{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.25, "eval_steps": 500, "global_step": 196000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5e-05, "grad_norm": 1.2729493379592896, "learning_rate": 4.5e-08, "loss": 1.2644, "step": 10 }, { "epoch": 0.0001, "grad_norm": 1.3188505172729492, "learning_rate": 9.5e-08, "loss": 1.2658, "step": 20 }, { "epoch": 0.00015, "grad_norm": 1.2601258754730225, "learning_rate": 1.45e-07, "loss": 1.2636, "step": 30 }, { "epoch": 0.0002, "grad_norm": 1.3004794120788574, "learning_rate": 1.95e-07, "loss": 1.2627, "step": 40 }, { "epoch": 0.00025, "grad_norm": 1.202286720275879, "learning_rate": 2.45e-07, "loss": 1.2534, "step": 50 }, { "epoch": 0.0003, "grad_norm": 1.0703670978546143, "learning_rate": 2.95e-07, "loss": 1.2429, "step": 60 }, { "epoch": 0.00035, "grad_norm": 0.9904623031616211, "learning_rate": 3.4500000000000003e-07, "loss": 1.223, "step": 70 }, { "epoch": 0.0004, "grad_norm": 0.7193464040756226, "learning_rate": 3.950000000000001e-07, "loss": 1.2048, "step": 80 }, { "epoch": 0.00045, "grad_norm": 0.5247392058372498, "learning_rate": 4.4500000000000003e-07, "loss": 1.1592, "step": 90 }, { "epoch": 0.0005, "grad_norm": 0.41689035296440125, "learning_rate": 4.95e-07, "loss": 1.15, "step": 100 }, { "epoch": 0.00055, "grad_norm": 0.29279381036758423, "learning_rate": 5.450000000000001e-07, "loss": 1.1358, "step": 110 }, { "epoch": 0.0006, "grad_norm": 0.17133663594722748, "learning_rate": 5.95e-07, "loss": 1.128, "step": 120 }, { "epoch": 0.00065, "grad_norm": 0.16452327370643616, "learning_rate": 6.450000000000001e-07, "loss": 1.1251, "step": 130 }, { "epoch": 0.0007, "grad_norm": 0.09243213385343552, "learning_rate": 6.95e-07, "loss": 1.1193, "step": 140 }, { "epoch": 0.00075, "grad_norm": 0.09495005011558533, "learning_rate": 7.450000000000001e-07, "loss": 1.1182, "step": 150 }, { "epoch": 0.0008, "grad_norm": 0.06381222605705261, "learning_rate": 7.950000000000001e-07, "loss": 1.1163, "step": 160 }, { "epoch": 0.00085, "grad_norm": 0.07385324686765671, "learning_rate": 8.45e-07, "loss": 1.1156, "step": 170 }, { "epoch": 0.0009, "grad_norm": 0.052240073680877686, "learning_rate": 8.95e-07, "loss": 1.1142, "step": 180 }, { "epoch": 0.00095, "grad_norm": 0.07249893993139267, "learning_rate": 9.450000000000001e-07, "loss": 1.1135, "step": 190 }, { "epoch": 0.001, "grad_norm": 0.05016590654850006, "learning_rate": 9.95e-07, "loss": 1.1123, "step": 200 }, { "epoch": 0.00105, "grad_norm": 0.0813603475689888, "learning_rate": 1.045e-06, "loss": 1.1118, "step": 210 }, { "epoch": 0.0011, "grad_norm": 0.04910163953900337, "learning_rate": 1.095e-06, "loss": 1.1108, "step": 220 }, { "epoch": 0.00115, "grad_norm": 0.07322562485933304, "learning_rate": 1.145e-06, "loss": 1.1103, "step": 230 }, { "epoch": 0.0012, "grad_norm": 0.05893868952989578, "learning_rate": 1.1950000000000002e-06, "loss": 1.1098, "step": 240 }, { "epoch": 0.00125, "grad_norm": 0.06626304984092712, "learning_rate": 1.245e-06, "loss": 1.11, "step": 250 }, { "epoch": 0.0013, "grad_norm": 0.05874939262866974, "learning_rate": 1.295e-06, "loss": 1.1075, "step": 260 }, { "epoch": 0.00135, "grad_norm": 0.052276477217674255, "learning_rate": 1.345e-06, "loss": 1.1041, "step": 270 }, { "epoch": 0.0014, "grad_norm": 0.06827472150325775, "learning_rate": 1.3950000000000002e-06, "loss": 1.1035, "step": 280 }, { "epoch": 0.00145, "grad_norm": 0.04688436537981033, "learning_rate": 1.445e-06, "loss": 1.1028, "step": 290 }, { "epoch": 0.0015, "grad_norm": 0.061367060989141464, "learning_rate": 1.495e-06, "loss": 1.1027, "step": 300 }, { "epoch": 0.00155, "grad_norm": 0.05163731053471565, "learning_rate": 1.545e-06, "loss": 1.1019, "step": 310 }, { "epoch": 0.0016, "grad_norm": 0.04336206987500191, "learning_rate": 1.595e-06, "loss": 1.1012, "step": 320 }, { "epoch": 0.00165, "grad_norm": 0.04618152230978012, "learning_rate": 1.645e-06, "loss": 1.0994, "step": 330 }, { "epoch": 0.0017, "grad_norm": 0.04556488245725632, "learning_rate": 1.695e-06, "loss": 1.0989, "step": 340 }, { "epoch": 0.00175, "grad_norm": 0.04560953006148338, "learning_rate": 1.745e-06, "loss": 1.0986, "step": 350 }, { "epoch": 0.0018, "grad_norm": 0.05721442773938179, "learning_rate": 1.7950000000000002e-06, "loss": 1.0982, "step": 360 }, { "epoch": 0.00185, "grad_norm": 0.045500755310058594, "learning_rate": 1.8450000000000001e-06, "loss": 1.099, "step": 370 }, { "epoch": 0.0019, "grad_norm": 0.04730851575732231, "learning_rate": 1.8950000000000003e-06, "loss": 1.0996, "step": 380 }, { "epoch": 0.00195, "grad_norm": 0.0756210908293724, "learning_rate": 1.945e-06, "loss": 1.098, "step": 390 }, { "epoch": 0.002, "grad_norm": 0.043138016015291214, "learning_rate": 1.995e-06, "loss": 1.0959, "step": 400 }, { "epoch": 0.00205, "grad_norm": 0.055947400629520416, "learning_rate": 2.045e-06, "loss": 1.095, "step": 410 }, { "epoch": 0.0021, "grad_norm": 0.0793597623705864, "learning_rate": 2.0950000000000003e-06, "loss": 1.0927, "step": 420 }, { "epoch": 0.00215, "grad_norm": 0.06133449450135231, "learning_rate": 2.1450000000000002e-06, "loss": 1.0914, "step": 430 }, { "epoch": 0.0022, "grad_norm": 0.238010436296463, "learning_rate": 2.195e-06, "loss": 1.0899, "step": 440 }, { "epoch": 0.00225, "grad_norm": 0.161293625831604, "learning_rate": 2.245e-06, "loss": 1.088, "step": 450 }, { "epoch": 0.0023, "grad_norm": 0.4534534811973572, "learning_rate": 2.2950000000000005e-06, "loss": 1.0862, "step": 460 }, { "epoch": 0.00235, "grad_norm": 0.13972555100917816, "learning_rate": 2.345e-06, "loss": 1.0843, "step": 470 }, { "epoch": 0.0024, "grad_norm": 0.5878572463989258, "learning_rate": 2.395e-06, "loss": 1.0831, "step": 480 }, { "epoch": 0.00245, "grad_norm": 1.0253840684890747, "learning_rate": 2.445e-06, "loss": 1.0819, "step": 490 }, { "epoch": 0.0025, "grad_norm": 0.5768185257911682, "learning_rate": 2.4950000000000003e-06, "loss": 1.0797, "step": 500 }, { "epoch": 0.00255, "grad_norm": 0.7738291025161743, "learning_rate": 2.545e-06, "loss": 1.077, "step": 510 }, { "epoch": 0.0026, "grad_norm": 0.8719862103462219, "learning_rate": 2.595e-06, "loss": 1.0732, "step": 520 }, { "epoch": 0.00265, "grad_norm": 1.0189906358718872, "learning_rate": 2.645e-06, "loss": 1.07, "step": 530 }, { "epoch": 0.0027, "grad_norm": 0.7507759928703308, "learning_rate": 2.6950000000000005e-06, "loss": 1.0668, "step": 540 }, { "epoch": 0.00275, "grad_norm": 0.181038960814476, "learning_rate": 2.745e-06, "loss": 1.059, "step": 550 }, { "epoch": 0.0028, "grad_norm": 0.3252950608730316, "learning_rate": 2.795e-06, "loss": 1.0507, "step": 560 }, { "epoch": 0.00285, "grad_norm": 0.8660540580749512, "learning_rate": 2.8450000000000003e-06, "loss": 1.04, "step": 570 }, { "epoch": 0.0029, "grad_norm": 0.36427515745162964, "learning_rate": 2.8950000000000002e-06, "loss": 1.0302, "step": 580 }, { "epoch": 0.00295, "grad_norm": 0.6166048049926758, "learning_rate": 2.945e-06, "loss": 1.0182, "step": 590 }, { "epoch": 0.003, "grad_norm": 1.1126497983932495, "learning_rate": 2.995e-06, "loss": 0.9998, "step": 600 }, { "epoch": 0.00305, "grad_norm": 0.7704527974128723, "learning_rate": 3.0450000000000005e-06, "loss": 0.9803, "step": 610 }, { "epoch": 0.0031, "grad_norm": 1.032020926475525, "learning_rate": 3.095e-06, "loss": 0.9623, "step": 620 }, { "epoch": 0.00315, "grad_norm": 0.41123878955841064, "learning_rate": 3.145e-06, "loss": 0.9493, "step": 630 }, { "epoch": 0.0032, "grad_norm": 0.44400614500045776, "learning_rate": 3.195e-06, "loss": 0.9173, "step": 640 }, { "epoch": 0.00325, "grad_norm": 0.7372226119041443, "learning_rate": 3.2450000000000003e-06, "loss": 0.8879, "step": 650 }, { "epoch": 0.0033, "grad_norm": 0.463133841753006, "learning_rate": 3.2950000000000002e-06, "loss": 0.8666, "step": 660 }, { "epoch": 0.00335, "grad_norm": 0.5725065469741821, "learning_rate": 3.345e-06, "loss": 0.8306, "step": 670 }, { "epoch": 0.0034, "grad_norm": 0.36958637833595276, "learning_rate": 3.395e-06, "loss": 0.8081, "step": 680 }, { "epoch": 0.00345, "grad_norm": 0.5271418690681458, "learning_rate": 3.4450000000000005e-06, "loss": 0.7744, "step": 690 }, { "epoch": 0.0035, "grad_norm": 0.48186737298965454, "learning_rate": 3.4950000000000004e-06, "loss": 0.752, "step": 700 }, { "epoch": 0.00355, "grad_norm": 0.7156515717506409, "learning_rate": 3.5450000000000004e-06, "loss": 0.7249, "step": 710 }, { "epoch": 0.0036, "grad_norm": 0.773932695388794, "learning_rate": 3.5950000000000003e-06, "loss": 0.7132, "step": 720 }, { "epoch": 0.00365, "grad_norm": 0.5371299386024475, "learning_rate": 3.6450000000000007e-06, "loss": 0.6724, "step": 730 }, { "epoch": 0.0037, "grad_norm": 0.579585075378418, "learning_rate": 3.6949999999999998e-06, "loss": 0.656, "step": 740 }, { "epoch": 0.00375, "grad_norm": 0.6134310364723206, "learning_rate": 3.7449999999999997e-06, "loss": 0.6222, "step": 750 }, { "epoch": 0.0038, "grad_norm": 0.4206959307193756, "learning_rate": 3.795e-06, "loss": 0.5999, "step": 760 }, { "epoch": 0.00385, "grad_norm": 0.5062888860702515, "learning_rate": 3.845e-06, "loss": 0.5768, "step": 770 }, { "epoch": 0.0039, "grad_norm": 0.470325767993927, "learning_rate": 3.895e-06, "loss": 0.5585, "step": 780 }, { "epoch": 0.00395, "grad_norm": 0.543121874332428, "learning_rate": 3.945e-06, "loss": 0.5445, "step": 790 }, { "epoch": 0.004, "grad_norm": 0.5184602737426758, "learning_rate": 3.995e-06, "loss": 0.5076, "step": 800 }, { "epoch": 0.00405, "grad_norm": 0.6378428339958191, "learning_rate": 4.045e-06, "loss": 0.5173, "step": 810 }, { "epoch": 0.0041, "grad_norm": 0.5006017684936523, "learning_rate": 4.095000000000001e-06, "loss": 0.4775, "step": 820 }, { "epoch": 0.00415, "grad_norm": 0.4863206744194031, "learning_rate": 4.145e-06, "loss": 0.4889, "step": 830 }, { "epoch": 0.0042, "grad_norm": 0.49730730056762695, "learning_rate": 4.1950000000000005e-06, "loss": 0.4476, "step": 840 }, { "epoch": 0.00425, "grad_norm": 0.7628925442695618, "learning_rate": 4.245e-06, "loss": 0.4368, "step": 850 }, { "epoch": 0.0043, "grad_norm": 0.49417269229888916, "learning_rate": 4.295e-06, "loss": 0.4226, "step": 860 }, { "epoch": 0.00435, "grad_norm": 0.6243632435798645, "learning_rate": 4.345000000000001e-06, "loss": 0.4138, "step": 870 }, { "epoch": 0.0044, "grad_norm": 0.4828900396823883, "learning_rate": 4.395e-06, "loss": 0.398, "step": 880 }, { "epoch": 0.00445, "grad_norm": 0.7151875495910645, "learning_rate": 4.445000000000001e-06, "loss": 0.3762, "step": 890 }, { "epoch": 0.0045, "grad_norm": 0.44865331053733826, "learning_rate": 4.495e-06, "loss": 0.3935, "step": 900 }, { "epoch": 0.00455, "grad_norm": 0.57523113489151, "learning_rate": 4.545e-06, "loss": 0.3625, "step": 910 }, { "epoch": 0.0046, "grad_norm": 0.4795694053173065, "learning_rate": 4.595e-06, "loss": 0.3682, "step": 920 }, { "epoch": 0.00465, "grad_norm": 0.8969322443008423, "learning_rate": 4.645e-06, "loss": 0.37, "step": 930 }, { "epoch": 0.0047, "grad_norm": 0.6106979250907898, "learning_rate": 4.695e-06, "loss": 0.3536, "step": 940 }, { "epoch": 0.00475, "grad_norm": 0.4854564666748047, "learning_rate": 4.745e-06, "loss": 0.3444, "step": 950 }, { "epoch": 0.0048, "grad_norm": 0.5885597467422485, "learning_rate": 4.795e-06, "loss": 0.328, "step": 960 }, { "epoch": 0.00485, "grad_norm": 0.5267831087112427, "learning_rate": 4.845e-06, "loss": 0.3361, "step": 970 }, { "epoch": 0.0049, "grad_norm": 0.6329275369644165, "learning_rate": 4.8950000000000006e-06, "loss": 0.3166, "step": 980 }, { "epoch": 0.00495, "grad_norm": 0.7038520574569702, "learning_rate": 4.945e-06, "loss": 0.3146, "step": 990 }, { "epoch": 0.005, "grad_norm": 0.9382172226905823, "learning_rate": 4.9950000000000005e-06, "loss": 0.3026, "step": 1000 }, { "epoch": 5e-05, "grad_norm": 0.5049734711647034, "learning_rate": 5.045000000000001e-06, "loss": 0.3008, "step": 1010 }, { "epoch": 0.0001, "grad_norm": 0.4181530773639679, "learning_rate": 5.095e-06, "loss": 0.285, "step": 1020 }, { "epoch": 0.00015, "grad_norm": 0.5890700221061707, "learning_rate": 5.145000000000001e-06, "loss": 0.2802, "step": 1030 }, { "epoch": 0.0002, "grad_norm": 0.7265661358833313, "learning_rate": 5.195e-06, "loss": 0.2882, "step": 1040 }, { "epoch": 0.00025, "grad_norm": 0.47236159443855286, "learning_rate": 5.245e-06, "loss": 0.2769, "step": 1050 }, { "epoch": 0.0003, "grad_norm": 0.8550785779953003, "learning_rate": 5.295e-06, "loss": 0.272, "step": 1060 }, { "epoch": 0.00035, "grad_norm": 0.5752397179603577, "learning_rate": 5.345e-06, "loss": 0.2686, "step": 1070 }, { "epoch": 0.0004, "grad_norm": 0.40073686838150024, "learning_rate": 5.395e-06, "loss": 0.292, "step": 1080 }, { "epoch": 0.00045, "grad_norm": 0.623715877532959, "learning_rate": 5.445e-06, "loss": 0.2762, "step": 1090 }, { "epoch": 0.0005, "grad_norm": 0.8721001744270325, "learning_rate": 5.495e-06, "loss": 0.2733, "step": 1100 }, { "epoch": 0.00055, "grad_norm": 0.4661286473274231, "learning_rate": 5.545e-06, "loss": 0.27, "step": 1110 }, { "epoch": 0.0006, "grad_norm": 0.38948312401771545, "learning_rate": 5.595000000000001e-06, "loss": 0.257, "step": 1120 }, { "epoch": 0.00065, "grad_norm": 0.3550063967704773, "learning_rate": 5.645e-06, "loss": 0.2786, "step": 1130 }, { "epoch": 0.0007, "grad_norm": 0.35984066128730774, "learning_rate": 5.6950000000000005e-06, "loss": 0.2508, "step": 1140 }, { "epoch": 0.00075, "grad_norm": 0.7620972990989685, "learning_rate": 5.745e-06, "loss": 0.2399, "step": 1150 }, { "epoch": 0.0008, "grad_norm": 0.8049952983856201, "learning_rate": 5.795e-06, "loss": 0.2466, "step": 1160 }, { "epoch": 0.00085, "grad_norm": 0.5977007150650024, "learning_rate": 5.845000000000001e-06, "loss": 0.2481, "step": 1170 }, { "epoch": 0.0009, "grad_norm": 0.6251347661018372, "learning_rate": 5.895e-06, "loss": 0.2506, "step": 1180 }, { "epoch": 0.00095, "grad_norm": 0.7045720219612122, "learning_rate": 5.945000000000001e-06, "loss": 0.2341, "step": 1190 }, { "epoch": 0.001, "grad_norm": 0.45146504044532776, "learning_rate": 5.995e-06, "loss": 0.2406, "step": 1200 }, { "epoch": 0.00105, "grad_norm": 0.8330421447753906, "learning_rate": 6.045e-06, "loss": 0.2667, "step": 1210 }, { "epoch": 0.0011, "grad_norm": 0.3268861174583435, "learning_rate": 6.095e-06, "loss": 0.2388, "step": 1220 }, { "epoch": 0.00115, "grad_norm": 0.45664694905281067, "learning_rate": 6.1450000000000005e-06, "loss": 0.2444, "step": 1230 }, { "epoch": 0.0012, "grad_norm": 0.48323923349380493, "learning_rate": 6.195e-06, "loss": 0.2396, "step": 1240 }, { "epoch": 0.00125, "grad_norm": 0.5909716486930847, "learning_rate": 6.245e-06, "loss": 0.2285, "step": 1250 }, { "epoch": 0.0013, "grad_norm": 0.3466012477874756, "learning_rate": 6.295000000000001e-06, "loss": 0.2328, "step": 1260 }, { "epoch": 0.00135, "grad_norm": 0.5698915719985962, "learning_rate": 6.345000000000001e-06, "loss": 0.2228, "step": 1270 }, { "epoch": 0.0014, "grad_norm": 0.5685576796531677, "learning_rate": 6.395000000000001e-06, "loss": 0.2153, "step": 1280 }, { "epoch": 0.00145, "grad_norm": 0.30952244997024536, "learning_rate": 6.444999999999999e-06, "loss": 0.219, "step": 1290 }, { "epoch": 0.0015, "grad_norm": 0.41652658581733704, "learning_rate": 6.495e-06, "loss": 0.2272, "step": 1300 }, { "epoch": 0.00155, "grad_norm": 0.43570876121520996, "learning_rate": 6.545e-06, "loss": 0.2158, "step": 1310 }, { "epoch": 0.0016, "grad_norm": 0.3096759617328644, "learning_rate": 6.5949999999999995e-06, "loss": 0.2184, "step": 1320 }, { "epoch": 0.00165, "grad_norm": 0.383800208568573, "learning_rate": 6.645e-06, "loss": 0.215, "step": 1330 }, { "epoch": 0.0017, "grad_norm": 0.3670880198478699, "learning_rate": 6.695e-06, "loss": 0.2012, "step": 1340 }, { "epoch": 0.00175, "grad_norm": 0.4803926944732666, "learning_rate": 6.745e-06, "loss": 0.2218, "step": 1350 }, { "epoch": 0.0018, "grad_norm": 0.3520296812057495, "learning_rate": 6.795e-06, "loss": 0.2182, "step": 1360 }, { "epoch": 0.00185, "grad_norm": 0.5916102528572083, "learning_rate": 6.845e-06, "loss": 0.2272, "step": 1370 }, { "epoch": 0.0019, "grad_norm": 0.46707865595817566, "learning_rate": 6.895e-06, "loss": 0.2129, "step": 1380 }, { "epoch": 0.00195, "grad_norm": 0.4104538559913635, "learning_rate": 6.945e-06, "loss": 0.2168, "step": 1390 }, { "epoch": 0.002, "grad_norm": 0.8709113001823425, "learning_rate": 6.995e-06, "loss": 0.203, "step": 1400 }, { "epoch": 0.00205, "grad_norm": 0.4051400125026703, "learning_rate": 7.045e-06, "loss": 0.2104, "step": 1410 }, { "epoch": 0.0021, "grad_norm": 0.6250728368759155, "learning_rate": 7.095000000000001e-06, "loss": 0.1993, "step": 1420 }, { "epoch": 0.00215, "grad_norm": 0.4261170029640198, "learning_rate": 7.145e-06, "loss": 0.2033, "step": 1430 }, { "epoch": 0.0022, "grad_norm": 0.6586616635322571, "learning_rate": 7.1950000000000006e-06, "loss": 0.1927, "step": 1440 }, { "epoch": 0.00225, "grad_norm": 0.2842698395252228, "learning_rate": 7.245e-06, "loss": 0.1917, "step": 1450 }, { "epoch": 0.0023, "grad_norm": 0.4493279457092285, "learning_rate": 7.2950000000000005e-06, "loss": 0.1979, "step": 1460 }, { "epoch": 0.00235, "grad_norm": 0.7227078676223755, "learning_rate": 7.345000000000001e-06, "loss": 0.2076, "step": 1470 }, { "epoch": 0.0024, "grad_norm": 0.5697221755981445, "learning_rate": 7.395e-06, "loss": 0.185, "step": 1480 }, { "epoch": 0.00245, "grad_norm": 0.5880600214004517, "learning_rate": 7.445000000000001e-06, "loss": 0.2147, "step": 1490 }, { "epoch": 0.0025, "grad_norm": 0.4969238042831421, "learning_rate": 7.495e-06, "loss": 0.2049, "step": 1500 }, { "epoch": 0.00255, "grad_norm": 0.7584699988365173, "learning_rate": 7.545000000000001e-06, "loss": 0.1988, "step": 1510 }, { "epoch": 0.0026, "grad_norm": 0.643905758857727, "learning_rate": 7.595000000000001e-06, "loss": 0.1977, "step": 1520 }, { "epoch": 0.00265, "grad_norm": 0.380588561296463, "learning_rate": 7.645e-06, "loss": 0.2, "step": 1530 }, { "epoch": 0.0027, "grad_norm": 0.5197119116783142, "learning_rate": 7.695e-06, "loss": 0.1853, "step": 1540 }, { "epoch": 0.00275, "grad_norm": 0.6001980900764465, "learning_rate": 7.745000000000001e-06, "loss": 0.1974, "step": 1550 }, { "epoch": 0.0028, "grad_norm": 0.32734277844429016, "learning_rate": 7.795e-06, "loss": 0.1848, "step": 1560 }, { "epoch": 0.00285, "grad_norm": 0.39121389389038086, "learning_rate": 7.845e-06, "loss": 0.186, "step": 1570 }, { "epoch": 0.0029, "grad_norm": 0.4204869568347931, "learning_rate": 7.895000000000001e-06, "loss": 0.1922, "step": 1580 }, { "epoch": 0.00295, "grad_norm": 0.5324739813804626, "learning_rate": 7.945000000000001e-06, "loss": 0.1857, "step": 1590 }, { "epoch": 0.003, "grad_norm": 0.6414231657981873, "learning_rate": 7.995e-06, "loss": 0.192, "step": 1600 }, { "epoch": 0.00305, "grad_norm": 0.8036292195320129, "learning_rate": 8.045e-06, "loss": 0.1826, "step": 1610 }, { "epoch": 0.0031, "grad_norm": 0.697265625, "learning_rate": 8.095e-06, "loss": 0.1863, "step": 1620 }, { "epoch": 0.00315, "grad_norm": 0.519187867641449, "learning_rate": 8.144999999999999e-06, "loss": 0.1984, "step": 1630 }, { "epoch": 0.0032, "grad_norm": 0.4846292734146118, "learning_rate": 8.195e-06, "loss": 0.1886, "step": 1640 }, { "epoch": 0.00325, "grad_norm": 0.4025842547416687, "learning_rate": 8.245e-06, "loss": 0.183, "step": 1650 }, { "epoch": 0.0033, "grad_norm": 0.4674290120601654, "learning_rate": 8.295e-06, "loss": 0.1735, "step": 1660 }, { "epoch": 0.00335, "grad_norm": 0.482946515083313, "learning_rate": 8.345e-06, "loss": 0.1792, "step": 1670 }, { "epoch": 0.0034, "grad_norm": 0.5781715512275696, "learning_rate": 8.395e-06, "loss": 0.1769, "step": 1680 }, { "epoch": 0.00345, "grad_norm": 0.34087538719177246, "learning_rate": 8.445e-06, "loss": 0.1768, "step": 1690 }, { "epoch": 0.0035, "grad_norm": 0.5387849807739258, "learning_rate": 8.495e-06, "loss": 0.1705, "step": 1700 }, { "epoch": 0.00355, "grad_norm": 0.3808085024356842, "learning_rate": 8.545e-06, "loss": 0.1821, "step": 1710 }, { "epoch": 0.0036, "grad_norm": 0.3419106602668762, "learning_rate": 8.595e-06, "loss": 0.1702, "step": 1720 }, { "epoch": 0.00365, "grad_norm": 0.5243216156959534, "learning_rate": 8.645000000000001e-06, "loss": 0.1826, "step": 1730 }, { "epoch": 0.0037, "grad_norm": 0.4391971826553345, "learning_rate": 8.695e-06, "loss": 0.1635, "step": 1740 }, { "epoch": 0.00375, "grad_norm": 0.4228607714176178, "learning_rate": 8.745e-06, "loss": 0.1705, "step": 1750 }, { "epoch": 0.0038, "grad_norm": 0.6283707022666931, "learning_rate": 8.795e-06, "loss": 0.1647, "step": 1760 }, { "epoch": 0.00385, "grad_norm": 0.5195528864860535, "learning_rate": 8.845000000000001e-06, "loss": 0.1706, "step": 1770 }, { "epoch": 0.0039, "grad_norm": 0.6638985276222229, "learning_rate": 8.895e-06, "loss": 0.169, "step": 1780 }, { "epoch": 0.00395, "grad_norm": 0.6162652969360352, "learning_rate": 8.945e-06, "loss": 0.1811, "step": 1790 }, { "epoch": 0.004, "grad_norm": 0.6614283919334412, "learning_rate": 8.995000000000001e-06, "loss": 0.1776, "step": 1800 }, { "epoch": 0.00405, "grad_norm": 0.38888028264045715, "learning_rate": 9.045e-06, "loss": 0.1795, "step": 1810 }, { "epoch": 0.0041, "grad_norm": 0.4616861045360565, "learning_rate": 9.095e-06, "loss": 0.1729, "step": 1820 }, { "epoch": 0.00415, "grad_norm": 0.7028610706329346, "learning_rate": 9.145000000000001e-06, "loss": 0.1695, "step": 1830 }, { "epoch": 0.0042, "grad_norm": 0.5122510194778442, "learning_rate": 9.195000000000001e-06, "loss": 0.1696, "step": 1840 }, { "epoch": 0.00425, "grad_norm": 0.411853164434433, "learning_rate": 9.245e-06, "loss": 0.1583, "step": 1850 }, { "epoch": 0.0043, "grad_norm": 0.41692647337913513, "learning_rate": 9.295000000000002e-06, "loss": 0.1699, "step": 1860 }, { "epoch": 0.00435, "grad_norm": 0.42551809549331665, "learning_rate": 9.345000000000001e-06, "loss": 0.1689, "step": 1870 }, { "epoch": 0.0044, "grad_norm": 0.587212860584259, "learning_rate": 9.395e-06, "loss": 0.1625, "step": 1880 }, { "epoch": 0.00445, "grad_norm": 1.0188299417495728, "learning_rate": 9.445000000000002e-06, "loss": 0.1639, "step": 1890 }, { "epoch": 0.0045, "grad_norm": 0.5616925954818726, "learning_rate": 9.495000000000001e-06, "loss": 0.1605, "step": 1900 }, { "epoch": 0.00455, "grad_norm": 0.7324318885803223, "learning_rate": 9.545e-06, "loss": 0.1598, "step": 1910 }, { "epoch": 0.0046, "grad_norm": 0.6417198181152344, "learning_rate": 9.595e-06, "loss": 0.1561, "step": 1920 }, { "epoch": 0.00465, "grad_norm": 0.4558139443397522, "learning_rate": 9.645e-06, "loss": 0.1637, "step": 1930 }, { "epoch": 0.0047, "grad_norm": 0.6513067483901978, "learning_rate": 9.695e-06, "loss": 0.1573, "step": 1940 }, { "epoch": 0.00475, "grad_norm": 0.5894631147384644, "learning_rate": 9.745e-06, "loss": 0.1725, "step": 1950 }, { "epoch": 0.0048, "grad_norm": 0.4687117040157318, "learning_rate": 9.795e-06, "loss": 0.1565, "step": 1960 }, { "epoch": 0.00485, "grad_norm": 0.9609802961349487, "learning_rate": 9.845e-06, "loss": 0.1537, "step": 1970 }, { "epoch": 0.0049, "grad_norm": 0.6374800801277161, "learning_rate": 9.895e-06, "loss": 0.1734, "step": 1980 }, { "epoch": 0.00495, "grad_norm": 0.659702718257904, "learning_rate": 9.945e-06, "loss": 0.1631, "step": 1990 }, { "epoch": 0.005, "grad_norm": 0.566866397857666, "learning_rate": 9.995e-06, "loss": 0.1729, "step": 2000 }, { "epoch": 0.00505, "grad_norm": 0.575867235660553, "learning_rate": 1.0045e-05, "loss": 0.1608, "step": 2010 }, { "epoch": 0.0051, "grad_norm": 0.5216620564460754, "learning_rate": 1.0095e-05, "loss": 0.1549, "step": 2020 }, { "epoch": 0.00515, "grad_norm": 0.4505414664745331, "learning_rate": 1.0145e-05, "loss": 0.1666, "step": 2030 }, { "epoch": 0.0052, "grad_norm": 0.47155532240867615, "learning_rate": 1.0195e-05, "loss": 0.156, "step": 2040 }, { "epoch": 0.00525, "grad_norm": 0.499719500541687, "learning_rate": 1.0245000000000001e-05, "loss": 0.1593, "step": 2050 }, { "epoch": 0.0053, "grad_norm": 0.5323207974433899, "learning_rate": 1.0295e-05, "loss": 0.1552, "step": 2060 }, { "epoch": 0.00535, "grad_norm": 0.48722875118255615, "learning_rate": 1.0345e-05, "loss": 0.156, "step": 2070 }, { "epoch": 0.0054, "grad_norm": 0.4714648425579071, "learning_rate": 1.0395000000000001e-05, "loss": 0.1562, "step": 2080 }, { "epoch": 0.00545, "grad_norm": 0.5482438206672668, "learning_rate": 1.0445e-05, "loss": 0.1521, "step": 2090 }, { "epoch": 0.0055, "grad_norm": 0.5565738081932068, "learning_rate": 1.0495e-05, "loss": 0.1519, "step": 2100 }, { "epoch": 0.00555, "grad_norm": 0.6969251036643982, "learning_rate": 1.0545000000000002e-05, "loss": 0.1658, "step": 2110 }, { "epoch": 0.0056, "grad_norm": 0.47207051515579224, "learning_rate": 1.0595000000000001e-05, "loss": 0.1571, "step": 2120 }, { "epoch": 0.00565, "grad_norm": 0.6089038252830505, "learning_rate": 1.0645e-05, "loss": 0.1522, "step": 2130 }, { "epoch": 0.0057, "grad_norm": 0.5770543217658997, "learning_rate": 1.0695e-05, "loss": 0.1515, "step": 2140 }, { "epoch": 0.00575, "grad_norm": 0.7493625283241272, "learning_rate": 1.0745000000000001e-05, "loss": 0.1452, "step": 2150 }, { "epoch": 0.0058, "grad_norm": 0.6182336807250977, "learning_rate": 1.0795e-05, "loss": 0.1439, "step": 2160 }, { "epoch": 0.00585, "grad_norm": 0.6780596971511841, "learning_rate": 1.0845e-05, "loss": 0.1441, "step": 2170 }, { "epoch": 0.0059, "grad_norm": 0.48616987466812134, "learning_rate": 1.0895000000000002e-05, "loss": 0.1405, "step": 2180 }, { "epoch": 0.00595, "grad_norm": 0.48511484265327454, "learning_rate": 1.0945000000000001e-05, "loss": 0.1431, "step": 2190 }, { "epoch": 0.006, "grad_norm": 0.5814856886863708, "learning_rate": 1.0995e-05, "loss": 0.1495, "step": 2200 }, { "epoch": 0.00605, "grad_norm": 0.5197475552558899, "learning_rate": 1.1045000000000002e-05, "loss": 0.143, "step": 2210 }, { "epoch": 0.0061, "grad_norm": 0.5951067805290222, "learning_rate": 1.1095e-05, "loss": 0.1574, "step": 2220 }, { "epoch": 0.00615, "grad_norm": 0.5585006475448608, "learning_rate": 1.1145e-05, "loss": 0.1489, "step": 2230 }, { "epoch": 0.0062, "grad_norm": 0.5013735890388489, "learning_rate": 1.1195e-05, "loss": 0.1411, "step": 2240 }, { "epoch": 0.00625, "grad_norm": 0.8280708193778992, "learning_rate": 1.1245e-05, "loss": 0.1369, "step": 2250 }, { "epoch": 0.0063, "grad_norm": 0.48556965589523315, "learning_rate": 1.1295e-05, "loss": 0.1497, "step": 2260 }, { "epoch": 0.00635, "grad_norm": 0.44708576798439026, "learning_rate": 1.1345e-05, "loss": 0.1385, "step": 2270 }, { "epoch": 0.0064, "grad_norm": 0.4834116995334625, "learning_rate": 1.1395e-05, "loss": 0.1413, "step": 2280 }, { "epoch": 0.00645, "grad_norm": 0.5674512386322021, "learning_rate": 1.1445e-05, "loss": 0.1512, "step": 2290 }, { "epoch": 0.0065, "grad_norm": 0.45389649271965027, "learning_rate": 1.1495000000000001e-05, "loss": 0.1555, "step": 2300 }, { "epoch": 0.00655, "grad_norm": 0.5489965081214905, "learning_rate": 1.1545e-05, "loss": 0.1524, "step": 2310 }, { "epoch": 0.0066, "grad_norm": 0.4980386793613434, "learning_rate": 1.1595e-05, "loss": 0.1499, "step": 2320 }, { "epoch": 0.00665, "grad_norm": 0.6992486715316772, "learning_rate": 1.1645000000000001e-05, "loss": 0.1447, "step": 2330 }, { "epoch": 0.0067, "grad_norm": 0.4216298460960388, "learning_rate": 1.1695e-05, "loss": 0.1414, "step": 2340 }, { "epoch": 0.00675, "grad_norm": 0.4966219961643219, "learning_rate": 1.1745e-05, "loss": 0.1419, "step": 2350 }, { "epoch": 0.0068, "grad_norm": 0.5875610709190369, "learning_rate": 1.1795e-05, "loss": 0.1369, "step": 2360 }, { "epoch": 0.00685, "grad_norm": 0.910181999206543, "learning_rate": 1.1845000000000001e-05, "loss": 0.1439, "step": 2370 }, { "epoch": 0.0069, "grad_norm": 0.5701887011528015, "learning_rate": 1.1895e-05, "loss": 0.1386, "step": 2380 }, { "epoch": 0.00695, "grad_norm": 0.7611976861953735, "learning_rate": 1.1945e-05, "loss": 0.1356, "step": 2390 }, { "epoch": 0.007, "grad_norm": 0.5266007781028748, "learning_rate": 1.1995000000000001e-05, "loss": 0.1357, "step": 2400 }, { "epoch": 0.00705, "grad_norm": 0.41671043634414673, "learning_rate": 1.2045e-05, "loss": 0.1338, "step": 2410 }, { "epoch": 0.0071, "grad_norm": 0.5773289799690247, "learning_rate": 1.2095e-05, "loss": 0.1329, "step": 2420 }, { "epoch": 0.00715, "grad_norm": 0.5124483108520508, "learning_rate": 1.2145000000000001e-05, "loss": 0.1354, "step": 2430 }, { "epoch": 0.0072, "grad_norm": 0.6027225255966187, "learning_rate": 1.2195000000000001e-05, "loss": 0.1314, "step": 2440 }, { "epoch": 0.00725, "grad_norm": 0.5789300203323364, "learning_rate": 1.2245e-05, "loss": 0.1377, "step": 2450 }, { "epoch": 0.0073, "grad_norm": 0.4970523715019226, "learning_rate": 1.2295000000000002e-05, "loss": 0.1434, "step": 2460 }, { "epoch": 0.00735, "grad_norm": 0.5187194347381592, "learning_rate": 1.2345000000000001e-05, "loss": 0.1349, "step": 2470 }, { "epoch": 0.0074, "grad_norm": 0.7817258238792419, "learning_rate": 1.2395e-05, "loss": 0.1409, "step": 2480 }, { "epoch": 0.00745, "grad_norm": 0.5194874405860901, "learning_rate": 1.2445e-05, "loss": 0.1337, "step": 2490 }, { "epoch": 0.0075, "grad_norm": 0.47469213604927063, "learning_rate": 1.2495000000000001e-05, "loss": 0.1341, "step": 2500 }, { "epoch": 0.00755, "grad_norm": 0.5221440196037292, "learning_rate": 1.2545000000000001e-05, "loss": 0.1304, "step": 2510 }, { "epoch": 0.0076, "grad_norm": 0.4986429214477539, "learning_rate": 1.2595e-05, "loss": 0.129, "step": 2520 }, { "epoch": 0.00765, "grad_norm": 0.5564189553260803, "learning_rate": 1.2645000000000002e-05, "loss": 0.1286, "step": 2530 }, { "epoch": 0.0077, "grad_norm": 0.5815272927284241, "learning_rate": 1.2695000000000001e-05, "loss": 0.1266, "step": 2540 }, { "epoch": 0.00775, "grad_norm": 0.4768863916397095, "learning_rate": 1.2745e-05, "loss": 0.1271, "step": 2550 }, { "epoch": 0.0078, "grad_norm": 0.5922735333442688, "learning_rate": 1.2795000000000002e-05, "loss": 0.1288, "step": 2560 }, { "epoch": 0.00785, "grad_norm": 0.8866589069366455, "learning_rate": 1.2845000000000002e-05, "loss": 0.1275, "step": 2570 }, { "epoch": 0.0079, "grad_norm": 0.5470432639122009, "learning_rate": 1.2895000000000001e-05, "loss": 0.1367, "step": 2580 }, { "epoch": 0.00795, "grad_norm": 0.45371824502944946, "learning_rate": 1.2945000000000002e-05, "loss": 0.1274, "step": 2590 }, { "epoch": 0.008, "grad_norm": 0.46164730191230774, "learning_rate": 1.2995000000000002e-05, "loss": 0.124, "step": 2600 }, { "epoch": 0.00805, "grad_norm": 0.5537484884262085, "learning_rate": 1.3045000000000001e-05, "loss": 0.1368, "step": 2610 }, { "epoch": 0.0081, "grad_norm": 0.6138302087783813, "learning_rate": 1.3095000000000003e-05, "loss": 0.1316, "step": 2620 }, { "epoch": 0.00815, "grad_norm": 0.6606197357177734, "learning_rate": 1.3145000000000002e-05, "loss": 0.1293, "step": 2630 }, { "epoch": 0.0082, "grad_norm": 0.5215857625007629, "learning_rate": 1.3195000000000002e-05, "loss": 0.1291, "step": 2640 }, { "epoch": 0.00825, "grad_norm": 0.5248004198074341, "learning_rate": 1.3245000000000001e-05, "loss": 0.1177, "step": 2650 }, { "epoch": 0.0083, "grad_norm": 0.49368399381637573, "learning_rate": 1.3295000000000002e-05, "loss": 0.1245, "step": 2660 }, { "epoch": 0.00835, "grad_norm": 0.6579505801200867, "learning_rate": 1.3345000000000002e-05, "loss": 0.1244, "step": 2670 }, { "epoch": 0.0084, "grad_norm": 0.6320722103118896, "learning_rate": 1.3395000000000001e-05, "loss": 0.1205, "step": 2680 }, { "epoch": 0.00845, "grad_norm": 0.4902285933494568, "learning_rate": 1.3445e-05, "loss": 0.1213, "step": 2690 }, { "epoch": 0.0085, "grad_norm": 0.6687859296798706, "learning_rate": 1.3494999999999999e-05, "loss": 0.1283, "step": 2700 }, { "epoch": 0.00855, "grad_norm": 0.5977720022201538, "learning_rate": 1.3545e-05, "loss": 0.1281, "step": 2710 }, { "epoch": 0.0086, "grad_norm": 0.5987256169319153, "learning_rate": 1.3595e-05, "loss": 0.1206, "step": 2720 }, { "epoch": 0.00865, "grad_norm": 0.6178602576255798, "learning_rate": 1.3644999999999999e-05, "loss": 0.1214, "step": 2730 }, { "epoch": 0.0087, "grad_norm": 0.5904673933982849, "learning_rate": 1.3695e-05, "loss": 0.1244, "step": 2740 }, { "epoch": 0.00875, "grad_norm": 0.7004421353340149, "learning_rate": 1.3745e-05, "loss": 0.1252, "step": 2750 }, { "epoch": 0.0088, "grad_norm": 0.4697619378566742, "learning_rate": 1.3795e-05, "loss": 0.124, "step": 2760 }, { "epoch": 0.00885, "grad_norm": 0.5584418773651123, "learning_rate": 1.3845e-05, "loss": 0.1318, "step": 2770 }, { "epoch": 0.0089, "grad_norm": 0.5180680751800537, "learning_rate": 1.3895e-05, "loss": 0.1291, "step": 2780 }, { "epoch": 0.00895, "grad_norm": 0.5268616676330566, "learning_rate": 1.3945e-05, "loss": 0.13, "step": 2790 }, { "epoch": 0.009, "grad_norm": 0.7439182996749878, "learning_rate": 1.3994999999999999e-05, "loss": 0.1256, "step": 2800 }, { "epoch": 0.00905, "grad_norm": 0.7283898591995239, "learning_rate": 1.4045e-05, "loss": 0.1227, "step": 2810 }, { "epoch": 0.0091, "grad_norm": 0.6830780506134033, "learning_rate": 1.4095e-05, "loss": 0.1164, "step": 2820 }, { "epoch": 0.00915, "grad_norm": 0.6911619901657104, "learning_rate": 1.4145e-05, "loss": 0.1265, "step": 2830 }, { "epoch": 0.0092, "grad_norm": 0.6404842734336853, "learning_rate": 1.4195e-05, "loss": 0.1306, "step": 2840 }, { "epoch": 0.00925, "grad_norm": 0.4563213586807251, "learning_rate": 1.4245e-05, "loss": 0.1255, "step": 2850 }, { "epoch": 0.0093, "grad_norm": 0.8771994113922119, "learning_rate": 1.4295e-05, "loss": 0.1282, "step": 2860 }, { "epoch": 0.00935, "grad_norm": 0.6573876142501831, "learning_rate": 1.4345e-05, "loss": 0.129, "step": 2870 }, { "epoch": 0.0094, "grad_norm": 0.6409444808959961, "learning_rate": 1.4395e-05, "loss": 0.129, "step": 2880 }, { "epoch": 0.00945, "grad_norm": 0.5570323467254639, "learning_rate": 1.4445e-05, "loss": 0.1206, "step": 2890 }, { "epoch": 0.0095, "grad_norm": 0.4876030385494232, "learning_rate": 1.4495000000000001e-05, "loss": 0.112, "step": 2900 }, { "epoch": 0.00955, "grad_norm": 0.5265260934829712, "learning_rate": 1.4545e-05, "loss": 0.121, "step": 2910 }, { "epoch": 0.0096, "grad_norm": 0.7372077703475952, "learning_rate": 1.4595e-05, "loss": 0.1226, "step": 2920 }, { "epoch": 0.00965, "grad_norm": 0.5543709993362427, "learning_rate": 1.4645e-05, "loss": 0.1223, "step": 2930 }, { "epoch": 0.0097, "grad_norm": 0.6461299061775208, "learning_rate": 1.4695e-05, "loss": 0.1272, "step": 2940 }, { "epoch": 0.00975, "grad_norm": 0.6065276861190796, "learning_rate": 1.4745e-05, "loss": 0.1213, "step": 2950 }, { "epoch": 0.0098, "grad_norm": 0.518893837928772, "learning_rate": 1.4795e-05, "loss": 0.1231, "step": 2960 }, { "epoch": 0.00985, "grad_norm": 0.41890275478363037, "learning_rate": 1.4845000000000001e-05, "loss": 0.1227, "step": 2970 }, { "epoch": 0.0099, "grad_norm": 0.7018951177597046, "learning_rate": 1.4895e-05, "loss": 0.121, "step": 2980 }, { "epoch": 0.00995, "grad_norm": 0.5778883695602417, "learning_rate": 1.4945e-05, "loss": 0.1131, "step": 2990 }, { "epoch": 0.01, "grad_norm": 0.5019136667251587, "learning_rate": 1.4995000000000001e-05, "loss": 0.1282, "step": 3000 }, { "epoch": 0.01005, "grad_norm": 0.547877311706543, "learning_rate": 1.5045e-05, "loss": 0.1088, "step": 3010 }, { "epoch": 0.0101, "grad_norm": 0.5180936455726624, "learning_rate": 1.5095e-05, "loss": 0.1261, "step": 3020 }, { "epoch": 0.01015, "grad_norm": 0.5541945099830627, "learning_rate": 1.5145000000000002e-05, "loss": 0.115, "step": 3030 }, { "epoch": 0.0102, "grad_norm": 0.4803144037723541, "learning_rate": 1.5195000000000001e-05, "loss": 0.1096, "step": 3040 }, { "epoch": 0.01025, "grad_norm": 0.5364857912063599, "learning_rate": 1.5245e-05, "loss": 0.1177, "step": 3050 }, { "epoch": 0.0103, "grad_norm": 0.40823110938072205, "learning_rate": 1.5295000000000002e-05, "loss": 0.1219, "step": 3060 }, { "epoch": 0.01035, "grad_norm": 0.46944060921669006, "learning_rate": 1.5345e-05, "loss": 0.1051, "step": 3070 }, { "epoch": 0.0104, "grad_norm": 0.517673909664154, "learning_rate": 1.5395e-05, "loss": 0.1183, "step": 3080 }, { "epoch": 0.01045, "grad_norm": 0.4942791759967804, "learning_rate": 1.5445000000000002e-05, "loss": 0.1111, "step": 3090 }, { "epoch": 0.0105, "grad_norm": 0.5908092260360718, "learning_rate": 1.5495e-05, "loss": 0.1149, "step": 3100 }, { "epoch": 0.01055, "grad_norm": 0.7741053104400635, "learning_rate": 1.5545e-05, "loss": 0.1102, "step": 3110 }, { "epoch": 0.0106, "grad_norm": 0.6076146364212036, "learning_rate": 1.5595000000000002e-05, "loss": 0.1085, "step": 3120 }, { "epoch": 0.01065, "grad_norm": 0.6643165349960327, "learning_rate": 1.5645e-05, "loss": 0.1132, "step": 3130 }, { "epoch": 0.0107, "grad_norm": 0.4119599461555481, "learning_rate": 1.5695e-05, "loss": 0.1197, "step": 3140 }, { "epoch": 0.01075, "grad_norm": 0.437264621257782, "learning_rate": 1.5745000000000003e-05, "loss": 0.1132, "step": 3150 }, { "epoch": 0.0108, "grad_norm": 0.44356924295425415, "learning_rate": 1.5795e-05, "loss": 0.107, "step": 3160 }, { "epoch": 0.01085, "grad_norm": 0.8218526840209961, "learning_rate": 1.5845e-05, "loss": 0.1124, "step": 3170 }, { "epoch": 0.0109, "grad_norm": 0.6016043424606323, "learning_rate": 1.5895000000000003e-05, "loss": 0.1073, "step": 3180 }, { "epoch": 0.01095, "grad_norm": 0.5623441934585571, "learning_rate": 1.5945e-05, "loss": 0.1037, "step": 3190 }, { "epoch": 0.011, "grad_norm": 0.4690852165222168, "learning_rate": 1.5995000000000002e-05, "loss": 0.117, "step": 3200 }, { "epoch": 0.01105, "grad_norm": 0.5373580455780029, "learning_rate": 1.6045000000000003e-05, "loss": 0.1096, "step": 3210 }, { "epoch": 0.0111, "grad_norm": 0.6314288973808289, "learning_rate": 1.6095e-05, "loss": 0.1077, "step": 3220 }, { "epoch": 0.01115, "grad_norm": 0.6033948063850403, "learning_rate": 1.6145000000000002e-05, "loss": 0.1107, "step": 3230 }, { "epoch": 0.0112, "grad_norm": 0.5538676977157593, "learning_rate": 1.6195000000000003e-05, "loss": 0.1038, "step": 3240 }, { "epoch": 0.01125, "grad_norm": 0.6222608089447021, "learning_rate": 1.6245e-05, "loss": 0.1073, "step": 3250 }, { "epoch": 0.0113, "grad_norm": 0.7525963187217712, "learning_rate": 1.6295000000000002e-05, "loss": 0.115, "step": 3260 }, { "epoch": 0.01135, "grad_norm": 0.6032587885856628, "learning_rate": 1.6345000000000004e-05, "loss": 0.1064, "step": 3270 }, { "epoch": 0.0114, "grad_norm": 0.5158464908599854, "learning_rate": 1.6395e-05, "loss": 0.1027, "step": 3280 }, { "epoch": 0.01145, "grad_norm": 0.5689551830291748, "learning_rate": 1.6445000000000003e-05, "loss": 0.1138, "step": 3290 }, { "epoch": 0.0115, "grad_norm": 0.3941485285758972, "learning_rate": 1.6495e-05, "loss": 0.1228, "step": 3300 }, { "epoch": 0.01155, "grad_norm": 0.4815775454044342, "learning_rate": 1.6545e-05, "loss": 0.1124, "step": 3310 }, { "epoch": 0.0116, "grad_norm": 0.6636756658554077, "learning_rate": 1.6595e-05, "loss": 0.1066, "step": 3320 }, { "epoch": 0.01165, "grad_norm": 0.7142220735549927, "learning_rate": 1.6645e-05, "loss": 0.1072, "step": 3330 }, { "epoch": 0.0117, "grad_norm": 0.5734850764274597, "learning_rate": 1.6695e-05, "loss": 0.1136, "step": 3340 }, { "epoch": 0.01175, "grad_norm": 0.4835772216320038, "learning_rate": 1.6745e-05, "loss": 0.1061, "step": 3350 }, { "epoch": 0.0118, "grad_norm": 0.444543719291687, "learning_rate": 1.6795e-05, "loss": 0.109, "step": 3360 }, { "epoch": 0.01185, "grad_norm": 0.5176830291748047, "learning_rate": 1.6845e-05, "loss": 0.1214, "step": 3370 }, { "epoch": 0.0119, "grad_norm": 0.47018465399742126, "learning_rate": 1.6895e-05, "loss": 0.1119, "step": 3380 }, { "epoch": 0.01195, "grad_norm": 0.41452693939208984, "learning_rate": 1.6945e-05, "loss": 0.1116, "step": 3390 }, { "epoch": 0.012, "grad_norm": 0.5852661728858948, "learning_rate": 1.6995e-05, "loss": 0.1133, "step": 3400 }, { "epoch": 0.01205, "grad_norm": 0.8678564429283142, "learning_rate": 1.7045e-05, "loss": 0.1123, "step": 3410 }, { "epoch": 0.0121, "grad_norm": 0.5551216006278992, "learning_rate": 1.7095e-05, "loss": 0.1061, "step": 3420 }, { "epoch": 0.01215, "grad_norm": 0.6764218807220459, "learning_rate": 1.7145e-05, "loss": 0.1097, "step": 3430 }, { "epoch": 0.0122, "grad_norm": 0.623274564743042, "learning_rate": 1.7195e-05, "loss": 0.1074, "step": 3440 }, { "epoch": 0.01225, "grad_norm": 0.5014644265174866, "learning_rate": 1.7245e-05, "loss": 0.1151, "step": 3450 }, { "epoch": 0.0123, "grad_norm": 0.7478466629981995, "learning_rate": 1.7295e-05, "loss": 0.1125, "step": 3460 }, { "epoch": 0.01235, "grad_norm": 0.6174198985099792, "learning_rate": 1.7345e-05, "loss": 0.1086, "step": 3470 }, { "epoch": 0.0124, "grad_norm": 0.5381947159767151, "learning_rate": 1.7395e-05, "loss": 0.1, "step": 3480 }, { "epoch": 0.01245, "grad_norm": 0.5227854251861572, "learning_rate": 1.7445e-05, "loss": 0.1083, "step": 3490 }, { "epoch": 0.0125, "grad_norm": 0.6526561379432678, "learning_rate": 1.7495e-05, "loss": 0.1185, "step": 3500 }, { "epoch": 0.01255, "grad_norm": 0.4748079478740692, "learning_rate": 1.7545e-05, "loss": 0.1052, "step": 3510 }, { "epoch": 0.0126, "grad_norm": 0.6691007614135742, "learning_rate": 1.7595e-05, "loss": 0.1043, "step": 3520 }, { "epoch": 0.01265, "grad_norm": 0.5588632225990295, "learning_rate": 1.7645e-05, "loss": 0.1119, "step": 3530 }, { "epoch": 0.0127, "grad_norm": 0.6040257215499878, "learning_rate": 1.7695e-05, "loss": 0.1086, "step": 3540 }, { "epoch": 0.01275, "grad_norm": 0.5624618530273438, "learning_rate": 1.7745e-05, "loss": 0.1108, "step": 3550 }, { "epoch": 0.0128, "grad_norm": 0.6275506019592285, "learning_rate": 1.7795e-05, "loss": 0.1032, "step": 3560 }, { "epoch": 0.01285, "grad_norm": 0.46647828817367554, "learning_rate": 1.7845e-05, "loss": 0.1, "step": 3570 }, { "epoch": 0.0129, "grad_norm": 0.5406060218811035, "learning_rate": 1.7895e-05, "loss": 0.1113, "step": 3580 }, { "epoch": 0.01295, "grad_norm": 0.4511054754257202, "learning_rate": 1.7945000000000002e-05, "loss": 0.1061, "step": 3590 }, { "epoch": 0.013, "grad_norm": 0.40886831283569336, "learning_rate": 1.7995e-05, "loss": 0.1028, "step": 3600 }, { "epoch": 0.01305, "grad_norm": 0.43077781796455383, "learning_rate": 1.8045e-05, "loss": 0.1115, "step": 3610 }, { "epoch": 0.0131, "grad_norm": 0.6307575106620789, "learning_rate": 1.8095000000000002e-05, "loss": 0.0984, "step": 3620 }, { "epoch": 0.01315, "grad_norm": 0.39847230911254883, "learning_rate": 1.8145e-05, "loss": 0.0988, "step": 3630 }, { "epoch": 0.0132, "grad_norm": 0.46950647234916687, "learning_rate": 1.8195e-05, "loss": 0.1033, "step": 3640 }, { "epoch": 0.01325, "grad_norm": 0.5256922841072083, "learning_rate": 1.8245000000000002e-05, "loss": 0.1037, "step": 3650 }, { "epoch": 0.0133, "grad_norm": 0.6507235765457153, "learning_rate": 1.8295e-05, "loss": 0.1042, "step": 3660 }, { "epoch": 0.01335, "grad_norm": 0.8489135503768921, "learning_rate": 1.8345e-05, "loss": 0.1016, "step": 3670 }, { "epoch": 0.0134, "grad_norm": 0.5893881916999817, "learning_rate": 1.8395000000000003e-05, "loss": 0.1077, "step": 3680 }, { "epoch": 0.01345, "grad_norm": 0.5649431943893433, "learning_rate": 1.8445e-05, "loss": 0.1168, "step": 3690 }, { "epoch": 0.0135, "grad_norm": 0.6583290100097656, "learning_rate": 1.8495e-05, "loss": 0.1063, "step": 3700 }, { "epoch": 0.01355, "grad_norm": 0.5168076157569885, "learning_rate": 1.8545000000000003e-05, "loss": 0.1033, "step": 3710 }, { "epoch": 0.0136, "grad_norm": 0.8110647201538086, "learning_rate": 1.8595e-05, "loss": 0.098, "step": 3720 }, { "epoch": 0.01365, "grad_norm": 0.4656333923339844, "learning_rate": 1.8645000000000002e-05, "loss": 0.0974, "step": 3730 }, { "epoch": 0.0137, "grad_norm": 0.5489840507507324, "learning_rate": 1.8695e-05, "loss": 0.0946, "step": 3740 }, { "epoch": 0.01375, "grad_norm": 0.9075152277946472, "learning_rate": 1.8745e-05, "loss": 0.107, "step": 3750 }, { "epoch": 0.0138, "grad_norm": 0.6150771379470825, "learning_rate": 1.8795000000000002e-05, "loss": 0.0987, "step": 3760 }, { "epoch": 0.01385, "grad_norm": 0.6698026657104492, "learning_rate": 1.8845e-05, "loss": 0.1006, "step": 3770 }, { "epoch": 0.0139, "grad_norm": 0.7191886901855469, "learning_rate": 1.8895e-05, "loss": 0.1032, "step": 3780 }, { "epoch": 0.01395, "grad_norm": 0.4110310673713684, "learning_rate": 1.8945000000000002e-05, "loss": 0.1016, "step": 3790 }, { "epoch": 0.014, "grad_norm": 0.4223337769508362, "learning_rate": 1.8995e-05, "loss": 0.1007, "step": 3800 }, { "epoch": 0.01405, "grad_norm": 0.5190854668617249, "learning_rate": 1.9045e-05, "loss": 0.1016, "step": 3810 }, { "epoch": 0.0141, "grad_norm": 0.4307669997215271, "learning_rate": 1.9095000000000003e-05, "loss": 0.1039, "step": 3820 }, { "epoch": 0.01415, "grad_norm": 0.37840327620506287, "learning_rate": 1.9145e-05, "loss": 0.0955, "step": 3830 }, { "epoch": 0.0142, "grad_norm": 0.47309333086013794, "learning_rate": 1.9195000000000002e-05, "loss": 0.103, "step": 3840 }, { "epoch": 0.01425, "grad_norm": 0.43094107508659363, "learning_rate": 1.9245000000000003e-05, "loss": 0.1027, "step": 3850 }, { "epoch": 0.0143, "grad_norm": 0.9020057916641235, "learning_rate": 1.9295e-05, "loss": 0.1007, "step": 3860 }, { "epoch": 0.01435, "grad_norm": 1.0408713817596436, "learning_rate": 1.9345000000000002e-05, "loss": 0.1092, "step": 3870 }, { "epoch": 0.0144, "grad_norm": 0.5754146575927734, "learning_rate": 1.9395000000000003e-05, "loss": 0.1043, "step": 3880 }, { "epoch": 0.01445, "grad_norm": 0.5601019859313965, "learning_rate": 1.9445e-05, "loss": 0.0969, "step": 3890 }, { "epoch": 0.0145, "grad_norm": 0.5026534795761108, "learning_rate": 1.9495000000000002e-05, "loss": 0.0923, "step": 3900 }, { "epoch": 0.01455, "grad_norm": 0.4332098364830017, "learning_rate": 1.9545000000000003e-05, "loss": 0.0949, "step": 3910 }, { "epoch": 0.0146, "grad_norm": 0.4272383749485016, "learning_rate": 1.9595e-05, "loss": 0.0946, "step": 3920 }, { "epoch": 0.01465, "grad_norm": 0.4689870774745941, "learning_rate": 1.9645000000000002e-05, "loss": 0.0998, "step": 3930 }, { "epoch": 0.0147, "grad_norm": 0.41271480917930603, "learning_rate": 1.9695e-05, "loss": 0.1012, "step": 3940 }, { "epoch": 0.01475, "grad_norm": 0.47752124071121216, "learning_rate": 1.9744999999999998e-05, "loss": 0.1015, "step": 3950 }, { "epoch": 0.0148, "grad_norm": 0.4925667345523834, "learning_rate": 1.9795e-05, "loss": 0.0945, "step": 3960 }, { "epoch": 0.01485, "grad_norm": 0.46057838201522827, "learning_rate": 1.9845e-05, "loss": 0.0945, "step": 3970 }, { "epoch": 0.0149, "grad_norm": 0.5354776382446289, "learning_rate": 1.9895e-05, "loss": 0.1022, "step": 3980 }, { "epoch": 0.01495, "grad_norm": 0.5683553218841553, "learning_rate": 1.9945e-05, "loss": 0.1012, "step": 3990 }, { "epoch": 0.015, "grad_norm": 0.5514432191848755, "learning_rate": 1.9995e-05, "loss": 0.0933, "step": 4000 }, { "epoch": 0.01505, "grad_norm": 0.6688148975372314, "learning_rate": 2.0045e-05, "loss": 0.1029, "step": 4010 }, { "epoch": 0.0151, "grad_norm": 0.5286933779716492, "learning_rate": 2.0095e-05, "loss": 0.094, "step": 4020 }, { "epoch": 0.01515, "grad_norm": 0.6586257815361023, "learning_rate": 2.0145e-05, "loss": 0.1005, "step": 4030 }, { "epoch": 0.0152, "grad_norm": 0.5654017329216003, "learning_rate": 2.0195e-05, "loss": 0.1011, "step": 4040 }, { "epoch": 0.01525, "grad_norm": 0.6501078009605408, "learning_rate": 2.0245e-05, "loss": 0.0976, "step": 4050 }, { "epoch": 0.0153, "grad_norm": 0.6477259397506714, "learning_rate": 2.0295e-05, "loss": 0.1002, "step": 4060 }, { "epoch": 0.01535, "grad_norm": 0.4997469186782837, "learning_rate": 2.0345e-05, "loss": 0.095, "step": 4070 }, { "epoch": 0.0154, "grad_norm": 0.418266236782074, "learning_rate": 2.0395e-05, "loss": 0.1032, "step": 4080 }, { "epoch": 0.01545, "grad_norm": 0.5838645100593567, "learning_rate": 2.0445e-05, "loss": 0.1019, "step": 4090 }, { "epoch": 0.0155, "grad_norm": 0.4280761778354645, "learning_rate": 2.0495e-05, "loss": 0.0961, "step": 4100 }, { "epoch": 0.01555, "grad_norm": 0.5512828230857849, "learning_rate": 2.0545e-05, "loss": 0.1019, "step": 4110 }, { "epoch": 0.0156, "grad_norm": 0.5964367985725403, "learning_rate": 2.0595000000000002e-05, "loss": 0.0943, "step": 4120 }, { "epoch": 0.01565, "grad_norm": 0.5994517803192139, "learning_rate": 2.0645e-05, "loss": 0.1026, "step": 4130 }, { "epoch": 0.0157, "grad_norm": 0.5050929188728333, "learning_rate": 2.0695e-05, "loss": 0.0997, "step": 4140 }, { "epoch": 0.01575, "grad_norm": 0.7495620846748352, "learning_rate": 2.0745000000000002e-05, "loss": 0.0993, "step": 4150 }, { "epoch": 0.0158, "grad_norm": 0.6496561765670776, "learning_rate": 2.0795e-05, "loss": 0.0977, "step": 4160 }, { "epoch": 0.01585, "grad_norm": 0.5183280110359192, "learning_rate": 2.0845e-05, "loss": 0.099, "step": 4170 }, { "epoch": 0.0159, "grad_norm": 0.4890337586402893, "learning_rate": 2.0895e-05, "loss": 0.1013, "step": 4180 }, { "epoch": 0.01595, "grad_norm": 0.48081013560295105, "learning_rate": 2.0945e-05, "loss": 0.1021, "step": 4190 }, { "epoch": 0.016, "grad_norm": 0.6270437240600586, "learning_rate": 2.0995e-05, "loss": 0.0953, "step": 4200 }, { "epoch": 0.01605, "grad_norm": 0.5585843920707703, "learning_rate": 2.1045e-05, "loss": 0.1001, "step": 4210 }, { "epoch": 0.0161, "grad_norm": 0.5349629521369934, "learning_rate": 2.1095e-05, "loss": 0.0927, "step": 4220 }, { "epoch": 0.01615, "grad_norm": 0.5161903500556946, "learning_rate": 2.1145e-05, "loss": 0.0901, "step": 4230 }, { "epoch": 0.0162, "grad_norm": 0.5629688501358032, "learning_rate": 2.1195e-05, "loss": 0.1021, "step": 4240 }, { "epoch": 0.01625, "grad_norm": 0.4266774356365204, "learning_rate": 2.1245e-05, "loss": 0.0968, "step": 4250 }, { "epoch": 0.0163, "grad_norm": 0.6050424575805664, "learning_rate": 2.1295000000000002e-05, "loss": 0.096, "step": 4260 }, { "epoch": 0.01635, "grad_norm": 0.48876550793647766, "learning_rate": 2.1345e-05, "loss": 0.0954, "step": 4270 }, { "epoch": 0.0164, "grad_norm": 0.4919767677783966, "learning_rate": 2.1395e-05, "loss": 0.0925, "step": 4280 }, { "epoch": 0.01645, "grad_norm": 0.55455482006073, "learning_rate": 2.1445000000000002e-05, "loss": 0.0863, "step": 4290 }, { "epoch": 0.0165, "grad_norm": 0.5026130676269531, "learning_rate": 2.1495e-05, "loss": 0.0923, "step": 4300 }, { "epoch": 0.01655, "grad_norm": 0.5634472370147705, "learning_rate": 2.1545e-05, "loss": 0.1001, "step": 4310 }, { "epoch": 0.0166, "grad_norm": 0.5411179661750793, "learning_rate": 2.1595000000000002e-05, "loss": 0.0928, "step": 4320 }, { "epoch": 0.01665, "grad_norm": 0.5356360077857971, "learning_rate": 2.1645e-05, "loss": 0.0948, "step": 4330 }, { "epoch": 0.0167, "grad_norm": 0.5214255452156067, "learning_rate": 2.1695e-05, "loss": 0.0913, "step": 4340 }, { "epoch": 0.01675, "grad_norm": 0.5843163728713989, "learning_rate": 2.1745000000000003e-05, "loss": 0.0923, "step": 4350 }, { "epoch": 0.0168, "grad_norm": 0.428416907787323, "learning_rate": 2.1795e-05, "loss": 0.0969, "step": 4360 }, { "epoch": 0.01685, "grad_norm": 0.5517915487289429, "learning_rate": 2.1845000000000002e-05, "loss": 0.0862, "step": 4370 }, { "epoch": 0.0169, "grad_norm": 0.48688235878944397, "learning_rate": 2.1895000000000003e-05, "loss": 0.096, "step": 4380 }, { "epoch": 0.01695, "grad_norm": 0.46745675802230835, "learning_rate": 2.1945e-05, "loss": 0.0964, "step": 4390 }, { "epoch": 0.017, "grad_norm": 0.42236295342445374, "learning_rate": 2.1995000000000002e-05, "loss": 0.0937, "step": 4400 }, { "epoch": 0.01705, "grad_norm": 0.5698501467704773, "learning_rate": 2.2045000000000003e-05, "loss": 0.0918, "step": 4410 }, { "epoch": 0.0171, "grad_norm": 0.4699753224849701, "learning_rate": 2.2095e-05, "loss": 0.0988, "step": 4420 }, { "epoch": 0.01715, "grad_norm": 0.512039065361023, "learning_rate": 2.2145000000000002e-05, "loss": 0.0967, "step": 4430 }, { "epoch": 0.0172, "grad_norm": 0.4880082905292511, "learning_rate": 2.2195000000000003e-05, "loss": 0.0979, "step": 4440 }, { "epoch": 0.01725, "grad_norm": 0.6383131742477417, "learning_rate": 2.2245e-05, "loss": 0.0949, "step": 4450 }, { "epoch": 0.0173, "grad_norm": 0.7075005173683167, "learning_rate": 2.2295000000000003e-05, "loss": 0.094, "step": 4460 }, { "epoch": 0.01735, "grad_norm": 0.5859620571136475, "learning_rate": 2.2345e-05, "loss": 0.0977, "step": 4470 }, { "epoch": 0.0174, "grad_norm": 0.4959677755832672, "learning_rate": 2.2395e-05, "loss": 0.0934, "step": 4480 }, { "epoch": 0.01745, "grad_norm": 0.5418904423713684, "learning_rate": 2.2445000000000003e-05, "loss": 0.0898, "step": 4490 }, { "epoch": 0.0175, "grad_norm": 0.5080021023750305, "learning_rate": 2.2495e-05, "loss": 0.091, "step": 4500 }, { "epoch": 0.01755, "grad_norm": 0.43675655126571655, "learning_rate": 2.2545000000000002e-05, "loss": 0.0953, "step": 4510 }, { "epoch": 0.0176, "grad_norm": 0.4278501272201538, "learning_rate": 2.2595000000000003e-05, "loss": 0.0977, "step": 4520 }, { "epoch": 0.01765, "grad_norm": 0.5013251304626465, "learning_rate": 2.2645e-05, "loss": 0.0915, "step": 4530 }, { "epoch": 0.0177, "grad_norm": 0.40271320939064026, "learning_rate": 2.2695000000000002e-05, "loss": 0.0921, "step": 4540 }, { "epoch": 0.01775, "grad_norm": 0.6630820631980896, "learning_rate": 2.2745000000000003e-05, "loss": 0.087, "step": 4550 }, { "epoch": 0.0178, "grad_norm": 0.547572910785675, "learning_rate": 2.2795e-05, "loss": 0.0881, "step": 4560 }, { "epoch": 0.01785, "grad_norm": 0.4887124300003052, "learning_rate": 2.2845e-05, "loss": 0.0897, "step": 4570 }, { "epoch": 0.0179, "grad_norm": 0.46305638551712036, "learning_rate": 2.2895e-05, "loss": 0.0893, "step": 4580 }, { "epoch": 0.01795, "grad_norm": 0.36096814274787903, "learning_rate": 2.2945e-05, "loss": 0.0919, "step": 4590 }, { "epoch": 0.018, "grad_norm": 0.4254930317401886, "learning_rate": 2.2995e-05, "loss": 0.0953, "step": 4600 }, { "epoch": 0.01805, "grad_norm": 0.4120861291885376, "learning_rate": 2.3045e-05, "loss": 0.0899, "step": 4610 }, { "epoch": 0.0181, "grad_norm": 0.519589364528656, "learning_rate": 2.3095e-05, "loss": 0.0907, "step": 4620 }, { "epoch": 0.01815, "grad_norm": 0.45753464102745056, "learning_rate": 2.3145e-05, "loss": 0.0856, "step": 4630 }, { "epoch": 0.0182, "grad_norm": 0.4146299362182617, "learning_rate": 2.3195e-05, "loss": 0.0892, "step": 4640 }, { "epoch": 0.01825, "grad_norm": 0.4657032787799835, "learning_rate": 2.3245e-05, "loss": 0.0876, "step": 4650 }, { "epoch": 0.0183, "grad_norm": 0.5032997131347656, "learning_rate": 2.3295e-05, "loss": 0.0858, "step": 4660 }, { "epoch": 0.01835, "grad_norm": 0.42685577273368835, "learning_rate": 2.3345e-05, "loss": 0.0925, "step": 4670 }, { "epoch": 0.0184, "grad_norm": 0.4389508366584778, "learning_rate": 2.3395e-05, "loss": 0.0839, "step": 4680 }, { "epoch": 0.01845, "grad_norm": 0.5109202861785889, "learning_rate": 2.3445e-05, "loss": 0.0876, "step": 4690 }, { "epoch": 0.0185, "grad_norm": 0.537704586982727, "learning_rate": 2.3495e-05, "loss": 0.087, "step": 4700 }, { "epoch": 0.01855, "grad_norm": 0.42653656005859375, "learning_rate": 2.3545e-05, "loss": 0.0847, "step": 4710 }, { "epoch": 0.0186, "grad_norm": 0.48544803261756897, "learning_rate": 2.3595e-05, "loss": 0.0808, "step": 4720 }, { "epoch": 0.01865, "grad_norm": 0.4926588237285614, "learning_rate": 2.3645e-05, "loss": 0.0995, "step": 4730 }, { "epoch": 0.0187, "grad_norm": 0.4736453890800476, "learning_rate": 2.3695e-05, "loss": 0.0862, "step": 4740 }, { "epoch": 0.01875, "grad_norm": 0.44785216450691223, "learning_rate": 2.3745e-05, "loss": 0.0837, "step": 4750 }, { "epoch": 0.0188, "grad_norm": 0.6035889387130737, "learning_rate": 2.3795000000000002e-05, "loss": 0.0869, "step": 4760 }, { "epoch": 0.01885, "grad_norm": 0.4353933334350586, "learning_rate": 2.3845e-05, "loss": 0.0834, "step": 4770 }, { "epoch": 0.0189, "grad_norm": 0.5200499296188354, "learning_rate": 2.3895e-05, "loss": 0.0907, "step": 4780 }, { "epoch": 0.01895, "grad_norm": 0.49414077401161194, "learning_rate": 2.3945000000000002e-05, "loss": 0.0863, "step": 4790 }, { "epoch": 0.019, "grad_norm": 0.44971001148223877, "learning_rate": 2.3995e-05, "loss": 0.0845, "step": 4800 }, { "epoch": 0.01905, "grad_norm": 0.4229947626590729, "learning_rate": 2.4045e-05, "loss": 0.0878, "step": 4810 }, { "epoch": 0.0191, "grad_norm": 0.5139651298522949, "learning_rate": 2.4095000000000002e-05, "loss": 0.0879, "step": 4820 }, { "epoch": 0.01915, "grad_norm": 0.5074255466461182, "learning_rate": 2.4145e-05, "loss": 0.0816, "step": 4830 }, { "epoch": 0.0192, "grad_norm": 0.605755090713501, "learning_rate": 2.4195e-05, "loss": 0.0921, "step": 4840 }, { "epoch": 0.01925, "grad_norm": 0.48389285802841187, "learning_rate": 2.4245000000000002e-05, "loss": 0.0829, "step": 4850 }, { "epoch": 0.0193, "grad_norm": 0.7070655226707458, "learning_rate": 2.4295e-05, "loss": 0.0931, "step": 4860 }, { "epoch": 0.01935, "grad_norm": 0.6098916530609131, "learning_rate": 2.4345e-05, "loss": 0.0903, "step": 4870 }, { "epoch": 0.0194, "grad_norm": 0.5935694575309753, "learning_rate": 2.4395000000000003e-05, "loss": 0.0884, "step": 4880 }, { "epoch": 0.01945, "grad_norm": 0.53770911693573, "learning_rate": 2.4445e-05, "loss": 0.0889, "step": 4890 }, { "epoch": 0.0195, "grad_norm": 0.3916023373603821, "learning_rate": 2.4495000000000002e-05, "loss": 0.0833, "step": 4900 }, { "epoch": 0.01955, "grad_norm": 0.567225456237793, "learning_rate": 2.4545000000000003e-05, "loss": 0.0889, "step": 4910 }, { "epoch": 0.0196, "grad_norm": 0.5302049517631531, "learning_rate": 2.4595e-05, "loss": 0.09, "step": 4920 }, { "epoch": 0.01965, "grad_norm": 0.5962851047515869, "learning_rate": 2.4645000000000002e-05, "loss": 0.0919, "step": 4930 }, { "epoch": 0.0197, "grad_norm": 0.49372929334640503, "learning_rate": 2.4695e-05, "loss": 0.0907, "step": 4940 }, { "epoch": 0.01975, "grad_norm": 0.5563930869102478, "learning_rate": 2.4745e-05, "loss": 0.0855, "step": 4950 }, { "epoch": 0.0198, "grad_norm": 0.44720908999443054, "learning_rate": 2.4795000000000002e-05, "loss": 0.0912, "step": 4960 }, { "epoch": 0.01985, "grad_norm": 0.4346630871295929, "learning_rate": 2.4845e-05, "loss": 0.0909, "step": 4970 }, { "epoch": 0.0199, "grad_norm": 0.5587462186813354, "learning_rate": 2.4895e-05, "loss": 0.092, "step": 4980 }, { "epoch": 0.01995, "grad_norm": 0.5791865587234497, "learning_rate": 2.4945000000000003e-05, "loss": 0.0893, "step": 4990 }, { "epoch": 0.02, "grad_norm": 0.44487252831459045, "learning_rate": 2.4995e-05, "loss": 0.0825, "step": 5000 }, { "epoch": 0.02005, "grad_norm": 0.5317064523696899, "learning_rate": 2.5045e-05, "loss": 0.0863, "step": 5010 }, { "epoch": 0.0201, "grad_norm": 0.41652870178222656, "learning_rate": 2.5095000000000003e-05, "loss": 0.086, "step": 5020 }, { "epoch": 0.02015, "grad_norm": 0.4210962653160095, "learning_rate": 2.5145e-05, "loss": 0.085, "step": 5030 }, { "epoch": 0.0202, "grad_norm": 0.4132222831249237, "learning_rate": 2.5195000000000002e-05, "loss": 0.083, "step": 5040 }, { "epoch": 0.02025, "grad_norm": 0.418788880109787, "learning_rate": 2.5245000000000003e-05, "loss": 0.0858, "step": 5050 }, { "epoch": 0.0203, "grad_norm": 0.4658764898777008, "learning_rate": 2.5295e-05, "loss": 0.082, "step": 5060 }, { "epoch": 0.02035, "grad_norm": 0.48415932059288025, "learning_rate": 2.5345000000000002e-05, "loss": 0.0833, "step": 5070 }, { "epoch": 0.0204, "grad_norm": 0.4540814459323883, "learning_rate": 2.5395000000000003e-05, "loss": 0.0866, "step": 5080 }, { "epoch": 0.02045, "grad_norm": 0.5398637056350708, "learning_rate": 2.5445e-05, "loss": 0.0879, "step": 5090 }, { "epoch": 0.0205, "grad_norm": 0.46198877692222595, "learning_rate": 2.5495000000000002e-05, "loss": 0.0858, "step": 5100 }, { "epoch": 0.02055, "grad_norm": 0.5064612627029419, "learning_rate": 2.5545000000000004e-05, "loss": 0.0843, "step": 5110 }, { "epoch": 0.0206, "grad_norm": 0.43292903900146484, "learning_rate": 2.5595e-05, "loss": 0.082, "step": 5120 }, { "epoch": 0.02065, "grad_norm": 0.42029690742492676, "learning_rate": 2.5645000000000003e-05, "loss": 0.084, "step": 5130 }, { "epoch": 0.0207, "grad_norm": 0.5056539177894592, "learning_rate": 2.5695000000000004e-05, "loss": 0.0815, "step": 5140 }, { "epoch": 0.02075, "grad_norm": 0.5533456206321716, "learning_rate": 2.5745e-05, "loss": 0.0833, "step": 5150 }, { "epoch": 0.0208, "grad_norm": 0.4594043791294098, "learning_rate": 2.5795000000000003e-05, "loss": 0.0848, "step": 5160 }, { "epoch": 0.02085, "grad_norm": 0.6164016723632812, "learning_rate": 2.5845000000000004e-05, "loss": 0.089, "step": 5170 }, { "epoch": 0.0209, "grad_norm": 0.506243884563446, "learning_rate": 2.5895000000000002e-05, "loss": 0.0824, "step": 5180 }, { "epoch": 0.02095, "grad_norm": 0.7490360736846924, "learning_rate": 2.5945000000000003e-05, "loss": 0.082, "step": 5190 }, { "epoch": 0.021, "grad_norm": 0.5026156306266785, "learning_rate": 2.5995000000000004e-05, "loss": 0.0934, "step": 5200 }, { "epoch": 0.02105, "grad_norm": 0.5008965134620667, "learning_rate": 2.6045000000000002e-05, "loss": 0.0893, "step": 5210 }, { "epoch": 0.0211, "grad_norm": 0.43096840381622314, "learning_rate": 2.6095000000000003e-05, "loss": 0.0798, "step": 5220 }, { "epoch": 0.02115, "grad_norm": 0.3479880690574646, "learning_rate": 2.6145e-05, "loss": 0.0821, "step": 5230 }, { "epoch": 0.0212, "grad_norm": 0.3658469617366791, "learning_rate": 2.6195000000000002e-05, "loss": 0.0806, "step": 5240 }, { "epoch": 0.02125, "grad_norm": 0.5226966142654419, "learning_rate": 2.6245000000000004e-05, "loss": 0.0808, "step": 5250 }, { "epoch": 0.0213, "grad_norm": 0.5437954068183899, "learning_rate": 2.6295e-05, "loss": 0.0851, "step": 5260 }, { "epoch": 0.02135, "grad_norm": 0.4487932026386261, "learning_rate": 2.6345000000000003e-05, "loss": 0.0831, "step": 5270 }, { "epoch": 0.0214, "grad_norm": 0.44730430841445923, "learning_rate": 2.6395000000000004e-05, "loss": 0.09, "step": 5280 }, { "epoch": 0.02145, "grad_norm": 0.5502302050590515, "learning_rate": 2.6445000000000002e-05, "loss": 0.0812, "step": 5290 }, { "epoch": 0.0215, "grad_norm": 0.6342505216598511, "learning_rate": 2.6495000000000003e-05, "loss": 0.0876, "step": 5300 }, { "epoch": 0.02155, "grad_norm": 0.4663097858428955, "learning_rate": 2.6545000000000004e-05, "loss": 0.0892, "step": 5310 }, { "epoch": 0.0216, "grad_norm": 0.5896298289299011, "learning_rate": 2.6595000000000002e-05, "loss": 0.0848, "step": 5320 }, { "epoch": 0.02165, "grad_norm": 0.4614022374153137, "learning_rate": 2.6645000000000003e-05, "loss": 0.0838, "step": 5330 }, { "epoch": 0.0217, "grad_norm": 0.4756334125995636, "learning_rate": 2.6695000000000004e-05, "loss": 0.0856, "step": 5340 }, { "epoch": 0.02175, "grad_norm": 0.6175363063812256, "learning_rate": 2.6745000000000002e-05, "loss": 0.0891, "step": 5350 }, { "epoch": 0.0218, "grad_norm": 0.49067890644073486, "learning_rate": 2.6795000000000003e-05, "loss": 0.091, "step": 5360 }, { "epoch": 0.02185, "grad_norm": 0.4190593957901001, "learning_rate": 2.6845000000000005e-05, "loss": 0.0818, "step": 5370 }, { "epoch": 0.0219, "grad_norm": 0.42757564783096313, "learning_rate": 2.6895000000000003e-05, "loss": 0.0829, "step": 5380 }, { "epoch": 0.02195, "grad_norm": 0.4748636782169342, "learning_rate": 2.6945000000000004e-05, "loss": 0.086, "step": 5390 }, { "epoch": 0.022, "grad_norm": 0.4459311366081238, "learning_rate": 2.6995000000000005e-05, "loss": 0.0845, "step": 5400 }, { "epoch": 0.02205, "grad_norm": 0.521063506603241, "learning_rate": 2.7045000000000003e-05, "loss": 0.0815, "step": 5410 }, { "epoch": 0.0221, "grad_norm": 0.5046432614326477, "learning_rate": 2.7095000000000004e-05, "loss": 0.0913, "step": 5420 }, { "epoch": 0.02215, "grad_norm": 0.6038782000541687, "learning_rate": 2.7145000000000005e-05, "loss": 0.0819, "step": 5430 }, { "epoch": 0.0222, "grad_norm": 0.46855854988098145, "learning_rate": 2.7195000000000003e-05, "loss": 0.0882, "step": 5440 }, { "epoch": 0.02225, "grad_norm": 0.43038228154182434, "learning_rate": 2.7245000000000004e-05, "loss": 0.0891, "step": 5450 }, { "epoch": 0.0223, "grad_norm": 0.4552011787891388, "learning_rate": 2.7295000000000005e-05, "loss": 0.0781, "step": 5460 }, { "epoch": 0.02235, "grad_norm": 0.4150822162628174, "learning_rate": 2.7345000000000003e-05, "loss": 0.0881, "step": 5470 }, { "epoch": 0.0224, "grad_norm": 0.4220922887325287, "learning_rate": 2.7395000000000005e-05, "loss": 0.0774, "step": 5480 }, { "epoch": 0.02245, "grad_norm": 0.39791256189346313, "learning_rate": 2.7445000000000002e-05, "loss": 0.0818, "step": 5490 }, { "epoch": 0.0225, "grad_norm": 0.4505294859409332, "learning_rate": 2.7495000000000004e-05, "loss": 0.0865, "step": 5500 }, { "epoch": 0.02255, "grad_norm": 0.6119654774665833, "learning_rate": 2.7544999999999998e-05, "loss": 0.0863, "step": 5510 }, { "epoch": 0.0226, "grad_norm": 0.5199548602104187, "learning_rate": 2.7595e-05, "loss": 0.0825, "step": 5520 }, { "epoch": 0.02265, "grad_norm": 0.4721256494522095, "learning_rate": 2.7644999999999997e-05, "loss": 0.0815, "step": 5530 }, { "epoch": 0.0227, "grad_norm": 0.5406511425971985, "learning_rate": 2.7694999999999998e-05, "loss": 0.08, "step": 5540 }, { "epoch": 0.02275, "grad_norm": 0.5193853378295898, "learning_rate": 2.7745e-05, "loss": 0.0855, "step": 5550 }, { "epoch": 0.0228, "grad_norm": 0.4746890962123871, "learning_rate": 2.7794999999999997e-05, "loss": 0.0822, "step": 5560 }, { "epoch": 0.02285, "grad_norm": 0.3871111273765564, "learning_rate": 2.7845e-05, "loss": 0.0829, "step": 5570 }, { "epoch": 0.0229, "grad_norm": 0.5385257005691528, "learning_rate": 2.7895e-05, "loss": 0.0865, "step": 5580 }, { "epoch": 0.02295, "grad_norm": 0.45264989137649536, "learning_rate": 2.7944999999999998e-05, "loss": 0.09, "step": 5590 }, { "epoch": 0.023, "grad_norm": 0.6268983483314514, "learning_rate": 2.7995e-05, "loss": 0.0808, "step": 5600 }, { "epoch": 0.02305, "grad_norm": 0.43054303526878357, "learning_rate": 2.8045e-05, "loss": 0.0812, "step": 5610 }, { "epoch": 0.0231, "grad_norm": 0.71108078956604, "learning_rate": 2.8094999999999998e-05, "loss": 0.0818, "step": 5620 }, { "epoch": 0.02315, "grad_norm": 0.6452078223228455, "learning_rate": 2.8145e-05, "loss": 0.0848, "step": 5630 }, { "epoch": 0.0232, "grad_norm": 0.5041331648826599, "learning_rate": 2.8195e-05, "loss": 0.081, "step": 5640 }, { "epoch": 0.02325, "grad_norm": 0.40575382113456726, "learning_rate": 2.8244999999999998e-05, "loss": 0.084, "step": 5650 }, { "epoch": 0.0233, "grad_norm": 0.4588361978530884, "learning_rate": 2.8295e-05, "loss": 0.0818, "step": 5660 }, { "epoch": 0.02335, "grad_norm": 0.47744446992874146, "learning_rate": 2.8345e-05, "loss": 0.0856, "step": 5670 }, { "epoch": 0.0234, "grad_norm": 0.49122774600982666, "learning_rate": 2.8395e-05, "loss": 0.0852, "step": 5680 }, { "epoch": 0.02345, "grad_norm": 0.45338350534439087, "learning_rate": 2.8445e-05, "loss": 0.0839, "step": 5690 }, { "epoch": 0.0235, "grad_norm": 0.4360921084880829, "learning_rate": 2.8495e-05, "loss": 0.0788, "step": 5700 }, { "epoch": 0.02355, "grad_norm": 0.4302518367767334, "learning_rate": 2.8545e-05, "loss": 0.0821, "step": 5710 }, { "epoch": 0.0236, "grad_norm": 0.46977418661117554, "learning_rate": 2.8595e-05, "loss": 0.0777, "step": 5720 }, { "epoch": 0.02365, "grad_norm": 0.4087386131286621, "learning_rate": 2.8645e-05, "loss": 0.0813, "step": 5730 }, { "epoch": 0.0237, "grad_norm": 0.5564868450164795, "learning_rate": 2.8695e-05, "loss": 0.0843, "step": 5740 }, { "epoch": 0.02375, "grad_norm": 0.5684245824813843, "learning_rate": 2.8745e-05, "loss": 0.0835, "step": 5750 }, { "epoch": 0.0238, "grad_norm": 0.498969167470932, "learning_rate": 2.8795e-05, "loss": 0.08, "step": 5760 }, { "epoch": 0.02385, "grad_norm": 0.43636438250541687, "learning_rate": 2.8845e-05, "loss": 0.0804, "step": 5770 }, { "epoch": 0.0239, "grad_norm": 0.5187768936157227, "learning_rate": 2.8895e-05, "loss": 0.0781, "step": 5780 }, { "epoch": 0.02395, "grad_norm": 0.4015440046787262, "learning_rate": 2.8945e-05, "loss": 0.0901, "step": 5790 }, { "epoch": 0.024, "grad_norm": 0.4077690541744232, "learning_rate": 2.8995e-05, "loss": 0.0779, "step": 5800 }, { "epoch": 0.02405, "grad_norm": 0.6242225766181946, "learning_rate": 2.9045e-05, "loss": 0.0827, "step": 5810 }, { "epoch": 0.0241, "grad_norm": 0.4288853108882904, "learning_rate": 2.9095e-05, "loss": 0.0851, "step": 5820 }, { "epoch": 0.02415, "grad_norm": 0.43137598037719727, "learning_rate": 2.9145e-05, "loss": 0.085, "step": 5830 }, { "epoch": 0.0242, "grad_norm": 0.540719211101532, "learning_rate": 2.9195e-05, "loss": 0.0856, "step": 5840 }, { "epoch": 0.02425, "grad_norm": 0.3808692395687103, "learning_rate": 2.9245e-05, "loss": 0.0823, "step": 5850 }, { "epoch": 0.0243, "grad_norm": 0.4207990765571594, "learning_rate": 2.9295e-05, "loss": 0.0817, "step": 5860 }, { "epoch": 0.02435, "grad_norm": 0.4244430959224701, "learning_rate": 2.9345e-05, "loss": 0.0812, "step": 5870 }, { "epoch": 0.0244, "grad_norm": 0.5179128050804138, "learning_rate": 2.9395e-05, "loss": 0.0846, "step": 5880 }, { "epoch": 0.02445, "grad_norm": 0.5072123408317566, "learning_rate": 2.9445e-05, "loss": 0.0811, "step": 5890 }, { "epoch": 0.0245, "grad_norm": 0.5344266295433044, "learning_rate": 2.9495e-05, "loss": 0.0906, "step": 5900 }, { "epoch": 0.02455, "grad_norm": 0.42122307419776917, "learning_rate": 2.9545e-05, "loss": 0.0803, "step": 5910 }, { "epoch": 0.0246, "grad_norm": 0.3986223638057709, "learning_rate": 2.9595e-05, "loss": 0.0807, "step": 5920 }, { "epoch": 0.02465, "grad_norm": 0.4583601653575897, "learning_rate": 2.9645e-05, "loss": 0.0848, "step": 5930 }, { "epoch": 0.0247, "grad_norm": 0.44839999079704285, "learning_rate": 2.9695e-05, "loss": 0.0757, "step": 5940 }, { "epoch": 0.02475, "grad_norm": 0.41972583532333374, "learning_rate": 2.9745e-05, "loss": 0.0897, "step": 5950 }, { "epoch": 0.0248, "grad_norm": 0.6261882185935974, "learning_rate": 2.9795000000000002e-05, "loss": 0.0822, "step": 5960 }, { "epoch": 0.02485, "grad_norm": 0.5626884698867798, "learning_rate": 2.9845e-05, "loss": 0.0873, "step": 5970 }, { "epoch": 0.0249, "grad_norm": 0.4882666766643524, "learning_rate": 2.9895e-05, "loss": 0.0781, "step": 5980 }, { "epoch": 0.02495, "grad_norm": 0.40979450941085815, "learning_rate": 2.9945000000000002e-05, "loss": 0.0763, "step": 5990 }, { "epoch": 0.025, "grad_norm": 0.32592901587486267, "learning_rate": 2.9995e-05, "loss": 0.078, "step": 6000 }, { "epoch": 0.02505, "grad_norm": 0.5241904258728027, "learning_rate": 3.0045e-05, "loss": 0.0852, "step": 6010 }, { "epoch": 0.0251, "grad_norm": 0.48974916338920593, "learning_rate": 3.0095000000000002e-05, "loss": 0.0791, "step": 6020 }, { "epoch": 0.02515, "grad_norm": 0.4012157917022705, "learning_rate": 3.0145e-05, "loss": 0.077, "step": 6030 }, { "epoch": 0.0252, "grad_norm": 0.4815496802330017, "learning_rate": 3.0195e-05, "loss": 0.0832, "step": 6040 }, { "epoch": 0.02525, "grad_norm": 0.4560248851776123, "learning_rate": 3.0245000000000003e-05, "loss": 0.0769, "step": 6050 }, { "epoch": 0.0253, "grad_norm": 0.43385860323905945, "learning_rate": 3.0295e-05, "loss": 0.0793, "step": 6060 }, { "epoch": 0.02535, "grad_norm": 0.3584393858909607, "learning_rate": 3.0345e-05, "loss": 0.0813, "step": 6070 }, { "epoch": 0.0254, "grad_norm": 0.426179975271225, "learning_rate": 3.0395000000000003e-05, "loss": 0.0804, "step": 6080 }, { "epoch": 0.02545, "grad_norm": 0.47968167066574097, "learning_rate": 3.0445e-05, "loss": 0.0865, "step": 6090 }, { "epoch": 0.0255, "grad_norm": 0.4998861253261566, "learning_rate": 3.0495000000000002e-05, "loss": 0.083, "step": 6100 }, { "epoch": 0.02555, "grad_norm": 0.38313713669776917, "learning_rate": 3.0545e-05, "loss": 0.0756, "step": 6110 }, { "epoch": 0.0256, "grad_norm": 0.44031354784965515, "learning_rate": 3.0595e-05, "loss": 0.0755, "step": 6120 }, { "epoch": 0.02565, "grad_norm": 0.3908822238445282, "learning_rate": 3.0645e-05, "loss": 0.0796, "step": 6130 }, { "epoch": 0.0257, "grad_norm": 0.39873838424682617, "learning_rate": 3.0695000000000003e-05, "loss": 0.0762, "step": 6140 }, { "epoch": 0.02575, "grad_norm": 0.3386397063732147, "learning_rate": 3.0745000000000005e-05, "loss": 0.0779, "step": 6150 }, { "epoch": 0.0258, "grad_norm": 0.4112503230571747, "learning_rate": 3.0795e-05, "loss": 0.0791, "step": 6160 }, { "epoch": 0.02585, "grad_norm": 0.41174352169036865, "learning_rate": 3.0845e-05, "loss": 0.0819, "step": 6170 }, { "epoch": 0.0259, "grad_norm": 0.4976944029331207, "learning_rate": 3.0895e-05, "loss": 0.0782, "step": 6180 }, { "epoch": 0.02595, "grad_norm": 0.42394208908081055, "learning_rate": 3.0945e-05, "loss": 0.0785, "step": 6190 }, { "epoch": 0.026, "grad_norm": 0.5694761276245117, "learning_rate": 3.0995000000000004e-05, "loss": 0.0773, "step": 6200 }, { "epoch": 0.02605, "grad_norm": 0.46636876463890076, "learning_rate": 3.1045000000000005e-05, "loss": 0.0818, "step": 6210 }, { "epoch": 0.0261, "grad_norm": 0.4229767918586731, "learning_rate": 3.1095e-05, "loss": 0.0792, "step": 6220 }, { "epoch": 0.02615, "grad_norm": 0.41858726739883423, "learning_rate": 3.1145e-05, "loss": 0.076, "step": 6230 }, { "epoch": 0.0262, "grad_norm": 0.43213996291160583, "learning_rate": 3.1195e-05, "loss": 0.076, "step": 6240 }, { "epoch": 0.02625, "grad_norm": 0.4471328556537628, "learning_rate": 3.1245e-05, "loss": 0.0742, "step": 6250 }, { "epoch": 0.0263, "grad_norm": 0.4880267381668091, "learning_rate": 3.1295000000000004e-05, "loss": 0.0847, "step": 6260 }, { "epoch": 0.02635, "grad_norm": 0.4235740900039673, "learning_rate": 3.1345e-05, "loss": 0.081, "step": 6270 }, { "epoch": 0.0264, "grad_norm": 0.4439496099948883, "learning_rate": 3.1395e-05, "loss": 0.0757, "step": 6280 }, { "epoch": 0.02645, "grad_norm": 0.37231093645095825, "learning_rate": 3.1445e-05, "loss": 0.0816, "step": 6290 }, { "epoch": 0.0265, "grad_norm": 0.5958653688430786, "learning_rate": 3.1495e-05, "loss": 0.074, "step": 6300 }, { "epoch": 0.02655, "grad_norm": 0.5390610098838806, "learning_rate": 3.1545000000000004e-05, "loss": 0.0807, "step": 6310 }, { "epoch": 0.0266, "grad_norm": 0.44329580664634705, "learning_rate": 3.1595000000000005e-05, "loss": 0.0826, "step": 6320 }, { "epoch": 0.02665, "grad_norm": 0.4327351748943329, "learning_rate": 3.1645e-05, "loss": 0.0789, "step": 6330 }, { "epoch": 0.0267, "grad_norm": 0.4001625180244446, "learning_rate": 3.1695e-05, "loss": 0.0794, "step": 6340 }, { "epoch": 0.02675, "grad_norm": 0.36040255427360535, "learning_rate": 3.1745e-05, "loss": 0.0763, "step": 6350 }, { "epoch": 0.0268, "grad_norm": 0.423082560300827, "learning_rate": 3.1795e-05, "loss": 0.0804, "step": 6360 }, { "epoch": 0.02685, "grad_norm": 0.42768681049346924, "learning_rate": 3.1845000000000004e-05, "loss": 0.0818, "step": 6370 }, { "epoch": 0.0269, "grad_norm": 0.4242507219314575, "learning_rate": 3.1895000000000005e-05, "loss": 0.0769, "step": 6380 }, { "epoch": 0.02695, "grad_norm": 0.5090714693069458, "learning_rate": 3.1945e-05, "loss": 0.0837, "step": 6390 }, { "epoch": 0.027, "grad_norm": 0.45466625690460205, "learning_rate": 3.1995e-05, "loss": 0.0751, "step": 6400 }, { "epoch": 0.02705, "grad_norm": 0.3438904285430908, "learning_rate": 3.2045e-05, "loss": 0.0809, "step": 6410 }, { "epoch": 0.0271, "grad_norm": 0.47308188676834106, "learning_rate": 3.2095000000000004e-05, "loss": 0.0762, "step": 6420 }, { "epoch": 0.02715, "grad_norm": 0.41979026794433594, "learning_rate": 3.2145000000000005e-05, "loss": 0.0812, "step": 6430 }, { "epoch": 0.0272, "grad_norm": 0.432265043258667, "learning_rate": 3.2195000000000006e-05, "loss": 0.0747, "step": 6440 }, { "epoch": 0.02725, "grad_norm": 0.39878150820732117, "learning_rate": 3.2245e-05, "loss": 0.0744, "step": 6450 }, { "epoch": 0.0273, "grad_norm": 0.4525047540664673, "learning_rate": 3.2295e-05, "loss": 0.0753, "step": 6460 }, { "epoch": 0.02735, "grad_norm": 0.46699413657188416, "learning_rate": 3.2345e-05, "loss": 0.0792, "step": 6470 }, { "epoch": 0.0274, "grad_norm": 0.48744940757751465, "learning_rate": 3.2395000000000004e-05, "loss": 0.0783, "step": 6480 }, { "epoch": 0.02745, "grad_norm": 0.4602070748806, "learning_rate": 3.2445000000000005e-05, "loss": 0.0746, "step": 6490 }, { "epoch": 0.0275, "grad_norm": 0.46310096979141235, "learning_rate": 3.2495000000000007e-05, "loss": 0.0779, "step": 6500 }, { "epoch": 0.02755, "grad_norm": 0.3661746084690094, "learning_rate": 3.2545e-05, "loss": 0.0774, "step": 6510 }, { "epoch": 0.0276, "grad_norm": 0.3644971549510956, "learning_rate": 3.2595e-05, "loss": 0.0737, "step": 6520 }, { "epoch": 0.02765, "grad_norm": 0.3705560863018036, "learning_rate": 3.2645e-05, "loss": 0.0748, "step": 6530 }, { "epoch": 0.0277, "grad_norm": 0.4240495562553406, "learning_rate": 3.2695000000000005e-05, "loss": 0.0767, "step": 6540 }, { "epoch": 0.02775, "grad_norm": 0.34379133582115173, "learning_rate": 3.2745000000000006e-05, "loss": 0.0715, "step": 6550 }, { "epoch": 0.0278, "grad_norm": 0.3436594605445862, "learning_rate": 3.2795e-05, "loss": 0.0732, "step": 6560 }, { "epoch": 0.02785, "grad_norm": 0.42003849148750305, "learning_rate": 3.2845e-05, "loss": 0.0821, "step": 6570 }, { "epoch": 0.0279, "grad_norm": 0.4620581865310669, "learning_rate": 3.2895e-05, "loss": 0.0711, "step": 6580 }, { "epoch": 0.02795, "grad_norm": 0.40812432765960693, "learning_rate": 3.2945000000000004e-05, "loss": 0.0781, "step": 6590 }, { "epoch": 0.028, "grad_norm": 0.41272908449172974, "learning_rate": 3.2995000000000005e-05, "loss": 0.0765, "step": 6600 }, { "epoch": 0.02805, "grad_norm": 0.43230175971984863, "learning_rate": 3.3045000000000006e-05, "loss": 0.0752, "step": 6610 }, { "epoch": 0.0281, "grad_norm": 0.4300050735473633, "learning_rate": 3.3095e-05, "loss": 0.0731, "step": 6620 }, { "epoch": 0.02815, "grad_norm": 0.44373124837875366, "learning_rate": 3.3145e-05, "loss": 0.0753, "step": 6630 }, { "epoch": 0.0282, "grad_norm": 0.49943041801452637, "learning_rate": 3.3195e-05, "loss": 0.0796, "step": 6640 }, { "epoch": 0.02825, "grad_norm": 0.3910506069660187, "learning_rate": 3.3245000000000004e-05, "loss": 0.0741, "step": 6650 }, { "epoch": 0.0283, "grad_norm": 0.394123911857605, "learning_rate": 3.3295000000000006e-05, "loss": 0.0744, "step": 6660 }, { "epoch": 0.02835, "grad_norm": 0.431263267993927, "learning_rate": 3.334500000000001e-05, "loss": 0.0779, "step": 6670 }, { "epoch": 0.0284, "grad_norm": 0.5385127067565918, "learning_rate": 3.3395e-05, "loss": 0.0731, "step": 6680 }, { "epoch": 0.02845, "grad_norm": 0.3577653765678406, "learning_rate": 3.3445e-05, "loss": 0.0731, "step": 6690 }, { "epoch": 0.0285, "grad_norm": 0.5011255145072937, "learning_rate": 3.3495000000000004e-05, "loss": 0.0797, "step": 6700 }, { "epoch": 0.02855, "grad_norm": 0.5084308981895447, "learning_rate": 3.3545000000000005e-05, "loss": 0.0772, "step": 6710 }, { "epoch": 0.0286, "grad_norm": 0.47546619176864624, "learning_rate": 3.3595000000000006e-05, "loss": 0.0739, "step": 6720 }, { "epoch": 0.02865, "grad_norm": 0.38033586740493774, "learning_rate": 3.364500000000001e-05, "loss": 0.0795, "step": 6730 }, { "epoch": 0.0287, "grad_norm": 0.4411650002002716, "learning_rate": 3.3695e-05, "loss": 0.0734, "step": 6740 }, { "epoch": 0.02875, "grad_norm": 0.32608819007873535, "learning_rate": 3.3745e-05, "loss": 0.0713, "step": 6750 }, { "epoch": 0.0288, "grad_norm": 0.4803429841995239, "learning_rate": 3.3795e-05, "loss": 0.0765, "step": 6760 }, { "epoch": 0.02885, "grad_norm": 0.38406142592430115, "learning_rate": 3.3845e-05, "loss": 0.0717, "step": 6770 }, { "epoch": 0.0289, "grad_norm": 0.4204261004924774, "learning_rate": 3.3895e-05, "loss": 0.0737, "step": 6780 }, { "epoch": 0.02895, "grad_norm": 0.37816375494003296, "learning_rate": 3.3945e-05, "loss": 0.0743, "step": 6790 }, { "epoch": 0.029, "grad_norm": 0.3421754240989685, "learning_rate": 3.3995e-05, "loss": 0.0693, "step": 6800 }, { "epoch": 0.02905, "grad_norm": 0.4057287871837616, "learning_rate": 3.4045e-05, "loss": 0.0754, "step": 6810 }, { "epoch": 0.0291, "grad_norm": 0.329377144575119, "learning_rate": 3.4095e-05, "loss": 0.071, "step": 6820 }, { "epoch": 0.02915, "grad_norm": 0.41998857259750366, "learning_rate": 3.4145e-05, "loss": 0.0811, "step": 6830 }, { "epoch": 0.0292, "grad_norm": 0.5486347675323486, "learning_rate": 3.4195e-05, "loss": 0.0773, "step": 6840 }, { "epoch": 0.02925, "grad_norm": 0.3410201966762543, "learning_rate": 3.4245e-05, "loss": 0.0707, "step": 6850 }, { "epoch": 0.0293, "grad_norm": 0.4516546130180359, "learning_rate": 3.4294999999999996e-05, "loss": 0.0742, "step": 6860 }, { "epoch": 0.02935, "grad_norm": 0.37678903341293335, "learning_rate": 3.4345e-05, "loss": 0.0717, "step": 6870 }, { "epoch": 0.0294, "grad_norm": 0.46914374828338623, "learning_rate": 3.4395e-05, "loss": 0.0718, "step": 6880 }, { "epoch": 0.02945, "grad_norm": 0.4036213755607605, "learning_rate": 3.4445e-05, "loss": 0.0751, "step": 6890 }, { "epoch": 0.0295, "grad_norm": 0.41932258009910583, "learning_rate": 3.4495e-05, "loss": 0.0699, "step": 6900 }, { "epoch": 0.02955, "grad_norm": 0.37729623913764954, "learning_rate": 3.4545e-05, "loss": 0.0725, "step": 6910 }, { "epoch": 0.0296, "grad_norm": 0.33412089943885803, "learning_rate": 3.4594999999999997e-05, "loss": 0.0677, "step": 6920 }, { "epoch": 0.02965, "grad_norm": 0.42308661341667175, "learning_rate": 3.4645e-05, "loss": 0.0758, "step": 6930 }, { "epoch": 0.0297, "grad_norm": 0.41113191843032837, "learning_rate": 3.4695e-05, "loss": 0.0746, "step": 6940 }, { "epoch": 0.02975, "grad_norm": 0.5308328866958618, "learning_rate": 3.4745e-05, "loss": 0.0713, "step": 6950 }, { "epoch": 0.0298, "grad_norm": 0.44157448410987854, "learning_rate": 3.4795e-05, "loss": 0.0756, "step": 6960 }, { "epoch": 0.02985, "grad_norm": 0.5505300760269165, "learning_rate": 3.4845e-05, "loss": 0.0765, "step": 6970 }, { "epoch": 0.0299, "grad_norm": 0.39519307017326355, "learning_rate": 3.4895e-05, "loss": 0.0755, "step": 6980 }, { "epoch": 0.02995, "grad_norm": 0.4539121687412262, "learning_rate": 3.4945e-05, "loss": 0.0747, "step": 6990 }, { "epoch": 0.03, "grad_norm": 0.5236254930496216, "learning_rate": 3.4995e-05, "loss": 0.0763, "step": 7000 }, { "epoch": 0.03005, "grad_norm": 0.36866137385368347, "learning_rate": 3.5045e-05, "loss": 0.0777, "step": 7010 }, { "epoch": 0.0301, "grad_norm": 0.3630446195602417, "learning_rate": 3.5095e-05, "loss": 0.0745, "step": 7020 }, { "epoch": 0.03015, "grad_norm": 0.4937750995159149, "learning_rate": 3.5145e-05, "loss": 0.0762, "step": 7030 }, { "epoch": 0.0302, "grad_norm": 0.39128226041793823, "learning_rate": 3.5195e-05, "loss": 0.0838, "step": 7040 }, { "epoch": 0.03025, "grad_norm": 0.41970768570899963, "learning_rate": 3.5245e-05, "loss": 0.0794, "step": 7050 }, { "epoch": 0.0303, "grad_norm": 0.4620768129825592, "learning_rate": 3.5295e-05, "loss": 0.0764, "step": 7060 }, { "epoch": 0.03035, "grad_norm": 0.5133053064346313, "learning_rate": 3.5345e-05, "loss": 0.0736, "step": 7070 }, { "epoch": 0.0304, "grad_norm": 0.3818191587924957, "learning_rate": 3.5395e-05, "loss": 0.0729, "step": 7080 }, { "epoch": 0.03045, "grad_norm": 0.4005877375602722, "learning_rate": 3.5445000000000004e-05, "loss": 0.0757, "step": 7090 }, { "epoch": 0.0305, "grad_norm": 0.4175821840763092, "learning_rate": 3.5495e-05, "loss": 0.0756, "step": 7100 }, { "epoch": 0.03055, "grad_norm": 0.38189101219177246, "learning_rate": 3.5545e-05, "loss": 0.0746, "step": 7110 }, { "epoch": 0.0306, "grad_norm": 0.3358469009399414, "learning_rate": 3.5595e-05, "loss": 0.0737, "step": 7120 }, { "epoch": 0.03065, "grad_norm": 0.33312109112739563, "learning_rate": 3.5645e-05, "loss": 0.0712, "step": 7130 }, { "epoch": 0.0307, "grad_norm": 0.3312031924724579, "learning_rate": 3.5695e-05, "loss": 0.0753, "step": 7140 }, { "epoch": 0.03075, "grad_norm": 0.3859456181526184, "learning_rate": 3.5745e-05, "loss": 0.074, "step": 7150 }, { "epoch": 0.0308, "grad_norm": 0.3346497118473053, "learning_rate": 3.5795e-05, "loss": 0.0787, "step": 7160 }, { "epoch": 0.03085, "grad_norm": 0.4235798418521881, "learning_rate": 3.5845e-05, "loss": 0.0695, "step": 7170 }, { "epoch": 0.0309, "grad_norm": 0.41039881110191345, "learning_rate": 3.5895e-05, "loss": 0.0694, "step": 7180 }, { "epoch": 0.03095, "grad_norm": 0.3948202431201935, "learning_rate": 3.5945e-05, "loss": 0.0736, "step": 7190 }, { "epoch": 0.031, "grad_norm": 0.37504926323890686, "learning_rate": 3.5995000000000004e-05, "loss": 0.0734, "step": 7200 }, { "epoch": 0.03105, "grad_norm": 0.34111249446868896, "learning_rate": 3.6045e-05, "loss": 0.068, "step": 7210 }, { "epoch": 0.0311, "grad_norm": 0.32981258630752563, "learning_rate": 3.6095e-05, "loss": 0.0727, "step": 7220 }, { "epoch": 0.03115, "grad_norm": 0.3568899631500244, "learning_rate": 3.6145e-05, "loss": 0.0717, "step": 7230 }, { "epoch": 0.0312, "grad_norm": 0.3659535348415375, "learning_rate": 3.6195e-05, "loss": 0.0763, "step": 7240 }, { "epoch": 0.03125, "grad_norm": 0.32447150349617004, "learning_rate": 3.6245e-05, "loss": 0.0704, "step": 7250 }, { "epoch": 0.0313, "grad_norm": 0.516423225402832, "learning_rate": 3.6295000000000004e-05, "loss": 0.0727, "step": 7260 }, { "epoch": 0.03135, "grad_norm": 0.5282039642333984, "learning_rate": 3.6345e-05, "loss": 0.0726, "step": 7270 }, { "epoch": 0.0314, "grad_norm": 0.4386370778083801, "learning_rate": 3.6395e-05, "loss": 0.0707, "step": 7280 }, { "epoch": 0.03145, "grad_norm": 0.41988563537597656, "learning_rate": 3.6445e-05, "loss": 0.0733, "step": 7290 }, { "epoch": 0.0315, "grad_norm": 0.355773001909256, "learning_rate": 3.6495e-05, "loss": 0.0722, "step": 7300 }, { "epoch": 0.03155, "grad_norm": 0.33527055382728577, "learning_rate": 3.6545e-05, "loss": 0.0695, "step": 7310 }, { "epoch": 0.0316, "grad_norm": 0.30877190828323364, "learning_rate": 3.6595000000000005e-05, "loss": 0.0696, "step": 7320 }, { "epoch": 0.03165, "grad_norm": 0.4743344783782959, "learning_rate": 3.6645e-05, "loss": 0.0692, "step": 7330 }, { "epoch": 0.0317, "grad_norm": 0.49953508377075195, "learning_rate": 3.6695e-05, "loss": 0.0759, "step": 7340 }, { "epoch": 0.03175, "grad_norm": 0.4573311507701874, "learning_rate": 3.6745e-05, "loss": 0.0721, "step": 7350 }, { "epoch": 0.0318, "grad_norm": 0.3666287958621979, "learning_rate": 3.6795e-05, "loss": 0.071, "step": 7360 }, { "epoch": 0.03185, "grad_norm": 0.3707403242588043, "learning_rate": 3.6845000000000004e-05, "loss": 0.0764, "step": 7370 }, { "epoch": 0.0319, "grad_norm": 0.36320701241493225, "learning_rate": 3.6895000000000005e-05, "loss": 0.0701, "step": 7380 }, { "epoch": 0.03195, "grad_norm": 0.34615108370780945, "learning_rate": 3.6945e-05, "loss": 0.0679, "step": 7390 }, { "epoch": 0.032, "grad_norm": 0.42103487253189087, "learning_rate": 3.6995e-05, "loss": 0.0725, "step": 7400 }, { "epoch": 0.03205, "grad_norm": 0.5118808746337891, "learning_rate": 3.7045e-05, "loss": 0.071, "step": 7410 }, { "epoch": 0.0321, "grad_norm": 0.502199649810791, "learning_rate": 3.7095e-05, "loss": 0.0749, "step": 7420 }, { "epoch": 0.03215, "grad_norm": 0.31548601388931274, "learning_rate": 3.7145000000000004e-05, "loss": 0.0739, "step": 7430 }, { "epoch": 0.0322, "grad_norm": 0.3683542311191559, "learning_rate": 3.7195e-05, "loss": 0.07, "step": 7440 }, { "epoch": 0.03225, "grad_norm": 0.3420860469341278, "learning_rate": 3.7245e-05, "loss": 0.0708, "step": 7450 }, { "epoch": 0.0323, "grad_norm": 0.32849380373954773, "learning_rate": 3.7295e-05, "loss": 0.0726, "step": 7460 }, { "epoch": 0.03235, "grad_norm": 0.4532844126224518, "learning_rate": 3.7345e-05, "loss": 0.07, "step": 7470 }, { "epoch": 0.0324, "grad_norm": 0.5196378827095032, "learning_rate": 3.7395000000000004e-05, "loss": 0.0726, "step": 7480 }, { "epoch": 0.03245, "grad_norm": 0.346918523311615, "learning_rate": 3.7445000000000005e-05, "loss": 0.0713, "step": 7490 }, { "epoch": 0.0325, "grad_norm": 0.402946412563324, "learning_rate": 3.7495e-05, "loss": 0.0727, "step": 7500 }, { "epoch": 0.03255, "grad_norm": 0.3576841354370117, "learning_rate": 3.7545e-05, "loss": 0.0747, "step": 7510 }, { "epoch": 0.0326, "grad_norm": 0.3537071645259857, "learning_rate": 3.7595e-05, "loss": 0.072, "step": 7520 }, { "epoch": 0.03265, "grad_norm": 0.3819756507873535, "learning_rate": 3.7645e-05, "loss": 0.0653, "step": 7530 }, { "epoch": 0.0327, "grad_norm": 0.44822728633880615, "learning_rate": 3.7695000000000004e-05, "loss": 0.0711, "step": 7540 }, { "epoch": 0.03275, "grad_norm": 0.3307861089706421, "learning_rate": 3.7745000000000005e-05, "loss": 0.0706, "step": 7550 }, { "epoch": 0.0328, "grad_norm": 0.3189656138420105, "learning_rate": 3.7795e-05, "loss": 0.0691, "step": 7560 }, { "epoch": 0.03285, "grad_norm": 0.45738330483436584, "learning_rate": 3.7845e-05, "loss": 0.0713, "step": 7570 }, { "epoch": 0.0329, "grad_norm": 0.4482582211494446, "learning_rate": 3.7895e-05, "loss": 0.0707, "step": 7580 }, { "epoch": 0.03295, "grad_norm": 0.4654321074485779, "learning_rate": 3.7945000000000003e-05, "loss": 0.0749, "step": 7590 }, { "epoch": 0.033, "grad_norm": 0.4446773827075958, "learning_rate": 3.7995000000000005e-05, "loss": 0.0719, "step": 7600 }, { "epoch": 0.03305, "grad_norm": 0.3221088647842407, "learning_rate": 3.8045000000000006e-05, "loss": 0.0741, "step": 7610 }, { "epoch": 0.0331, "grad_norm": 0.33431246876716614, "learning_rate": 3.8095e-05, "loss": 0.0727, "step": 7620 }, { "epoch": 0.03315, "grad_norm": 0.3938763439655304, "learning_rate": 3.8145e-05, "loss": 0.0763, "step": 7630 }, { "epoch": 0.0332, "grad_norm": 0.34474390745162964, "learning_rate": 3.8195e-05, "loss": 0.0743, "step": 7640 }, { "epoch": 0.03325, "grad_norm": 0.3402387797832489, "learning_rate": 3.8245000000000004e-05, "loss": 0.071, "step": 7650 }, { "epoch": 0.0333, "grad_norm": 0.34557628631591797, "learning_rate": 3.8295000000000005e-05, "loss": 0.0764, "step": 7660 }, { "epoch": 0.03335, "grad_norm": 0.2767280042171478, "learning_rate": 3.8345000000000006e-05, "loss": 0.0712, "step": 7670 }, { "epoch": 0.0334, "grad_norm": 0.37553533911705017, "learning_rate": 3.8395e-05, "loss": 0.0698, "step": 7680 }, { "epoch": 0.03345, "grad_norm": 0.4499758183956146, "learning_rate": 3.8445e-05, "loss": 0.0727, "step": 7690 }, { "epoch": 0.0335, "grad_norm": 0.4133255183696747, "learning_rate": 3.8495e-05, "loss": 0.0702, "step": 7700 }, { "epoch": 0.03355, "grad_norm": 0.3341597020626068, "learning_rate": 3.8545000000000004e-05, "loss": 0.0682, "step": 7710 }, { "epoch": 0.0336, "grad_norm": 0.3567278981208801, "learning_rate": 3.8595000000000006e-05, "loss": 0.0674, "step": 7720 }, { "epoch": 0.03365, "grad_norm": 0.43361714482307434, "learning_rate": 3.8645e-05, "loss": 0.0722, "step": 7730 }, { "epoch": 0.0337, "grad_norm": 0.41051551699638367, "learning_rate": 3.8695e-05, "loss": 0.0757, "step": 7740 }, { "epoch": 0.03375, "grad_norm": 0.4041409492492676, "learning_rate": 3.8745e-05, "loss": 0.0752, "step": 7750 }, { "epoch": 0.0338, "grad_norm": 0.41050779819488525, "learning_rate": 3.8795000000000004e-05, "loss": 0.0677, "step": 7760 }, { "epoch": 0.03385, "grad_norm": 0.36437398195266724, "learning_rate": 3.8845000000000005e-05, "loss": 0.0712, "step": 7770 }, { "epoch": 0.0339, "grad_norm": 0.4467727839946747, "learning_rate": 3.8895000000000006e-05, "loss": 0.0737, "step": 7780 }, { "epoch": 0.03395, "grad_norm": 0.4188295900821686, "learning_rate": 3.8945e-05, "loss": 0.0718, "step": 7790 }, { "epoch": 0.034, "grad_norm": 0.3202160894870758, "learning_rate": 3.8995e-05, "loss": 0.0675, "step": 7800 }, { "epoch": 0.03405, "grad_norm": 0.35536912083625793, "learning_rate": 3.9045e-05, "loss": 0.0711, "step": 7810 }, { "epoch": 0.0341, "grad_norm": 0.3401013910770416, "learning_rate": 3.9095000000000004e-05, "loss": 0.0665, "step": 7820 }, { "epoch": 0.03415, "grad_norm": 0.32901763916015625, "learning_rate": 3.9145000000000006e-05, "loss": 0.0709, "step": 7830 }, { "epoch": 0.0342, "grad_norm": 0.4919268786907196, "learning_rate": 3.919500000000001e-05, "loss": 0.0813, "step": 7840 }, { "epoch": 0.03425, "grad_norm": 0.33046674728393555, "learning_rate": 3.9245e-05, "loss": 0.0723, "step": 7850 }, { "epoch": 0.0343, "grad_norm": 0.29371678829193115, "learning_rate": 3.9295e-05, "loss": 0.0743, "step": 7860 }, { "epoch": 0.03435, "grad_norm": 0.3690441846847534, "learning_rate": 3.9345000000000004e-05, "loss": 0.071, "step": 7870 }, { "epoch": 0.0344, "grad_norm": 0.30826297402381897, "learning_rate": 3.9395000000000005e-05, "loss": 0.0704, "step": 7880 }, { "epoch": 0.03445, "grad_norm": 0.3399566113948822, "learning_rate": 3.9445000000000006e-05, "loss": 0.0708, "step": 7890 }, { "epoch": 0.0345, "grad_norm": 0.35554730892181396, "learning_rate": 3.949500000000001e-05, "loss": 0.0726, "step": 7900 }, { "epoch": 0.03455, "grad_norm": 0.3770660161972046, "learning_rate": 3.9545e-05, "loss": 0.0737, "step": 7910 }, { "epoch": 0.0346, "grad_norm": 0.3023923933506012, "learning_rate": 3.9595e-05, "loss": 0.0699, "step": 7920 }, { "epoch": 0.03465, "grad_norm": 0.31832584738731384, "learning_rate": 3.9645000000000004e-05, "loss": 0.0692, "step": 7930 }, { "epoch": 0.0347, "grad_norm": 0.30358853936195374, "learning_rate": 3.9695000000000005e-05, "loss": 0.0709, "step": 7940 }, { "epoch": 0.03475, "grad_norm": 0.3580412268638611, "learning_rate": 3.9745000000000007e-05, "loss": 0.0671, "step": 7950 }, { "epoch": 0.0348, "grad_norm": 0.36572298407554626, "learning_rate": 3.979500000000001e-05, "loss": 0.0712, "step": 7960 }, { "epoch": 0.03485, "grad_norm": 0.2903812527656555, "learning_rate": 3.9845e-05, "loss": 0.0748, "step": 7970 }, { "epoch": 0.0349, "grad_norm": 0.25910863280296326, "learning_rate": 3.9895000000000003e-05, "loss": 0.0686, "step": 7980 }, { "epoch": 0.03495, "grad_norm": 0.37598222494125366, "learning_rate": 3.9945000000000005e-05, "loss": 0.0651, "step": 7990 }, { "epoch": 0.035, "grad_norm": 0.32963132858276367, "learning_rate": 3.9995000000000006e-05, "loss": 0.0662, "step": 8000 }, { "epoch": 0.03505, "grad_norm": 0.36862391233444214, "learning_rate": 4.0045e-05, "loss": 0.068, "step": 8010 }, { "epoch": 0.0351, "grad_norm": 0.3572233021259308, "learning_rate": 4.0095e-05, "loss": 0.0682, "step": 8020 }, { "epoch": 0.03515, "grad_norm": 0.33086565136909485, "learning_rate": 4.0144999999999996e-05, "loss": 0.0644, "step": 8030 }, { "epoch": 0.0352, "grad_norm": 0.39653196930885315, "learning_rate": 4.0195e-05, "loss": 0.0727, "step": 8040 }, { "epoch": 0.03525, "grad_norm": 0.42832210659980774, "learning_rate": 4.0245e-05, "loss": 0.0723, "step": 8050 }, { "epoch": 0.0353, "grad_norm": 0.3951289653778076, "learning_rate": 4.0295e-05, "loss": 0.0741, "step": 8060 }, { "epoch": 0.03535, "grad_norm": 0.40113726258277893, "learning_rate": 4.0345e-05, "loss": 0.067, "step": 8070 }, { "epoch": 0.0354, "grad_norm": 0.3262624740600586, "learning_rate": 4.0395e-05, "loss": 0.0747, "step": 8080 }, { "epoch": 0.03545, "grad_norm": 0.35799640417099, "learning_rate": 4.0444999999999996e-05, "loss": 0.0733, "step": 8090 }, { "epoch": 0.0355, "grad_norm": 0.357382595539093, "learning_rate": 4.0495e-05, "loss": 0.0696, "step": 8100 }, { "epoch": 0.03555, "grad_norm": 0.36806145310401917, "learning_rate": 4.0545e-05, "loss": 0.0689, "step": 8110 }, { "epoch": 0.0356, "grad_norm": 0.4190371334552765, "learning_rate": 4.0595e-05, "loss": 0.0712, "step": 8120 }, { "epoch": 0.03565, "grad_norm": 0.3320857584476471, "learning_rate": 4.0645e-05, "loss": 0.0675, "step": 8130 }, { "epoch": 0.0357, "grad_norm": 0.4370461106300354, "learning_rate": 4.0695e-05, "loss": 0.0741, "step": 8140 }, { "epoch": 0.03575, "grad_norm": 0.3096306324005127, "learning_rate": 4.0745e-05, "loss": 0.0684, "step": 8150 }, { "epoch": 0.0358, "grad_norm": 0.42305925488471985, "learning_rate": 4.0795e-05, "loss": 0.0671, "step": 8160 }, { "epoch": 0.03585, "grad_norm": 0.4075618386268616, "learning_rate": 4.0845e-05, "loss": 0.0694, "step": 8170 }, { "epoch": 0.0359, "grad_norm": 0.3317747116088867, "learning_rate": 4.0895e-05, "loss": 0.0712, "step": 8180 }, { "epoch": 0.03595, "grad_norm": 0.34016817808151245, "learning_rate": 4.0945e-05, "loss": 0.0683, "step": 8190 }, { "epoch": 0.036, "grad_norm": 0.29989123344421387, "learning_rate": 4.0995e-05, "loss": 0.0683, "step": 8200 }, { "epoch": 0.03605, "grad_norm": 0.3664803206920624, "learning_rate": 4.1045e-05, "loss": 0.0675, "step": 8210 }, { "epoch": 0.0361, "grad_norm": 0.40872564911842346, "learning_rate": 4.1095e-05, "loss": 0.0663, "step": 8220 }, { "epoch": 0.03615, "grad_norm": 0.3482436239719391, "learning_rate": 4.1145e-05, "loss": 0.065, "step": 8230 }, { "epoch": 0.0362, "grad_norm": 0.3456083834171295, "learning_rate": 4.1195e-05, "loss": 0.0669, "step": 8240 }, { "epoch": 0.03625, "grad_norm": 0.45253556966781616, "learning_rate": 4.1245e-05, "loss": 0.0695, "step": 8250 }, { "epoch": 0.0363, "grad_norm": 0.329023540019989, "learning_rate": 4.1295000000000004e-05, "loss": 0.0665, "step": 8260 }, { "epoch": 0.03635, "grad_norm": 0.3185300827026367, "learning_rate": 4.1345e-05, "loss": 0.0655, "step": 8270 }, { "epoch": 0.0364, "grad_norm": 0.3669206500053406, "learning_rate": 4.1395e-05, "loss": 0.0668, "step": 8280 }, { "epoch": 0.03645, "grad_norm": 0.43072745203971863, "learning_rate": 4.1445e-05, "loss": 0.0665, "step": 8290 }, { "epoch": 0.0365, "grad_norm": 0.28496241569519043, "learning_rate": 4.1495e-05, "loss": 0.0677, "step": 8300 }, { "epoch": 0.03655, "grad_norm": 0.4361313581466675, "learning_rate": 4.1545e-05, "loss": 0.0696, "step": 8310 }, { "epoch": 0.0366, "grad_norm": 0.3698161244392395, "learning_rate": 4.1595e-05, "loss": 0.0742, "step": 8320 }, { "epoch": 0.03665, "grad_norm": 0.32634246349334717, "learning_rate": 4.1645e-05, "loss": 0.0704, "step": 8330 }, { "epoch": 0.0367, "grad_norm": 0.33289647102355957, "learning_rate": 4.1695e-05, "loss": 0.0674, "step": 8340 }, { "epoch": 0.03675, "grad_norm": 0.32005950808525085, "learning_rate": 4.1745e-05, "loss": 0.0693, "step": 8350 }, { "epoch": 0.0368, "grad_norm": 0.5573751926422119, "learning_rate": 4.1795e-05, "loss": 0.0707, "step": 8360 }, { "epoch": 0.03685, "grad_norm": 0.3519611656665802, "learning_rate": 4.1845000000000003e-05, "loss": 0.0673, "step": 8370 }, { "epoch": 0.0369, "grad_norm": 0.43575596809387207, "learning_rate": 4.1895e-05, "loss": 0.0664, "step": 8380 }, { "epoch": 0.03695, "grad_norm": 0.33544063568115234, "learning_rate": 4.1945e-05, "loss": 0.0642, "step": 8390 }, { "epoch": 0.037, "grad_norm": 0.3813968002796173, "learning_rate": 4.1995e-05, "loss": 0.0707, "step": 8400 }, { "epoch": 0.03705, "grad_norm": 0.25771671533584595, "learning_rate": 4.2045e-05, "loss": 0.0655, "step": 8410 }, { "epoch": 0.0371, "grad_norm": 0.3025599420070648, "learning_rate": 4.2095e-05, "loss": 0.0637, "step": 8420 }, { "epoch": 0.03715, "grad_norm": 0.30885520577430725, "learning_rate": 4.2145000000000004e-05, "loss": 0.0681, "step": 8430 }, { "epoch": 0.0372, "grad_norm": 0.37138083577156067, "learning_rate": 4.2195e-05, "loss": 0.065, "step": 8440 }, { "epoch": 0.03725, "grad_norm": 0.33118507266044617, "learning_rate": 4.2245e-05, "loss": 0.0632, "step": 8450 }, { "epoch": 0.0373, "grad_norm": 0.4132819175720215, "learning_rate": 4.2295e-05, "loss": 0.0687, "step": 8460 }, { "epoch": 0.03735, "grad_norm": 0.43236279487609863, "learning_rate": 4.2345e-05, "loss": 0.0646, "step": 8470 }, { "epoch": 0.0374, "grad_norm": 0.3995482921600342, "learning_rate": 4.2395e-05, "loss": 0.0656, "step": 8480 }, { "epoch": 0.03745, "grad_norm": 0.42754000425338745, "learning_rate": 4.2445000000000004e-05, "loss": 0.0671, "step": 8490 }, { "epoch": 0.0375, "grad_norm": 0.39753013849258423, "learning_rate": 4.2495e-05, "loss": 0.069, "step": 8500 }, { "epoch": 0.03755, "grad_norm": 0.38752812147140503, "learning_rate": 4.2545e-05, "loss": 0.0651, "step": 8510 }, { "epoch": 0.0376, "grad_norm": 0.44947749376296997, "learning_rate": 4.2595e-05, "loss": 0.0753, "step": 8520 }, { "epoch": 0.03765, "grad_norm": 0.33878234028816223, "learning_rate": 4.2645e-05, "loss": 0.0666, "step": 8530 }, { "epoch": 0.0377, "grad_norm": 0.3755720555782318, "learning_rate": 4.2695000000000004e-05, "loss": 0.0688, "step": 8540 }, { "epoch": 0.03775, "grad_norm": 0.3157779574394226, "learning_rate": 4.2745000000000005e-05, "loss": 0.0676, "step": 8550 }, { "epoch": 0.0378, "grad_norm": 0.29580649733543396, "learning_rate": 4.2795e-05, "loss": 0.0677, "step": 8560 }, { "epoch": 0.03785, "grad_norm": 0.3034764230251312, "learning_rate": 4.2845e-05, "loss": 0.0656, "step": 8570 }, { "epoch": 0.0379, "grad_norm": 0.30412721633911133, "learning_rate": 4.2895e-05, "loss": 0.0672, "step": 8580 }, { "epoch": 0.03795, "grad_norm": 0.3045472502708435, "learning_rate": 4.2945e-05, "loss": 0.0663, "step": 8590 }, { "epoch": 0.038, "grad_norm": 0.3242882490158081, "learning_rate": 4.2995000000000004e-05, "loss": 0.067, "step": 8600 }, { "epoch": 0.03805, "grad_norm": 0.3049640357494354, "learning_rate": 4.3045e-05, "loss": 0.068, "step": 8610 }, { "epoch": 0.0381, "grad_norm": 0.2670859098434448, "learning_rate": 4.3095e-05, "loss": 0.0676, "step": 8620 }, { "epoch": 0.03815, "grad_norm": 0.34805092215538025, "learning_rate": 4.3145e-05, "loss": 0.0645, "step": 8630 }, { "epoch": 0.0382, "grad_norm": 0.34513089060783386, "learning_rate": 4.3195e-05, "loss": 0.0654, "step": 8640 }, { "epoch": 0.03825, "grad_norm": 0.36328983306884766, "learning_rate": 4.3245000000000004e-05, "loss": 0.0657, "step": 8650 }, { "epoch": 0.0383, "grad_norm": 0.34282222390174866, "learning_rate": 4.3295000000000005e-05, "loss": 0.068, "step": 8660 }, { "epoch": 0.03835, "grad_norm": 0.3558262586593628, "learning_rate": 4.3345e-05, "loss": 0.0724, "step": 8670 }, { "epoch": 0.0384, "grad_norm": 0.33850905299186707, "learning_rate": 4.3395e-05, "loss": 0.0671, "step": 8680 }, { "epoch": 0.03845, "grad_norm": 0.43555590510368347, "learning_rate": 4.3445e-05, "loss": 0.0657, "step": 8690 }, { "epoch": 0.0385, "grad_norm": 0.4613133668899536, "learning_rate": 4.3495e-05, "loss": 0.0696, "step": 8700 }, { "epoch": 0.03855, "grad_norm": 0.3406635820865631, "learning_rate": 4.3545000000000004e-05, "loss": 0.07, "step": 8710 }, { "epoch": 0.0386, "grad_norm": 0.32975125312805176, "learning_rate": 4.3595000000000005e-05, "loss": 0.0713, "step": 8720 }, { "epoch": 0.03865, "grad_norm": 0.4285775125026703, "learning_rate": 4.3645e-05, "loss": 0.069, "step": 8730 }, { "epoch": 0.0387, "grad_norm": 0.2535092830657959, "learning_rate": 4.3695e-05, "loss": 0.0685, "step": 8740 }, { "epoch": 0.03875, "grad_norm": 0.30236580967903137, "learning_rate": 4.3745e-05, "loss": 0.0693, "step": 8750 }, { "epoch": 0.0388, "grad_norm": 0.3505745828151703, "learning_rate": 4.3795e-05, "loss": 0.0655, "step": 8760 }, { "epoch": 0.03885, "grad_norm": 0.2992234230041504, "learning_rate": 4.3845000000000005e-05, "loss": 0.0661, "step": 8770 }, { "epoch": 0.0389, "grad_norm": 0.34178248047828674, "learning_rate": 4.3895000000000006e-05, "loss": 0.0703, "step": 8780 }, { "epoch": 0.03895, "grad_norm": 0.36263760924339294, "learning_rate": 4.3945e-05, "loss": 0.0674, "step": 8790 }, { "epoch": 0.039, "grad_norm": 0.26600268483161926, "learning_rate": 4.3995e-05, "loss": 0.0649, "step": 8800 }, { "epoch": 0.03905, "grad_norm": 0.2860242426395416, "learning_rate": 4.4045e-05, "loss": 0.0646, "step": 8810 }, { "epoch": 0.0391, "grad_norm": 0.326742023229599, "learning_rate": 4.4095000000000004e-05, "loss": 0.0661, "step": 8820 }, { "epoch": 0.03915, "grad_norm": 0.32793718576431274, "learning_rate": 4.4145000000000005e-05, "loss": 0.0651, "step": 8830 }, { "epoch": 0.0392, "grad_norm": 0.3359118103981018, "learning_rate": 4.4195000000000006e-05, "loss": 0.0649, "step": 8840 }, { "epoch": 0.03925, "grad_norm": 0.3868449032306671, "learning_rate": 4.4245e-05, "loss": 0.0645, "step": 8850 }, { "epoch": 0.0393, "grad_norm": 0.27781063318252563, "learning_rate": 4.4295e-05, "loss": 0.0622, "step": 8860 }, { "epoch": 0.03935, "grad_norm": 0.30407023429870605, "learning_rate": 4.4345e-05, "loss": 0.0622, "step": 8870 }, { "epoch": 0.0394, "grad_norm": 0.3161301612854004, "learning_rate": 4.4395000000000004e-05, "loss": 0.0639, "step": 8880 }, { "epoch": 0.03945, "grad_norm": 0.33792781829833984, "learning_rate": 4.4445000000000006e-05, "loss": 0.0646, "step": 8890 }, { "epoch": 0.0395, "grad_norm": 0.30785366892814636, "learning_rate": 4.4495e-05, "loss": 0.0652, "step": 8900 }, { "epoch": 0.03955, "grad_norm": 0.4145120084285736, "learning_rate": 4.4545e-05, "loss": 0.0661, "step": 8910 }, { "epoch": 0.0396, "grad_norm": 0.3020155131816864, "learning_rate": 4.4595e-05, "loss": 0.0693, "step": 8920 }, { "epoch": 0.03965, "grad_norm": 0.4017809331417084, "learning_rate": 4.4645000000000004e-05, "loss": 0.0677, "step": 8930 }, { "epoch": 0.0397, "grad_norm": 0.3599850535392761, "learning_rate": 4.4695000000000005e-05, "loss": 0.0681, "step": 8940 }, { "epoch": 0.03975, "grad_norm": 0.3779388666152954, "learning_rate": 4.4745000000000006e-05, "loss": 0.0663, "step": 8950 }, { "epoch": 0.0398, "grad_norm": 0.30472904443740845, "learning_rate": 4.4795e-05, "loss": 0.0685, "step": 8960 }, { "epoch": 0.03985, "grad_norm": 0.38446706533432007, "learning_rate": 4.4845e-05, "loss": 0.0669, "step": 8970 }, { "epoch": 0.0399, "grad_norm": 0.49220919609069824, "learning_rate": 4.4895e-05, "loss": 0.0696, "step": 8980 }, { "epoch": 0.03995, "grad_norm": 0.353708416223526, "learning_rate": 4.4945000000000004e-05, "loss": 0.0688, "step": 8990 }, { "epoch": 0.04, "grad_norm": 0.3147698938846588, "learning_rate": 4.4995000000000005e-05, "loss": 0.0646, "step": 9000 }, { "epoch": 0.04005, "grad_norm": 0.3192857503890991, "learning_rate": 4.504500000000001e-05, "loss": 0.069, "step": 9010 }, { "epoch": 0.0401, "grad_norm": 0.38566654920578003, "learning_rate": 4.5095e-05, "loss": 0.0652, "step": 9020 }, { "epoch": 0.04015, "grad_norm": 0.37652164697647095, "learning_rate": 4.5145e-05, "loss": 0.0689, "step": 9030 }, { "epoch": 0.0402, "grad_norm": 0.32042309641838074, "learning_rate": 4.5195000000000004e-05, "loss": 0.0651, "step": 9040 }, { "epoch": 0.04025, "grad_norm": 0.3594474792480469, "learning_rate": 4.5245000000000005e-05, "loss": 0.0665, "step": 9050 }, { "epoch": 0.0403, "grad_norm": 0.33755066990852356, "learning_rate": 4.5295000000000006e-05, "loss": 0.0672, "step": 9060 }, { "epoch": 0.04035, "grad_norm": 0.30055055022239685, "learning_rate": 4.534500000000001e-05, "loss": 0.0671, "step": 9070 }, { "epoch": 0.0404, "grad_norm": 0.30003151297569275, "learning_rate": 4.5395e-05, "loss": 0.0641, "step": 9080 }, { "epoch": 0.04045, "grad_norm": 0.3057911992073059, "learning_rate": 4.5445e-05, "loss": 0.0664, "step": 9090 }, { "epoch": 0.0405, "grad_norm": 0.30321067571640015, "learning_rate": 4.5495000000000004e-05, "loss": 0.0642, "step": 9100 }, { "epoch": 0.04055, "grad_norm": 0.2936624586582184, "learning_rate": 4.5545000000000005e-05, "loss": 0.0654, "step": 9110 }, { "epoch": 0.0406, "grad_norm": 0.3208167850971222, "learning_rate": 4.5595000000000006e-05, "loss": 0.0616, "step": 9120 }, { "epoch": 0.04065, "grad_norm": 0.3730127513408661, "learning_rate": 4.564500000000001e-05, "loss": 0.0649, "step": 9130 }, { "epoch": 0.0407, "grad_norm": 0.3969462811946869, "learning_rate": 4.5695e-05, "loss": 0.0656, "step": 9140 }, { "epoch": 0.04075, "grad_norm": 0.3340781033039093, "learning_rate": 4.5745e-05, "loss": 0.0676, "step": 9150 }, { "epoch": 0.0408, "grad_norm": 0.3071921169757843, "learning_rate": 4.5795000000000005e-05, "loss": 0.0696, "step": 9160 }, { "epoch": 0.04085, "grad_norm": 0.31838589906692505, "learning_rate": 4.5845000000000006e-05, "loss": 0.0602, "step": 9170 }, { "epoch": 0.0409, "grad_norm": 0.3813740909099579, "learning_rate": 4.589500000000001e-05, "loss": 0.0678, "step": 9180 }, { "epoch": 0.04095, "grad_norm": 0.30758917331695557, "learning_rate": 4.5945e-05, "loss": 0.0698, "step": 9190 }, { "epoch": 0.041, "grad_norm": 0.41876235604286194, "learning_rate": 4.5995e-05, "loss": 0.064, "step": 9200 }, { "epoch": 0.04105, "grad_norm": 0.32810330390930176, "learning_rate": 4.6045000000000004e-05, "loss": 0.0635, "step": 9210 }, { "epoch": 0.0411, "grad_norm": 0.41681545972824097, "learning_rate": 4.6095000000000005e-05, "loss": 0.0628, "step": 9220 }, { "epoch": 0.04115, "grad_norm": 0.33085331320762634, "learning_rate": 4.6145000000000006e-05, "loss": 0.0606, "step": 9230 }, { "epoch": 0.0412, "grad_norm": 0.3884361684322357, "learning_rate": 4.619500000000001e-05, "loss": 0.0672, "step": 9240 }, { "epoch": 0.04125, "grad_norm": 0.3489012122154236, "learning_rate": 4.6245e-05, "loss": 0.0677, "step": 9250 }, { "epoch": 0.0413, "grad_norm": 0.29971393942832947, "learning_rate": 4.6294999999999996e-05, "loss": 0.0643, "step": 9260 }, { "epoch": 0.04135, "grad_norm": 0.37529256939888, "learning_rate": 4.6345e-05, "loss": 0.065, "step": 9270 }, { "epoch": 0.0414, "grad_norm": 0.32133549451828003, "learning_rate": 4.6395e-05, "loss": 0.0641, "step": 9280 }, { "epoch": 0.04145, "grad_norm": 0.3157539963722229, "learning_rate": 4.6445e-05, "loss": 0.0597, "step": 9290 }, { "epoch": 0.0415, "grad_norm": 0.28702154755592346, "learning_rate": 4.6495e-05, "loss": 0.0635, "step": 9300 }, { "epoch": 0.04155, "grad_norm": 0.3248656988143921, "learning_rate": 4.6545e-05, "loss": 0.0687, "step": 9310 }, { "epoch": 0.0416, "grad_norm": 0.30441874265670776, "learning_rate": 4.6595e-05, "loss": 0.0648, "step": 9320 }, { "epoch": 0.04165, "grad_norm": 0.3399603068828583, "learning_rate": 4.6645e-05, "loss": 0.0654, "step": 9330 }, { "epoch": 0.0417, "grad_norm": 0.3140687048435211, "learning_rate": 4.6695e-05, "loss": 0.0657, "step": 9340 }, { "epoch": 0.04175, "grad_norm": 0.37293708324432373, "learning_rate": 4.6745e-05, "loss": 0.0621, "step": 9350 }, { "epoch": 0.0418, "grad_norm": 0.3210163414478302, "learning_rate": 4.6795e-05, "loss": 0.0633, "step": 9360 }, { "epoch": 0.04185, "grad_norm": 0.3903563320636749, "learning_rate": 4.6845e-05, "loss": 0.0769, "step": 9370 }, { "epoch": 0.0419, "grad_norm": 0.3045681118965149, "learning_rate": 4.6895e-05, "loss": 0.0667, "step": 9380 }, { "epoch": 0.04195, "grad_norm": 0.3322887718677521, "learning_rate": 4.6945e-05, "loss": 0.0637, "step": 9390 }, { "epoch": 0.042, "grad_norm": 0.2262994796037674, "learning_rate": 4.6995e-05, "loss": 0.0638, "step": 9400 }, { "epoch": 0.04205, "grad_norm": 0.2820618152618408, "learning_rate": 4.7045e-05, "loss": 0.0638, "step": 9410 }, { "epoch": 0.0421, "grad_norm": 0.3317321538925171, "learning_rate": 4.7095e-05, "loss": 0.0691, "step": 9420 }, { "epoch": 0.04215, "grad_norm": 0.43117380142211914, "learning_rate": 4.7145000000000003e-05, "loss": 0.0686, "step": 9430 }, { "epoch": 0.0422, "grad_norm": 0.3164169490337372, "learning_rate": 4.7195e-05, "loss": 0.0666, "step": 9440 }, { "epoch": 0.04225, "grad_norm": 0.27061155438423157, "learning_rate": 4.7245e-05, "loss": 0.0652, "step": 9450 }, { "epoch": 0.0423, "grad_norm": 0.32976457476615906, "learning_rate": 4.7295e-05, "loss": 0.0652, "step": 9460 }, { "epoch": 0.04235, "grad_norm": 0.2550129294395447, "learning_rate": 4.7345e-05, "loss": 0.064, "step": 9470 }, { "epoch": 0.0424, "grad_norm": 0.3532482385635376, "learning_rate": 4.7395e-05, "loss": 0.0667, "step": 9480 }, { "epoch": 0.04245, "grad_norm": 0.3003195524215698, "learning_rate": 4.7445e-05, "loss": 0.0681, "step": 9490 }, { "epoch": 0.0425, "grad_norm": 0.42255571484565735, "learning_rate": 4.7495e-05, "loss": 0.0643, "step": 9500 }, { "epoch": 0.04255, "grad_norm": 0.3724454939365387, "learning_rate": 4.7545e-05, "loss": 0.0669, "step": 9510 }, { "epoch": 0.0426, "grad_norm": 0.3489012122154236, "learning_rate": 4.7595e-05, "loss": 0.0632, "step": 9520 }, { "epoch": 0.04265, "grad_norm": 0.33121049404144287, "learning_rate": 4.7645e-05, "loss": 0.0667, "step": 9530 }, { "epoch": 0.0427, "grad_norm": 0.35532277822494507, "learning_rate": 4.7695e-05, "loss": 0.0692, "step": 9540 }, { "epoch": 0.04275, "grad_norm": 0.28395581245422363, "learning_rate": 4.7745e-05, "loss": 0.0634, "step": 9550 }, { "epoch": 0.0428, "grad_norm": 0.25116708874702454, "learning_rate": 4.7795e-05, "loss": 0.0647, "step": 9560 }, { "epoch": 0.04285, "grad_norm": 0.32771411538124084, "learning_rate": 4.7845e-05, "loss": 0.0633, "step": 9570 }, { "epoch": 0.0429, "grad_norm": 0.3212730884552002, "learning_rate": 4.7895e-05, "loss": 0.0629, "step": 9580 }, { "epoch": 0.04295, "grad_norm": 0.2865977883338928, "learning_rate": 4.7945e-05, "loss": 0.0633, "step": 9590 }, { "epoch": 0.043, "grad_norm": 0.29332104325294495, "learning_rate": 4.7995000000000004e-05, "loss": 0.064, "step": 9600 }, { "epoch": 0.04305, "grad_norm": 0.28664249181747437, "learning_rate": 4.8045e-05, "loss": 0.0602, "step": 9610 }, { "epoch": 0.0431, "grad_norm": 0.3150465786457062, "learning_rate": 4.8095e-05, "loss": 0.0644, "step": 9620 }, { "epoch": 0.04315, "grad_norm": 0.3172779083251953, "learning_rate": 4.8145e-05, "loss": 0.065, "step": 9630 }, { "epoch": 0.0432, "grad_norm": 0.2557525038719177, "learning_rate": 4.8195e-05, "loss": 0.0621, "step": 9640 }, { "epoch": 0.04325, "grad_norm": 0.35068729519844055, "learning_rate": 4.8245e-05, "loss": 0.0641, "step": 9650 }, { "epoch": 0.0433, "grad_norm": 0.36322087049484253, "learning_rate": 4.8295000000000004e-05, "loss": 0.0623, "step": 9660 }, { "epoch": 0.04335, "grad_norm": 0.2819206416606903, "learning_rate": 4.8345e-05, "loss": 0.0635, "step": 9670 }, { "epoch": 0.0434, "grad_norm": 0.25681859254837036, "learning_rate": 4.8395e-05, "loss": 0.0603, "step": 9680 }, { "epoch": 0.04345, "grad_norm": 0.35684749484062195, "learning_rate": 4.8445e-05, "loss": 0.0655, "step": 9690 }, { "epoch": 0.0435, "grad_norm": 0.3112817704677582, "learning_rate": 4.8495e-05, "loss": 0.069, "step": 9700 }, { "epoch": 0.04355, "grad_norm": 0.3422347605228424, "learning_rate": 4.8545000000000004e-05, "loss": 0.064, "step": 9710 }, { "epoch": 0.0436, "grad_norm": 0.2875858247280121, "learning_rate": 4.8595000000000005e-05, "loss": 0.0626, "step": 9720 }, { "epoch": 0.04365, "grad_norm": 0.25618475675582886, "learning_rate": 4.8645e-05, "loss": 0.0648, "step": 9730 }, { "epoch": 0.0437, "grad_norm": 0.26029500365257263, "learning_rate": 4.8695e-05, "loss": 0.0634, "step": 9740 }, { "epoch": 0.04375, "grad_norm": 0.2977071702480316, "learning_rate": 4.8745e-05, "loss": 0.0673, "step": 9750 }, { "epoch": 0.0438, "grad_norm": 0.21176737546920776, "learning_rate": 4.8795e-05, "loss": 0.0613, "step": 9760 }, { "epoch": 0.04385, "grad_norm": 0.30652615427970886, "learning_rate": 4.8845000000000004e-05, "loss": 0.0639, "step": 9770 }, { "epoch": 0.0439, "grad_norm": 0.38696739077568054, "learning_rate": 4.8895e-05, "loss": 0.0628, "step": 9780 }, { "epoch": 0.04395, "grad_norm": 0.3368676006793976, "learning_rate": 4.8945e-05, "loss": 0.0657, "step": 9790 }, { "epoch": 0.044, "grad_norm": 0.3058677315711975, "learning_rate": 4.8995e-05, "loss": 0.0602, "step": 9800 }, { "epoch": 0.04405, "grad_norm": 0.2631063461303711, "learning_rate": 4.9045e-05, "loss": 0.0631, "step": 9810 }, { "epoch": 0.0441, "grad_norm": 0.2672768533229828, "learning_rate": 4.9095000000000003e-05, "loss": 0.0631, "step": 9820 }, { "epoch": 0.04415, "grad_norm": 0.3295259177684784, "learning_rate": 4.9145000000000005e-05, "loss": 0.0675, "step": 9830 }, { "epoch": 0.0442, "grad_norm": 0.32516661286354065, "learning_rate": 4.9195e-05, "loss": 0.0616, "step": 9840 }, { "epoch": 0.04425, "grad_norm": 0.3430582284927368, "learning_rate": 4.9245e-05, "loss": 0.0659, "step": 9850 }, { "epoch": 0.0443, "grad_norm": 0.27236413955688477, "learning_rate": 4.9295e-05, "loss": 0.0613, "step": 9860 }, { "epoch": 0.04435, "grad_norm": 0.30401864647865295, "learning_rate": 4.9345e-05, "loss": 0.0637, "step": 9870 }, { "epoch": 0.0444, "grad_norm": 0.3249342143535614, "learning_rate": 4.9395000000000004e-05, "loss": 0.0651, "step": 9880 }, { "epoch": 0.04445, "grad_norm": 0.3392452299594879, "learning_rate": 4.9445000000000005e-05, "loss": 0.0622, "step": 9890 }, { "epoch": 0.0445, "grad_norm": 0.31362077593803406, "learning_rate": 4.9495e-05, "loss": 0.0629, "step": 9900 }, { "epoch": 0.04455, "grad_norm": 0.26321685314178467, "learning_rate": 4.9545e-05, "loss": 0.0616, "step": 9910 }, { "epoch": 0.0446, "grad_norm": 0.3222457766532898, "learning_rate": 4.9595e-05, "loss": 0.0646, "step": 9920 }, { "epoch": 0.04465, "grad_norm": 0.3142569065093994, "learning_rate": 4.9645e-05, "loss": 0.0607, "step": 9930 }, { "epoch": 0.0447, "grad_norm": 0.2683854103088379, "learning_rate": 4.9695000000000004e-05, "loss": 0.0615, "step": 9940 }, { "epoch": 0.04475, "grad_norm": 0.31706681847572327, "learning_rate": 4.9745000000000006e-05, "loss": 0.0608, "step": 9950 }, { "epoch": 0.0448, "grad_norm": 0.38556215167045593, "learning_rate": 4.9795e-05, "loss": 0.0637, "step": 9960 }, { "epoch": 0.04485, "grad_norm": 0.29253655672073364, "learning_rate": 4.9845e-05, "loss": 0.0634, "step": 9970 }, { "epoch": 0.0449, "grad_norm": 0.3062693774700165, "learning_rate": 4.9895e-05, "loss": 0.0652, "step": 9980 }, { "epoch": 0.04495, "grad_norm": 0.3026590943336487, "learning_rate": 4.9945000000000004e-05, "loss": 0.0602, "step": 9990 }, { "epoch": 0.045, "grad_norm": 0.35445278882980347, "learning_rate": 4.9995000000000005e-05, "loss": 0.0611, "step": 10000 }, { "epoch": 0.04505, "grad_norm": 0.3197009265422821, "learning_rate": 4.999999972318631e-05, "loss": 0.0633, "step": 10010 }, { "epoch": 0.0451, "grad_norm": 0.3079759478569031, "learning_rate": 4.999999876629946e-05, "loss": 0.0673, "step": 10020 }, { "epoch": 0.04515, "grad_norm": 0.29146572947502136, "learning_rate": 4.9999997125922047e-05, "loss": 0.0636, "step": 10030 }, { "epoch": 0.0452, "grad_norm": 0.26313549280166626, "learning_rate": 4.9999994802054094e-05, "loss": 0.0613, "step": 10040 }, { "epoch": 0.04525, "grad_norm": 0.30985012650489807, "learning_rate": 4.999999179469568e-05, "loss": 0.0636, "step": 10050 }, { "epoch": 0.0453, "grad_norm": 0.3062456250190735, "learning_rate": 4.9999988103846886e-05, "loss": 0.0641, "step": 10060 }, { "epoch": 0.04535, "grad_norm": 0.32784801721572876, "learning_rate": 4.9999983729507806e-05, "loss": 0.0591, "step": 10070 }, { "epoch": 0.0454, "grad_norm": 0.32923054695129395, "learning_rate": 4.999997867167856e-05, "loss": 0.0635, "step": 10080 }, { "epoch": 0.04545, "grad_norm": 0.2901904881000519, "learning_rate": 4.99999729303593e-05, "loss": 0.0636, "step": 10090 }, { "epoch": 0.0455, "grad_norm": 0.29496315121650696, "learning_rate": 4.999996650555017e-05, "loss": 0.0649, "step": 10100 }, { "epoch": 0.04555, "grad_norm": 0.33649277687072754, "learning_rate": 4.999995939725134e-05, "loss": 0.0634, "step": 10110 }, { "epoch": 0.0456, "grad_norm": 0.30145928263664246, "learning_rate": 4.9999951605463015e-05, "loss": 0.0625, "step": 10120 }, { "epoch": 0.04565, "grad_norm": 0.3132384717464447, "learning_rate": 4.99999431301854e-05, "loss": 0.0645, "step": 10130 }, { "epoch": 0.0457, "grad_norm": 0.24817843735218048, "learning_rate": 4.999993397141874e-05, "loss": 0.0618, "step": 10140 }, { "epoch": 0.04575, "grad_norm": 0.2636968195438385, "learning_rate": 4.999992412916327e-05, "loss": 0.068, "step": 10150 }, { "epoch": 0.0458, "grad_norm": 0.36269840598106384, "learning_rate": 4.999991360341927e-05, "loss": 0.0638, "step": 10160 }, { "epoch": 0.04585, "grad_norm": 0.43733730912208557, "learning_rate": 4.9999902394187024e-05, "loss": 0.0663, "step": 10170 }, { "epoch": 0.0459, "grad_norm": 0.32769593596458435, "learning_rate": 4.999989050146684e-05, "loss": 0.0639, "step": 10180 }, { "epoch": 0.04595, "grad_norm": 0.269916296005249, "learning_rate": 4.999987792525904e-05, "loss": 0.0656, "step": 10190 }, { "epoch": 0.046, "grad_norm": 0.2898986041545868, "learning_rate": 4.9999864665563975e-05, "loss": 0.0661, "step": 10200 }, { "epoch": 0.04605, "grad_norm": 0.32004767656326294, "learning_rate": 4.999985072238199e-05, "loss": 0.0655, "step": 10210 }, { "epoch": 0.0461, "grad_norm": 0.2851164937019348, "learning_rate": 4.999983609571349e-05, "loss": 0.0605, "step": 10220 }, { "epoch": 0.04615, "grad_norm": 0.22278539836406708, "learning_rate": 4.999982078555886e-05, "loss": 0.0658, "step": 10230 }, { "epoch": 0.0462, "grad_norm": 0.31230485439300537, "learning_rate": 4.999980479191852e-05, "loss": 0.0636, "step": 10240 }, { "epoch": 0.04625, "grad_norm": 0.2774222493171692, "learning_rate": 4.999978811479291e-05, "loss": 0.0618, "step": 10250 }, { "epoch": 0.0463, "grad_norm": 0.22640889883041382, "learning_rate": 4.999977075418248e-05, "loss": 0.0625, "step": 10260 }, { "epoch": 0.04635, "grad_norm": 0.23389208316802979, "learning_rate": 4.999975271008772e-05, "loss": 0.0623, "step": 10270 }, { "epoch": 0.0464, "grad_norm": 0.25524914264678955, "learning_rate": 4.99997339825091e-05, "loss": 0.0583, "step": 10280 }, { "epoch": 0.04645, "grad_norm": 0.26234862208366394, "learning_rate": 4.999971457144715e-05, "loss": 0.0585, "step": 10290 }, { "epoch": 0.0465, "grad_norm": 0.2599146366119385, "learning_rate": 4.9999694476902404e-05, "loss": 0.0618, "step": 10300 }, { "epoch": 0.04655, "grad_norm": 0.24796922504901886, "learning_rate": 4.9999673698875393e-05, "loss": 0.0576, "step": 10310 }, { "epoch": 0.0466, "grad_norm": 0.31469812989234924, "learning_rate": 4.99996522373667e-05, "loss": 0.061, "step": 10320 }, { "epoch": 0.04665, "grad_norm": 0.2860850989818573, "learning_rate": 4.99996300923769e-05, "loss": 0.0617, "step": 10330 }, { "epoch": 0.0467, "grad_norm": 0.2586144804954529, "learning_rate": 4.999960726390662e-05, "loss": 0.059, "step": 10340 }, { "epoch": 0.04675, "grad_norm": 0.2587771415710449, "learning_rate": 4.9999583751956455e-05, "loss": 0.0596, "step": 10350 }, { "epoch": 0.0468, "grad_norm": 0.22751501202583313, "learning_rate": 4.999955955652706e-05, "loss": 0.0582, "step": 10360 }, { "epoch": 0.04685, "grad_norm": 0.2889695167541504, "learning_rate": 4.999953467761911e-05, "loss": 0.0612, "step": 10370 }, { "epoch": 0.0469, "grad_norm": 0.27310308814048767, "learning_rate": 4.9999509115233275e-05, "loss": 0.0612, "step": 10380 }, { "epoch": 0.04695, "grad_norm": 0.2685157358646393, "learning_rate": 4.999948286937024e-05, "loss": 0.0592, "step": 10390 }, { "epoch": 0.047, "grad_norm": 0.24792355298995972, "learning_rate": 4.9999455940030746e-05, "loss": 0.059, "step": 10400 }, { "epoch": 0.04705, "grad_norm": 0.26915544271469116, "learning_rate": 4.999942832721551e-05, "loss": 0.0577, "step": 10410 }, { "epoch": 0.0471, "grad_norm": 0.28766775131225586, "learning_rate": 4.99994000309253e-05, "loss": 0.0606, "step": 10420 }, { "epoch": 0.04715, "grad_norm": 0.32877805829048157, "learning_rate": 4.9999371051160893e-05, "loss": 0.0605, "step": 10430 }, { "epoch": 0.0472, "grad_norm": 0.3041923940181732, "learning_rate": 4.999934138792306e-05, "loss": 0.0591, "step": 10440 }, { "epoch": 0.04725, "grad_norm": 0.2929264307022095, "learning_rate": 4.999931104121263e-05, "loss": 0.0621, "step": 10450 }, { "epoch": 0.0473, "grad_norm": 0.2993503510951996, "learning_rate": 4.999928001103043e-05, "loss": 0.0598, "step": 10460 }, { "epoch": 0.04735, "grad_norm": 0.33387285470962524, "learning_rate": 4.99992482973773e-05, "loss": 0.0581, "step": 10470 }, { "epoch": 0.0474, "grad_norm": 0.30160704255104065, "learning_rate": 4.999921590025412e-05, "loss": 0.0656, "step": 10480 }, { "epoch": 0.04745, "grad_norm": 0.2685299217700958, "learning_rate": 4.9999182819661774e-05, "loss": 0.0628, "step": 10490 }, { "epoch": 0.0475, "grad_norm": 0.2762181758880615, "learning_rate": 4.999914905560115e-05, "loss": 0.0636, "step": 10500 }, { "epoch": 0.04755, "grad_norm": 0.315581351518631, "learning_rate": 4.999911460807318e-05, "loss": 0.0625, "step": 10510 }, { "epoch": 0.0476, "grad_norm": 0.34477338194847107, "learning_rate": 4.999907947707882e-05, "loss": 0.0626, "step": 10520 }, { "epoch": 0.04765, "grad_norm": 0.2868213355541229, "learning_rate": 4.999904366261902e-05, "loss": 0.0612, "step": 10530 }, { "epoch": 0.0477, "grad_norm": 0.27307435870170593, "learning_rate": 4.999900716469475e-05, "loss": 0.0634, "step": 10540 }, { "epoch": 0.04775, "grad_norm": 0.3192185163497925, "learning_rate": 4.999896998330701e-05, "loss": 0.059, "step": 10550 }, { "epoch": 0.0478, "grad_norm": 0.2786751091480255, "learning_rate": 4.999893211845684e-05, "loss": 0.0579, "step": 10560 }, { "epoch": 0.04785, "grad_norm": 0.26636555790901184, "learning_rate": 4.9998893570145243e-05, "loss": 0.0598, "step": 10570 }, { "epoch": 0.0479, "grad_norm": 0.28624773025512695, "learning_rate": 4.999885433837329e-05, "loss": 0.0639, "step": 10580 }, { "epoch": 0.04795, "grad_norm": 0.26311632990837097, "learning_rate": 4.999881442314206e-05, "loss": 0.0599, "step": 10590 }, { "epoch": 0.048, "grad_norm": 0.33889976143836975, "learning_rate": 4.999877382445263e-05, "loss": 0.0602, "step": 10600 }, { "epoch": 0.04805, "grad_norm": 0.25893542170524597, "learning_rate": 4.999873254230611e-05, "loss": 0.0615, "step": 10610 }, { "epoch": 0.0481, "grad_norm": 0.2611432671546936, "learning_rate": 4.9998690576703635e-05, "loss": 0.0589, "step": 10620 }, { "epoch": 0.04815, "grad_norm": 0.23470397293567657, "learning_rate": 4.9998647927646355e-05, "loss": 0.0582, "step": 10630 }, { "epoch": 0.0482, "grad_norm": 0.2380250096321106, "learning_rate": 4.9998604595135435e-05, "loss": 0.0593, "step": 10640 }, { "epoch": 0.04825, "grad_norm": 0.35195785760879517, "learning_rate": 4.999856057917205e-05, "loss": 0.0636, "step": 10650 }, { "epoch": 0.0483, "grad_norm": 0.3239285349845886, "learning_rate": 4.9998515879757415e-05, "loss": 0.0652, "step": 10660 }, { "epoch": 0.04835, "grad_norm": 0.32478657364845276, "learning_rate": 4.9998470496892746e-05, "loss": 0.0632, "step": 10670 }, { "epoch": 0.0484, "grad_norm": 0.2464042752981186, "learning_rate": 4.999842443057928e-05, "loss": 0.0584, "step": 10680 }, { "epoch": 0.04845, "grad_norm": 0.2592853009700775, "learning_rate": 4.9998377680818286e-05, "loss": 0.0595, "step": 10690 }, { "epoch": 0.0485, "grad_norm": 0.25503969192504883, "learning_rate": 4.999833024761104e-05, "loss": 0.058, "step": 10700 }, { "epoch": 0.04855, "grad_norm": 0.2166227400302887, "learning_rate": 4.999828213095883e-05, "loss": 0.0581, "step": 10710 }, { "epoch": 0.0486, "grad_norm": 0.3099343180656433, "learning_rate": 4.9998233330862984e-05, "loss": 0.0616, "step": 10720 }, { "epoch": 0.04865, "grad_norm": 0.3154681324958801, "learning_rate": 4.9998183847324833e-05, "loss": 0.0577, "step": 10730 }, { "epoch": 0.0487, "grad_norm": 0.2956450283527374, "learning_rate": 4.999813368034572e-05, "loss": 0.0591, "step": 10740 }, { "epoch": 0.04875, "grad_norm": 0.34081724286079407, "learning_rate": 4.999808282992703e-05, "loss": 0.0616, "step": 10750 }, { "epoch": 0.0488, "grad_norm": 0.24640336632728577, "learning_rate": 4.9998031296070144e-05, "loss": 0.0605, "step": 10760 }, { "epoch": 0.04885, "grad_norm": 0.25215527415275574, "learning_rate": 4.999797907877647e-05, "loss": 0.0593, "step": 10770 }, { "epoch": 0.0489, "grad_norm": 0.2916519343852997, "learning_rate": 4.999792617804744e-05, "loss": 0.059, "step": 10780 }, { "epoch": 0.04895, "grad_norm": 0.30040955543518066, "learning_rate": 4.99978725938845e-05, "loss": 0.0611, "step": 10790 }, { "epoch": 0.049, "grad_norm": 0.2178073674440384, "learning_rate": 4.999781832628911e-05, "loss": 0.0586, "step": 10800 }, { "epoch": 0.04905, "grad_norm": 0.2435062974691391, "learning_rate": 4.999776337526277e-05, "loss": 0.0605, "step": 10810 }, { "epoch": 0.0491, "grad_norm": 0.2754117548465729, "learning_rate": 4.999770774080696e-05, "loss": 0.0585, "step": 10820 }, { "epoch": 0.04915, "grad_norm": 0.25847530364990234, "learning_rate": 4.999765142292322e-05, "loss": 0.0574, "step": 10830 }, { "epoch": 0.0492, "grad_norm": 0.2868664860725403, "learning_rate": 4.999759442161308e-05, "loss": 0.0591, "step": 10840 }, { "epoch": 0.04925, "grad_norm": 0.2413976490497589, "learning_rate": 4.99975367368781e-05, "loss": 0.0602, "step": 10850 }, { "epoch": 0.0493, "grad_norm": 0.2344127595424652, "learning_rate": 4.999747836871985e-05, "loss": 0.0636, "step": 10860 }, { "epoch": 0.04935, "grad_norm": 0.24767453968524933, "learning_rate": 4.999741931713994e-05, "loss": 0.0589, "step": 10870 }, { "epoch": 0.0494, "grad_norm": 0.28867772221565247, "learning_rate": 4.9997359582139966e-05, "loss": 0.0627, "step": 10880 }, { "epoch": 0.04945, "grad_norm": 0.3197946846485138, "learning_rate": 4.999729916372159e-05, "loss": 0.0634, "step": 10890 }, { "epoch": 0.0495, "grad_norm": 0.29469308257102966, "learning_rate": 4.999723806188643e-05, "loss": 0.0612, "step": 10900 }, { "epoch": 0.04955, "grad_norm": 0.23049208521842957, "learning_rate": 4.999717627663618e-05, "loss": 0.0595, "step": 10910 }, { "epoch": 0.0496, "grad_norm": 0.24875399470329285, "learning_rate": 4.9997113807972516e-05, "loss": 0.0623, "step": 10920 }, { "epoch": 0.04965, "grad_norm": 0.21667809784412384, "learning_rate": 4.999705065589716e-05, "loss": 0.0618, "step": 10930 }, { "epoch": 0.0497, "grad_norm": 0.26928454637527466, "learning_rate": 4.9996986820411824e-05, "loss": 0.0607, "step": 10940 }, { "epoch": 0.04975, "grad_norm": 0.30384913086891174, "learning_rate": 4.999692230151825e-05, "loss": 0.0617, "step": 10950 }, { "epoch": 0.0498, "grad_norm": 0.3048796057701111, "learning_rate": 4.999685709921823e-05, "loss": 0.0608, "step": 10960 }, { "epoch": 0.04985, "grad_norm": 0.25846514105796814, "learning_rate": 4.999679121351352e-05, "loss": 0.0623, "step": 10970 }, { "epoch": 0.0499, "grad_norm": 0.28279492259025574, "learning_rate": 4.999672464440592e-05, "loss": 0.0604, "step": 10980 }, { "epoch": 0.04995, "grad_norm": 0.24530085921287537, "learning_rate": 4.999665739189727e-05, "loss": 0.0569, "step": 10990 }, { "epoch": 0.05, "grad_norm": 0.42206844687461853, "learning_rate": 4.9996589455989404e-05, "loss": 0.0626, "step": 11000 }, { "epoch": 0.05005, "grad_norm": 0.24767158925533295, "learning_rate": 4.999652083668416e-05, "loss": 0.0609, "step": 11010 }, { "epoch": 0.0501, "grad_norm": 0.273266077041626, "learning_rate": 4.9996451533983435e-05, "loss": 0.0619, "step": 11020 }, { "epoch": 0.05015, "grad_norm": 0.30322688817977905, "learning_rate": 4.99963815478891e-05, "loss": 0.0622, "step": 11030 }, { "epoch": 0.0502, "grad_norm": 0.2822934091091156, "learning_rate": 4.9996310878403106e-05, "loss": 0.06, "step": 11040 }, { "epoch": 0.05025, "grad_norm": 0.3341050446033478, "learning_rate": 4.9996239525527356e-05, "loss": 0.0579, "step": 11050 }, { "epoch": 0.0503, "grad_norm": 0.2769171893596649, "learning_rate": 4.99961674892638e-05, "loss": 0.0567, "step": 11060 }, { "epoch": 0.05035, "grad_norm": 0.2524805963039398, "learning_rate": 4.999609476961442e-05, "loss": 0.0596, "step": 11070 }, { "epoch": 0.0504, "grad_norm": 0.2670571208000183, "learning_rate": 4.99960213665812e-05, "loss": 0.0632, "step": 11080 }, { "epoch": 0.05045, "grad_norm": 0.26169538497924805, "learning_rate": 4.999594728016615e-05, "loss": 0.0636, "step": 11090 }, { "epoch": 0.0505, "grad_norm": 0.30789557099342346, "learning_rate": 4.999587251037129e-05, "loss": 0.0651, "step": 11100 }, { "epoch": 0.05055, "grad_norm": 0.2783627510070801, "learning_rate": 4.999579705719866e-05, "loss": 0.0615, "step": 11110 }, { "epoch": 0.0506, "grad_norm": 0.3286147117614746, "learning_rate": 4.999572092065034e-05, "loss": 0.0612, "step": 11120 }, { "epoch": 0.05065, "grad_norm": 0.2502022385597229, "learning_rate": 4.999564410072839e-05, "loss": 0.0613, "step": 11130 }, { "epoch": 0.0507, "grad_norm": 0.23253153264522552, "learning_rate": 4.999556659743493e-05, "loss": 0.0585, "step": 11140 }, { "epoch": 0.05075, "grad_norm": 0.21726743876934052, "learning_rate": 4.999548841077206e-05, "loss": 0.059, "step": 11150 }, { "epoch": 0.0508, "grad_norm": 0.2250111848115921, "learning_rate": 4.9995409540741934e-05, "loss": 0.0597, "step": 11160 }, { "epoch": 0.05085, "grad_norm": 0.22477279603481293, "learning_rate": 4.99953299873467e-05, "loss": 0.0629, "step": 11170 }, { "epoch": 0.0509, "grad_norm": 0.2970663011074066, "learning_rate": 4.9995249750588535e-05, "loss": 0.0627, "step": 11180 }, { "epoch": 0.05095, "grad_norm": 0.2567269802093506, "learning_rate": 4.999516883046963e-05, "loss": 0.0585, "step": 11190 }, { "epoch": 0.051, "grad_norm": 0.2891165614128113, "learning_rate": 4.99950872269922e-05, "loss": 0.06, "step": 11200 }, { "epoch": 0.05105, "grad_norm": 0.3167526125907898, "learning_rate": 4.9995004940158476e-05, "loss": 0.0616, "step": 11210 }, { "epoch": 0.0511, "grad_norm": 0.3454287648200989, "learning_rate": 4.99949219699707e-05, "loss": 0.0652, "step": 11220 }, { "epoch": 0.05115, "grad_norm": 0.28335583209991455, "learning_rate": 4.999483831643116e-05, "loss": 0.0659, "step": 11230 }, { "epoch": 0.0512, "grad_norm": 0.28895190358161926, "learning_rate": 4.999475397954213e-05, "loss": 0.0603, "step": 11240 }, { "epoch": 0.05125, "grad_norm": 0.2975988984107971, "learning_rate": 4.99946689593059e-05, "loss": 0.0614, "step": 11250 }, { "epoch": 0.0513, "grad_norm": 0.2765286862850189, "learning_rate": 4.999458325572483e-05, "loss": 0.0584, "step": 11260 }, { "epoch": 0.05135, "grad_norm": 0.2741754651069641, "learning_rate": 4.999449686880123e-05, "loss": 0.0663, "step": 11270 }, { "epoch": 0.0514, "grad_norm": 0.28087082505226135, "learning_rate": 4.999440979853749e-05, "loss": 0.0589, "step": 11280 }, { "epoch": 0.05145, "grad_norm": 0.2920054793357849, "learning_rate": 4.999432204493596e-05, "loss": 0.0627, "step": 11290 }, { "epoch": 0.0515, "grad_norm": 0.25342732667922974, "learning_rate": 4.9994233607999064e-05, "loss": 0.0571, "step": 11300 }, { "epoch": 0.05155, "grad_norm": 0.258687824010849, "learning_rate": 4.999414448772921e-05, "loss": 0.0605, "step": 11310 }, { "epoch": 0.0516, "grad_norm": 0.26928308606147766, "learning_rate": 4.999405468412883e-05, "loss": 0.0582, "step": 11320 }, { "epoch": 0.05165, "grad_norm": 0.20206747949123383, "learning_rate": 4.9993964197200394e-05, "loss": 0.0551, "step": 11330 }, { "epoch": 0.0517, "grad_norm": 0.2057802826166153, "learning_rate": 4.999387302694636e-05, "loss": 0.0537, "step": 11340 }, { "epoch": 0.05175, "grad_norm": 0.23069089651107788, "learning_rate": 4.999378117336924e-05, "loss": 0.0584, "step": 11350 }, { "epoch": 0.0518, "grad_norm": 0.33335962891578674, "learning_rate": 4.9993688636471516e-05, "loss": 0.0623, "step": 11360 }, { "epoch": 0.05185, "grad_norm": 0.2621992528438568, "learning_rate": 4.999359541625574e-05, "loss": 0.0598, "step": 11370 }, { "epoch": 0.0519, "grad_norm": 0.2107490450143814, "learning_rate": 4.9993501512724463e-05, "loss": 0.0545, "step": 11380 }, { "epoch": 0.05195, "grad_norm": 0.24193622171878815, "learning_rate": 4.9993406925880233e-05, "loss": 0.0563, "step": 11390 }, { "epoch": 0.052, "grad_norm": 0.23652245104312897, "learning_rate": 4.999331165572565e-05, "loss": 0.058, "step": 11400 }, { "epoch": 0.05205, "grad_norm": 0.29875287413597107, "learning_rate": 4.9993215702263316e-05, "loss": 0.0597, "step": 11410 }, { "epoch": 0.0521, "grad_norm": 0.2521936297416687, "learning_rate": 4.999311906549585e-05, "loss": 0.057, "step": 11420 }, { "epoch": 0.05215, "grad_norm": 0.3101290762424469, "learning_rate": 4.999302174542591e-05, "loss": 0.0588, "step": 11430 }, { "epoch": 0.0522, "grad_norm": 0.2589113712310791, "learning_rate": 4.999292374205614e-05, "loss": 0.0603, "step": 11440 }, { "epoch": 0.05225, "grad_norm": 0.25411316752433777, "learning_rate": 4.999282505538922e-05, "loss": 0.0636, "step": 11450 }, { "epoch": 0.0523, "grad_norm": 0.2493228316307068, "learning_rate": 4.999272568542785e-05, "loss": 0.0572, "step": 11460 }, { "epoch": 0.05235, "grad_norm": 0.27624061703681946, "learning_rate": 4.999262563217476e-05, "loss": 0.0577, "step": 11470 }, { "epoch": 0.0524, "grad_norm": 0.23362897336483002, "learning_rate": 4.999252489563267e-05, "loss": 0.0585, "step": 11480 }, { "epoch": 0.05245, "grad_norm": 0.27527329325675964, "learning_rate": 4.999242347580434e-05, "loss": 0.0609, "step": 11490 }, { "epoch": 0.0525, "grad_norm": 0.2193654328584671, "learning_rate": 4.9992321372692535e-05, "loss": 0.0578, "step": 11500 }, { "epoch": 0.05255, "grad_norm": 0.2288377285003662, "learning_rate": 4.999221858630005e-05, "loss": 0.0581, "step": 11510 }, { "epoch": 0.0526, "grad_norm": 0.2176082879304886, "learning_rate": 4.9992115116629714e-05, "loss": 0.058, "step": 11520 }, { "epoch": 0.05265, "grad_norm": 0.21936215460300446, "learning_rate": 4.9992010963684325e-05, "loss": 0.0577, "step": 11530 }, { "epoch": 0.0527, "grad_norm": 0.2650482952594757, "learning_rate": 4.999190612746675e-05, "loss": 0.0592, "step": 11540 }, { "epoch": 0.05275, "grad_norm": 0.2885134816169739, "learning_rate": 4.999180060797986e-05, "loss": 0.0575, "step": 11550 }, { "epoch": 0.0528, "grad_norm": 0.21044647693634033, "learning_rate": 4.999169440522652e-05, "loss": 0.0638, "step": 11560 }, { "epoch": 0.05285, "grad_norm": 0.21848969161510468, "learning_rate": 4.999158751920964e-05, "loss": 0.0567, "step": 11570 }, { "epoch": 0.0529, "grad_norm": 0.2269868552684784, "learning_rate": 4.999147994993215e-05, "loss": 0.0605, "step": 11580 }, { "epoch": 0.05295, "grad_norm": 0.24068693816661835, "learning_rate": 4.999137169739699e-05, "loss": 0.0562, "step": 11590 }, { "epoch": 0.053, "grad_norm": 0.2279026210308075, "learning_rate": 4.999126276160711e-05, "loss": 0.0551, "step": 11600 }, { "epoch": 0.05305, "grad_norm": 0.2190355807542801, "learning_rate": 4.9991153142565506e-05, "loss": 0.0563, "step": 11610 }, { "epoch": 0.0531, "grad_norm": 0.2589868903160095, "learning_rate": 4.999104284027516e-05, "loss": 0.0586, "step": 11620 }, { "epoch": 0.05315, "grad_norm": 0.24582260847091675, "learning_rate": 4.9990931854739084e-05, "loss": 0.0571, "step": 11630 }, { "epoch": 0.0532, "grad_norm": 0.2815797030925751, "learning_rate": 4.999082018596033e-05, "loss": 0.0587, "step": 11640 }, { "epoch": 0.05325, "grad_norm": 0.3051389753818512, "learning_rate": 4.999070783394193e-05, "loss": 0.0615, "step": 11650 }, { "epoch": 0.0533, "grad_norm": 0.2518411874771118, "learning_rate": 4.9990594798686975e-05, "loss": 0.057, "step": 11660 }, { "epoch": 0.05335, "grad_norm": 0.2967738211154938, "learning_rate": 4.9990481080198546e-05, "loss": 0.0617, "step": 11670 }, { "epoch": 0.0534, "grad_norm": 0.32430994510650635, "learning_rate": 4.999036667847975e-05, "loss": 0.0586, "step": 11680 }, { "epoch": 0.05345, "grad_norm": 0.2484043687582016, "learning_rate": 4.999025159353372e-05, "loss": 0.0581, "step": 11690 }, { "epoch": 0.0535, "grad_norm": 0.2939985394477844, "learning_rate": 4.999013582536359e-05, "loss": 0.0574, "step": 11700 }, { "epoch": 0.05355, "grad_norm": 0.23284538090229034, "learning_rate": 4.9990019373972554e-05, "loss": 0.0581, "step": 11710 }, { "epoch": 0.0536, "grad_norm": 0.2365046888589859, "learning_rate": 4.9989902239363765e-05, "loss": 0.057, "step": 11720 }, { "epoch": 0.05365, "grad_norm": 0.2517213821411133, "learning_rate": 4.998978442154043e-05, "loss": 0.0563, "step": 11730 }, { "epoch": 0.0537, "grad_norm": 0.21513904631137848, "learning_rate": 4.998966592050579e-05, "loss": 0.0566, "step": 11740 }, { "epoch": 0.05375, "grad_norm": 0.2141963541507721, "learning_rate": 4.9989546736263066e-05, "loss": 0.0611, "step": 11750 }, { "epoch": 0.0538, "grad_norm": 0.2445666640996933, "learning_rate": 4.998942686881553e-05, "loss": 0.0566, "step": 11760 }, { "epoch": 0.05385, "grad_norm": 0.3019103705883026, "learning_rate": 4.998930631816644e-05, "loss": 0.0597, "step": 11770 }, { "epoch": 0.0539, "grad_norm": 0.25654977560043335, "learning_rate": 4.99891850843191e-05, "loss": 0.0568, "step": 11780 }, { "epoch": 0.05395, "grad_norm": 0.30889660120010376, "learning_rate": 4.998906316727684e-05, "loss": 0.0589, "step": 11790 }, { "epoch": 0.054, "grad_norm": 0.26153361797332764, "learning_rate": 4.9988940567042975e-05, "loss": 0.057, "step": 11800 }, { "epoch": 0.05405, "grad_norm": 0.25084272027015686, "learning_rate": 4.998881728362086e-05, "loss": 0.0605, "step": 11810 }, { "epoch": 0.0541, "grad_norm": 0.25785118341445923, "learning_rate": 4.998869331701388e-05, "loss": 0.0651, "step": 11820 }, { "epoch": 0.05415, "grad_norm": 0.2896030843257904, "learning_rate": 4.99885686672254e-05, "loss": 0.0596, "step": 11830 }, { "epoch": 0.0542, "grad_norm": 0.2596687078475952, "learning_rate": 4.998844333425885e-05, "loss": 0.0586, "step": 11840 }, { "epoch": 0.05425, "grad_norm": 0.28756245970726013, "learning_rate": 4.998831731811764e-05, "loss": 0.0591, "step": 11850 }, { "epoch": 0.0543, "grad_norm": 0.23252467811107635, "learning_rate": 4.998819061880523e-05, "loss": 0.0589, "step": 11860 }, { "epoch": 0.05435, "grad_norm": 0.3250674903392792, "learning_rate": 4.998806323632507e-05, "loss": 0.0593, "step": 11870 }, { "epoch": 0.0544, "grad_norm": 0.23451420664787292, "learning_rate": 4.998793517068065e-05, "loss": 0.0575, "step": 11880 }, { "epoch": 0.05445, "grad_norm": 0.25425106287002563, "learning_rate": 4.9987806421875465e-05, "loss": 0.0572, "step": 11890 }, { "epoch": 0.0545, "grad_norm": 0.23875631392002106, "learning_rate": 4.998767698991304e-05, "loss": 0.0602, "step": 11900 }, { "epoch": 0.05455, "grad_norm": 0.24599017202854156, "learning_rate": 4.998754687479692e-05, "loss": 0.0591, "step": 11910 }, { "epoch": 0.0546, "grad_norm": 0.2623789310455322, "learning_rate": 4.998741607653066e-05, "loss": 0.0617, "step": 11920 }, { "epoch": 0.05465, "grad_norm": 0.2282388061285019, "learning_rate": 4.9987284595117825e-05, "loss": 0.0566, "step": 11930 }, { "epoch": 0.0547, "grad_norm": 0.22660212218761444, "learning_rate": 4.998715243056201e-05, "loss": 0.0545, "step": 11940 }, { "epoch": 0.05475, "grad_norm": 0.20677420496940613, "learning_rate": 4.9987019582866844e-05, "loss": 0.0552, "step": 11950 }, { "epoch": 0.0548, "grad_norm": 0.24677255749702454, "learning_rate": 4.9986886052035954e-05, "loss": 0.0555, "step": 11960 }, { "epoch": 0.05485, "grad_norm": 0.23030756413936615, "learning_rate": 4.998675183807298e-05, "loss": 0.0596, "step": 11970 }, { "epoch": 0.0549, "grad_norm": 0.2902722656726837, "learning_rate": 4.99866169409816e-05, "loss": 0.0591, "step": 11980 }, { "epoch": 0.05495, "grad_norm": 0.310017853975296, "learning_rate": 4.998648136076549e-05, "loss": 0.0576, "step": 11990 }, { "epoch": 0.055, "grad_norm": 0.2971934378147125, "learning_rate": 4.9986345097428375e-05, "loss": 0.0583, "step": 12000 }, { "epoch": 0.05505, "grad_norm": 0.23468483984470367, "learning_rate": 4.9986208150973975e-05, "loss": 0.0587, "step": 12010 }, { "epoch": 0.0551, "grad_norm": 0.22812537848949432, "learning_rate": 4.998607052140603e-05, "loss": 0.0582, "step": 12020 }, { "epoch": 0.05515, "grad_norm": 0.2039913386106491, "learning_rate": 4.99859322087283e-05, "loss": 0.0573, "step": 12030 }, { "epoch": 0.0552, "grad_norm": 0.21972723305225372, "learning_rate": 4.998579321294456e-05, "loss": 0.0598, "step": 12040 }, { "epoch": 0.05525, "grad_norm": 0.2677394151687622, "learning_rate": 4.998565353405864e-05, "loss": 0.0588, "step": 12050 }, { "epoch": 0.0553, "grad_norm": 0.24771089851856232, "learning_rate": 4.998551317207433e-05, "loss": 0.0577, "step": 12060 }, { "epoch": 0.05535, "grad_norm": 0.22934655845165253, "learning_rate": 4.9985372126995475e-05, "loss": 0.059, "step": 12070 }, { "epoch": 0.0554, "grad_norm": 0.31099027395248413, "learning_rate": 4.998523039882594e-05, "loss": 0.0591, "step": 12080 }, { "epoch": 0.05545, "grad_norm": 0.2668203115463257, "learning_rate": 4.9985087987569586e-05, "loss": 0.0593, "step": 12090 }, { "epoch": 0.0555, "grad_norm": 0.22269988059997559, "learning_rate": 4.998494489323031e-05, "loss": 0.0576, "step": 12100 }, { "epoch": 0.05555, "grad_norm": 0.27321428060531616, "learning_rate": 4.998480111581203e-05, "loss": 0.0588, "step": 12110 }, { "epoch": 0.0556, "grad_norm": 0.2842467427253723, "learning_rate": 4.998465665531868e-05, "loss": 0.0565, "step": 12120 }, { "epoch": 0.05565, "grad_norm": 0.26087847352027893, "learning_rate": 4.998451151175419e-05, "loss": 0.0555, "step": 12130 }, { "epoch": 0.0557, "grad_norm": 0.2942187190055847, "learning_rate": 4.998436568512256e-05, "loss": 0.0597, "step": 12140 }, { "epoch": 0.05575, "grad_norm": 0.2344774305820465, "learning_rate": 4.998421917542775e-05, "loss": 0.0604, "step": 12150 }, { "epoch": 0.0558, "grad_norm": 0.27858108282089233, "learning_rate": 4.998407198267376e-05, "loss": 0.0556, "step": 12160 }, { "epoch": 0.05585, "grad_norm": 0.2372090369462967, "learning_rate": 4.998392410686465e-05, "loss": 0.06, "step": 12170 }, { "epoch": 0.0559, "grad_norm": 0.24095700681209564, "learning_rate": 4.998377554800443e-05, "loss": 0.0579, "step": 12180 }, { "epoch": 0.05595, "grad_norm": 0.22147738933563232, "learning_rate": 4.998362630609717e-05, "loss": 0.0578, "step": 12190 }, { "epoch": 0.056, "grad_norm": 0.31405651569366455, "learning_rate": 4.998347638114696e-05, "loss": 0.0601, "step": 12200 }, { "epoch": 0.05605, "grad_norm": 0.28502020239830017, "learning_rate": 4.9983325773157886e-05, "loss": 0.0577, "step": 12210 }, { "epoch": 0.0561, "grad_norm": 0.3050804138183594, "learning_rate": 4.9983174482134076e-05, "loss": 0.0605, "step": 12220 }, { "epoch": 0.05615, "grad_norm": 0.22686448693275452, "learning_rate": 4.9983022508079655e-05, "loss": 0.0556, "step": 12230 }, { "epoch": 0.0562, "grad_norm": 0.2321311980485916, "learning_rate": 4.998286985099879e-05, "loss": 0.0586, "step": 12240 }, { "epoch": 0.05625, "grad_norm": 0.24789652228355408, "learning_rate": 4.998271651089564e-05, "loss": 0.0614, "step": 12250 }, { "epoch": 0.0563, "grad_norm": 0.27146685123443604, "learning_rate": 4.998256248777442e-05, "loss": 0.0595, "step": 12260 }, { "epoch": 0.05635, "grad_norm": 0.2287423312664032, "learning_rate": 4.998240778163932e-05, "loss": 0.056, "step": 12270 }, { "epoch": 0.0564, "grad_norm": 0.25738126039505005, "learning_rate": 4.9982252392494576e-05, "loss": 0.0573, "step": 12280 }, { "epoch": 0.05645, "grad_norm": 0.21796013414859772, "learning_rate": 4.998209632034444e-05, "loss": 0.0566, "step": 12290 }, { "epoch": 0.0565, "grad_norm": 0.18819575011730194, "learning_rate": 4.998193956519317e-05, "loss": 0.0569, "step": 12300 }, { "epoch": 0.05655, "grad_norm": 0.3370819091796875, "learning_rate": 4.998178212704506e-05, "loss": 0.0607, "step": 12310 }, { "epoch": 0.0566, "grad_norm": 0.3050325810909271, "learning_rate": 4.998162400590442e-05, "loss": 0.0553, "step": 12320 }, { "epoch": 0.05665, "grad_norm": 0.3089597821235657, "learning_rate": 4.9981465201775554e-05, "loss": 0.0576, "step": 12330 }, { "epoch": 0.0567, "grad_norm": 0.23376913368701935, "learning_rate": 4.998130571466282e-05, "loss": 0.056, "step": 12340 }, { "epoch": 0.05675, "grad_norm": 0.22346380352973938, "learning_rate": 4.9981145544570565e-05, "loss": 0.0554, "step": 12350 }, { "epoch": 0.0568, "grad_norm": 0.22319258749485016, "learning_rate": 4.998098469150319e-05, "loss": 0.057, "step": 12360 }, { "epoch": 0.05685, "grad_norm": 0.19363442063331604, "learning_rate": 4.998082315546506e-05, "loss": 0.0584, "step": 12370 }, { "epoch": 0.0569, "grad_norm": 0.23074665665626526, "learning_rate": 4.9980660936460624e-05, "loss": 0.0537, "step": 12380 }, { "epoch": 0.05695, "grad_norm": 0.25440314412117004, "learning_rate": 4.99804980344943e-05, "loss": 0.0591, "step": 12390 }, { "epoch": 0.057, "grad_norm": 0.24444711208343506, "learning_rate": 4.998033444957054e-05, "loss": 0.0549, "step": 12400 }, { "epoch": 0.05705, "grad_norm": 0.28419366478919983, "learning_rate": 4.998017018169383e-05, "loss": 0.0555, "step": 12410 }, { "epoch": 0.0571, "grad_norm": 0.24973613023757935, "learning_rate": 4.998000523086864e-05, "loss": 0.0555, "step": 12420 }, { "epoch": 0.05715, "grad_norm": 0.23273035883903503, "learning_rate": 4.99798395970995e-05, "loss": 0.0576, "step": 12430 }, { "epoch": 0.0572, "grad_norm": 0.18866272270679474, "learning_rate": 4.997967328039093e-05, "loss": 0.0565, "step": 12440 }, { "epoch": 0.05725, "grad_norm": 0.22517941892147064, "learning_rate": 4.997950628074747e-05, "loss": 0.0588, "step": 12450 }, { "epoch": 0.0573, "grad_norm": 0.24999724328517914, "learning_rate": 4.99793385981737e-05, "loss": 0.0601, "step": 12460 }, { "epoch": 0.05735, "grad_norm": 0.21959833800792694, "learning_rate": 4.997917023267419e-05, "loss": 0.0563, "step": 12470 }, { "epoch": 0.0574, "grad_norm": 0.2291775792837143, "learning_rate": 4.997900118425356e-05, "loss": 0.0549, "step": 12480 }, { "epoch": 0.05745, "grad_norm": 0.2492663860321045, "learning_rate": 4.997883145291641e-05, "loss": 0.0554, "step": 12490 }, { "epoch": 0.0575, "grad_norm": 0.23691225051879883, "learning_rate": 4.99786610386674e-05, "loss": 0.0576, "step": 12500 }, { "epoch": 0.05755, "grad_norm": 0.21633702516555786, "learning_rate": 4.9978489941511184e-05, "loss": 0.0547, "step": 12510 }, { "epoch": 0.0576, "grad_norm": 0.23120734095573425, "learning_rate": 4.997831816145243e-05, "loss": 0.0547, "step": 12520 }, { "epoch": 0.05765, "grad_norm": 0.23655107617378235, "learning_rate": 4.9978145698495845e-05, "loss": 0.0542, "step": 12530 }, { "epoch": 0.0577, "grad_norm": 0.2183103710412979, "learning_rate": 4.9977972552646136e-05, "loss": 0.0552, "step": 12540 }, { "epoch": 0.05775, "grad_norm": 0.2202279269695282, "learning_rate": 4.997779872390805e-05, "loss": 0.0562, "step": 12550 }, { "epoch": 0.0578, "grad_norm": 0.24577485024929047, "learning_rate": 4.9977624212286324e-05, "loss": 0.0535, "step": 12560 }, { "epoch": 0.05785, "grad_norm": 0.24386066198349, "learning_rate": 4.997744901778574e-05, "loss": 0.0607, "step": 12570 }, { "epoch": 0.0579, "grad_norm": 0.2942847013473511, "learning_rate": 4.997727314041107e-05, "loss": 0.0546, "step": 12580 }, { "epoch": 0.05795, "grad_norm": 0.23982101678848267, "learning_rate": 4.997709658016715e-05, "loss": 0.0558, "step": 12590 }, { "epoch": 0.058, "grad_norm": 0.23010914027690887, "learning_rate": 4.997691933705879e-05, "loss": 0.0566, "step": 12600 }, { "epoch": 0.05805, "grad_norm": 0.23606491088867188, "learning_rate": 4.997674141109084e-05, "loss": 0.0557, "step": 12610 }, { "epoch": 0.0581, "grad_norm": 0.22416196763515472, "learning_rate": 4.997656280226816e-05, "loss": 0.0553, "step": 12620 }, { "epoch": 0.05815, "grad_norm": 0.23933905363082886, "learning_rate": 4.997638351059564e-05, "loss": 0.0568, "step": 12630 }, { "epoch": 0.0582, "grad_norm": 0.2400108426809311, "learning_rate": 4.997620353607817e-05, "loss": 0.0538, "step": 12640 }, { "epoch": 0.05825, "grad_norm": 0.20764097571372986, "learning_rate": 4.9976022878720684e-05, "loss": 0.0566, "step": 12650 }, { "epoch": 0.0583, "grad_norm": 0.22233030200004578, "learning_rate": 4.997584153852812e-05, "loss": 0.056, "step": 12660 }, { "epoch": 0.05835, "grad_norm": 0.23320798575878143, "learning_rate": 4.997565951550542e-05, "loss": 0.0566, "step": 12670 }, { "epoch": 0.0584, "grad_norm": 0.2351810187101364, "learning_rate": 4.997547680965758e-05, "loss": 0.054, "step": 12680 }, { "epoch": 0.05845, "grad_norm": 0.22825945913791656, "learning_rate": 4.997529342098959e-05, "loss": 0.0581, "step": 12690 }, { "epoch": 0.0585, "grad_norm": 0.21631501615047455, "learning_rate": 4.9975109349506455e-05, "loss": 0.0542, "step": 12700 }, { "epoch": 0.05855, "grad_norm": 0.26137256622314453, "learning_rate": 4.997492459521321e-05, "loss": 0.058, "step": 12710 }, { "epoch": 0.0586, "grad_norm": 0.21283724904060364, "learning_rate": 4.9974739158114916e-05, "loss": 0.0568, "step": 12720 }, { "epoch": 0.05865, "grad_norm": 0.22252531349658966, "learning_rate": 4.9974553038216635e-05, "loss": 0.057, "step": 12730 }, { "epoch": 0.0587, "grad_norm": 0.21466626226902008, "learning_rate": 4.997436623552345e-05, "loss": 0.058, "step": 12740 }, { "epoch": 0.05875, "grad_norm": 0.225613072514534, "learning_rate": 4.997417875004048e-05, "loss": 0.0541, "step": 12750 }, { "epoch": 0.0588, "grad_norm": 0.22769001126289368, "learning_rate": 4.997399058177284e-05, "loss": 0.0557, "step": 12760 }, { "epoch": 0.05885, "grad_norm": 0.24786868691444397, "learning_rate": 4.997380173072569e-05, "loss": 0.054, "step": 12770 }, { "epoch": 0.0589, "grad_norm": 0.2872532308101654, "learning_rate": 4.997361219690417e-05, "loss": 0.0587, "step": 12780 }, { "epoch": 0.05895, "grad_norm": 0.2511669993400574, "learning_rate": 4.997342198031348e-05, "loss": 0.0546, "step": 12790 }, { "epoch": 0.059, "grad_norm": 0.3343372046947479, "learning_rate": 4.997323108095883e-05, "loss": 0.0593, "step": 12800 }, { "epoch": 0.05905, "grad_norm": 0.26693952083587646, "learning_rate": 4.997303949884541e-05, "loss": 0.0564, "step": 12810 }, { "epoch": 0.0591, "grad_norm": 0.23090007901191711, "learning_rate": 4.997284723397847e-05, "loss": 0.0598, "step": 12820 }, { "epoch": 0.05915, "grad_norm": 0.22998195886611938, "learning_rate": 4.997265428636328e-05, "loss": 0.0573, "step": 12830 }, { "epoch": 0.0592, "grad_norm": 0.24856556951999664, "learning_rate": 4.997246065600508e-05, "loss": 0.0601, "step": 12840 }, { "epoch": 0.05925, "grad_norm": 0.2634192705154419, "learning_rate": 4.997226634290921e-05, "loss": 0.0584, "step": 12850 }, { "epoch": 0.0593, "grad_norm": 0.3157845735549927, "learning_rate": 4.997207134708095e-05, "loss": 0.0557, "step": 12860 }, { "epoch": 0.05935, "grad_norm": 0.22737300395965576, "learning_rate": 4.9971875668525646e-05, "loss": 0.0552, "step": 12870 }, { "epoch": 0.0594, "grad_norm": 0.28176745772361755, "learning_rate": 4.997167930724864e-05, "loss": 0.0552, "step": 12880 }, { "epoch": 0.05945, "grad_norm": 0.3244107663631439, "learning_rate": 4.99714822632553e-05, "loss": 0.058, "step": 12890 }, { "epoch": 0.0595, "grad_norm": 0.26062488555908203, "learning_rate": 4.9971284536551025e-05, "loss": 0.0582, "step": 12900 }, { "epoch": 0.05955, "grad_norm": 0.22571077942848206, "learning_rate": 4.9971086127141206e-05, "loss": 0.0548, "step": 12910 }, { "epoch": 0.0596, "grad_norm": 0.2939530611038208, "learning_rate": 4.9970887035031274e-05, "loss": 0.0574, "step": 12920 }, { "epoch": 0.05965, "grad_norm": 0.24956828355789185, "learning_rate": 4.9970687260226665e-05, "loss": 0.0569, "step": 12930 }, { "epoch": 0.0597, "grad_norm": 0.2565845251083374, "learning_rate": 4.997048680273286e-05, "loss": 0.0536, "step": 12940 }, { "epoch": 0.05975, "grad_norm": 0.22084426879882812, "learning_rate": 4.9970285662555315e-05, "loss": 0.0544, "step": 12950 }, { "epoch": 0.0598, "grad_norm": 0.2750712037086487, "learning_rate": 4.997008383969955e-05, "loss": 0.0571, "step": 12960 }, { "epoch": 0.05985, "grad_norm": 0.24832278490066528, "learning_rate": 4.996988133417107e-05, "loss": 0.0573, "step": 12970 }, { "epoch": 0.0599, "grad_norm": 0.24103182554244995, "learning_rate": 4.996967814597542e-05, "loss": 0.054, "step": 12980 }, { "epoch": 0.05995, "grad_norm": 0.2249392718076706, "learning_rate": 4.996947427511814e-05, "loss": 0.0548, "step": 12990 }, { "epoch": 0.06, "grad_norm": 0.23457661271095276, "learning_rate": 4.996926972160482e-05, "loss": 0.0537, "step": 13000 }, { "epoch": 0.06005, "grad_norm": 0.26607438921928406, "learning_rate": 4.996906448544105e-05, "loss": 0.057, "step": 13010 }, { "epoch": 0.0601, "grad_norm": 0.24230334162712097, "learning_rate": 4.9968858566632435e-05, "loss": 0.0534, "step": 13020 }, { "epoch": 0.06015, "grad_norm": 0.2050572633743286, "learning_rate": 4.99686519651846e-05, "loss": 0.0542, "step": 13030 }, { "epoch": 0.0602, "grad_norm": 0.2492501586675644, "learning_rate": 4.996844468110321e-05, "loss": 0.0544, "step": 13040 }, { "epoch": 0.06025, "grad_norm": 0.20426131784915924, "learning_rate": 4.9968236714393916e-05, "loss": 0.0544, "step": 13050 }, { "epoch": 0.0603, "grad_norm": 0.21557781100273132, "learning_rate": 4.996802806506241e-05, "loss": 0.0535, "step": 13060 }, { "epoch": 0.06035, "grad_norm": 0.22966895997524261, "learning_rate": 4.9967818733114404e-05, "loss": 0.0522, "step": 13070 }, { "epoch": 0.0604, "grad_norm": 0.16762161254882812, "learning_rate": 4.996760871855561e-05, "loss": 0.0532, "step": 13080 }, { "epoch": 0.06045, "grad_norm": 0.24853459000587463, "learning_rate": 4.996739802139177e-05, "loss": 0.0547, "step": 13090 }, { "epoch": 0.0605, "grad_norm": 0.2116885632276535, "learning_rate": 4.996718664162865e-05, "loss": 0.0559, "step": 13100 }, { "epoch": 0.06055, "grad_norm": 0.23208336532115936, "learning_rate": 4.996697457927203e-05, "loss": 0.0565, "step": 13110 }, { "epoch": 0.0606, "grad_norm": 0.2170572280883789, "learning_rate": 4.99667618343277e-05, "loss": 0.0531, "step": 13120 }, { "epoch": 0.06065, "grad_norm": 0.22632868587970734, "learning_rate": 4.9966548406801486e-05, "loss": 0.0547, "step": 13130 }, { "epoch": 0.0607, "grad_norm": 0.24190941452980042, "learning_rate": 4.996633429669921e-05, "loss": 0.0537, "step": 13140 }, { "epoch": 0.06075, "grad_norm": 0.1979205310344696, "learning_rate": 4.996611950402674e-05, "loss": 0.055, "step": 13150 }, { "epoch": 0.0608, "grad_norm": 0.23045705258846283, "learning_rate": 4.9965904028789945e-05, "loss": 0.0538, "step": 13160 }, { "epoch": 0.06085, "grad_norm": 0.24325931072235107, "learning_rate": 4.9965687870994716e-05, "loss": 0.0562, "step": 13170 }, { "epoch": 0.0609, "grad_norm": 0.28811779618263245, "learning_rate": 4.996547103064695e-05, "loss": 0.055, "step": 13180 }, { "epoch": 0.06095, "grad_norm": 0.2513379454612732, "learning_rate": 4.9965253507752585e-05, "loss": 0.0536, "step": 13190 }, { "epoch": 0.061, "grad_norm": 0.22728721797466278, "learning_rate": 4.9965035302317574e-05, "loss": 0.0549, "step": 13200 }, { "epoch": 0.06105, "grad_norm": 0.272308886051178, "learning_rate": 4.9964816414347874e-05, "loss": 0.0567, "step": 13210 }, { "epoch": 0.0611, "grad_norm": 0.26510074734687805, "learning_rate": 4.9964596843849474e-05, "loss": 0.0551, "step": 13220 }, { "epoch": 0.06115, "grad_norm": 0.2139650285243988, "learning_rate": 4.996437659082838e-05, "loss": 0.0543, "step": 13230 }, { "epoch": 0.0612, "grad_norm": 0.2072010636329651, "learning_rate": 4.9964155655290596e-05, "loss": 0.0553, "step": 13240 }, { "epoch": 0.06125, "grad_norm": 0.23949231207370758, "learning_rate": 4.996393403724218e-05, "loss": 0.0552, "step": 13250 }, { "epoch": 0.0613, "grad_norm": 0.20085765421390533, "learning_rate": 4.996371173668919e-05, "loss": 0.0552, "step": 13260 }, { "epoch": 0.06135, "grad_norm": 0.20907004177570343, "learning_rate": 4.9963488753637696e-05, "loss": 0.0562, "step": 13270 }, { "epoch": 0.0614, "grad_norm": 0.18924008309841156, "learning_rate": 4.99632650880938e-05, "loss": 0.0541, "step": 13280 }, { "epoch": 0.06145, "grad_norm": 0.2130063772201538, "learning_rate": 4.996304074006361e-05, "loss": 0.0531, "step": 13290 }, { "epoch": 0.0615, "grad_norm": 0.2611365020275116, "learning_rate": 4.996281570955327e-05, "loss": 0.0541, "step": 13300 }, { "epoch": 0.06155, "grad_norm": 0.25117290019989014, "learning_rate": 4.996258999656892e-05, "loss": 0.0538, "step": 13310 }, { "epoch": 0.0616, "grad_norm": 0.23625993728637695, "learning_rate": 4.9962363601116745e-05, "loss": 0.0554, "step": 13320 }, { "epoch": 0.06165, "grad_norm": 0.23082759976387024, "learning_rate": 4.996213652320292e-05, "loss": 0.0581, "step": 13330 }, { "epoch": 0.0617, "grad_norm": 0.1999133825302124, "learning_rate": 4.9961908762833666e-05, "loss": 0.0558, "step": 13340 }, { "epoch": 0.06175, "grad_norm": 0.23757296800613403, "learning_rate": 4.9961680320015205e-05, "loss": 0.0553, "step": 13350 }, { "epoch": 0.0618, "grad_norm": 0.25344860553741455, "learning_rate": 4.996145119475377e-05, "loss": 0.0553, "step": 13360 }, { "epoch": 0.06185, "grad_norm": 0.24485984444618225, "learning_rate": 4.996122138705565e-05, "loss": 0.0537, "step": 13370 }, { "epoch": 0.0619, "grad_norm": 0.21683093905448914, "learning_rate": 4.9960990896927116e-05, "loss": 0.0539, "step": 13380 }, { "epoch": 0.06195, "grad_norm": 0.2736802101135254, "learning_rate": 4.9960759724374464e-05, "loss": 0.0562, "step": 13390 }, { "epoch": 0.062, "grad_norm": 0.253433495759964, "learning_rate": 4.996052786940402e-05, "loss": 0.0579, "step": 13400 }, { "epoch": 0.06205, "grad_norm": 0.28744834661483765, "learning_rate": 4.996029533202211e-05, "loss": 0.0576, "step": 13410 }, { "epoch": 0.0621, "grad_norm": 0.23220312595367432, "learning_rate": 4.996006211223511e-05, "loss": 0.0553, "step": 13420 }, { "epoch": 0.06215, "grad_norm": 0.24789561331272125, "learning_rate": 4.99598282100494e-05, "loss": 0.0562, "step": 13430 }, { "epoch": 0.0622, "grad_norm": 0.2644476890563965, "learning_rate": 4.9959593625471344e-05, "loss": 0.0572, "step": 13440 }, { "epoch": 0.06225, "grad_norm": 0.2106294333934784, "learning_rate": 4.995935835850739e-05, "loss": 0.0584, "step": 13450 }, { "epoch": 0.0623, "grad_norm": 0.2522731423377991, "learning_rate": 4.995912240916395e-05, "loss": 0.0593, "step": 13460 }, { "epoch": 0.06235, "grad_norm": 0.2467738538980484, "learning_rate": 4.995888577744748e-05, "loss": 0.0532, "step": 13470 }, { "epoch": 0.0624, "grad_norm": 0.2429942637681961, "learning_rate": 4.995864846336445e-05, "loss": 0.0566, "step": 13480 }, { "epoch": 0.06245, "grad_norm": 0.2741338908672333, "learning_rate": 4.995841046692135e-05, "loss": 0.0559, "step": 13490 }, { "epoch": 0.0625, "grad_norm": 0.21140851080417633, "learning_rate": 4.995817178812468e-05, "loss": 0.0524, "step": 13500 }, { "epoch": 0.06255, "grad_norm": 0.23873601853847504, "learning_rate": 4.9957932426980966e-05, "loss": 0.0565, "step": 13510 }, { "epoch": 0.0626, "grad_norm": 0.2380428910255432, "learning_rate": 4.9957692383496765e-05, "loss": 0.0527, "step": 13520 }, { "epoch": 0.06265, "grad_norm": 0.19534370303153992, "learning_rate": 4.995745165767863e-05, "loss": 0.0577, "step": 13530 }, { "epoch": 0.0627, "grad_norm": 0.21950581669807434, "learning_rate": 4.995721024953314e-05, "loss": 0.0546, "step": 13540 }, { "epoch": 0.06275, "grad_norm": 0.26917189359664917, "learning_rate": 4.9956968159066894e-05, "loss": 0.0564, "step": 13550 }, { "epoch": 0.0628, "grad_norm": 0.24399952590465546, "learning_rate": 4.995672538628652e-05, "loss": 0.0546, "step": 13560 }, { "epoch": 0.06285, "grad_norm": 0.27379703521728516, "learning_rate": 4.9956481931198644e-05, "loss": 0.0544, "step": 13570 }, { "epoch": 0.0629, "grad_norm": 0.2227669209241867, "learning_rate": 4.995623779380993e-05, "loss": 0.0535, "step": 13580 }, { "epoch": 0.06295, "grad_norm": 0.20523680746555328, "learning_rate": 4.9955992974127055e-05, "loss": 0.0518, "step": 13590 }, { "epoch": 0.063, "grad_norm": 0.21672451496124268, "learning_rate": 4.99557474721567e-05, "loss": 0.0522, "step": 13600 }, { "epoch": 0.06305, "grad_norm": 0.2435152381658554, "learning_rate": 4.995550128790559e-05, "loss": 0.0531, "step": 13610 }, { "epoch": 0.0631, "grad_norm": 0.19593331217765808, "learning_rate": 4.9955254421380446e-05, "loss": 0.0524, "step": 13620 }, { "epoch": 0.06315, "grad_norm": 0.202594593167305, "learning_rate": 4.995500687258803e-05, "loss": 0.0524, "step": 13630 }, { "epoch": 0.0632, "grad_norm": 0.17885445058345795, "learning_rate": 4.9954758641535094e-05, "loss": 0.0539, "step": 13640 }, { "epoch": 0.06325, "grad_norm": 0.2962518632411957, "learning_rate": 4.9954509728228434e-05, "loss": 0.0588, "step": 13650 }, { "epoch": 0.0633, "grad_norm": 0.20670145750045776, "learning_rate": 4.9954260132674844e-05, "loss": 0.055, "step": 13660 }, { "epoch": 0.06335, "grad_norm": 0.2579496502876282, "learning_rate": 4.995400985488117e-05, "loss": 0.0546, "step": 13670 }, { "epoch": 0.0634, "grad_norm": 0.2677033841609955, "learning_rate": 4.995375889485424e-05, "loss": 0.054, "step": 13680 }, { "epoch": 0.06345, "grad_norm": 0.22976121306419373, "learning_rate": 4.9953507252600906e-05, "loss": 0.0551, "step": 13690 }, { "epoch": 0.0635, "grad_norm": 0.2728902995586395, "learning_rate": 4.995325492812807e-05, "loss": 0.0552, "step": 13700 }, { "epoch": 0.06355, "grad_norm": 0.2082107663154602, "learning_rate": 4.9953001921442613e-05, "loss": 0.0563, "step": 13710 }, { "epoch": 0.0636, "grad_norm": 0.20261944830417633, "learning_rate": 4.995274823255146e-05, "loss": 0.0587, "step": 13720 }, { "epoch": 0.06365, "grad_norm": 0.23577085137367249, "learning_rate": 4.9952493861461544e-05, "loss": 0.0554, "step": 13730 }, { "epoch": 0.0637, "grad_norm": 0.2136303335428238, "learning_rate": 4.995223880817982e-05, "loss": 0.0535, "step": 13740 }, { "epoch": 0.06375, "grad_norm": 0.250302255153656, "learning_rate": 4.995198307271326e-05, "loss": 0.0571, "step": 13750 }, { "epoch": 0.0638, "grad_norm": 0.1985165923833847, "learning_rate": 4.995172665506886e-05, "loss": 0.056, "step": 13760 }, { "epoch": 0.06385, "grad_norm": 0.2227594256401062, "learning_rate": 4.9951469555253624e-05, "loss": 0.0558, "step": 13770 }, { "epoch": 0.0639, "grad_norm": 0.2320021539926529, "learning_rate": 4.995121177327458e-05, "loss": 0.0586, "step": 13780 }, { "epoch": 0.06395, "grad_norm": 0.23888349533081055, "learning_rate": 4.9950953309138784e-05, "loss": 0.0597, "step": 13790 }, { "epoch": 0.064, "grad_norm": 0.23809459805488586, "learning_rate": 4.99506941628533e-05, "loss": 0.0544, "step": 13800 }, { "epoch": 0.06405, "grad_norm": 0.23160304129123688, "learning_rate": 4.995043433442521e-05, "loss": 0.0554, "step": 13810 }, { "epoch": 0.0641, "grad_norm": 0.20707209408283234, "learning_rate": 4.995017382386162e-05, "loss": 0.0557, "step": 13820 }, { "epoch": 0.06415, "grad_norm": 0.20726770162582397, "learning_rate": 4.994991263116965e-05, "loss": 0.0568, "step": 13830 }, { "epoch": 0.0642, "grad_norm": 0.2668224573135376, "learning_rate": 4.9949650756356434e-05, "loss": 0.0531, "step": 13840 }, { "epoch": 0.06425, "grad_norm": 0.23629474639892578, "learning_rate": 4.994938819942915e-05, "loss": 0.0547, "step": 13850 }, { "epoch": 0.0643, "grad_norm": 0.2216833382844925, "learning_rate": 4.994912496039496e-05, "loss": 0.055, "step": 13860 }, { "epoch": 0.06435, "grad_norm": 0.20070704817771912, "learning_rate": 4.9948861039261074e-05, "loss": 0.058, "step": 13870 }, { "epoch": 0.0644, "grad_norm": 0.23889175057411194, "learning_rate": 4.994859643603469e-05, "loss": 0.0557, "step": 13880 }, { "epoch": 0.06445, "grad_norm": 0.2590092718601227, "learning_rate": 4.994833115072306e-05, "loss": 0.0543, "step": 13890 }, { "epoch": 0.0645, "grad_norm": 0.1995381861925125, "learning_rate": 4.994806518333343e-05, "loss": 0.0558, "step": 13900 }, { "epoch": 0.06455, "grad_norm": 0.24994847178459167, "learning_rate": 4.994779853387307e-05, "loss": 0.0612, "step": 13910 }, { "epoch": 0.0646, "grad_norm": 0.21314527094364166, "learning_rate": 4.994753120234926e-05, "loss": 0.0564, "step": 13920 }, { "epoch": 0.06465, "grad_norm": 0.24399550259113312, "learning_rate": 4.9947263188769337e-05, "loss": 0.0567, "step": 13930 }, { "epoch": 0.0647, "grad_norm": 0.20098140835762024, "learning_rate": 4.9946994493140595e-05, "loss": 0.0581, "step": 13940 }, { "epoch": 0.06475, "grad_norm": 0.2497253566980362, "learning_rate": 4.99467251154704e-05, "loss": 0.0528, "step": 13950 }, { "epoch": 0.0648, "grad_norm": 0.20846222341060638, "learning_rate": 4.994645505576612e-05, "loss": 0.0555, "step": 13960 }, { "epoch": 0.06485, "grad_norm": 0.20436640083789825, "learning_rate": 4.9946184314035116e-05, "loss": 0.0542, "step": 13970 }, { "epoch": 0.0649, "grad_norm": 0.18963970243930817, "learning_rate": 4.994591289028482e-05, "loss": 0.052, "step": 13980 }, { "epoch": 0.06495, "grad_norm": 0.23356103897094727, "learning_rate": 4.994564078452262e-05, "loss": 0.0536, "step": 13990 }, { "epoch": 0.065, "grad_norm": 0.23844048380851746, "learning_rate": 4.994536799675599e-05, "loss": 0.0544, "step": 14000 }, { "epoch": 0.06505, "grad_norm": 0.23758967220783234, "learning_rate": 4.9945094526992364e-05, "loss": 0.0543, "step": 14010 }, { "epoch": 0.0651, "grad_norm": 0.2923468351364136, "learning_rate": 4.994482037523922e-05, "loss": 0.0549, "step": 14020 }, { "epoch": 0.06515, "grad_norm": 0.23966705799102783, "learning_rate": 4.994454554150406e-05, "loss": 0.0552, "step": 14030 }, { "epoch": 0.0652, "grad_norm": 0.18790079653263092, "learning_rate": 4.99442700257944e-05, "loss": 0.0546, "step": 14040 }, { "epoch": 0.06525, "grad_norm": 0.24292923510074615, "learning_rate": 4.9943993828117776e-05, "loss": 0.0547, "step": 14050 }, { "epoch": 0.0653, "grad_norm": 0.2541712820529938, "learning_rate": 4.9943716948481715e-05, "loss": 0.053, "step": 14060 }, { "epoch": 0.06535, "grad_norm": 0.23627446591854095, "learning_rate": 4.994343938689381e-05, "loss": 0.0541, "step": 14070 }, { "epoch": 0.0654, "grad_norm": 0.23709315061569214, "learning_rate": 4.994316114336165e-05, "loss": 0.0539, "step": 14080 }, { "epoch": 0.06545, "grad_norm": 0.22304728627204895, "learning_rate": 4.9942882217892825e-05, "loss": 0.0524, "step": 14090 }, { "epoch": 0.0655, "grad_norm": 0.23598924279212952, "learning_rate": 4.994260261049498e-05, "loss": 0.0525, "step": 14100 }, { "epoch": 0.06555, "grad_norm": 0.26575416326522827, "learning_rate": 4.994232232117574e-05, "loss": 0.0593, "step": 14110 }, { "epoch": 0.0656, "grad_norm": 0.23113156855106354, "learning_rate": 4.9942041349942795e-05, "loss": 0.053, "step": 14120 }, { "epoch": 0.06565, "grad_norm": 0.2666196823120117, "learning_rate": 4.994175969680379e-05, "loss": 0.0527, "step": 14130 }, { "epoch": 0.0657, "grad_norm": 0.2423446625471115, "learning_rate": 4.994147736176645e-05, "loss": 0.0525, "step": 14140 }, { "epoch": 0.06575, "grad_norm": 0.27136650681495667, "learning_rate": 4.9941194344838496e-05, "loss": 0.0554, "step": 14150 }, { "epoch": 0.0658, "grad_norm": 0.31948310136795044, "learning_rate": 4.994091064602766e-05, "loss": 0.0557, "step": 14160 }, { "epoch": 0.06585, "grad_norm": 0.2882412075996399, "learning_rate": 4.994062626534169e-05, "loss": 0.0578, "step": 14170 }, { "epoch": 0.0659, "grad_norm": 0.2573373019695282, "learning_rate": 4.994034120278837e-05, "loss": 0.057, "step": 14180 }, { "epoch": 0.06595, "grad_norm": 0.2526601552963257, "learning_rate": 4.994005545837549e-05, "loss": 0.0554, "step": 14190 }, { "epoch": 0.066, "grad_norm": 0.24789269268512726, "learning_rate": 4.9939769032110864e-05, "loss": 0.0543, "step": 14200 }, { "epoch": 0.06605, "grad_norm": 0.24072995781898499, "learning_rate": 4.993948192400232e-05, "loss": 0.0529, "step": 14210 }, { "epoch": 0.0661, "grad_norm": 0.23017463088035583, "learning_rate": 4.993919413405772e-05, "loss": 0.0563, "step": 14220 }, { "epoch": 0.06615, "grad_norm": 0.25913599133491516, "learning_rate": 4.993890566228491e-05, "loss": 0.0568, "step": 14230 }, { "epoch": 0.0662, "grad_norm": 0.25078967213630676, "learning_rate": 4.993861650869179e-05, "loss": 0.0588, "step": 14240 }, { "epoch": 0.06625, "grad_norm": 0.23285934329032898, "learning_rate": 4.993832667328626e-05, "loss": 0.0546, "step": 14250 }, { "epoch": 0.0663, "grad_norm": 0.2456798553466797, "learning_rate": 4.9938036156076256e-05, "loss": 0.0547, "step": 14260 }, { "epoch": 0.06635, "grad_norm": 0.23873870074748993, "learning_rate": 4.993774495706971e-05, "loss": 0.0576, "step": 14270 }, { "epoch": 0.0664, "grad_norm": 0.2455510050058365, "learning_rate": 4.9937453076274584e-05, "loss": 0.0554, "step": 14280 }, { "epoch": 0.06645, "grad_norm": 0.23358359932899475, "learning_rate": 4.993716051369886e-05, "loss": 0.0566, "step": 14290 }, { "epoch": 0.0665, "grad_norm": 0.22073981165885925, "learning_rate": 4.993686726935054e-05, "loss": 0.0559, "step": 14300 }, { "epoch": 0.06655, "grad_norm": 0.22967039048671722, "learning_rate": 4.993657334323763e-05, "loss": 0.0578, "step": 14310 }, { "epoch": 0.0666, "grad_norm": 0.2374074012041092, "learning_rate": 4.993627873536818e-05, "loss": 0.0556, "step": 14320 }, { "epoch": 0.06665, "grad_norm": 0.25563186407089233, "learning_rate": 4.993598344575023e-05, "loss": 0.0556, "step": 14330 }, { "epoch": 0.0667, "grad_norm": 0.22516918182373047, "learning_rate": 4.993568747439187e-05, "loss": 0.0524, "step": 14340 }, { "epoch": 0.06675, "grad_norm": 0.2268334925174713, "learning_rate": 4.993539082130117e-05, "loss": 0.0524, "step": 14350 }, { "epoch": 0.0668, "grad_norm": 0.20074786245822906, "learning_rate": 4.993509348648626e-05, "loss": 0.0535, "step": 14360 }, { "epoch": 0.06685, "grad_norm": 0.23249930143356323, "learning_rate": 4.9934795469955266e-05, "loss": 0.0519, "step": 14370 }, { "epoch": 0.0669, "grad_norm": 0.21201986074447632, "learning_rate": 4.9934496771716326e-05, "loss": 0.0581, "step": 14380 }, { "epoch": 0.06695, "grad_norm": 0.22310949862003326, "learning_rate": 4.993419739177761e-05, "loss": 0.0555, "step": 14390 }, { "epoch": 0.067, "grad_norm": 0.2250770777463913, "learning_rate": 4.9933897330147305e-05, "loss": 0.0571, "step": 14400 }, { "epoch": 0.06705, "grad_norm": 0.23023316264152527, "learning_rate": 4.993359658683362e-05, "loss": 0.0589, "step": 14410 }, { "epoch": 0.0671, "grad_norm": 0.19881965219974518, "learning_rate": 4.9933295161844765e-05, "loss": 0.0531, "step": 14420 }, { "epoch": 0.06715, "grad_norm": 0.16332776844501495, "learning_rate": 4.993299305518899e-05, "loss": 0.0506, "step": 14430 }, { "epoch": 0.0672, "grad_norm": 0.23525168001651764, "learning_rate": 4.993269026687456e-05, "loss": 0.0556, "step": 14440 }, { "epoch": 0.06725, "grad_norm": 0.19342724978923798, "learning_rate": 4.993238679690974e-05, "loss": 0.0549, "step": 14450 }, { "epoch": 0.0673, "grad_norm": 0.24618756771087646, "learning_rate": 4.993208264530282e-05, "loss": 0.0537, "step": 14460 }, { "epoch": 0.06735, "grad_norm": 0.21478991210460663, "learning_rate": 4.9931777812062134e-05, "loss": 0.0541, "step": 14470 }, { "epoch": 0.0674, "grad_norm": 0.2020798772573471, "learning_rate": 4.9931472297196015e-05, "loss": 0.0532, "step": 14480 }, { "epoch": 0.06745, "grad_norm": 0.20711049437522888, "learning_rate": 4.99311661007128e-05, "loss": 0.0528, "step": 14490 }, { "epoch": 0.0675, "grad_norm": 0.2520681619644165, "learning_rate": 4.993085922262088e-05, "loss": 0.0552, "step": 14500 }, { "epoch": 0.06755, "grad_norm": 0.24167174100875854, "learning_rate": 4.993055166292863e-05, "loss": 0.0529, "step": 14510 }, { "epoch": 0.0676, "grad_norm": 0.2574155628681183, "learning_rate": 4.9930243421644466e-05, "loss": 0.0518, "step": 14520 }, { "epoch": 0.06765, "grad_norm": 0.27726471424102783, "learning_rate": 4.992993449877681e-05, "loss": 0.0527, "step": 14530 }, { "epoch": 0.0677, "grad_norm": 0.26100972294807434, "learning_rate": 4.992962489433411e-05, "loss": 0.0518, "step": 14540 }, { "epoch": 0.06775, "grad_norm": 0.23531347513198853, "learning_rate": 4.992931460832483e-05, "loss": 0.0527, "step": 14550 }, { "epoch": 0.0678, "grad_norm": 0.23893924057483673, "learning_rate": 4.992900364075746e-05, "loss": 0.0563, "step": 14560 }, { "epoch": 0.06785, "grad_norm": 0.22128519415855408, "learning_rate": 4.992869199164048e-05, "loss": 0.0524, "step": 14570 }, { "epoch": 0.0679, "grad_norm": 0.2469291090965271, "learning_rate": 4.992837966098245e-05, "loss": 0.0524, "step": 14580 }, { "epoch": 0.06795, "grad_norm": 0.24957656860351562, "learning_rate": 4.992806664879187e-05, "loss": 0.0527, "step": 14590 }, { "epoch": 0.068, "grad_norm": 0.21424804627895355, "learning_rate": 4.9927752955077314e-05, "loss": 0.0545, "step": 14600 }, { "epoch": 0.06805, "grad_norm": 0.18640349805355072, "learning_rate": 4.9927438579847364e-05, "loss": 0.0531, "step": 14610 }, { "epoch": 0.0681, "grad_norm": 0.2083619236946106, "learning_rate": 4.9927123523110595e-05, "loss": 0.0531, "step": 14620 }, { "epoch": 0.06815, "grad_norm": 0.17825178802013397, "learning_rate": 4.9926807784875654e-05, "loss": 0.0538, "step": 14630 }, { "epoch": 0.0682, "grad_norm": 0.21923069655895233, "learning_rate": 4.992649136515113e-05, "loss": 0.0585, "step": 14640 }, { "epoch": 0.06825, "grad_norm": 0.21128062903881073, "learning_rate": 4.992617426394571e-05, "loss": 0.055, "step": 14650 }, { "epoch": 0.0683, "grad_norm": 0.23529167473316193, "learning_rate": 4.992585648126805e-05, "loss": 0.052, "step": 14660 }, { "epoch": 0.06835, "grad_norm": 0.23567888140678406, "learning_rate": 4.9925538017126836e-05, "loss": 0.0549, "step": 14670 }, { "epoch": 0.0684, "grad_norm": 0.19339175522327423, "learning_rate": 4.992521887153078e-05, "loss": 0.0513, "step": 14680 }, { "epoch": 0.06845, "grad_norm": 0.20444408059120178, "learning_rate": 4.9924899044488594e-05, "loss": 0.0561, "step": 14690 }, { "epoch": 0.0685, "grad_norm": 0.18966902792453766, "learning_rate": 4.9924578536009035e-05, "loss": 0.0543, "step": 14700 }, { "epoch": 0.06855, "grad_norm": 0.2409667819738388, "learning_rate": 4.992425734610087e-05, "loss": 0.0553, "step": 14710 }, { "epoch": 0.0686, "grad_norm": 0.19646379351615906, "learning_rate": 4.9923935474772864e-05, "loss": 0.0529, "step": 14720 }, { "epoch": 0.06865, "grad_norm": 0.21415621042251587, "learning_rate": 4.9923612922033836e-05, "loss": 0.0535, "step": 14730 }, { "epoch": 0.0687, "grad_norm": 0.18555407226085663, "learning_rate": 4.992328968789258e-05, "loss": 0.0533, "step": 14740 }, { "epoch": 0.06875, "grad_norm": 0.2298015058040619, "learning_rate": 4.992296577235796e-05, "loss": 0.055, "step": 14750 }, { "epoch": 0.0688, "grad_norm": 0.1745564043521881, "learning_rate": 4.9922641175438813e-05, "loss": 0.0536, "step": 14760 }, { "epoch": 0.06885, "grad_norm": 0.21434536576271057, "learning_rate": 4.992231589714402e-05, "loss": 0.0532, "step": 14770 }, { "epoch": 0.0689, "grad_norm": 0.21135056018829346, "learning_rate": 4.992198993748247e-05, "loss": 0.0549, "step": 14780 }, { "epoch": 0.06895, "grad_norm": 0.1799994707107544, "learning_rate": 4.992166329646308e-05, "loss": 0.0503, "step": 14790 }, { "epoch": 0.069, "grad_norm": 0.18514901399612427, "learning_rate": 4.992133597409478e-05, "loss": 0.05, "step": 14800 }, { "epoch": 0.06905, "grad_norm": 0.26623496413230896, "learning_rate": 4.992100797038652e-05, "loss": 0.0557, "step": 14810 }, { "epoch": 0.0691, "grad_norm": 0.23814378678798676, "learning_rate": 4.992067928534726e-05, "loss": 0.0497, "step": 14820 }, { "epoch": 0.06915, "grad_norm": 0.18923845887184143, "learning_rate": 4.9920349918985995e-05, "loss": 0.0512, "step": 14830 }, { "epoch": 0.0692, "grad_norm": 0.1813831627368927, "learning_rate": 4.992001987131172e-05, "loss": 0.0511, "step": 14840 }, { "epoch": 0.06925, "grad_norm": 0.23114556074142456, "learning_rate": 4.991968914233347e-05, "loss": 0.0534, "step": 14850 }, { "epoch": 0.0693, "grad_norm": 0.28188079595565796, "learning_rate": 4.991935773206027e-05, "loss": 0.0534, "step": 14860 }, { "epoch": 0.06935, "grad_norm": 0.20539860427379608, "learning_rate": 4.99190256405012e-05, "loss": 0.0554, "step": 14870 }, { "epoch": 0.0694, "grad_norm": 0.1767107993364334, "learning_rate": 4.9918692867665327e-05, "loss": 0.0511, "step": 14880 }, { "epoch": 0.06945, "grad_norm": 0.16256114840507507, "learning_rate": 4.991835941356176e-05, "loss": 0.0519, "step": 14890 }, { "epoch": 0.0695, "grad_norm": 0.23473677039146423, "learning_rate": 4.9918025278199597e-05, "loss": 0.0538, "step": 14900 }, { "epoch": 0.06955, "grad_norm": 0.23456275463104248, "learning_rate": 4.991769046158799e-05, "loss": 0.0531, "step": 14910 }, { "epoch": 0.0696, "grad_norm": 0.22146765887737274, "learning_rate": 4.991735496373609e-05, "loss": 0.053, "step": 14920 }, { "epoch": 0.06965, "grad_norm": 0.2178531438112259, "learning_rate": 4.9917018784653056e-05, "loss": 0.0513, "step": 14930 }, { "epoch": 0.0697, "grad_norm": 0.20697830617427826, "learning_rate": 4.99166819243481e-05, "loss": 0.0512, "step": 14940 }, { "epoch": 0.06975, "grad_norm": 0.17402079701423645, "learning_rate": 4.9916344382830414e-05, "loss": 0.0516, "step": 14950 }, { "epoch": 0.0698, "grad_norm": 0.20043763518333435, "learning_rate": 4.9916006160109235e-05, "loss": 0.0537, "step": 14960 }, { "epoch": 0.06985, "grad_norm": 0.1847904920578003, "learning_rate": 4.991566725619381e-05, "loss": 0.0518, "step": 14970 }, { "epoch": 0.0699, "grad_norm": 0.2037106603384018, "learning_rate": 4.99153276710934e-05, "loss": 0.0561, "step": 14980 }, { "epoch": 0.06995, "grad_norm": 0.21404510736465454, "learning_rate": 4.991498740481729e-05, "loss": 0.0521, "step": 14990 }, { "epoch": 0.07, "grad_norm": 0.2313455194234848, "learning_rate": 4.991464645737479e-05, "loss": 0.0522, "step": 15000 }, { "epoch": 0.07005, "grad_norm": 0.22190025448799133, "learning_rate": 4.9914304828775215e-05, "loss": 0.0533, "step": 15010 }, { "epoch": 0.0701, "grad_norm": 0.18861819803714752, "learning_rate": 4.99139625190279e-05, "loss": 0.0541, "step": 15020 }, { "epoch": 0.07015, "grad_norm": 0.20761419832706451, "learning_rate": 4.991361952814222e-05, "loss": 0.0538, "step": 15030 }, { "epoch": 0.0702, "grad_norm": 0.2672784626483917, "learning_rate": 4.9913275856127534e-05, "loss": 0.0583, "step": 15040 }, { "epoch": 0.07025, "grad_norm": 0.22271452844142914, "learning_rate": 4.991293150299324e-05, "loss": 0.0536, "step": 15050 }, { "epoch": 0.0703, "grad_norm": 0.2635791003704071, "learning_rate": 4.9912586468748774e-05, "loss": 0.0535, "step": 15060 }, { "epoch": 0.07035, "grad_norm": 0.21073433756828308, "learning_rate": 4.991224075340355e-05, "loss": 0.0536, "step": 15070 }, { "epoch": 0.0704, "grad_norm": 0.22948896884918213, "learning_rate": 4.991189435696701e-05, "loss": 0.053, "step": 15080 }, { "epoch": 0.07045, "grad_norm": 0.19976653158664703, "learning_rate": 4.9911547279448644e-05, "loss": 0.0546, "step": 15090 }, { "epoch": 0.0705, "grad_norm": 0.20821602642536163, "learning_rate": 4.9911199520857935e-05, "loss": 0.0512, "step": 15100 }, { "epoch": 0.07055, "grad_norm": 0.20193876326084137, "learning_rate": 4.991085108120439e-05, "loss": 0.0518, "step": 15110 }, { "epoch": 0.0706, "grad_norm": 0.18943262100219727, "learning_rate": 4.9910501960497536e-05, "loss": 0.0516, "step": 15120 }, { "epoch": 0.07065, "grad_norm": 0.2120610475540161, "learning_rate": 4.9910152158746914e-05, "loss": 0.0522, "step": 15130 }, { "epoch": 0.0707, "grad_norm": 0.1962938755750656, "learning_rate": 4.990980167596209e-05, "loss": 0.0513, "step": 15140 }, { "epoch": 0.07075, "grad_norm": 0.1755223572254181, "learning_rate": 4.990945051215265e-05, "loss": 0.0517, "step": 15150 }, { "epoch": 0.0708, "grad_norm": 0.17874115705490112, "learning_rate": 4.990909866732819e-05, "loss": 0.0522, "step": 15160 }, { "epoch": 0.07085, "grad_norm": 0.24599440395832062, "learning_rate": 4.990874614149833e-05, "loss": 0.0541, "step": 15170 }, { "epoch": 0.0709, "grad_norm": 0.22264912724494934, "learning_rate": 4.9908392934672705e-05, "loss": 0.0526, "step": 15180 }, { "epoch": 0.07095, "grad_norm": 0.1884782463312149, "learning_rate": 4.990803904686098e-05, "loss": 0.053, "step": 15190 }, { "epoch": 0.071, "grad_norm": 0.1697518229484558, "learning_rate": 4.990768447807282e-05, "loss": 0.0518, "step": 15200 }, { "epoch": 0.07105, "grad_norm": 0.22185924649238586, "learning_rate": 4.990732922831792e-05, "loss": 0.0543, "step": 15210 }, { "epoch": 0.0711, "grad_norm": 0.2215665727853775, "learning_rate": 4.990697329760601e-05, "loss": 0.0519, "step": 15220 }, { "epoch": 0.07115, "grad_norm": 0.21765001118183136, "learning_rate": 4.99066166859468e-05, "loss": 0.0511, "step": 15230 }, { "epoch": 0.0712, "grad_norm": 0.2361811399459839, "learning_rate": 4.990625939335004e-05, "loss": 0.0544, "step": 15240 }, { "epoch": 0.07125, "grad_norm": 0.25048092007637024, "learning_rate": 4.990590141982552e-05, "loss": 0.0544, "step": 15250 }, { "epoch": 0.0713, "grad_norm": 0.2167491912841797, "learning_rate": 4.9905542765382996e-05, "loss": 0.0538, "step": 15260 }, { "epoch": 0.07135, "grad_norm": 0.2278248518705368, "learning_rate": 4.9905183430032296e-05, "loss": 0.0551, "step": 15270 }, { "epoch": 0.0714, "grad_norm": 0.2136402279138565, "learning_rate": 4.990482341378324e-05, "loss": 0.0535, "step": 15280 }, { "epoch": 0.07145, "grad_norm": 0.18827708065509796, "learning_rate": 4.9904462716645675e-05, "loss": 0.051, "step": 15290 }, { "epoch": 0.0715, "grad_norm": 0.20222033560276031, "learning_rate": 4.990410133862944e-05, "loss": 0.0537, "step": 15300 }, { "epoch": 0.07155, "grad_norm": 0.2278817594051361, "learning_rate": 4.9903739279744436e-05, "loss": 0.0559, "step": 15310 }, { "epoch": 0.0716, "grad_norm": 0.26084935665130615, "learning_rate": 4.9903376540000555e-05, "loss": 0.0533, "step": 15320 }, { "epoch": 0.07165, "grad_norm": 0.24685749411582947, "learning_rate": 4.990301311940772e-05, "loss": 0.0535, "step": 15330 }, { "epoch": 0.0717, "grad_norm": 0.1969948559999466, "learning_rate": 4.990264901797586e-05, "loss": 0.052, "step": 15340 }, { "epoch": 0.07175, "grad_norm": 0.19155311584472656, "learning_rate": 4.990228423571493e-05, "loss": 0.0532, "step": 15350 }, { "epoch": 0.0718, "grad_norm": 0.19653113186359406, "learning_rate": 4.9901918772634906e-05, "loss": 0.051, "step": 15360 }, { "epoch": 0.07185, "grad_norm": 0.17781168222427368, "learning_rate": 4.990155262874577e-05, "loss": 0.0522, "step": 15370 }, { "epoch": 0.0719, "grad_norm": 0.1654060333967209, "learning_rate": 4.990118580405755e-05, "loss": 0.0514, "step": 15380 }, { "epoch": 0.07195, "grad_norm": 0.19361428916454315, "learning_rate": 4.9900818298580263e-05, "loss": 0.0523, "step": 15390 }, { "epoch": 0.072, "grad_norm": 0.2262229472398758, "learning_rate": 4.990045011232396e-05, "loss": 0.0506, "step": 15400 }, { "epoch": 0.07205, "grad_norm": 0.20477080345153809, "learning_rate": 4.9900081245298703e-05, "loss": 0.0491, "step": 15410 }, { "epoch": 0.0721, "grad_norm": 0.20688781142234802, "learning_rate": 4.9899711697514586e-05, "loss": 0.0504, "step": 15420 }, { "epoch": 0.07215, "grad_norm": 0.24755077064037323, "learning_rate": 4.9899341468981696e-05, "loss": 0.0512, "step": 15430 }, { "epoch": 0.0722, "grad_norm": 0.2354496866464615, "learning_rate": 4.9898970559710165e-05, "loss": 0.051, "step": 15440 }, { "epoch": 0.07225, "grad_norm": 0.1773119419813156, "learning_rate": 4.9898598969710137e-05, "loss": 0.0505, "step": 15450 }, { "epoch": 0.0723, "grad_norm": 0.20547893643379211, "learning_rate": 4.989822669899177e-05, "loss": 0.0513, "step": 15460 }, { "epoch": 0.07235, "grad_norm": 0.22126734256744385, "learning_rate": 4.9897853747565225e-05, "loss": 0.0522, "step": 15470 }, { "epoch": 0.0724, "grad_norm": 0.2021513730287552, "learning_rate": 4.9897480115440724e-05, "loss": 0.0556, "step": 15480 }, { "epoch": 0.07245, "grad_norm": 0.215627521276474, "learning_rate": 4.989710580262847e-05, "loss": 0.0513, "step": 15490 }, { "epoch": 0.0725, "grad_norm": 0.2203914225101471, "learning_rate": 4.9896730809138694e-05, "loss": 0.0567, "step": 15500 }, { "epoch": 0.07255, "grad_norm": 0.19085222482681274, "learning_rate": 4.9896355134981655e-05, "loss": 0.0541, "step": 15510 }, { "epoch": 0.0726, "grad_norm": 0.18488481640815735, "learning_rate": 4.9895978780167615e-05, "loss": 0.0518, "step": 15520 }, { "epoch": 0.07265, "grad_norm": 0.18811728060245514, "learning_rate": 4.989560174470687e-05, "loss": 0.0522, "step": 15530 }, { "epoch": 0.0727, "grad_norm": 0.233742818236351, "learning_rate": 4.989522402860972e-05, "loss": 0.0525, "step": 15540 }, { "epoch": 0.07275, "grad_norm": 0.20568574965000153, "learning_rate": 4.989484563188651e-05, "loss": 0.0564, "step": 15550 }, { "epoch": 0.0728, "grad_norm": 0.185214564204216, "learning_rate": 4.9894466554547566e-05, "loss": 0.0528, "step": 15560 }, { "epoch": 0.07285, "grad_norm": 0.17654481530189514, "learning_rate": 4.989408679660326e-05, "loss": 0.051, "step": 15570 }, { "epoch": 0.0729, "grad_norm": 0.1739576905965805, "learning_rate": 4.989370635806398e-05, "loss": 0.0533, "step": 15580 }, { "epoch": 0.07295, "grad_norm": 0.197053924202919, "learning_rate": 4.98933252389401e-05, "loss": 0.0499, "step": 15590 }, { "epoch": 0.073, "grad_norm": 0.2722446918487549, "learning_rate": 4.9892943439242076e-05, "loss": 0.0537, "step": 15600 }, { "epoch": 0.07305, "grad_norm": 0.20078343152999878, "learning_rate": 4.9892560958980326e-05, "loss": 0.0516, "step": 15610 }, { "epoch": 0.0731, "grad_norm": 0.21441137790679932, "learning_rate": 4.989217779816532e-05, "loss": 0.0568, "step": 15620 }, { "epoch": 0.07315, "grad_norm": 0.1925336718559265, "learning_rate": 4.9891793956807506e-05, "loss": 0.0512, "step": 15630 }, { "epoch": 0.0732, "grad_norm": 0.16830724477767944, "learning_rate": 4.9891409434917414e-05, "loss": 0.0505, "step": 15640 }, { "epoch": 0.07325, "grad_norm": 0.2086031436920166, "learning_rate": 4.9891024232505536e-05, "loss": 0.0504, "step": 15650 }, { "epoch": 0.0733, "grad_norm": 0.22745658457279205, "learning_rate": 4.98906383495824e-05, "loss": 0.0531, "step": 15660 }, { "epoch": 0.07335, "grad_norm": 0.17610715329647064, "learning_rate": 4.9890251786158565e-05, "loss": 0.0491, "step": 15670 }, { "epoch": 0.0734, "grad_norm": 0.1811055839061737, "learning_rate": 4.9889864542244594e-05, "loss": 0.0486, "step": 15680 }, { "epoch": 0.07345, "grad_norm": 0.18700256943702698, "learning_rate": 4.9889476617851085e-05, "loss": 0.0543, "step": 15690 }, { "epoch": 0.0735, "grad_norm": 0.20162612199783325, "learning_rate": 4.988908801298863e-05, "loss": 0.0522, "step": 15700 }, { "epoch": 0.07355, "grad_norm": 0.16793783009052277, "learning_rate": 4.988869872766786e-05, "loss": 0.0527, "step": 15710 }, { "epoch": 0.0736, "grad_norm": 0.2012702226638794, "learning_rate": 4.988830876189942e-05, "loss": 0.0496, "step": 15720 }, { "epoch": 0.07365, "grad_norm": 0.18021146953105927, "learning_rate": 4.988791811569396e-05, "loss": 0.0487, "step": 15730 }, { "epoch": 0.0737, "grad_norm": 0.2230038046836853, "learning_rate": 4.988752678906218e-05, "loss": 0.0497, "step": 15740 }, { "epoch": 0.07375, "grad_norm": 0.20034830272197723, "learning_rate": 4.9887134782014764e-05, "loss": 0.0515, "step": 15750 }, { "epoch": 0.0738, "grad_norm": 0.18192660808563232, "learning_rate": 4.988674209456243e-05, "loss": 0.051, "step": 15760 }, { "epoch": 0.07385, "grad_norm": 0.21604807674884796, "learning_rate": 4.988634872671592e-05, "loss": 0.0529, "step": 15770 }, { "epoch": 0.0739, "grad_norm": 0.17603836953639984, "learning_rate": 4.988595467848598e-05, "loss": 0.051, "step": 15780 }, { "epoch": 0.07395, "grad_norm": 0.1528969407081604, "learning_rate": 4.988555994988339e-05, "loss": 0.053, "step": 15790 }, { "epoch": 0.074, "grad_norm": 0.18262742459774017, "learning_rate": 4.988516454091894e-05, "loss": 0.0488, "step": 15800 }, { "epoch": 0.07405, "grad_norm": 0.23504868149757385, "learning_rate": 4.988476845160345e-05, "loss": 0.0517, "step": 15810 }, { "epoch": 0.0741, "grad_norm": 0.1953057050704956, "learning_rate": 4.988437168194773e-05, "loss": 0.0505, "step": 15820 }, { "epoch": 0.07415, "grad_norm": 0.19112664461135864, "learning_rate": 4.988397423196264e-05, "loss": 0.0496, "step": 15830 }, { "epoch": 0.0742, "grad_norm": 0.17466707527637482, "learning_rate": 4.9883576101659037e-05, "loss": 0.0486, "step": 15840 }, { "epoch": 0.07425, "grad_norm": 0.2254481315612793, "learning_rate": 4.988317729104781e-05, "loss": 0.052, "step": 15850 }, { "epoch": 0.0743, "grad_norm": 0.20193511247634888, "learning_rate": 4.9882777800139875e-05, "loss": 0.0524, "step": 15860 }, { "epoch": 0.07435, "grad_norm": 0.22816495597362518, "learning_rate": 4.988237762894613e-05, "loss": 0.0542, "step": 15870 }, { "epoch": 0.0744, "grad_norm": 0.2185070514678955, "learning_rate": 4.9881976777477545e-05, "loss": 0.0574, "step": 15880 }, { "epoch": 0.07445, "grad_norm": 0.18639391660690308, "learning_rate": 4.9881575245745046e-05, "loss": 0.0568, "step": 15890 }, { "epoch": 0.0745, "grad_norm": 0.22813963890075684, "learning_rate": 4.988117303375964e-05, "loss": 0.0512, "step": 15900 }, { "epoch": 0.07455, "grad_norm": 0.1964694708585739, "learning_rate": 4.9880770141532304e-05, "loss": 0.0524, "step": 15910 }, { "epoch": 0.0746, "grad_norm": 0.17903338372707367, "learning_rate": 4.988036656907407e-05, "loss": 0.0491, "step": 15920 }, { "epoch": 0.07465, "grad_norm": 0.21408379077911377, "learning_rate": 4.987996231639594e-05, "loss": 0.0503, "step": 15930 }, { "epoch": 0.0747, "grad_norm": 0.222556933760643, "learning_rate": 4.9879557383509005e-05, "loss": 0.0515, "step": 15940 }, { "epoch": 0.07475, "grad_norm": 0.18890510499477386, "learning_rate": 4.9879151770424314e-05, "loss": 0.0494, "step": 15950 }, { "epoch": 0.0748, "grad_norm": 0.1653035432100296, "learning_rate": 4.9878745477152955e-05, "loss": 0.0527, "step": 15960 }, { "epoch": 0.07485, "grad_norm": 0.21293434500694275, "learning_rate": 4.987833850370605e-05, "loss": 0.0536, "step": 15970 }, { "epoch": 0.0749, "grad_norm": 0.17240995168685913, "learning_rate": 4.9877930850094715e-05, "loss": 0.0518, "step": 15980 }, { "epoch": 0.07495, "grad_norm": 0.1972210854291916, "learning_rate": 4.987752251633009e-05, "loss": 0.0486, "step": 15990 }, { "epoch": 0.075, "grad_norm": 0.16621819138526917, "learning_rate": 4.9877113502423345e-05, "loss": 0.0507, "step": 16000 }, { "epoch": 0.07505, "grad_norm": 0.16641293466091156, "learning_rate": 4.987670380838567e-05, "loss": 0.0498, "step": 16010 }, { "epoch": 0.0751, "grad_norm": 0.21070459485054016, "learning_rate": 4.987629343422825e-05, "loss": 0.0531, "step": 16020 }, { "epoch": 0.07515, "grad_norm": 0.15260066092014313, "learning_rate": 4.987588237996232e-05, "loss": 0.0531, "step": 16030 }, { "epoch": 0.0752, "grad_norm": 0.16156025230884552, "learning_rate": 4.987547064559911e-05, "loss": 0.0487, "step": 16040 }, { "epoch": 0.07525, "grad_norm": 0.19028621912002563, "learning_rate": 4.987505823114988e-05, "loss": 0.0475, "step": 16050 }, { "epoch": 0.0753, "grad_norm": 0.22921723127365112, "learning_rate": 4.9874645136625894e-05, "loss": 0.0516, "step": 16060 }, { "epoch": 0.07535, "grad_norm": 0.24922367930412292, "learning_rate": 4.987423136203847e-05, "loss": 0.0523, "step": 16070 }, { "epoch": 0.0754, "grad_norm": 0.24001844227313995, "learning_rate": 4.98738169073989e-05, "loss": 0.0519, "step": 16080 }, { "epoch": 0.07545, "grad_norm": 0.18395234644412994, "learning_rate": 4.987340177271851e-05, "loss": 0.0501, "step": 16090 }, { "epoch": 0.0755, "grad_norm": 0.2519650459289551, "learning_rate": 4.9872985958008664e-05, "loss": 0.0514, "step": 16100 }, { "epoch": 0.07555, "grad_norm": 0.22571155428886414, "learning_rate": 4.9872569463280736e-05, "loss": 0.0567, "step": 16110 }, { "epoch": 0.0756, "grad_norm": 0.22015048563480377, "learning_rate": 4.987215228854609e-05, "loss": 0.0506, "step": 16120 }, { "epoch": 0.07565, "grad_norm": 0.21603775024414062, "learning_rate": 4.9871734433816156e-05, "loss": 0.0521, "step": 16130 }, { "epoch": 0.0757, "grad_norm": 0.2373482584953308, "learning_rate": 4.9871315899102345e-05, "loss": 0.0512, "step": 16140 }, { "epoch": 0.07575, "grad_norm": 0.19632349908351898, "learning_rate": 4.98708966844161e-05, "loss": 0.0542, "step": 16150 }, { "epoch": 0.0758, "grad_norm": 0.2170940786600113, "learning_rate": 4.987047678976887e-05, "loss": 0.0484, "step": 16160 }, { "epoch": 0.07585, "grad_norm": 0.2200576663017273, "learning_rate": 4.987005621517217e-05, "loss": 0.0514, "step": 16170 }, { "epoch": 0.0759, "grad_norm": 0.30243274569511414, "learning_rate": 4.9869634960637454e-05, "loss": 0.0518, "step": 16180 }, { "epoch": 0.07595, "grad_norm": 0.2449166476726532, "learning_rate": 4.9869213026176275e-05, "loss": 0.0512, "step": 16190 }, { "epoch": 0.076, "grad_norm": 0.22707422077655792, "learning_rate": 4.986879041180016e-05, "loss": 0.05, "step": 16200 }, { "epoch": 0.07605, "grad_norm": 0.17047467827796936, "learning_rate": 4.986836711752064e-05, "loss": 0.0505, "step": 16210 }, { "epoch": 0.0761, "grad_norm": 0.19080035388469696, "learning_rate": 4.986794314334932e-05, "loss": 0.0497, "step": 16220 }, { "epoch": 0.07615, "grad_norm": 0.20476721227169037, "learning_rate": 4.986751848929777e-05, "loss": 0.0501, "step": 16230 }, { "epoch": 0.0762, "grad_norm": 0.31264495849609375, "learning_rate": 4.9867093155377606e-05, "loss": 0.0536, "step": 16240 }, { "epoch": 0.07625, "grad_norm": 0.21649867296218872, "learning_rate": 4.986666714160047e-05, "loss": 0.0512, "step": 16250 }, { "epoch": 0.0763, "grad_norm": 0.2247818261384964, "learning_rate": 4.986624044797799e-05, "loss": 0.0525, "step": 16260 }, { "epoch": 0.07635, "grad_norm": 0.19337685406208038, "learning_rate": 4.9865813074521825e-05, "loss": 0.0508, "step": 16270 }, { "epoch": 0.0764, "grad_norm": 0.23544394969940186, "learning_rate": 4.9865385021243686e-05, "loss": 0.0517, "step": 16280 }, { "epoch": 0.07645, "grad_norm": 0.177505761384964, "learning_rate": 4.986495628815526e-05, "loss": 0.0517, "step": 16290 }, { "epoch": 0.0765, "grad_norm": 0.19319675862789154, "learning_rate": 4.986452687526827e-05, "loss": 0.0524, "step": 16300 }, { "epoch": 0.07655, "grad_norm": 0.20353886485099792, "learning_rate": 4.9864096782594446e-05, "loss": 0.0526, "step": 16310 }, { "epoch": 0.0766, "grad_norm": 0.2161436676979065, "learning_rate": 4.986366601014557e-05, "loss": 0.0532, "step": 16320 }, { "epoch": 0.07665, "grad_norm": 0.23269261419773102, "learning_rate": 4.98632345579334e-05, "loss": 0.0508, "step": 16330 }, { "epoch": 0.0767, "grad_norm": 0.16609209775924683, "learning_rate": 4.9862802425969744e-05, "loss": 0.0526, "step": 16340 }, { "epoch": 0.07675, "grad_norm": 0.24200305342674255, "learning_rate": 4.9862369614266404e-05, "loss": 0.0542, "step": 16350 }, { "epoch": 0.0768, "grad_norm": 0.21339236199855804, "learning_rate": 4.9861936122835223e-05, "loss": 0.0496, "step": 16360 }, { "epoch": 0.07685, "grad_norm": 0.218331441283226, "learning_rate": 4.986150195168805e-05, "loss": 0.0522, "step": 16370 }, { "epoch": 0.0769, "grad_norm": 0.19578105211257935, "learning_rate": 4.9861067100836744e-05, "loss": 0.052, "step": 16380 }, { "epoch": 0.07695, "grad_norm": 0.24065200984477997, "learning_rate": 4.9860631570293216e-05, "loss": 0.0508, "step": 16390 }, { "epoch": 0.077, "grad_norm": 0.19160734117031097, "learning_rate": 4.986019536006935e-05, "loss": 0.0533, "step": 16400 }, { "epoch": 0.07705, "grad_norm": 0.24766413867473602, "learning_rate": 4.9859758470177084e-05, "loss": 0.0503, "step": 16410 }, { "epoch": 0.0771, "grad_norm": 0.21655277907848358, "learning_rate": 4.985932090062837e-05, "loss": 0.0537, "step": 16420 }, { "epoch": 0.07715, "grad_norm": 0.22482611238956451, "learning_rate": 4.985888265143515e-05, "loss": 0.051, "step": 16430 }, { "epoch": 0.0772, "grad_norm": 0.19084882736206055, "learning_rate": 4.9858443722609426e-05, "loss": 0.0489, "step": 16440 }, { "epoch": 0.07725, "grad_norm": 0.1958894431591034, "learning_rate": 4.985800411416318e-05, "loss": 0.0507, "step": 16450 }, { "epoch": 0.0773, "grad_norm": 0.19690927863121033, "learning_rate": 4.9857563826108456e-05, "loss": 0.0507, "step": 16460 }, { "epoch": 0.07735, "grad_norm": 0.19239164888858795, "learning_rate": 4.985712285845726e-05, "loss": 0.0521, "step": 16470 }, { "epoch": 0.0774, "grad_norm": 0.19204671680927277, "learning_rate": 4.9856681211221666e-05, "loss": 0.0536, "step": 16480 }, { "epoch": 0.07745, "grad_norm": 0.20469598472118378, "learning_rate": 4.9856238884413754e-05, "loss": 0.0493, "step": 16490 }, { "epoch": 0.0775, "grad_norm": 0.1652633398771286, "learning_rate": 4.9855795878045606e-05, "loss": 0.051, "step": 16500 }, { "epoch": 0.07755, "grad_norm": 0.16923579573631287, "learning_rate": 4.985535219212933e-05, "loss": 0.0493, "step": 16510 }, { "epoch": 0.0776, "grad_norm": 0.1931590586900711, "learning_rate": 4.9854907826677074e-05, "loss": 0.0499, "step": 16520 }, { "epoch": 0.07765, "grad_norm": 0.17787854373455048, "learning_rate": 4.985446278170097e-05, "loss": 0.0499, "step": 16530 }, { "epoch": 0.0777, "grad_norm": 0.21355508267879486, "learning_rate": 4.9854017057213187e-05, "loss": 0.0539, "step": 16540 }, { "epoch": 0.07775, "grad_norm": 0.17724072933197021, "learning_rate": 4.985357065322592e-05, "loss": 0.052, "step": 16550 }, { "epoch": 0.0778, "grad_norm": 0.19531121850013733, "learning_rate": 4.985312356975137e-05, "loss": 0.0519, "step": 16560 }, { "epoch": 0.07785, "grad_norm": 0.17170202732086182, "learning_rate": 4.985267580680175e-05, "loss": 0.0568, "step": 16570 }, { "epoch": 0.0779, "grad_norm": 0.22292238473892212, "learning_rate": 4.9852227364389316e-05, "loss": 0.0529, "step": 16580 }, { "epoch": 0.07795, "grad_norm": 0.1876860409975052, "learning_rate": 4.985177824252632e-05, "loss": 0.0508, "step": 16590 }, { "epoch": 0.078, "grad_norm": 0.19387325644493103, "learning_rate": 4.9851328441225044e-05, "loss": 0.05, "step": 16600 }, { "epoch": 0.07805, "grad_norm": 0.19660423696041107, "learning_rate": 4.9850877960497786e-05, "loss": 0.0498, "step": 16610 }, { "epoch": 0.0781, "grad_norm": 0.2226826399564743, "learning_rate": 4.9850426800356855e-05, "loss": 0.0552, "step": 16620 }, { "epoch": 0.07815, "grad_norm": 0.16555924713611603, "learning_rate": 4.9849974960814606e-05, "loss": 0.0496, "step": 16630 }, { "epoch": 0.0782, "grad_norm": 0.21441705524921417, "learning_rate": 4.9849522441883364e-05, "loss": 0.0563, "step": 16640 }, { "epoch": 0.07825, "grad_norm": 0.20419728755950928, "learning_rate": 4.984906924357552e-05, "loss": 0.0528, "step": 16650 }, { "epoch": 0.0783, "grad_norm": 0.2105439454317093, "learning_rate": 4.984861536590345e-05, "loss": 0.0516, "step": 16660 }, { "epoch": 0.07835, "grad_norm": 0.19244681298732758, "learning_rate": 4.984816080887958e-05, "loss": 0.0517, "step": 16670 }, { "epoch": 0.0784, "grad_norm": 0.1926605999469757, "learning_rate": 4.9847705572516326e-05, "loss": 0.0495, "step": 16680 }, { "epoch": 0.07845, "grad_norm": 0.16235984861850739, "learning_rate": 4.9847249656826136e-05, "loss": 0.0509, "step": 16690 }, { "epoch": 0.0785, "grad_norm": 0.18869003653526306, "learning_rate": 4.984679306182147e-05, "loss": 0.0505, "step": 16700 }, { "epoch": 0.07855, "grad_norm": 0.1517942249774933, "learning_rate": 4.984633578751482e-05, "loss": 0.0498, "step": 16710 }, { "epoch": 0.0786, "grad_norm": 0.18166415393352509, "learning_rate": 4.984587783391869e-05, "loss": 0.0566, "step": 16720 }, { "epoch": 0.07865, "grad_norm": 0.19320838153362274, "learning_rate": 4.984541920104558e-05, "loss": 0.0502, "step": 16730 }, { "epoch": 0.0787, "grad_norm": 0.21377691626548767, "learning_rate": 4.984495988890806e-05, "loss": 0.0522, "step": 16740 }, { "epoch": 0.07875, "grad_norm": 0.1650165170431137, "learning_rate": 4.9844499897518656e-05, "loss": 0.0476, "step": 16750 }, { "epoch": 0.0788, "grad_norm": 0.23033910989761353, "learning_rate": 4.984403922688997e-05, "loss": 0.0549, "step": 16760 }, { "epoch": 0.07885, "grad_norm": 0.19480597972869873, "learning_rate": 4.984357787703458e-05, "loss": 0.0538, "step": 16770 }, { "epoch": 0.0789, "grad_norm": 0.21139460802078247, "learning_rate": 4.98431158479651e-05, "loss": 0.0529, "step": 16780 }, { "epoch": 0.07895, "grad_norm": 0.17839471995830536, "learning_rate": 4.984265313969417e-05, "loss": 0.0541, "step": 16790 }, { "epoch": 0.079, "grad_norm": 0.18361295759677887, "learning_rate": 4.9842189752234435e-05, "loss": 0.0487, "step": 16800 }, { "epoch": 0.07905, "grad_norm": 0.1692679226398468, "learning_rate": 4.9841725685598574e-05, "loss": 0.0492, "step": 16810 }, { "epoch": 0.0791, "grad_norm": 0.1974862962961197, "learning_rate": 4.984126093979925e-05, "loss": 0.051, "step": 16820 }, { "epoch": 0.07915, "grad_norm": 0.18256103992462158, "learning_rate": 4.9840795514849196e-05, "loss": 0.0524, "step": 16830 }, { "epoch": 0.0792, "grad_norm": 0.1663244068622589, "learning_rate": 4.9840329410761124e-05, "loss": 0.049, "step": 16840 }, { "epoch": 0.07925, "grad_norm": 0.18855704367160797, "learning_rate": 4.983986262754777e-05, "loss": 0.0506, "step": 16850 }, { "epoch": 0.0793, "grad_norm": 0.168840229511261, "learning_rate": 4.983939516522191e-05, "loss": 0.0522, "step": 16860 }, { "epoch": 0.07935, "grad_norm": 0.183172807097435, "learning_rate": 4.9838927023796315e-05, "loss": 0.0504, "step": 16870 }, { "epoch": 0.0794, "grad_norm": 0.26104679703712463, "learning_rate": 4.9838458203283786e-05, "loss": 0.0545, "step": 16880 }, { "epoch": 0.07945, "grad_norm": 0.18943244218826294, "learning_rate": 4.9837988703697144e-05, "loss": 0.0493, "step": 16890 }, { "epoch": 0.0795, "grad_norm": 0.20340140163898468, "learning_rate": 4.983751852504922e-05, "loss": 0.0517, "step": 16900 }, { "epoch": 0.07955, "grad_norm": 0.16128084063529968, "learning_rate": 4.983704766735288e-05, "loss": 0.0493, "step": 16910 }, { "epoch": 0.0796, "grad_norm": 0.17783492803573608, "learning_rate": 4.983657613062097e-05, "loss": 0.047, "step": 16920 }, { "epoch": 0.07965, "grad_norm": 0.15998443961143494, "learning_rate": 4.983610391486641e-05, "loss": 0.0516, "step": 16930 }, { "epoch": 0.0797, "grad_norm": 0.1869068592786789, "learning_rate": 4.9835631020102104e-05, "loss": 0.0555, "step": 16940 }, { "epoch": 0.07975, "grad_norm": 0.21481378376483917, "learning_rate": 4.9835157446340965e-05, "loss": 0.0509, "step": 16950 }, { "epoch": 0.0798, "grad_norm": 0.19575315713882446, "learning_rate": 4.983468319359595e-05, "loss": 0.0519, "step": 16960 }, { "epoch": 0.07985, "grad_norm": 0.18808448314666748, "learning_rate": 4.983420826188004e-05, "loss": 0.0522, "step": 16970 }, { "epoch": 0.0799, "grad_norm": 0.161651149392128, "learning_rate": 4.98337326512062e-05, "loss": 0.0492, "step": 16980 }, { "epoch": 0.07995, "grad_norm": 0.16255159676074982, "learning_rate": 4.983325636158744e-05, "loss": 0.0482, "step": 16990 }, { "epoch": 0.08, "grad_norm": 0.17627467215061188, "learning_rate": 4.9832779393036777e-05, "loss": 0.0467, "step": 17000 }, { "epoch": 0.08005, "grad_norm": 0.17127850651741028, "learning_rate": 4.983230174556725e-05, "loss": 0.0503, "step": 17010 }, { "epoch": 0.0801, "grad_norm": 0.1601397693157196, "learning_rate": 4.983182341919194e-05, "loss": 0.0497, "step": 17020 }, { "epoch": 0.08015, "grad_norm": 0.17131473124027252, "learning_rate": 4.9831344413923885e-05, "loss": 0.047, "step": 17030 }, { "epoch": 0.0802, "grad_norm": 0.20350749790668488, "learning_rate": 4.983086472977622e-05, "loss": 0.0493, "step": 17040 }, { "epoch": 0.08025, "grad_norm": 0.15694685280323029, "learning_rate": 4.9830384366762026e-05, "loss": 0.0508, "step": 17050 }, { "epoch": 0.0803, "grad_norm": 0.18018211424350739, "learning_rate": 4.9829903324894466e-05, "loss": 0.05, "step": 17060 }, { "epoch": 0.08035, "grad_norm": 0.19399897754192352, "learning_rate": 4.982942160418667e-05, "loss": 0.0523, "step": 17070 }, { "epoch": 0.0804, "grad_norm": 0.18660689890384674, "learning_rate": 4.982893920465181e-05, "loss": 0.0497, "step": 17080 }, { "epoch": 0.08045, "grad_norm": 0.1875895857810974, "learning_rate": 4.9828456126303094e-05, "loss": 0.0493, "step": 17090 }, { "epoch": 0.0805, "grad_norm": 0.1983502358198166, "learning_rate": 4.982797236915371e-05, "loss": 0.0497, "step": 17100 }, { "epoch": 0.08055, "grad_norm": 0.24940334260463715, "learning_rate": 4.9827487933216884e-05, "loss": 0.0525, "step": 17110 }, { "epoch": 0.0806, "grad_norm": 0.2184525579214096, "learning_rate": 4.982700281850586e-05, "loss": 0.0509, "step": 17120 }, { "epoch": 0.08065, "grad_norm": 0.21082714200019836, "learning_rate": 4.982651702503392e-05, "loss": 0.053, "step": 17130 }, { "epoch": 0.0807, "grad_norm": 0.2231767475605011, "learning_rate": 4.982603055281432e-05, "loss": 0.0515, "step": 17140 }, { "epoch": 0.08075, "grad_norm": 0.250836044549942, "learning_rate": 4.982554340186038e-05, "loss": 0.0605, "step": 17150 }, { "epoch": 0.0808, "grad_norm": 0.22837120294570923, "learning_rate": 4.982505557218541e-05, "loss": 0.0522, "step": 17160 }, { "epoch": 0.08085, "grad_norm": 0.2457517683506012, "learning_rate": 4.9824567063802744e-05, "loss": 0.0554, "step": 17170 }, { "epoch": 0.0809, "grad_norm": 0.23828142881393433, "learning_rate": 4.982407787672574e-05, "loss": 0.0516, "step": 17180 }, { "epoch": 0.08095, "grad_norm": 0.20121601223945618, "learning_rate": 4.982358801096777e-05, "loss": 0.0505, "step": 17190 }, { "epoch": 0.081, "grad_norm": 0.20422187447547913, "learning_rate": 4.9823097466542236e-05, "loss": 0.0544, "step": 17200 }, { "epoch": 0.08105, "grad_norm": 0.15830935537815094, "learning_rate": 4.9822606243462534e-05, "loss": 0.0501, "step": 17210 }, { "epoch": 0.0811, "grad_norm": 0.166924387216568, "learning_rate": 4.982211434174211e-05, "loss": 0.0519, "step": 17220 }, { "epoch": 0.08115, "grad_norm": 0.15573611855506897, "learning_rate": 4.98216217613944e-05, "loss": 0.0494, "step": 17230 }, { "epoch": 0.0812, "grad_norm": 0.15739154815673828, "learning_rate": 4.982112850243288e-05, "loss": 0.0528, "step": 17240 }, { "epoch": 0.08125, "grad_norm": 0.1668911725282669, "learning_rate": 4.9820634564871034e-05, "loss": 0.0516, "step": 17250 }, { "epoch": 0.0813, "grad_norm": 0.17671068012714386, "learning_rate": 4.982013994872236e-05, "loss": 0.051, "step": 17260 }, { "epoch": 0.08135, "grad_norm": 0.16582036018371582, "learning_rate": 4.9819644654000387e-05, "loss": 0.0499, "step": 17270 }, { "epoch": 0.0814, "grad_norm": 0.1869654655456543, "learning_rate": 4.981914868071865e-05, "loss": 0.054, "step": 17280 }, { "epoch": 0.08145, "grad_norm": 0.19672545790672302, "learning_rate": 4.981865202889071e-05, "loss": 0.0493, "step": 17290 }, { "epoch": 0.0815, "grad_norm": 0.20300154387950897, "learning_rate": 4.981815469853015e-05, "loss": 0.0507, "step": 17300 }, { "epoch": 0.08155, "grad_norm": 0.1864527016878128, "learning_rate": 4.981765668965057e-05, "loss": 0.0513, "step": 17310 }, { "epoch": 0.0816, "grad_norm": 0.15904299914836884, "learning_rate": 4.9817158002265576e-05, "loss": 0.0493, "step": 17320 }, { "epoch": 0.08165, "grad_norm": 0.19241668283939362, "learning_rate": 4.98166586363888e-05, "loss": 0.0503, "step": 17330 }, { "epoch": 0.0817, "grad_norm": 0.17900395393371582, "learning_rate": 4.98161585920339e-05, "loss": 0.0508, "step": 17340 }, { "epoch": 0.08175, "grad_norm": 0.16683508455753326, "learning_rate": 4.981565786921456e-05, "loss": 0.0528, "step": 17350 }, { "epoch": 0.0818, "grad_norm": 0.17146515846252441, "learning_rate": 4.9815156467944446e-05, "loss": 0.0484, "step": 17360 }, { "epoch": 0.08185, "grad_norm": 0.195095032453537, "learning_rate": 4.9814654388237284e-05, "loss": 0.0491, "step": 17370 }, { "epoch": 0.0819, "grad_norm": 0.16585716605186462, "learning_rate": 4.981415163010679e-05, "loss": 0.0496, "step": 17380 }, { "epoch": 0.08195, "grad_norm": 0.15615801513195038, "learning_rate": 4.9813648193566705e-05, "loss": 0.049, "step": 17390 }, { "epoch": 0.082, "grad_norm": 0.20243534445762634, "learning_rate": 4.98131440786308e-05, "loss": 0.0562, "step": 17400 }, { "epoch": 0.08205, "grad_norm": 0.19609931111335754, "learning_rate": 4.981263928531287e-05, "loss": 0.0514, "step": 17410 }, { "epoch": 0.0821, "grad_norm": 0.19932952523231506, "learning_rate": 4.98121338136267e-05, "loss": 0.0521, "step": 17420 }, { "epoch": 0.08215, "grad_norm": 0.2170480191707611, "learning_rate": 4.981162766358611e-05, "loss": 0.0506, "step": 17430 }, { "epoch": 0.0822, "grad_norm": 0.21314817667007446, "learning_rate": 4.981112083520494e-05, "loss": 0.0499, "step": 17440 }, { "epoch": 0.08225, "grad_norm": 0.15489475429058075, "learning_rate": 4.981061332849705e-05, "loss": 0.0518, "step": 17450 }, { "epoch": 0.0823, "grad_norm": 0.18252049386501312, "learning_rate": 4.9810105143476315e-05, "loss": 0.0492, "step": 17460 }, { "epoch": 0.08235, "grad_norm": 0.1492038071155548, "learning_rate": 4.980959628015662e-05, "loss": 0.0495, "step": 17470 }, { "epoch": 0.0824, "grad_norm": 0.1855282336473465, "learning_rate": 4.980908673855189e-05, "loss": 0.0495, "step": 17480 }, { "epoch": 0.08245, "grad_norm": 0.15187287330627441, "learning_rate": 4.980857651867604e-05, "loss": 0.0496, "step": 17490 }, { "epoch": 0.0825, "grad_norm": 0.1776946783065796, "learning_rate": 4.980806562054303e-05, "loss": 0.0515, "step": 17500 }, { "epoch": 0.08255, "grad_norm": 0.19719122350215912, "learning_rate": 4.980755404416684e-05, "loss": 0.052, "step": 17510 }, { "epoch": 0.0826, "grad_norm": 0.1953863650560379, "learning_rate": 4.980704178956143e-05, "loss": 0.0538, "step": 17520 }, { "epoch": 0.08265, "grad_norm": 0.1708361655473709, "learning_rate": 4.9806528856740814e-05, "loss": 0.0519, "step": 17530 }, { "epoch": 0.0827, "grad_norm": 0.23801736533641815, "learning_rate": 4.9806015245719025e-05, "loss": 0.0522, "step": 17540 }, { "epoch": 0.08275, "grad_norm": 0.19790126383304596, "learning_rate": 4.9805500956510095e-05, "loss": 0.056, "step": 17550 }, { "epoch": 0.0828, "grad_norm": 0.17115181684494019, "learning_rate": 4.980498598912809e-05, "loss": 0.049, "step": 17560 }, { "epoch": 0.08285, "grad_norm": 0.15244634449481964, "learning_rate": 4.980447034358708e-05, "loss": 0.0501, "step": 17570 }, { "epoch": 0.0829, "grad_norm": 0.18820776045322418, "learning_rate": 4.9803954019901175e-05, "loss": 0.0512, "step": 17580 }, { "epoch": 0.08295, "grad_norm": 0.16231533885002136, "learning_rate": 4.980343701808449e-05, "loss": 0.051, "step": 17590 }, { "epoch": 0.083, "grad_norm": 0.18385298550128937, "learning_rate": 4.9802919338151154e-05, "loss": 0.0498, "step": 17600 }, { "epoch": 0.08305, "grad_norm": 0.145505890250206, "learning_rate": 4.980240098011532e-05, "loss": 0.0531, "step": 17610 }, { "epoch": 0.0831, "grad_norm": 0.19038888812065125, "learning_rate": 4.980188194399116e-05, "loss": 0.0501, "step": 17620 }, { "epoch": 0.08315, "grad_norm": 0.19386428594589233, "learning_rate": 4.980136222979286e-05, "loss": 0.05, "step": 17630 }, { "epoch": 0.0832, "grad_norm": 0.19215770065784454, "learning_rate": 4.9800841837534636e-05, "loss": 0.0495, "step": 17640 }, { "epoch": 0.08325, "grad_norm": 0.1821633279323578, "learning_rate": 4.980032076723073e-05, "loss": 0.0496, "step": 17650 }, { "epoch": 0.0833, "grad_norm": 0.16347461938858032, "learning_rate": 4.979979901889535e-05, "loss": 0.0512, "step": 17660 }, { "epoch": 0.08335, "grad_norm": 0.19061973690986633, "learning_rate": 4.979927659254279e-05, "loss": 0.0493, "step": 17670 }, { "epoch": 0.0834, "grad_norm": 0.21797873079776764, "learning_rate": 4.9798753488187324e-05, "loss": 0.0508, "step": 17680 }, { "epoch": 0.08345, "grad_norm": 0.189432293176651, "learning_rate": 4.979822970584325e-05, "loss": 0.0498, "step": 17690 }, { "epoch": 0.0835, "grad_norm": 0.23013871908187866, "learning_rate": 4.979770524552489e-05, "loss": 0.0541, "step": 17700 }, { "epoch": 0.08355, "grad_norm": 0.19550783932209015, "learning_rate": 4.97971801072466e-05, "loss": 0.0503, "step": 17710 }, { "epoch": 0.0836, "grad_norm": 0.20004889369010925, "learning_rate": 4.979665429102271e-05, "loss": 0.0516, "step": 17720 }, { "epoch": 0.08365, "grad_norm": 0.2468462437391281, "learning_rate": 4.979612779686761e-05, "loss": 0.0506, "step": 17730 }, { "epoch": 0.0837, "grad_norm": 0.2049475908279419, "learning_rate": 4.979560062479569e-05, "loss": 0.0534, "step": 17740 }, { "epoch": 0.08375, "grad_norm": 0.18321344256401062, "learning_rate": 4.9795072774821366e-05, "loss": 0.0506, "step": 17750 }, { "epoch": 0.0838, "grad_norm": 0.20350444316864014, "learning_rate": 4.979454424695906e-05, "loss": 0.0575, "step": 17760 }, { "epoch": 0.08385, "grad_norm": 0.18226414918899536, "learning_rate": 4.979401504122324e-05, "loss": 0.0484, "step": 17770 }, { "epoch": 0.0839, "grad_norm": 0.21651868522167206, "learning_rate": 4.979348515762836e-05, "loss": 0.0519, "step": 17780 }, { "epoch": 0.08395, "grad_norm": 0.15989641845226288, "learning_rate": 4.9792954596188914e-05, "loss": 0.052, "step": 17790 }, { "epoch": 0.084, "grad_norm": 0.20612096786499023, "learning_rate": 4.979242335691939e-05, "loss": 0.0499, "step": 17800 }, { "epoch": 0.08405, "grad_norm": 0.16908518970012665, "learning_rate": 4.979189143983434e-05, "loss": 0.0487, "step": 17810 }, { "epoch": 0.0841, "grad_norm": 0.18822510540485382, "learning_rate": 4.979135884494829e-05, "loss": 0.0504, "step": 17820 }, { "epoch": 0.08415, "grad_norm": 0.19432739913463593, "learning_rate": 4.97908255722758e-05, "loss": 0.0516, "step": 17830 }, { "epoch": 0.0842, "grad_norm": 0.1897604763507843, "learning_rate": 4.9790291621831456e-05, "loss": 0.0509, "step": 17840 }, { "epoch": 0.08425, "grad_norm": 0.18982402980327606, "learning_rate": 4.978975699362984e-05, "loss": 0.0519, "step": 17850 }, { "epoch": 0.0843, "grad_norm": 0.187894806265831, "learning_rate": 4.97892216876856e-05, "loss": 0.0485, "step": 17860 }, { "epoch": 0.08435, "grad_norm": 0.16973808407783508, "learning_rate": 4.978868570401333e-05, "loss": 0.0478, "step": 17870 }, { "epoch": 0.0844, "grad_norm": 0.15670092403888702, "learning_rate": 4.978814904262772e-05, "loss": 0.0495, "step": 17880 }, { "epoch": 0.08445, "grad_norm": 0.16168759763240814, "learning_rate": 4.9787611703543426e-05, "loss": 0.0487, "step": 17890 }, { "epoch": 0.0845, "grad_norm": 0.178312286734581, "learning_rate": 4.9787073686775136e-05, "loss": 0.049, "step": 17900 }, { "epoch": 0.08455, "grad_norm": 0.15167541801929474, "learning_rate": 4.978653499233756e-05, "loss": 0.0492, "step": 17910 }, { "epoch": 0.0846, "grad_norm": 0.17063525319099426, "learning_rate": 4.978599562024544e-05, "loss": 0.0532, "step": 17920 }, { "epoch": 0.08465, "grad_norm": 0.18666213750839233, "learning_rate": 4.978545557051351e-05, "loss": 0.0507, "step": 17930 }, { "epoch": 0.0847, "grad_norm": 0.1855110377073288, "learning_rate": 4.978491484315653e-05, "loss": 0.0475, "step": 17940 }, { "epoch": 0.08475, "grad_norm": 0.20270107686519623, "learning_rate": 4.978437343818929e-05, "loss": 0.0476, "step": 17950 }, { "epoch": 0.0848, "grad_norm": 0.22121216356754303, "learning_rate": 4.97838313556266e-05, "loss": 0.0481, "step": 17960 }, { "epoch": 0.08485, "grad_norm": 0.17030639946460724, "learning_rate": 4.978328859548326e-05, "loss": 0.0468, "step": 17970 }, { "epoch": 0.0849, "grad_norm": 0.17538540065288544, "learning_rate": 4.978274515777413e-05, "loss": 0.0513, "step": 17980 }, { "epoch": 0.08495, "grad_norm": 0.1701241135597229, "learning_rate": 4.9782201042514056e-05, "loss": 0.049, "step": 17990 }, { "epoch": 0.085, "grad_norm": 0.166990265250206, "learning_rate": 4.9781656249717914e-05, "loss": 0.0496, "step": 18000 }, { "epoch": 0.08505, "grad_norm": 0.20476819574832916, "learning_rate": 4.978111077940059e-05, "loss": 0.0508, "step": 18010 }, { "epoch": 0.0851, "grad_norm": 0.19486652314662933, "learning_rate": 4.978056463157702e-05, "loss": 0.049, "step": 18020 }, { "epoch": 0.08515, "grad_norm": 0.1732991337776184, "learning_rate": 4.978001780626212e-05, "loss": 0.0488, "step": 18030 }, { "epoch": 0.0852, "grad_norm": 0.19245341420173645, "learning_rate": 4.977947030347084e-05, "loss": 0.0484, "step": 18040 }, { "epoch": 0.08525, "grad_norm": 0.18052397668361664, "learning_rate": 4.977892212321815e-05, "loss": 0.0511, "step": 18050 }, { "epoch": 0.0853, "grad_norm": 0.23231765627861023, "learning_rate": 4.977837326551904e-05, "loss": 0.0537, "step": 18060 }, { "epoch": 0.08535, "grad_norm": 0.1854337900876999, "learning_rate": 4.977782373038852e-05, "loss": 0.0499, "step": 18070 }, { "epoch": 0.0854, "grad_norm": 0.20114870369434357, "learning_rate": 4.9777273517841597e-05, "loss": 0.0483, "step": 18080 }, { "epoch": 0.08545, "grad_norm": 0.1717618703842163, "learning_rate": 4.9776722627893334e-05, "loss": 0.0489, "step": 18090 }, { "epoch": 0.0855, "grad_norm": 0.16750378906726837, "learning_rate": 4.977617106055878e-05, "loss": 0.0513, "step": 18100 }, { "epoch": 0.08555, "grad_norm": 0.21718497574329376, "learning_rate": 4.977561881585301e-05, "loss": 0.0562, "step": 18110 }, { "epoch": 0.0856, "grad_norm": 0.19127683341503143, "learning_rate": 4.977506589379114e-05, "loss": 0.0526, "step": 18120 }, { "epoch": 0.08565, "grad_norm": 0.2382887452840805, "learning_rate": 4.977451229438827e-05, "loss": 0.054, "step": 18130 }, { "epoch": 0.0857, "grad_norm": 0.19061945378780365, "learning_rate": 4.977395801765954e-05, "loss": 0.0521, "step": 18140 }, { "epoch": 0.08575, "grad_norm": 0.18431106209754944, "learning_rate": 4.9773403063620104e-05, "loss": 0.0506, "step": 18150 }, { "epoch": 0.0858, "grad_norm": 0.16501373052597046, "learning_rate": 4.977284743228514e-05, "loss": 0.0486, "step": 18160 }, { "epoch": 0.08585, "grad_norm": 0.18923909962177277, "learning_rate": 4.977229112366983e-05, "loss": 0.0558, "step": 18170 }, { "epoch": 0.0859, "grad_norm": 0.17071984708309174, "learning_rate": 4.9771734137789394e-05, "loss": 0.0551, "step": 18180 }, { "epoch": 0.08595, "grad_norm": 0.20952653884887695, "learning_rate": 4.9771176474659045e-05, "loss": 0.0528, "step": 18190 }, { "epoch": 0.086, "grad_norm": 0.17564013600349426, "learning_rate": 4.977061813429404e-05, "loss": 0.0541, "step": 18200 }, { "epoch": 0.08605, "grad_norm": 0.17267075181007385, "learning_rate": 4.977005911670964e-05, "loss": 0.0501, "step": 18210 }, { "epoch": 0.0861, "grad_norm": 0.206747367978096, "learning_rate": 4.976949942192114e-05, "loss": 0.049, "step": 18220 }, { "epoch": 0.08615, "grad_norm": 0.19832727313041687, "learning_rate": 4.976893904994382e-05, "loss": 0.0493, "step": 18230 }, { "epoch": 0.0862, "grad_norm": 0.18825599551200867, "learning_rate": 4.9768378000793015e-05, "loss": 0.0542, "step": 18240 }, { "epoch": 0.08625, "grad_norm": 0.21697713434696198, "learning_rate": 4.976781627448406e-05, "loss": 0.0544, "step": 18250 }, { "epoch": 0.0863, "grad_norm": 0.21250379085540771, "learning_rate": 4.976725387103231e-05, "loss": 0.0498, "step": 18260 }, { "epoch": 0.08635, "grad_norm": 0.2346934974193573, "learning_rate": 4.9766690790453144e-05, "loss": 0.0479, "step": 18270 }, { "epoch": 0.0864, "grad_norm": 0.2579614222049713, "learning_rate": 4.976612703276196e-05, "loss": 0.0515, "step": 18280 }, { "epoch": 0.08645, "grad_norm": 0.261024534702301, "learning_rate": 4.976556259797417e-05, "loss": 0.0511, "step": 18290 }, { "epoch": 0.0865, "grad_norm": 0.2698875963687897, "learning_rate": 4.976499748610519e-05, "loss": 0.052, "step": 18300 }, { "epoch": 0.08655, "grad_norm": 0.23211123049259186, "learning_rate": 4.97644316971705e-05, "loss": 0.0506, "step": 18310 }, { "epoch": 0.0866, "grad_norm": 0.26521798968315125, "learning_rate": 4.976386523118554e-05, "loss": 0.0496, "step": 18320 }, { "epoch": 0.08665, "grad_norm": 0.24303749203681946, "learning_rate": 4.97632980881658e-05, "loss": 0.0493, "step": 18330 }, { "epoch": 0.0867, "grad_norm": 0.2587723731994629, "learning_rate": 4.976273026812681e-05, "loss": 0.0521, "step": 18340 }, { "epoch": 0.08675, "grad_norm": 0.20306704938411713, "learning_rate": 4.976216177108407e-05, "loss": 0.0495, "step": 18350 }, { "epoch": 0.0868, "grad_norm": 0.24386657774448395, "learning_rate": 4.976159259705313e-05, "loss": 0.0521, "step": 18360 }, { "epoch": 0.08685, "grad_norm": 0.22963173687458038, "learning_rate": 4.9761022746049544e-05, "loss": 0.0497, "step": 18370 }, { "epoch": 0.0869, "grad_norm": 0.2091069370508194, "learning_rate": 4.9760452218088915e-05, "loss": 0.0486, "step": 18380 }, { "epoch": 0.08695, "grad_norm": 0.2133539766073227, "learning_rate": 4.975988101318682e-05, "loss": 0.0527, "step": 18390 }, { "epoch": 0.087, "grad_norm": 0.19983141124248505, "learning_rate": 4.975930913135887e-05, "loss": 0.0493, "step": 18400 }, { "epoch": 0.08705, "grad_norm": 0.20242683589458466, "learning_rate": 4.9758736572620714e-05, "loss": 0.0517, "step": 18410 }, { "epoch": 0.0871, "grad_norm": 0.163058802485466, "learning_rate": 4.9758163336988e-05, "loss": 0.0484, "step": 18420 }, { "epoch": 0.08715, "grad_norm": 0.18098005652427673, "learning_rate": 4.97575894244764e-05, "loss": 0.0481, "step": 18430 }, { "epoch": 0.0872, "grad_norm": 0.2012663632631302, "learning_rate": 4.975701483510161e-05, "loss": 0.0521, "step": 18440 }, { "epoch": 0.08725, "grad_norm": 0.19771431386470795, "learning_rate": 4.9756439568879345e-05, "loss": 0.0493, "step": 18450 }, { "epoch": 0.0873, "grad_norm": 0.1853446364402771, "learning_rate": 4.975586362582531e-05, "loss": 0.0492, "step": 18460 }, { "epoch": 0.08735, "grad_norm": 0.17247962951660156, "learning_rate": 4.9755287005955264e-05, "loss": 0.0506, "step": 18470 }, { "epoch": 0.0874, "grad_norm": 0.1908915936946869, "learning_rate": 4.975470970928498e-05, "loss": 0.0497, "step": 18480 }, { "epoch": 0.08745, "grad_norm": 0.14187689125537872, "learning_rate": 4.9754131735830223e-05, "loss": 0.0493, "step": 18490 }, { "epoch": 0.0875, "grad_norm": 0.20994983613491058, "learning_rate": 4.975355308560681e-05, "loss": 0.0483, "step": 18500 }, { "epoch": 0.08755, "grad_norm": 0.21409648656845093, "learning_rate": 4.975297375863055e-05, "loss": 0.049, "step": 18510 }, { "epoch": 0.0876, "grad_norm": 0.1786034107208252, "learning_rate": 4.975239375491729e-05, "loss": 0.0514, "step": 18520 }, { "epoch": 0.08765, "grad_norm": 0.20096784830093384, "learning_rate": 4.9751813074482876e-05, "loss": 0.0474, "step": 18530 }, { "epoch": 0.0877, "grad_norm": 0.18679524958133698, "learning_rate": 4.975123171734321e-05, "loss": 0.0492, "step": 18540 }, { "epoch": 0.08775, "grad_norm": 0.19444356858730316, "learning_rate": 4.975064968351415e-05, "loss": 0.0476, "step": 18550 }, { "epoch": 0.0878, "grad_norm": 0.20263616740703583, "learning_rate": 4.975006697301163e-05, "loss": 0.0545, "step": 18560 }, { "epoch": 0.08785, "grad_norm": 0.23749925196170807, "learning_rate": 4.974948358585158e-05, "loss": 0.0511, "step": 18570 }, { "epoch": 0.0879, "grad_norm": 0.19112126529216766, "learning_rate": 4.9748899522049944e-05, "loss": 0.0504, "step": 18580 }, { "epoch": 0.08795, "grad_norm": 0.1887068897485733, "learning_rate": 4.9748314781622696e-05, "loss": 0.0495, "step": 18590 }, { "epoch": 0.088, "grad_norm": 0.1935853362083435, "learning_rate": 4.974772936458582e-05, "loss": 0.0496, "step": 18600 }, { "epoch": 0.08805, "grad_norm": 0.1678624302148819, "learning_rate": 4.9747143270955324e-05, "loss": 0.0472, "step": 18610 }, { "epoch": 0.0881, "grad_norm": 0.21695537865161896, "learning_rate": 4.974655650074722e-05, "loss": 0.0474, "step": 18620 }, { "epoch": 0.08815, "grad_norm": 0.1442045122385025, "learning_rate": 4.974596905397756e-05, "loss": 0.0481, "step": 18630 }, { "epoch": 0.0882, "grad_norm": 0.17364618182182312, "learning_rate": 4.97453809306624e-05, "loss": 0.0469, "step": 18640 }, { "epoch": 0.08825, "grad_norm": 0.16962353885173798, "learning_rate": 4.974479213081783e-05, "loss": 0.0521, "step": 18650 }, { "epoch": 0.0883, "grad_norm": 0.21104495227336884, "learning_rate": 4.9744202654459935e-05, "loss": 0.0505, "step": 18660 }, { "epoch": 0.08835, "grad_norm": 0.13724002242088318, "learning_rate": 4.974361250160483e-05, "loss": 0.051, "step": 18670 }, { "epoch": 0.0884, "grad_norm": 0.19101646542549133, "learning_rate": 4.974302167226866e-05, "loss": 0.0493, "step": 18680 }, { "epoch": 0.08845, "grad_norm": 0.14986075460910797, "learning_rate": 4.974243016646758e-05, "loss": 0.0493, "step": 18690 }, { "epoch": 0.0885, "grad_norm": 0.18582309782505035, "learning_rate": 4.9741837984217746e-05, "loss": 0.0479, "step": 18700 }, { "epoch": 0.08855, "grad_norm": 0.18872737884521484, "learning_rate": 4.974124512553535e-05, "loss": 0.0477, "step": 18710 }, { "epoch": 0.0886, "grad_norm": 0.20704255998134613, "learning_rate": 4.9740651590436624e-05, "loss": 0.0481, "step": 18720 }, { "epoch": 0.08865, "grad_norm": 0.2147834151983261, "learning_rate": 4.9740057378937764e-05, "loss": 0.0486, "step": 18730 }, { "epoch": 0.0887, "grad_norm": 0.166178360581398, "learning_rate": 4.9739462491055035e-05, "loss": 0.0483, "step": 18740 }, { "epoch": 0.08875, "grad_norm": 0.2481415867805481, "learning_rate": 4.9738866926804694e-05, "loss": 0.0486, "step": 18750 }, { "epoch": 0.0888, "grad_norm": 0.1964527815580368, "learning_rate": 4.973827068620303e-05, "loss": 0.0496, "step": 18760 }, { "epoch": 0.08885, "grad_norm": 0.22712422907352448, "learning_rate": 4.973767376926633e-05, "loss": 0.0529, "step": 18770 }, { "epoch": 0.0889, "grad_norm": 0.21424773335456848, "learning_rate": 4.9737076176010935e-05, "loss": 0.0489, "step": 18780 }, { "epoch": 0.08895, "grad_norm": 0.19034512341022491, "learning_rate": 4.973647790645316e-05, "loss": 0.0487, "step": 18790 }, { "epoch": 0.089, "grad_norm": 0.1828261762857437, "learning_rate": 4.9735878960609385e-05, "loss": 0.05, "step": 18800 }, { "epoch": 0.08905, "grad_norm": 0.210920050740242, "learning_rate": 4.973527933849596e-05, "loss": 0.0502, "step": 18810 }, { "epoch": 0.0891, "grad_norm": 0.15590353310108185, "learning_rate": 4.9734679040129296e-05, "loss": 0.0499, "step": 18820 }, { "epoch": 0.08915, "grad_norm": 0.17563462257385254, "learning_rate": 4.973407806552579e-05, "loss": 0.0503, "step": 18830 }, { "epoch": 0.0892, "grad_norm": 0.23146726191043854, "learning_rate": 4.97334764147019e-05, "loss": 0.0536, "step": 18840 }, { "epoch": 0.08925, "grad_norm": 0.18807169795036316, "learning_rate": 4.9732874087674044e-05, "loss": 0.0483, "step": 18850 }, { "epoch": 0.0893, "grad_norm": 0.16290698945522308, "learning_rate": 4.9732271084458704e-05, "loss": 0.0481, "step": 18860 }, { "epoch": 0.08935, "grad_norm": 0.15102915465831757, "learning_rate": 4.973166740507236e-05, "loss": 0.0478, "step": 18870 }, { "epoch": 0.0894, "grad_norm": 0.17469939589500427, "learning_rate": 4.9731063049531527e-05, "loss": 0.0491, "step": 18880 }, { "epoch": 0.08945, "grad_norm": 0.14090858399868011, "learning_rate": 4.973045801785272e-05, "loss": 0.0472, "step": 18890 }, { "epoch": 0.0895, "grad_norm": 0.16017688810825348, "learning_rate": 4.9729852310052475e-05, "loss": 0.0488, "step": 18900 }, { "epoch": 0.08955, "grad_norm": 0.24661116302013397, "learning_rate": 4.9729245926147364e-05, "loss": 0.0481, "step": 18910 }, { "epoch": 0.0896, "grad_norm": 0.19585567712783813, "learning_rate": 4.9728638866153965e-05, "loss": 0.0508, "step": 18920 }, { "epoch": 0.08965, "grad_norm": 0.1510034054517746, "learning_rate": 4.972803113008886e-05, "loss": 0.0456, "step": 18930 }, { "epoch": 0.0897, "grad_norm": 0.23765966296195984, "learning_rate": 4.972742271796868e-05, "loss": 0.0485, "step": 18940 }, { "epoch": 0.08975, "grad_norm": 0.16872632503509521, "learning_rate": 4.9726813629810056e-05, "loss": 0.0468, "step": 18950 }, { "epoch": 0.0898, "grad_norm": 0.1598125398159027, "learning_rate": 4.972620386562963e-05, "loss": 0.048, "step": 18960 }, { "epoch": 0.08985, "grad_norm": 0.2012661248445511, "learning_rate": 4.9725593425444075e-05, "loss": 0.0475, "step": 18970 }, { "epoch": 0.0899, "grad_norm": 0.16737717390060425, "learning_rate": 4.972498230927009e-05, "loss": 0.0474, "step": 18980 }, { "epoch": 0.08995, "grad_norm": 0.16477149724960327, "learning_rate": 4.972437051712438e-05, "loss": 0.0475, "step": 18990 }, { "epoch": 0.09, "grad_norm": 0.1504310518503189, "learning_rate": 4.972375804902366e-05, "loss": 0.0489, "step": 19000 }, { "epoch": 0.09005, "grad_norm": 0.16693048179149628, "learning_rate": 4.97231449049847e-05, "loss": 0.0472, "step": 19010 }, { "epoch": 0.0901, "grad_norm": 0.15752717852592468, "learning_rate": 4.9722531085024234e-05, "loss": 0.0481, "step": 19020 }, { "epoch": 0.09015, "grad_norm": 0.1765477955341339, "learning_rate": 4.972191658915906e-05, "loss": 0.0471, "step": 19030 }, { "epoch": 0.0902, "grad_norm": 0.16440802812576294, "learning_rate": 4.972130141740597e-05, "loss": 0.0489, "step": 19040 }, { "epoch": 0.09025, "grad_norm": 0.1679811179637909, "learning_rate": 4.972068556978179e-05, "loss": 0.0485, "step": 19050 }, { "epoch": 0.0903, "grad_norm": 0.14989008009433746, "learning_rate": 4.9720069046303355e-05, "loss": 0.0478, "step": 19060 }, { "epoch": 0.09035, "grad_norm": 0.19462595880031586, "learning_rate": 4.971945184698751e-05, "loss": 0.0519, "step": 19070 }, { "epoch": 0.0904, "grad_norm": 0.2007419466972351, "learning_rate": 4.971883397185114e-05, "loss": 0.0481, "step": 19080 }, { "epoch": 0.09045, "grad_norm": 0.1667453795671463, "learning_rate": 4.9718215420911145e-05, "loss": 0.0489, "step": 19090 }, { "epoch": 0.0905, "grad_norm": 0.17430023849010468, "learning_rate": 4.9717596194184426e-05, "loss": 0.0471, "step": 19100 }, { "epoch": 0.09055, "grad_norm": 0.18542641401290894, "learning_rate": 4.9716976291687904e-05, "loss": 0.048, "step": 19110 }, { "epoch": 0.0906, "grad_norm": 0.17246194183826447, "learning_rate": 4.9716355713438546e-05, "loss": 0.0506, "step": 19120 }, { "epoch": 0.09065, "grad_norm": 0.19071803987026215, "learning_rate": 4.97157344594533e-05, "loss": 0.0481, "step": 19130 }, { "epoch": 0.0907, "grad_norm": 0.17150120437145233, "learning_rate": 4.9715112529749165e-05, "loss": 0.0511, "step": 19140 }, { "epoch": 0.09075, "grad_norm": 0.18714603781700134, "learning_rate": 4.971448992434313e-05, "loss": 0.0491, "step": 19150 }, { "epoch": 0.0908, "grad_norm": 0.1737683117389679, "learning_rate": 4.9713866643252235e-05, "loss": 0.0482, "step": 19160 }, { "epoch": 0.09085, "grad_norm": 0.15026921033859253, "learning_rate": 4.9713242686493504e-05, "loss": 0.0482, "step": 19170 }, { "epoch": 0.0909, "grad_norm": 0.1403944045305252, "learning_rate": 4.9712618054084e-05, "loss": 0.0468, "step": 19180 }, { "epoch": 0.09095, "grad_norm": 0.17783498764038086, "learning_rate": 4.9711992746040815e-05, "loss": 0.0472, "step": 19190 }, { "epoch": 0.091, "grad_norm": 0.16668634116649628, "learning_rate": 4.9711366762381023e-05, "loss": 0.0489, "step": 19200 }, { "epoch": 0.09105, "grad_norm": 0.14376750588417053, "learning_rate": 4.971074010312175e-05, "loss": 0.0469, "step": 19210 }, { "epoch": 0.0911, "grad_norm": 0.15709319710731506, "learning_rate": 4.971011276828012e-05, "loss": 0.0471, "step": 19220 }, { "epoch": 0.09115, "grad_norm": 0.15818123519420624, "learning_rate": 4.9709484757873296e-05, "loss": 0.0457, "step": 19230 }, { "epoch": 0.0912, "grad_norm": 0.20621873438358307, "learning_rate": 4.9708856071918444e-05, "loss": 0.0496, "step": 19240 }, { "epoch": 0.09125, "grad_norm": 0.16149362921714783, "learning_rate": 4.970822671043275e-05, "loss": 0.0461, "step": 19250 }, { "epoch": 0.0913, "grad_norm": 0.19152836501598358, "learning_rate": 4.970759667343341e-05, "loss": 0.0467, "step": 19260 }, { "epoch": 0.09135, "grad_norm": 0.1924281120300293, "learning_rate": 4.970696596093767e-05, "loss": 0.0472, "step": 19270 }, { "epoch": 0.0914, "grad_norm": 0.2085145264863968, "learning_rate": 4.9706334572962754e-05, "loss": 0.0463, "step": 19280 }, { "epoch": 0.09145, "grad_norm": 0.2535405457019806, "learning_rate": 4.970570250952594e-05, "loss": 0.0542, "step": 19290 }, { "epoch": 0.0915, "grad_norm": 0.20979748666286469, "learning_rate": 4.9705069770644495e-05, "loss": 0.0497, "step": 19300 }, { "epoch": 0.09155, "grad_norm": 0.24943894147872925, "learning_rate": 4.9704436356335726e-05, "loss": 0.051, "step": 19310 }, { "epoch": 0.0916, "grad_norm": 0.19635538756847382, "learning_rate": 4.970380226661695e-05, "loss": 0.0471, "step": 19320 }, { "epoch": 0.09165, "grad_norm": 0.1994963139295578, "learning_rate": 4.97031675015055e-05, "loss": 0.0484, "step": 19330 }, { "epoch": 0.0917, "grad_norm": 0.2685892879962921, "learning_rate": 4.970253206101873e-05, "loss": 0.0474, "step": 19340 }, { "epoch": 0.09175, "grad_norm": 0.24149328470230103, "learning_rate": 4.970189594517401e-05, "loss": 0.052, "step": 19350 }, { "epoch": 0.0918, "grad_norm": 0.24114196002483368, "learning_rate": 4.9701259153988746e-05, "loss": 0.0479, "step": 19360 }, { "epoch": 0.09185, "grad_norm": 0.21207568049430847, "learning_rate": 4.9700621687480326e-05, "loss": 0.0475, "step": 19370 }, { "epoch": 0.0919, "grad_norm": 0.21124345064163208, "learning_rate": 4.9699983545666196e-05, "loss": 0.0492, "step": 19380 }, { "epoch": 0.09195, "grad_norm": 0.19619761407375336, "learning_rate": 4.969934472856379e-05, "loss": 0.0485, "step": 19390 }, { "epoch": 0.092, "grad_norm": 0.18321619927883148, "learning_rate": 4.9698705236190576e-05, "loss": 0.0477, "step": 19400 }, { "epoch": 0.09205, "grad_norm": 0.17496579885482788, "learning_rate": 4.9698065068564046e-05, "loss": 0.0511, "step": 19410 }, { "epoch": 0.0921, "grad_norm": 0.18823853135108948, "learning_rate": 4.9697424225701695e-05, "loss": 0.0491, "step": 19420 }, { "epoch": 0.09215, "grad_norm": 0.1598740518093109, "learning_rate": 4.9696782707621034e-05, "loss": 0.0491, "step": 19430 }, { "epoch": 0.0922, "grad_norm": 0.21306900680065155, "learning_rate": 4.969614051433963e-05, "loss": 0.0517, "step": 19440 }, { "epoch": 0.09225, "grad_norm": 0.16243909299373627, "learning_rate": 4.969549764587501e-05, "loss": 0.0482, "step": 19450 }, { "epoch": 0.0923, "grad_norm": 0.17526249587535858, "learning_rate": 4.9694854102244756e-05, "loss": 0.0478, "step": 19460 }, { "epoch": 0.09235, "grad_norm": 0.1852334886789322, "learning_rate": 4.969420988346648e-05, "loss": 0.0493, "step": 19470 }, { "epoch": 0.0924, "grad_norm": 0.14817926287651062, "learning_rate": 4.9693564989557784e-05, "loss": 0.0472, "step": 19480 }, { "epoch": 0.09245, "grad_norm": 0.15732935070991516, "learning_rate": 4.9692919420536285e-05, "loss": 0.0474, "step": 19490 }, { "epoch": 0.0925, "grad_norm": 0.19525741040706635, "learning_rate": 4.969227317641966e-05, "loss": 0.0541, "step": 19500 }, { "epoch": 0.09255, "grad_norm": 0.18633055686950684, "learning_rate": 4.969162625722556e-05, "loss": 0.0489, "step": 19510 }, { "epoch": 0.0926, "grad_norm": 0.17439649999141693, "learning_rate": 4.9690978662971674e-05, "loss": 0.052, "step": 19520 }, { "epoch": 0.09265, "grad_norm": 0.172563835978508, "learning_rate": 4.96903303936757e-05, "loss": 0.0501, "step": 19530 }, { "epoch": 0.0927, "grad_norm": 0.1648489534854889, "learning_rate": 4.968968144935538e-05, "loss": 0.0521, "step": 19540 }, { "epoch": 0.09275, "grad_norm": 0.17998361587524414, "learning_rate": 4.968903183002843e-05, "loss": 0.0495, "step": 19550 }, { "epoch": 0.0928, "grad_norm": 0.15992969274520874, "learning_rate": 4.968838153571263e-05, "loss": 0.05, "step": 19560 }, { "epoch": 0.09285, "grad_norm": 0.17172817885875702, "learning_rate": 4.968773056642576e-05, "loss": 0.0472, "step": 19570 }, { "epoch": 0.0929, "grad_norm": 0.1717754751443863, "learning_rate": 4.9687078922185614e-05, "loss": 0.0484, "step": 19580 }, { "epoch": 0.09295, "grad_norm": 0.17016863822937012, "learning_rate": 4.9686426603009996e-05, "loss": 0.05, "step": 19590 }, { "epoch": 0.093, "grad_norm": 0.20225080847740173, "learning_rate": 4.968577360891675e-05, "loss": 0.052, "step": 19600 }, { "epoch": 0.09305, "grad_norm": 0.19731438159942627, "learning_rate": 4.968511993992373e-05, "loss": 0.0492, "step": 19610 }, { "epoch": 0.0931, "grad_norm": 0.16638094186782837, "learning_rate": 4.9684465596048804e-05, "loss": 0.0486, "step": 19620 }, { "epoch": 0.09315, "grad_norm": 0.19387859106063843, "learning_rate": 4.968381057730986e-05, "loss": 0.0484, "step": 19630 }, { "epoch": 0.0932, "grad_norm": 0.1559034287929535, "learning_rate": 4.9683154883724815e-05, "loss": 0.0477, "step": 19640 }, { "epoch": 0.09325, "grad_norm": 0.16176250576972961, "learning_rate": 4.968249851531158e-05, "loss": 0.0528, "step": 19650 }, { "epoch": 0.0933, "grad_norm": 0.1534813493490219, "learning_rate": 4.9681841472088116e-05, "loss": 0.0474, "step": 19660 }, { "epoch": 0.09335, "grad_norm": 0.1552479863166809, "learning_rate": 4.968118375407238e-05, "loss": 0.0466, "step": 19670 }, { "epoch": 0.0934, "grad_norm": 0.13360746204853058, "learning_rate": 4.9680525361282335e-05, "loss": 0.0476, "step": 19680 }, { "epoch": 0.09345, "grad_norm": 0.15875092148780823, "learning_rate": 4.9679866293736015e-05, "loss": 0.0486, "step": 19690 }, { "epoch": 0.0935, "grad_norm": 0.18327684700489044, "learning_rate": 4.9679206551451415e-05, "loss": 0.048, "step": 19700 }, { "epoch": 0.09355, "grad_norm": 0.15746480226516724, "learning_rate": 4.967854613444659e-05, "loss": 0.0485, "step": 19710 }, { "epoch": 0.0936, "grad_norm": 0.2012982964515686, "learning_rate": 4.9677885042739575e-05, "loss": 0.048, "step": 19720 }, { "epoch": 0.09365, "grad_norm": 0.17083559930324554, "learning_rate": 4.967722327634846e-05, "loss": 0.0466, "step": 19730 }, { "epoch": 0.0937, "grad_norm": 0.1629980057477951, "learning_rate": 4.9676560835291324e-05, "loss": 0.049, "step": 19740 }, { "epoch": 0.09375, "grad_norm": 0.17821356654167175, "learning_rate": 4.967589771958629e-05, "loss": 0.0479, "step": 19750 }, { "epoch": 0.0938, "grad_norm": 0.1729680448770523, "learning_rate": 4.9675233929251486e-05, "loss": 0.0463, "step": 19760 }, { "epoch": 0.09385, "grad_norm": 0.13014718890190125, "learning_rate": 4.967456946430505e-05, "loss": 0.0476, "step": 19770 }, { "epoch": 0.0939, "grad_norm": 0.15263806283473969, "learning_rate": 4.9673904324765154e-05, "loss": 0.0481, "step": 19780 }, { "epoch": 0.09395, "grad_norm": 0.18089689314365387, "learning_rate": 4.967323851064999e-05, "loss": 0.0499, "step": 19790 }, { "epoch": 0.094, "grad_norm": 0.17258678376674652, "learning_rate": 4.9672572021977747e-05, "loss": 0.0488, "step": 19800 }, { "epoch": 0.09405, "grad_norm": 0.1985284686088562, "learning_rate": 4.967190485876666e-05, "loss": 0.0516, "step": 19810 }, { "epoch": 0.0941, "grad_norm": 0.1818893700838089, "learning_rate": 4.967123702103496e-05, "loss": 0.0476, "step": 19820 }, { "epoch": 0.09415, "grad_norm": 0.16856974363327026, "learning_rate": 4.9670568508800905e-05, "loss": 0.05, "step": 19830 }, { "epoch": 0.0942, "grad_norm": 0.17095766961574554, "learning_rate": 4.966989932208279e-05, "loss": 0.0499, "step": 19840 }, { "epoch": 0.09425, "grad_norm": 0.18125998973846436, "learning_rate": 4.966922946089888e-05, "loss": 0.05, "step": 19850 }, { "epoch": 0.0943, "grad_norm": 0.14619998633861542, "learning_rate": 4.966855892526751e-05, "loss": 0.0472, "step": 19860 }, { "epoch": 0.09435, "grad_norm": 0.17567703127861023, "learning_rate": 4.9667887715207004e-05, "loss": 0.0479, "step": 19870 }, { "epoch": 0.0944, "grad_norm": 0.16288483142852783, "learning_rate": 4.966721583073572e-05, "loss": 0.0488, "step": 19880 }, { "epoch": 0.09445, "grad_norm": 0.17857293784618378, "learning_rate": 4.9666543271872017e-05, "loss": 0.0493, "step": 19890 }, { "epoch": 0.0945, "grad_norm": 0.1777510941028595, "learning_rate": 4.966587003863429e-05, "loss": 0.0471, "step": 19900 }, { "epoch": 0.09455, "grad_norm": 0.20810148119926453, "learning_rate": 4.9665196131040946e-05, "loss": 0.0506, "step": 19910 }, { "epoch": 0.0946, "grad_norm": 0.18452343344688416, "learning_rate": 4.96645215491104e-05, "loss": 0.0525, "step": 19920 }, { "epoch": 0.09465, "grad_norm": 0.1990736871957779, "learning_rate": 4.96638462928611e-05, "loss": 0.0506, "step": 19930 }, { "epoch": 0.0947, "grad_norm": 0.1752295345067978, "learning_rate": 4.966317036231152e-05, "loss": 0.048, "step": 19940 }, { "epoch": 0.09475, "grad_norm": 0.18605153262615204, "learning_rate": 4.966249375748012e-05, "loss": 0.0482, "step": 19950 }, { "epoch": 0.0948, "grad_norm": 0.17329007387161255, "learning_rate": 4.96618164783854e-05, "loss": 0.0484, "step": 19960 }, { "epoch": 0.09485, "grad_norm": 0.2310418337583542, "learning_rate": 4.966113852504589e-05, "loss": 0.0499, "step": 19970 }, { "epoch": 0.0949, "grad_norm": 0.14862242341041565, "learning_rate": 4.966045989748011e-05, "loss": 0.0472, "step": 19980 }, { "epoch": 0.09495, "grad_norm": 0.20593217015266418, "learning_rate": 4.965978059570663e-05, "loss": 0.0489, "step": 19990 }, { "epoch": 0.095, "grad_norm": 0.15908844769001007, "learning_rate": 4.9659100619744016e-05, "loss": 0.0468, "step": 20000 }, { "epoch": 0.09505, "grad_norm": 0.15577799081802368, "learning_rate": 4.965841996961084e-05, "loss": 0.0479, "step": 20010 }, { "epoch": 0.0951, "grad_norm": 0.20530954003334045, "learning_rate": 4.965773864532573e-05, "loss": 0.0483, "step": 20020 }, { "epoch": 0.09515, "grad_norm": 0.16188617050647736, "learning_rate": 4.965705664690732e-05, "loss": 0.0471, "step": 20030 }, { "epoch": 0.0952, "grad_norm": 0.1475004404783249, "learning_rate": 4.9656373974374235e-05, "loss": 0.0474, "step": 20040 }, { "epoch": 0.09525, "grad_norm": 0.14627623558044434, "learning_rate": 4.9655690627745156e-05, "loss": 0.0475, "step": 20050 }, { "epoch": 0.0953, "grad_norm": 0.14917166531085968, "learning_rate": 4.965500660703875e-05, "loss": 0.0482, "step": 20060 }, { "epoch": 0.09535, "grad_norm": 0.16681663691997528, "learning_rate": 4.965432191227373e-05, "loss": 0.0502, "step": 20070 }, { "epoch": 0.0954, "grad_norm": 0.18023552000522614, "learning_rate": 4.965363654346881e-05, "loss": 0.0507, "step": 20080 }, { "epoch": 0.09545, "grad_norm": 0.14802071452140808, "learning_rate": 4.9652950500642724e-05, "loss": 0.0491, "step": 20090 }, { "epoch": 0.0955, "grad_norm": 0.20631712675094604, "learning_rate": 4.965226378381424e-05, "loss": 0.0502, "step": 20100 }, { "epoch": 0.09555, "grad_norm": 0.16245928406715393, "learning_rate": 4.9651576393002124e-05, "loss": 0.0474, "step": 20110 }, { "epoch": 0.0956, "grad_norm": 0.18286171555519104, "learning_rate": 4.965088832822517e-05, "loss": 0.0484, "step": 20120 }, { "epoch": 0.09565, "grad_norm": 0.1389743834733963, "learning_rate": 4.965019958950219e-05, "loss": 0.0474, "step": 20130 }, { "epoch": 0.0957, "grad_norm": 0.15614444017410278, "learning_rate": 4.9649510176852016e-05, "loss": 0.0488, "step": 20140 }, { "epoch": 0.09575, "grad_norm": 0.15054257214069366, "learning_rate": 4.964882009029349e-05, "loss": 0.047, "step": 20150 }, { "epoch": 0.0958, "grad_norm": 0.14017704129219055, "learning_rate": 4.964812932984548e-05, "loss": 0.0482, "step": 20160 }, { "epoch": 0.09585, "grad_norm": 0.17021861672401428, "learning_rate": 4.964743789552688e-05, "loss": 0.0473, "step": 20170 }, { "epoch": 0.0959, "grad_norm": 0.16860070824623108, "learning_rate": 4.964674578735659e-05, "loss": 0.0487, "step": 20180 }, { "epoch": 0.09595, "grad_norm": 0.1646297574043274, "learning_rate": 4.964605300535353e-05, "loss": 0.0475, "step": 20190 }, { "epoch": 0.096, "grad_norm": 0.14261919260025024, "learning_rate": 4.964535954953663e-05, "loss": 0.047, "step": 20200 }, { "epoch": 0.09605, "grad_norm": 0.1402837634086609, "learning_rate": 4.9644665419924864e-05, "loss": 0.0445, "step": 20210 }, { "epoch": 0.0961, "grad_norm": 0.17717242240905762, "learning_rate": 4.96439706165372e-05, "loss": 0.045, "step": 20220 }, { "epoch": 0.09615, "grad_norm": 0.16690470278263092, "learning_rate": 4.9643275139392646e-05, "loss": 0.0456, "step": 20230 }, { "epoch": 0.0962, "grad_norm": 0.20035287737846375, "learning_rate": 4.96425789885102e-05, "loss": 0.0485, "step": 20240 }, { "epoch": 0.09625, "grad_norm": 0.1991281360387802, "learning_rate": 4.964188216390891e-05, "loss": 0.0506, "step": 20250 }, { "epoch": 0.0963, "grad_norm": 0.20741082727909088, "learning_rate": 4.964118466560782e-05, "loss": 0.0516, "step": 20260 }, { "epoch": 0.09635, "grad_norm": 0.18098628520965576, "learning_rate": 4.9640486493625996e-05, "loss": 0.0476, "step": 20270 }, { "epoch": 0.0964, "grad_norm": 0.16041788458824158, "learning_rate": 4.9639787647982525e-05, "loss": 0.0481, "step": 20280 }, { "epoch": 0.09645, "grad_norm": 0.15006977319717407, "learning_rate": 4.963908812869652e-05, "loss": 0.0477, "step": 20290 }, { "epoch": 0.0965, "grad_norm": 0.137539803981781, "learning_rate": 4.963838793578709e-05, "loss": 0.0466, "step": 20300 }, { "epoch": 0.09655, "grad_norm": 0.1478036642074585, "learning_rate": 4.96376870692734e-05, "loss": 0.0462, "step": 20310 }, { "epoch": 0.0966, "grad_norm": 0.1680741310119629, "learning_rate": 4.963698552917461e-05, "loss": 0.0463, "step": 20320 }, { "epoch": 0.09665, "grad_norm": 0.141627699136734, "learning_rate": 4.963628331550988e-05, "loss": 0.0454, "step": 20330 }, { "epoch": 0.0967, "grad_norm": 0.17239850759506226, "learning_rate": 4.963558042829842e-05, "loss": 0.0505, "step": 20340 }, { "epoch": 0.09675, "grad_norm": 0.17625150084495544, "learning_rate": 4.9634876867559445e-05, "loss": 0.0454, "step": 20350 }, { "epoch": 0.0968, "grad_norm": 0.23776133358478546, "learning_rate": 4.963417263331219e-05, "loss": 0.0482, "step": 20360 }, { "epoch": 0.09685, "grad_norm": 0.20735669136047363, "learning_rate": 4.963346772557592e-05, "loss": 0.0483, "step": 20370 }, { "epoch": 0.0969, "grad_norm": 0.18635834753513336, "learning_rate": 4.9632762144369894e-05, "loss": 0.0464, "step": 20380 }, { "epoch": 0.09695, "grad_norm": 0.1918153315782547, "learning_rate": 4.9632055889713405e-05, "loss": 0.0472, "step": 20390 }, { "epoch": 0.097, "grad_norm": 0.18084578216075897, "learning_rate": 4.9631348961625756e-05, "loss": 0.0457, "step": 20400 }, { "epoch": 0.09705, "grad_norm": 0.16984711587429047, "learning_rate": 4.963064136012629e-05, "loss": 0.0496, "step": 20410 }, { "epoch": 0.0971, "grad_norm": 0.18165351450443268, "learning_rate": 4.9629933085234324e-05, "loss": 0.0471, "step": 20420 }, { "epoch": 0.09715, "grad_norm": 0.18519999086856842, "learning_rate": 4.9629224136969254e-05, "loss": 0.0478, "step": 20430 }, { "epoch": 0.0972, "grad_norm": 0.1832815706729889, "learning_rate": 4.962851451535044e-05, "loss": 0.0478, "step": 20440 }, { "epoch": 0.09725, "grad_norm": 0.18598419427871704, "learning_rate": 4.9627804220397306e-05, "loss": 0.0482, "step": 20450 }, { "epoch": 0.0973, "grad_norm": 0.16671109199523926, "learning_rate": 4.962709325212925e-05, "loss": 0.0467, "step": 20460 }, { "epoch": 0.09735, "grad_norm": 0.14696188271045685, "learning_rate": 4.9626381610565714e-05, "loss": 0.0486, "step": 20470 }, { "epoch": 0.0974, "grad_norm": 0.15696905553340912, "learning_rate": 4.962566929572616e-05, "loss": 0.0506, "step": 20480 }, { "epoch": 0.09745, "grad_norm": 0.18372030556201935, "learning_rate": 4.9624956307630054e-05, "loss": 0.0471, "step": 20490 }, { "epoch": 0.0975, "grad_norm": 0.18625737726688385, "learning_rate": 4.96242426462969e-05, "loss": 0.0475, "step": 20500 }, { "epoch": 0.09755, "grad_norm": 0.15121020376682281, "learning_rate": 4.96235283117462e-05, "loss": 0.0491, "step": 20510 }, { "epoch": 0.0976, "grad_norm": 0.16401106119155884, "learning_rate": 4.9622813303997486e-05, "loss": 0.0469, "step": 20520 }, { "epoch": 0.09765, "grad_norm": 0.1643715500831604, "learning_rate": 4.9622097623070306e-05, "loss": 0.0464, "step": 20530 }, { "epoch": 0.0977, "grad_norm": 0.1850992739200592, "learning_rate": 4.9621381268984224e-05, "loss": 0.0483, "step": 20540 }, { "epoch": 0.09775, "grad_norm": 0.15636497735977173, "learning_rate": 4.9620664241758835e-05, "loss": 0.0493, "step": 20550 }, { "epoch": 0.0978, "grad_norm": 0.17449866235256195, "learning_rate": 4.961994654141373e-05, "loss": 0.0483, "step": 20560 }, { "epoch": 0.09785, "grad_norm": 0.17442123591899872, "learning_rate": 4.961922816796854e-05, "loss": 0.0467, "step": 20570 }, { "epoch": 0.0979, "grad_norm": 0.16407912969589233, "learning_rate": 4.96185091214429e-05, "loss": 0.0465, "step": 20580 }, { "epoch": 0.09795, "grad_norm": 0.15191154181957245, "learning_rate": 4.961778940185647e-05, "loss": 0.0483, "step": 20590 }, { "epoch": 0.098, "grad_norm": 0.17933468520641327, "learning_rate": 4.9617069009228924e-05, "loss": 0.0454, "step": 20600 }, { "epoch": 0.09805, "grad_norm": 0.1651156097650528, "learning_rate": 4.9616347943579955e-05, "loss": 0.0482, "step": 20610 }, { "epoch": 0.0981, "grad_norm": 0.14247684180736542, "learning_rate": 4.9615626204929285e-05, "loss": 0.0484, "step": 20620 }, { "epoch": 0.09815, "grad_norm": 0.1793159544467926, "learning_rate": 4.9614903793296655e-05, "loss": 0.0483, "step": 20630 }, { "epoch": 0.0982, "grad_norm": 0.1951449066400528, "learning_rate": 4.961418070870178e-05, "loss": 0.0483, "step": 20640 }, { "epoch": 0.09825, "grad_norm": 0.16701920330524445, "learning_rate": 4.961345695116447e-05, "loss": 0.0481, "step": 20650 }, { "epoch": 0.0983, "grad_norm": 0.18166546523571014, "learning_rate": 4.9612732520704486e-05, "loss": 0.0468, "step": 20660 }, { "epoch": 0.09835, "grad_norm": 0.17654158174991608, "learning_rate": 4.9612007417341635e-05, "loss": 0.0519, "step": 20670 }, { "epoch": 0.0984, "grad_norm": 0.17844170331954956, "learning_rate": 4.9611281641095757e-05, "loss": 0.0506, "step": 20680 }, { "epoch": 0.09845, "grad_norm": 0.1816539615392685, "learning_rate": 4.9610555191986685e-05, "loss": 0.0523, "step": 20690 }, { "epoch": 0.0985, "grad_norm": 0.1649259477853775, "learning_rate": 4.9609828070034274e-05, "loss": 0.0479, "step": 20700 }, { "epoch": 0.09855, "grad_norm": 0.13955436646938324, "learning_rate": 4.960910027525841e-05, "loss": 0.0491, "step": 20710 }, { "epoch": 0.0986, "grad_norm": 0.13358356058597565, "learning_rate": 4.9608371807679e-05, "loss": 0.0471, "step": 20720 }, { "epoch": 0.09865, "grad_norm": 0.14095593988895416, "learning_rate": 4.960764266731593e-05, "loss": 0.0468, "step": 20730 }, { "epoch": 0.0987, "grad_norm": 0.16070382297039032, "learning_rate": 4.960691285418918e-05, "loss": 0.0479, "step": 20740 }, { "epoch": 0.09875, "grad_norm": 0.13572952151298523, "learning_rate": 4.9606182368318654e-05, "loss": 0.0496, "step": 20750 }, { "epoch": 0.0988, "grad_norm": 0.17128685116767883, "learning_rate": 4.960545120972436e-05, "loss": 0.0461, "step": 20760 }, { "epoch": 0.09885, "grad_norm": 0.16364675760269165, "learning_rate": 4.960471937842627e-05, "loss": 0.0499, "step": 20770 }, { "epoch": 0.0989, "grad_norm": 0.21058125793933868, "learning_rate": 4.9603986874444393e-05, "loss": 0.0474, "step": 20780 }, { "epoch": 0.09895, "grad_norm": 0.16382066905498505, "learning_rate": 4.960325369779876e-05, "loss": 0.0479, "step": 20790 }, { "epoch": 0.099, "grad_norm": 0.16887398064136505, "learning_rate": 4.960251984850941e-05, "loss": 0.047, "step": 20800 }, { "epoch": 0.09905, "grad_norm": 0.16802550852298737, "learning_rate": 4.960178532659642e-05, "loss": 0.0478, "step": 20810 }, { "epoch": 0.0991, "grad_norm": 0.14457744359970093, "learning_rate": 4.960105013207985e-05, "loss": 0.047, "step": 20820 }, { "epoch": 0.09915, "grad_norm": 0.1769077330827713, "learning_rate": 4.960031426497982e-05, "loss": 0.0459, "step": 20830 }, { "epoch": 0.0992, "grad_norm": 0.19485503435134888, "learning_rate": 4.959957772531643e-05, "loss": 0.0485, "step": 20840 }, { "epoch": 0.09925, "grad_norm": 0.17297333478927612, "learning_rate": 4.959884051310983e-05, "loss": 0.0462, "step": 20850 }, { "epoch": 0.0993, "grad_norm": 0.14237897098064423, "learning_rate": 4.959810262838018e-05, "loss": 0.0476, "step": 20860 }, { "epoch": 0.09935, "grad_norm": 0.17594468593597412, "learning_rate": 4.959736407114764e-05, "loss": 0.0462, "step": 20870 }, { "epoch": 0.0994, "grad_norm": 0.17022374272346497, "learning_rate": 4.9596624841432404e-05, "loss": 0.0469, "step": 20880 }, { "epoch": 0.09945, "grad_norm": 0.14756067097187042, "learning_rate": 4.959588493925469e-05, "loss": 0.049, "step": 20890 }, { "epoch": 0.0995, "grad_norm": 0.18401427567005157, "learning_rate": 4.959514436463473e-05, "loss": 0.0491, "step": 20900 }, { "epoch": 0.09955, "grad_norm": 0.15568624436855316, "learning_rate": 4.9594403117592746e-05, "loss": 0.0456, "step": 20910 }, { "epoch": 0.0996, "grad_norm": 0.18284066021442413, "learning_rate": 4.959366119814903e-05, "loss": 0.0474, "step": 20920 }, { "epoch": 0.09965, "grad_norm": 0.179499551653862, "learning_rate": 4.9592918606323856e-05, "loss": 0.0475, "step": 20930 }, { "epoch": 0.0997, "grad_norm": 0.16402794420719147, "learning_rate": 4.959217534213753e-05, "loss": 0.049, "step": 20940 }, { "epoch": 0.09975, "grad_norm": 0.16333773732185364, "learning_rate": 4.959143140561036e-05, "loss": 0.0472, "step": 20950 }, { "epoch": 0.0998, "grad_norm": 0.1984231323003769, "learning_rate": 4.9590686796762695e-05, "loss": 0.0505, "step": 20960 }, { "epoch": 0.09985, "grad_norm": 0.14446966350078583, "learning_rate": 4.958994151561489e-05, "loss": 0.0457, "step": 20970 }, { "epoch": 0.0999, "grad_norm": 0.15792179107666016, "learning_rate": 4.958919556218733e-05, "loss": 0.0461, "step": 20980 }, { "epoch": 0.09995, "grad_norm": 0.1875765323638916, "learning_rate": 4.9588448936500395e-05, "loss": 0.0473, "step": 20990 }, { "epoch": 0.1, "grad_norm": 0.16804379224777222, "learning_rate": 4.958770163857451e-05, "loss": 0.0458, "step": 21000 }, { "epoch": 0.10005, "grad_norm": 0.17551161348819733, "learning_rate": 4.958695366843009e-05, "loss": 0.0473, "step": 21010 }, { "epoch": 0.1001, "grad_norm": 0.19064000248908997, "learning_rate": 4.95862050260876e-05, "loss": 0.0487, "step": 21020 }, { "epoch": 0.10015, "grad_norm": 0.17545157670974731, "learning_rate": 4.9585455711567495e-05, "loss": 0.0475, "step": 21030 }, { "epoch": 0.1002, "grad_norm": 0.13496017456054688, "learning_rate": 4.958470572489028e-05, "loss": 0.0455, "step": 21040 }, { "epoch": 0.10025, "grad_norm": 0.1735667735338211, "learning_rate": 4.958395506607644e-05, "loss": 0.0461, "step": 21050 }, { "epoch": 0.1003, "grad_norm": 0.15615443885326385, "learning_rate": 4.95832037351465e-05, "loss": 0.0486, "step": 21060 }, { "epoch": 0.10035, "grad_norm": 0.2032114714384079, "learning_rate": 4.9582451732121e-05, "loss": 0.0467, "step": 21070 }, { "epoch": 0.1004, "grad_norm": 0.173023521900177, "learning_rate": 4.958169905702052e-05, "loss": 0.0495, "step": 21080 }, { "epoch": 0.10045, "grad_norm": 0.1550341099500656, "learning_rate": 4.958094570986561e-05, "loss": 0.0481, "step": 21090 }, { "epoch": 0.1005, "grad_norm": 0.16095350682735443, "learning_rate": 4.958019169067689e-05, "loss": 0.0457, "step": 21100 }, { "epoch": 0.10055, "grad_norm": 0.16583847999572754, "learning_rate": 4.957943699947496e-05, "loss": 0.0471, "step": 21110 }, { "epoch": 0.1006, "grad_norm": 0.13893641531467438, "learning_rate": 4.957868163628045e-05, "loss": 0.0467, "step": 21120 }, { "epoch": 0.10065, "grad_norm": 0.1380900740623474, "learning_rate": 4.957792560111403e-05, "loss": 0.0466, "step": 21130 }, { "epoch": 0.1007, "grad_norm": 0.19369691610336304, "learning_rate": 4.9577168893996346e-05, "loss": 0.0469, "step": 21140 }, { "epoch": 0.10075, "grad_norm": 0.12918607890605927, "learning_rate": 4.9576411514948095e-05, "loss": 0.046, "step": 21150 }, { "epoch": 0.1008, "grad_norm": 0.16536854207515717, "learning_rate": 4.957565346399e-05, "loss": 0.0496, "step": 21160 }, { "epoch": 0.10085, "grad_norm": 0.15015877783298492, "learning_rate": 4.9574894741142765e-05, "loss": 0.0468, "step": 21170 }, { "epoch": 0.1009, "grad_norm": 0.1411079466342926, "learning_rate": 4.957413534642714e-05, "loss": 0.0456, "step": 21180 }, { "epoch": 0.10095, "grad_norm": 0.12581391632556915, "learning_rate": 4.957337527986389e-05, "loss": 0.0453, "step": 21190 }, { "epoch": 0.101, "grad_norm": 0.1657503843307495, "learning_rate": 4.9572614541473786e-05, "loss": 0.0476, "step": 21200 }, { "epoch": 0.10105, "grad_norm": 0.13735216856002808, "learning_rate": 4.957185313127763e-05, "loss": 0.0469, "step": 21210 }, { "epoch": 0.1011, "grad_norm": 0.15780487656593323, "learning_rate": 4.9571091049296246e-05, "loss": 0.0448, "step": 21220 }, { "epoch": 0.10115, "grad_norm": 0.18495288491249084, "learning_rate": 4.957032829555046e-05, "loss": 0.051, "step": 21230 }, { "epoch": 0.1012, "grad_norm": 0.16596247255802155, "learning_rate": 4.956956487006114e-05, "loss": 0.0447, "step": 21240 }, { "epoch": 0.10125, "grad_norm": 0.169532909989357, "learning_rate": 4.9568800772849136e-05, "loss": 0.045, "step": 21250 }, { "epoch": 0.1013, "grad_norm": 0.20555630326271057, "learning_rate": 4.9568036003935344e-05, "loss": 0.0473, "step": 21260 }, { "epoch": 0.10135, "grad_norm": 0.17142048478126526, "learning_rate": 4.956727056334068e-05, "loss": 0.0476, "step": 21270 }, { "epoch": 0.1014, "grad_norm": 0.1793658286333084, "learning_rate": 4.956650445108608e-05, "loss": 0.0468, "step": 21280 }, { "epoch": 0.10145, "grad_norm": 0.1630728393793106, "learning_rate": 4.956573766719247e-05, "loss": 0.0474, "step": 21290 }, { "epoch": 0.1015, "grad_norm": 0.16996127367019653, "learning_rate": 4.956497021168082e-05, "loss": 0.0459, "step": 21300 }, { "epoch": 0.10155, "grad_norm": 0.16866938769817352, "learning_rate": 4.9564202084572114e-05, "loss": 0.0459, "step": 21310 }, { "epoch": 0.1016, "grad_norm": 0.1557689607143402, "learning_rate": 4.956343328588735e-05, "loss": 0.0473, "step": 21320 }, { "epoch": 0.10165, "grad_norm": 0.1726214587688446, "learning_rate": 4.9562663815647555e-05, "loss": 0.0456, "step": 21330 }, { "epoch": 0.1017, "grad_norm": 0.1973710060119629, "learning_rate": 4.956189367387375e-05, "loss": 0.0473, "step": 21340 }, { "epoch": 0.10175, "grad_norm": 0.17411907017230988, "learning_rate": 4.956112286058701e-05, "loss": 0.0452, "step": 21350 }, { "epoch": 0.1018, "grad_norm": 0.18137136101722717, "learning_rate": 4.9560351375808386e-05, "loss": 0.0469, "step": 21360 }, { "epoch": 0.10185, "grad_norm": 0.20940333604812622, "learning_rate": 4.9559579219558985e-05, "loss": 0.0484, "step": 21370 }, { "epoch": 0.1019, "grad_norm": 0.16980507969856262, "learning_rate": 4.9558806391859925e-05, "loss": 0.0464, "step": 21380 }, { "epoch": 0.10195, "grad_norm": 0.20325854420661926, "learning_rate": 4.955803289273231e-05, "loss": 0.0465, "step": 21390 }, { "epoch": 0.102, "grad_norm": 0.17460691928863525, "learning_rate": 4.9557258722197305e-05, "loss": 0.0482, "step": 21400 }, { "epoch": 0.10205, "grad_norm": 0.18925505876541138, "learning_rate": 4.955648388027608e-05, "loss": 0.0472, "step": 21410 }, { "epoch": 0.1021, "grad_norm": 0.2193133533000946, "learning_rate": 4.9555708366989804e-05, "loss": 0.0469, "step": 21420 }, { "epoch": 0.10215, "grad_norm": 0.1568610519170761, "learning_rate": 4.955493218235969e-05, "loss": 0.0475, "step": 21430 }, { "epoch": 0.1022, "grad_norm": 0.18509572744369507, "learning_rate": 4.9554155326406956e-05, "loss": 0.0514, "step": 21440 }, { "epoch": 0.10225, "grad_norm": 0.20055852830410004, "learning_rate": 4.955337779915285e-05, "loss": 0.0488, "step": 21450 }, { "epoch": 0.1023, "grad_norm": 0.22387243807315826, "learning_rate": 4.9552599600618596e-05, "loss": 0.0503, "step": 21460 }, { "epoch": 0.10235, "grad_norm": 0.16432657837867737, "learning_rate": 4.955182073082551e-05, "loss": 0.048, "step": 21470 }, { "epoch": 0.1024, "grad_norm": 0.169547900557518, "learning_rate": 4.955104118979487e-05, "loss": 0.0486, "step": 21480 }, { "epoch": 0.10245, "grad_norm": 0.19012103974819183, "learning_rate": 4.9550260977547974e-05, "loss": 0.0473, "step": 21490 }, { "epoch": 0.1025, "grad_norm": 0.18030716478824615, "learning_rate": 4.954948009410617e-05, "loss": 0.048, "step": 21500 }, { "epoch": 0.10255, "grad_norm": 0.16843658685684204, "learning_rate": 4.954869853949081e-05, "loss": 0.0467, "step": 21510 }, { "epoch": 0.1026, "grad_norm": 0.18063050508499146, "learning_rate": 4.9547916313723254e-05, "loss": 0.0467, "step": 21520 }, { "epoch": 0.10265, "grad_norm": 0.16403870284557343, "learning_rate": 4.9547133416824875e-05, "loss": 0.0476, "step": 21530 }, { "epoch": 0.1027, "grad_norm": 0.1540970355272293, "learning_rate": 4.954634984881711e-05, "loss": 0.0481, "step": 21540 }, { "epoch": 0.10275, "grad_norm": 0.1952674686908722, "learning_rate": 4.9545565609721346e-05, "loss": 0.0486, "step": 21550 }, { "epoch": 0.1028, "grad_norm": 0.17950768768787384, "learning_rate": 4.954478069955905e-05, "loss": 0.047, "step": 21560 }, { "epoch": 0.10285, "grad_norm": 0.17310073971748352, "learning_rate": 4.954399511835166e-05, "loss": 0.046, "step": 21570 }, { "epoch": 0.1029, "grad_norm": 0.16321447491645813, "learning_rate": 4.954320886612067e-05, "loss": 0.0474, "step": 21580 }, { "epoch": 0.10295, "grad_norm": 0.16060872375965118, "learning_rate": 4.954242194288757e-05, "loss": 0.049, "step": 21590 }, { "epoch": 0.103, "grad_norm": 0.1784420907497406, "learning_rate": 4.9541634348673875e-05, "loss": 0.047, "step": 21600 }, { "epoch": 0.10305, "grad_norm": 0.17316702008247375, "learning_rate": 4.9540846083501115e-05, "loss": 0.0521, "step": 21610 }, { "epoch": 0.1031, "grad_norm": 0.19821666181087494, "learning_rate": 4.954005714739085e-05, "loss": 0.0487, "step": 21620 }, { "epoch": 0.10315, "grad_norm": 0.16755364835262299, "learning_rate": 4.953926754036463e-05, "loss": 0.05, "step": 21630 }, { "epoch": 0.1032, "grad_norm": 0.1867600977420807, "learning_rate": 4.953847726244406e-05, "loss": 0.0486, "step": 21640 }, { "epoch": 0.10325, "grad_norm": 0.17544685304164886, "learning_rate": 4.9537686313650744e-05, "loss": 0.0488, "step": 21650 }, { "epoch": 0.1033, "grad_norm": 0.14893613755702972, "learning_rate": 4.9536894694006295e-05, "loss": 0.0482, "step": 21660 }, { "epoch": 0.10335, "grad_norm": 0.1764899045228958, "learning_rate": 4.953610240353237e-05, "loss": 0.0495, "step": 21670 }, { "epoch": 0.1034, "grad_norm": 0.1684151291847229, "learning_rate": 4.9535309442250624e-05, "loss": 0.0497, "step": 21680 }, { "epoch": 0.10345, "grad_norm": 0.16186705231666565, "learning_rate": 4.9534515810182724e-05, "loss": 0.0486, "step": 21690 }, { "epoch": 0.1035, "grad_norm": 0.17481642961502075, "learning_rate": 4.9533721507350395e-05, "loss": 0.0467, "step": 21700 }, { "epoch": 0.10355, "grad_norm": 0.1745694875717163, "learning_rate": 4.953292653377533e-05, "loss": 0.05, "step": 21710 }, { "epoch": 0.1036, "grad_norm": 0.17581479251384735, "learning_rate": 4.953213088947926e-05, "loss": 0.0479, "step": 21720 }, { "epoch": 0.10365, "grad_norm": 0.21984413266181946, "learning_rate": 4.953133457448395e-05, "loss": 0.0555, "step": 21730 }, { "epoch": 0.1037, "grad_norm": 0.20761926472187042, "learning_rate": 4.953053758881119e-05, "loss": 0.047, "step": 21740 }, { "epoch": 0.10375, "grad_norm": 0.1760006844997406, "learning_rate": 4.952973993248273e-05, "loss": 0.0501, "step": 21750 }, { "epoch": 0.1038, "grad_norm": 0.14995700120925903, "learning_rate": 4.952894160552039e-05, "loss": 0.0475, "step": 21760 }, { "epoch": 0.10385, "grad_norm": 0.18868878483772278, "learning_rate": 4.952814260794602e-05, "loss": 0.0494, "step": 21770 }, { "epoch": 0.1039, "grad_norm": 0.1648269146680832, "learning_rate": 4.9527342939781426e-05, "loss": 0.048, "step": 21780 }, { "epoch": 0.10395, "grad_norm": 0.16374856233596802, "learning_rate": 4.952654260104851e-05, "loss": 0.0517, "step": 21790 }, { "epoch": 0.104, "grad_norm": 0.1632968932390213, "learning_rate": 4.952574159176912e-05, "loss": 0.0502, "step": 21800 }, { "epoch": 0.10405, "grad_norm": 0.15796631574630737, "learning_rate": 4.9524939911965176e-05, "loss": 0.0479, "step": 21810 }, { "epoch": 0.1041, "grad_norm": 0.16355274617671967, "learning_rate": 4.9524137561658586e-05, "loss": 0.049, "step": 21820 }, { "epoch": 0.10415, "grad_norm": 0.19500236213207245, "learning_rate": 4.952333454087128e-05, "loss": 0.0476, "step": 21830 }, { "epoch": 0.1042, "grad_norm": 0.1601325124502182, "learning_rate": 4.9522530849625235e-05, "loss": 0.0463, "step": 21840 }, { "epoch": 0.10425, "grad_norm": 0.14225871860980988, "learning_rate": 4.95217264879424e-05, "loss": 0.0469, "step": 21850 }, { "epoch": 0.1043, "grad_norm": 0.14632344245910645, "learning_rate": 4.952092145584478e-05, "loss": 0.0511, "step": 21860 }, { "epoch": 0.10435, "grad_norm": 0.13829264044761658, "learning_rate": 4.952011575335438e-05, "loss": 0.0462, "step": 21870 }, { "epoch": 0.1044, "grad_norm": 0.15746837854385376, "learning_rate": 4.951930938049322e-05, "loss": 0.0472, "step": 21880 }, { "epoch": 0.10445, "grad_norm": 0.15343759953975677, "learning_rate": 4.951850233728336e-05, "loss": 0.0462, "step": 21890 }, { "epoch": 0.1045, "grad_norm": 0.1506185531616211, "learning_rate": 4.9517694623746855e-05, "loss": 0.0467, "step": 21900 }, { "epoch": 0.10455, "grad_norm": 0.13890409469604492, "learning_rate": 4.9516886239905794e-05, "loss": 0.0459, "step": 21910 }, { "epoch": 0.1046, "grad_norm": 0.15097202360630035, "learning_rate": 4.951607718578226e-05, "loss": 0.0463, "step": 21920 }, { "epoch": 0.10465, "grad_norm": 0.14566144347190857, "learning_rate": 4.9515267461398396e-05, "loss": 0.0481, "step": 21930 }, { "epoch": 0.1047, "grad_norm": 0.11933682858943939, "learning_rate": 4.9514457066776334e-05, "loss": 0.0481, "step": 21940 }, { "epoch": 0.10475, "grad_norm": 0.13602997362613678, "learning_rate": 4.951364600193822e-05, "loss": 0.044, "step": 21950 }, { "epoch": 0.1048, "grad_norm": 0.13590200245380402, "learning_rate": 4.951283426690623e-05, "loss": 0.0455, "step": 21960 }, { "epoch": 0.10485, "grad_norm": 0.15556122362613678, "learning_rate": 4.951202186170257e-05, "loss": 0.0462, "step": 21970 }, { "epoch": 0.1049, "grad_norm": 0.2523100972175598, "learning_rate": 4.951120878634943e-05, "loss": 0.0474, "step": 21980 }, { "epoch": 0.10495, "grad_norm": 0.14426535367965698, "learning_rate": 4.9510395040869054e-05, "loss": 0.0466, "step": 21990 }, { "epoch": 0.105, "grad_norm": 0.15779918432235718, "learning_rate": 4.950958062528369e-05, "loss": 0.0495, "step": 22000 }, { "epoch": 0.10505, "grad_norm": 0.1777641624212265, "learning_rate": 4.95087655396156e-05, "loss": 0.0462, "step": 22010 }, { "epoch": 0.1051, "grad_norm": 0.1580272912979126, "learning_rate": 4.950794978388706e-05, "loss": 0.0468, "step": 22020 }, { "epoch": 0.10515, "grad_norm": 0.14673081040382385, "learning_rate": 4.950713335812038e-05, "loss": 0.0484, "step": 22030 }, { "epoch": 0.1052, "grad_norm": 0.1237303763628006, "learning_rate": 4.9506316262337896e-05, "loss": 0.0462, "step": 22040 }, { "epoch": 0.10525, "grad_norm": 0.1669149547815323, "learning_rate": 4.950549849656192e-05, "loss": 0.0468, "step": 22050 }, { "epoch": 0.1053, "grad_norm": 0.15633957087993622, "learning_rate": 4.9504680060814823e-05, "loss": 0.0452, "step": 22060 }, { "epoch": 0.10535, "grad_norm": 0.16523058712482452, "learning_rate": 4.9503860955118976e-05, "loss": 0.0507, "step": 22070 }, { "epoch": 0.1054, "grad_norm": 0.1778353899717331, "learning_rate": 4.950304117949678e-05, "loss": 0.0492, "step": 22080 }, { "epoch": 0.10545, "grad_norm": 0.18347936868667603, "learning_rate": 4.950222073397064e-05, "loss": 0.0489, "step": 22090 }, { "epoch": 0.1055, "grad_norm": 0.20095577836036682, "learning_rate": 4.950139961856299e-05, "loss": 0.0491, "step": 22100 }, { "epoch": 0.10555, "grad_norm": 0.14817671477794647, "learning_rate": 4.9500577833296284e-05, "loss": 0.0481, "step": 22110 }, { "epoch": 0.1056, "grad_norm": 0.17643176019191742, "learning_rate": 4.949975537819298e-05, "loss": 0.0483, "step": 22120 }, { "epoch": 0.10565, "grad_norm": 0.156997412443161, "learning_rate": 4.949893225327558e-05, "loss": 0.0486, "step": 22130 }, { "epoch": 0.1057, "grad_norm": 0.14414997398853302, "learning_rate": 4.949810845856656e-05, "loss": 0.0478, "step": 22140 }, { "epoch": 0.10575, "grad_norm": 0.1429523229598999, "learning_rate": 4.949728399408846e-05, "loss": 0.0459, "step": 22150 }, { "epoch": 0.1058, "grad_norm": 0.16600804030895233, "learning_rate": 4.9496458859863824e-05, "loss": 0.0475, "step": 22160 }, { "epoch": 0.10585, "grad_norm": 0.1708773672580719, "learning_rate": 4.949563305591521e-05, "loss": 0.0451, "step": 22170 }, { "epoch": 0.1059, "grad_norm": 0.13618949055671692, "learning_rate": 4.949480658226518e-05, "loss": 0.0475, "step": 22180 }, { "epoch": 0.10595, "grad_norm": 0.14604583382606506, "learning_rate": 4.9493979438936356e-05, "loss": 0.0464, "step": 22190 }, { "epoch": 0.106, "grad_norm": 0.1562792807817459, "learning_rate": 4.9493151625951326e-05, "loss": 0.0488, "step": 22200 }, { "epoch": 0.10605, "grad_norm": 0.1627453714609146, "learning_rate": 4.949232314333273e-05, "loss": 0.0472, "step": 22210 }, { "epoch": 0.1061, "grad_norm": 0.149214968085289, "learning_rate": 4.949149399110322e-05, "loss": 0.0461, "step": 22220 }, { "epoch": 0.10615, "grad_norm": 0.16553807258605957, "learning_rate": 4.949066416928547e-05, "loss": 0.0458, "step": 22230 }, { "epoch": 0.1062, "grad_norm": 0.14975816011428833, "learning_rate": 4.9489833677902164e-05, "loss": 0.0442, "step": 22240 }, { "epoch": 0.10625, "grad_norm": 0.14777584373950958, "learning_rate": 4.9489002516976e-05, "loss": 0.0473, "step": 22250 }, { "epoch": 0.1063, "grad_norm": 0.1806098222732544, "learning_rate": 4.9488170686529714e-05, "loss": 0.048, "step": 22260 }, { "epoch": 0.10635, "grad_norm": 0.17245250940322876, "learning_rate": 4.948733818658604e-05, "loss": 0.0505, "step": 22270 }, { "epoch": 0.1064, "grad_norm": 0.19378966093063354, "learning_rate": 4.9486505017167726e-05, "loss": 0.0458, "step": 22280 }, { "epoch": 0.10645, "grad_norm": 0.2093646377325058, "learning_rate": 4.9485671178297576e-05, "loss": 0.0481, "step": 22290 }, { "epoch": 0.1065, "grad_norm": 0.16001328825950623, "learning_rate": 4.948483666999838e-05, "loss": 0.0472, "step": 22300 }, { "epoch": 0.10655, "grad_norm": 0.1635342389345169, "learning_rate": 4.948400149229294e-05, "loss": 0.0459, "step": 22310 }, { "epoch": 0.1066, "grad_norm": 0.16335773468017578, "learning_rate": 4.9483165645204097e-05, "loss": 0.0457, "step": 22320 }, { "epoch": 0.10665, "grad_norm": 0.1378794014453888, "learning_rate": 4.94823291287547e-05, "loss": 0.0461, "step": 22330 }, { "epoch": 0.1067, "grad_norm": 0.1463647335767746, "learning_rate": 4.948149194296763e-05, "loss": 0.0452, "step": 22340 }, { "epoch": 0.10675, "grad_norm": 0.1523444652557373, "learning_rate": 4.9480654087865766e-05, "loss": 0.0463, "step": 22350 }, { "epoch": 0.1068, "grad_norm": 0.1556294709444046, "learning_rate": 4.947981556347201e-05, "loss": 0.0458, "step": 22360 }, { "epoch": 0.10685, "grad_norm": 0.19621415436267853, "learning_rate": 4.9478976369809305e-05, "loss": 0.0453, "step": 22370 }, { "epoch": 0.1069, "grad_norm": 0.2060937136411667, "learning_rate": 4.9478136506900574e-05, "loss": 0.0488, "step": 22380 }, { "epoch": 0.10695, "grad_norm": 0.18328139185905457, "learning_rate": 4.947729597476879e-05, "loss": 0.0487, "step": 22390 }, { "epoch": 0.107, "grad_norm": 0.15908265113830566, "learning_rate": 4.9476454773436925e-05, "loss": 0.0461, "step": 22400 }, { "epoch": 0.10705, "grad_norm": 0.16934293508529663, "learning_rate": 4.9475612902927985e-05, "loss": 0.0459, "step": 22410 }, { "epoch": 0.1071, "grad_norm": 0.15121278166770935, "learning_rate": 4.947477036326498e-05, "loss": 0.0459, "step": 22420 }, { "epoch": 0.10715, "grad_norm": 0.1772674322128296, "learning_rate": 4.9473927154470954e-05, "loss": 0.0467, "step": 22430 }, { "epoch": 0.1072, "grad_norm": 0.17032106220722198, "learning_rate": 4.9473083276568955e-05, "loss": 0.0486, "step": 22440 }, { "epoch": 0.10725, "grad_norm": 0.16720731556415558, "learning_rate": 4.9472238729582045e-05, "loss": 0.0484, "step": 22450 }, { "epoch": 0.1073, "grad_norm": 0.19819645583629608, "learning_rate": 4.9471393513533324e-05, "loss": 0.046, "step": 22460 }, { "epoch": 0.10735, "grad_norm": 0.16138656437397003, "learning_rate": 4.94705476284459e-05, "loss": 0.0461, "step": 22470 }, { "epoch": 0.1074, "grad_norm": 0.1695665568113327, "learning_rate": 4.94697010743429e-05, "loss": 0.0474, "step": 22480 }, { "epoch": 0.10745, "grad_norm": 0.13845255970954895, "learning_rate": 4.9468853851247466e-05, "loss": 0.0442, "step": 22490 }, { "epoch": 0.1075, "grad_norm": 0.1434382200241089, "learning_rate": 4.946800595918275e-05, "loss": 0.0445, "step": 22500 }, { "epoch": 0.10755, "grad_norm": 0.14327076077461243, "learning_rate": 4.9467157398171956e-05, "loss": 0.0495, "step": 22510 }, { "epoch": 0.1076, "grad_norm": 0.12793277204036713, "learning_rate": 4.946630816823826e-05, "loss": 0.0462, "step": 22520 }, { "epoch": 0.10765, "grad_norm": 0.17494246363639832, "learning_rate": 4.9465458269404895e-05, "loss": 0.0476, "step": 22530 }, { "epoch": 0.1077, "grad_norm": 0.17591674625873566, "learning_rate": 4.94646077016951e-05, "loss": 0.0453, "step": 22540 }, { "epoch": 0.10775, "grad_norm": 0.15116706490516663, "learning_rate": 4.94637564651321e-05, "loss": 0.0468, "step": 22550 }, { "epoch": 0.1078, "grad_norm": 0.17800773680210114, "learning_rate": 4.946290455973921e-05, "loss": 0.0471, "step": 22560 }, { "epoch": 0.10785, "grad_norm": 0.15833675861358643, "learning_rate": 4.9462051985539695e-05, "loss": 0.0458, "step": 22570 }, { "epoch": 0.1079, "grad_norm": 0.17471368610858917, "learning_rate": 4.946119874255686e-05, "loss": 0.0491, "step": 22580 }, { "epoch": 0.10795, "grad_norm": 0.18584364652633667, "learning_rate": 4.946034483081405e-05, "loss": 0.0482, "step": 22590 }, { "epoch": 0.108, "grad_norm": 0.17959827184677124, "learning_rate": 4.945949025033459e-05, "loss": 0.0473, "step": 22600 }, { "epoch": 0.10805, "grad_norm": 0.13268496096134186, "learning_rate": 4.945863500114187e-05, "loss": 0.0467, "step": 22610 }, { "epoch": 0.1081, "grad_norm": 0.19379787147045135, "learning_rate": 4.9457779083259255e-05, "loss": 0.0461, "step": 22620 }, { "epoch": 0.10815, "grad_norm": 0.1832592785358429, "learning_rate": 4.945692249671015e-05, "loss": 0.0469, "step": 22630 }, { "epoch": 0.1082, "grad_norm": 0.14402779936790466, "learning_rate": 4.945606524151796e-05, "loss": 0.0471, "step": 22640 }, { "epoch": 0.10825, "grad_norm": 0.15217886865139008, "learning_rate": 4.945520731770614e-05, "loss": 0.0475, "step": 22650 }, { "epoch": 0.1083, "grad_norm": 0.15967465937137604, "learning_rate": 4.945434872529814e-05, "loss": 0.0491, "step": 22660 }, { "epoch": 0.10835, "grad_norm": 0.13702714443206787, "learning_rate": 4.9453489464317434e-05, "loss": 0.0459, "step": 22670 }, { "epoch": 0.1084, "grad_norm": 0.19126775860786438, "learning_rate": 4.9452629534787506e-05, "loss": 0.0499, "step": 22680 }, { "epoch": 0.10845, "grad_norm": 0.16599732637405396, "learning_rate": 4.9451768936731885e-05, "loss": 0.0465, "step": 22690 }, { "epoch": 0.1085, "grad_norm": 0.1632414609193802, "learning_rate": 4.9450907670174084e-05, "loss": 0.0469, "step": 22700 }, { "epoch": 0.10855, "grad_norm": 0.16432011127471924, "learning_rate": 4.945004573513765e-05, "loss": 0.0489, "step": 22710 }, { "epoch": 0.1086, "grad_norm": 0.16096735000610352, "learning_rate": 4.9449183131646146e-05, "loss": 0.0483, "step": 22720 }, { "epoch": 0.10865, "grad_norm": 0.18021048605442047, "learning_rate": 4.944831985972317e-05, "loss": 0.047, "step": 22730 }, { "epoch": 0.1087, "grad_norm": 0.155808225274086, "learning_rate": 4.94474559193923e-05, "loss": 0.0478, "step": 22740 }, { "epoch": 0.10875, "grad_norm": 0.1347564309835434, "learning_rate": 4.944659131067719e-05, "loss": 0.046, "step": 22750 }, { "epoch": 0.1088, "grad_norm": 0.1773679256439209, "learning_rate": 4.944572603360145e-05, "loss": 0.0458, "step": 22760 }, { "epoch": 0.10885, "grad_norm": 0.14633294939994812, "learning_rate": 4.9444860088188736e-05, "loss": 0.0457, "step": 22770 }, { "epoch": 0.1089, "grad_norm": 0.14980213344097137, "learning_rate": 4.944399347446274e-05, "loss": 0.0459, "step": 22780 }, { "epoch": 0.10895, "grad_norm": 0.16055136919021606, "learning_rate": 4.944312619244714e-05, "loss": 0.0471, "step": 22790 }, { "epoch": 0.109, "grad_norm": 0.16401828825473785, "learning_rate": 4.9442258242165653e-05, "loss": 0.0452, "step": 22800 }, { "epoch": 0.10905, "grad_norm": 0.1338483691215515, "learning_rate": 4.944138962364201e-05, "loss": 0.0488, "step": 22810 }, { "epoch": 0.1091, "grad_norm": 0.16888396441936493, "learning_rate": 4.944052033689995e-05, "loss": 0.0451, "step": 22820 }, { "epoch": 0.10915, "grad_norm": 0.1720247119665146, "learning_rate": 4.943965038196326e-05, "loss": 0.0456, "step": 22830 }, { "epoch": 0.1092, "grad_norm": 0.13762693107128143, "learning_rate": 4.9438779758855694e-05, "loss": 0.0467, "step": 22840 }, { "epoch": 0.10925, "grad_norm": 0.11534810066223145, "learning_rate": 4.943790846760108e-05, "loss": 0.0441, "step": 22850 }, { "epoch": 0.1093, "grad_norm": 0.1553107500076294, "learning_rate": 4.943703650822323e-05, "loss": 0.0481, "step": 22860 }, { "epoch": 0.10935, "grad_norm": 0.1464255452156067, "learning_rate": 4.9436163880745975e-05, "loss": 0.0444, "step": 22870 }, { "epoch": 0.1094, "grad_norm": 0.167152538895607, "learning_rate": 4.9435290585193186e-05, "loss": 0.0435, "step": 22880 }, { "epoch": 0.10945, "grad_norm": 0.14588108658790588, "learning_rate": 4.943441662158874e-05, "loss": 0.0433, "step": 22890 }, { "epoch": 0.1095, "grad_norm": 0.14812380075454712, "learning_rate": 4.943354198995651e-05, "loss": 0.0447, "step": 22900 }, { "epoch": 0.10955, "grad_norm": 0.2095862329006195, "learning_rate": 4.9432666690320426e-05, "loss": 0.0462, "step": 22910 }, { "epoch": 0.1096, "grad_norm": 0.1407693475484848, "learning_rate": 4.943179072270441e-05, "loss": 0.0461, "step": 22920 }, { "epoch": 0.10965, "grad_norm": 0.15722720324993134, "learning_rate": 4.9430914087132415e-05, "loss": 0.0452, "step": 22930 }, { "epoch": 0.1097, "grad_norm": 0.15718896687030792, "learning_rate": 4.943003678362842e-05, "loss": 0.0457, "step": 22940 }, { "epoch": 0.10975, "grad_norm": 0.13286322355270386, "learning_rate": 4.942915881221638e-05, "loss": 0.0441, "step": 22950 }, { "epoch": 0.1098, "grad_norm": 0.16470444202423096, "learning_rate": 4.942828017292033e-05, "loss": 0.0468, "step": 22960 }, { "epoch": 0.10985, "grad_norm": 0.12710019946098328, "learning_rate": 4.942740086576427e-05, "loss": 0.0452, "step": 22970 }, { "epoch": 0.1099, "grad_norm": 0.18647213280200958, "learning_rate": 4.9426520890772245e-05, "loss": 0.0507, "step": 22980 }, { "epoch": 0.10995, "grad_norm": 0.24347060918807983, "learning_rate": 4.942564024796832e-05, "loss": 0.046, "step": 22990 }, { "epoch": 0.11, "grad_norm": 0.1583353877067566, "learning_rate": 4.9424758937376567e-05, "loss": 0.0475, "step": 23000 }, { "epoch": 0.11005, "grad_norm": 0.16557100415229797, "learning_rate": 4.942387695902108e-05, "loss": 0.0466, "step": 23010 }, { "epoch": 0.1101, "grad_norm": 0.1670871376991272, "learning_rate": 4.942299431292596e-05, "loss": 0.0463, "step": 23020 }, { "epoch": 0.11015, "grad_norm": 0.14393047988414764, "learning_rate": 4.9422110999115365e-05, "loss": 0.0445, "step": 23030 }, { "epoch": 0.1102, "grad_norm": 0.15277686715126038, "learning_rate": 4.942122701761343e-05, "loss": 0.0491, "step": 23040 }, { "epoch": 0.11025, "grad_norm": 0.14930570125579834, "learning_rate": 4.9420342368444316e-05, "loss": 0.0453, "step": 23050 }, { "epoch": 0.1103, "grad_norm": 0.12654705345630646, "learning_rate": 4.941945705163222e-05, "loss": 0.048, "step": 23060 }, { "epoch": 0.11035, "grad_norm": 0.17418532073497772, "learning_rate": 4.9418571067201346e-05, "loss": 0.0463, "step": 23070 }, { "epoch": 0.1104, "grad_norm": 0.14700326323509216, "learning_rate": 4.9417684415175905e-05, "loss": 0.0509, "step": 23080 }, { "epoch": 0.11045, "grad_norm": 0.17009063065052032, "learning_rate": 4.9416797095580156e-05, "loss": 0.0495, "step": 23090 }, { "epoch": 0.1105, "grad_norm": 0.15119709074497223, "learning_rate": 4.9415909108438344e-05, "loss": 0.048, "step": 23100 }, { "epoch": 0.11055, "grad_norm": 0.16565102338790894, "learning_rate": 4.941502045377474e-05, "loss": 0.045, "step": 23110 }, { "epoch": 0.1106, "grad_norm": 0.17975634336471558, "learning_rate": 4.9414131131613656e-05, "loss": 0.0494, "step": 23120 }, { "epoch": 0.11065, "grad_norm": 0.15956923365592957, "learning_rate": 4.9413241141979394e-05, "loss": 0.0459, "step": 23130 }, { "epoch": 0.1107, "grad_norm": 0.13292647898197174, "learning_rate": 4.9412350484896294e-05, "loss": 0.0451, "step": 23140 }, { "epoch": 0.11075, "grad_norm": 0.15606789290905, "learning_rate": 4.9411459160388705e-05, "loss": 0.0485, "step": 23150 }, { "epoch": 0.1108, "grad_norm": 0.20483602583408356, "learning_rate": 4.941056716848099e-05, "loss": 0.0503, "step": 23160 }, { "epoch": 0.11085, "grad_norm": 0.167527973651886, "learning_rate": 4.940967450919755e-05, "loss": 0.047, "step": 23170 }, { "epoch": 0.1109, "grad_norm": 0.16891071200370789, "learning_rate": 4.940878118256277e-05, "loss": 0.0488, "step": 23180 }, { "epoch": 0.11095, "grad_norm": 0.15802857279777527, "learning_rate": 4.9407887188601084e-05, "loss": 0.048, "step": 23190 }, { "epoch": 0.111, "grad_norm": 0.1566019505262375, "learning_rate": 4.9406992527336924e-05, "loss": 0.0493, "step": 23200 }, { "epoch": 0.11105, "grad_norm": 0.1614404320716858, "learning_rate": 4.940609719879477e-05, "loss": 0.0458, "step": 23210 }, { "epoch": 0.1111, "grad_norm": 0.17096981406211853, "learning_rate": 4.9405201202999085e-05, "loss": 0.0499, "step": 23220 }, { "epoch": 0.11115, "grad_norm": 0.14519457519054413, "learning_rate": 4.9404304539974364e-05, "loss": 0.0445, "step": 23230 }, { "epoch": 0.1112, "grad_norm": 0.1501145362854004, "learning_rate": 4.9403407209745125e-05, "loss": 0.0479, "step": 23240 }, { "epoch": 0.11125, "grad_norm": 0.1739199012517929, "learning_rate": 4.9402509212335904e-05, "loss": 0.0486, "step": 23250 }, { "epoch": 0.1113, "grad_norm": 0.1648600697517395, "learning_rate": 4.9401610547771246e-05, "loss": 0.0467, "step": 23260 }, { "epoch": 0.11135, "grad_norm": 0.15994058549404144, "learning_rate": 4.9400711216075726e-05, "loss": 0.0492, "step": 23270 }, { "epoch": 0.1114, "grad_norm": 0.14188778400421143, "learning_rate": 4.9399811217273916e-05, "loss": 0.0483, "step": 23280 }, { "epoch": 0.11145, "grad_norm": 0.14371943473815918, "learning_rate": 4.939891055139045e-05, "loss": 0.0466, "step": 23290 }, { "epoch": 0.1115, "grad_norm": 0.1529754251241684, "learning_rate": 4.939800921844993e-05, "loss": 0.0491, "step": 23300 }, { "epoch": 0.11155, "grad_norm": 0.18079929053783417, "learning_rate": 4.9397107218477e-05, "loss": 0.0471, "step": 23310 }, { "epoch": 0.1116, "grad_norm": 0.16525928676128387, "learning_rate": 4.9396204551496326e-05, "loss": 0.0452, "step": 23320 }, { "epoch": 0.11165, "grad_norm": 0.14274610579013824, "learning_rate": 4.939530121753259e-05, "loss": 0.0465, "step": 23330 }, { "epoch": 0.1117, "grad_norm": 0.15236027538776398, "learning_rate": 4.9394397216610476e-05, "loss": 0.0474, "step": 23340 }, { "epoch": 0.11175, "grad_norm": 0.12764261662960052, "learning_rate": 4.939349254875472e-05, "loss": 0.0449, "step": 23350 }, { "epoch": 0.1118, "grad_norm": 0.13034336268901825, "learning_rate": 4.939258721399003e-05, "loss": 0.0468, "step": 23360 }, { "epoch": 0.11185, "grad_norm": 0.1476508378982544, "learning_rate": 4.939168121234117e-05, "loss": 0.0496, "step": 23370 }, { "epoch": 0.1119, "grad_norm": 0.166147843003273, "learning_rate": 4.9390774543832906e-05, "loss": 0.0507, "step": 23380 }, { "epoch": 0.11195, "grad_norm": 0.17686863243579865, "learning_rate": 4.9389867208490034e-05, "loss": 0.048, "step": 23390 }, { "epoch": 0.112, "grad_norm": 0.17661166191101074, "learning_rate": 4.938895920633736e-05, "loss": 0.0457, "step": 23400 }, { "epoch": 0.11205, "grad_norm": 0.16418366134166718, "learning_rate": 4.93880505373997e-05, "loss": 0.0461, "step": 23410 }, { "epoch": 0.1121, "grad_norm": 0.14816594123840332, "learning_rate": 4.93871412017019e-05, "loss": 0.0489, "step": 23420 }, { "epoch": 0.11215, "grad_norm": 0.15972666442394257, "learning_rate": 4.938623119926882e-05, "loss": 0.0459, "step": 23430 }, { "epoch": 0.1122, "grad_norm": 0.1464952528476715, "learning_rate": 4.9385320530125346e-05, "loss": 0.0472, "step": 23440 }, { "epoch": 0.11225, "grad_norm": 0.15542052686214447, "learning_rate": 4.938440919429637e-05, "loss": 0.0467, "step": 23450 }, { "epoch": 0.1123, "grad_norm": 0.16439156234264374, "learning_rate": 4.938349719180679e-05, "loss": 0.0481, "step": 23460 }, { "epoch": 0.11235, "grad_norm": 0.18987365067005157, "learning_rate": 4.9382584522681575e-05, "loss": 0.0453, "step": 23470 }, { "epoch": 0.1124, "grad_norm": 0.15060585737228394, "learning_rate": 4.9381671186945656e-05, "loss": 0.0447, "step": 23480 }, { "epoch": 0.11245, "grad_norm": 0.21386857330799103, "learning_rate": 4.9380757184624006e-05, "loss": 0.0464, "step": 23490 }, { "epoch": 0.1125, "grad_norm": 0.16736474633216858, "learning_rate": 4.937984251574162e-05, "loss": 0.0467, "step": 23500 }, { "epoch": 0.11255, "grad_norm": 0.17925432324409485, "learning_rate": 4.9378927180323485e-05, "loss": 0.0465, "step": 23510 }, { "epoch": 0.1126, "grad_norm": 0.1393420398235321, "learning_rate": 4.937801117839464e-05, "loss": 0.0447, "step": 23520 }, { "epoch": 0.11265, "grad_norm": 0.191842183470726, "learning_rate": 4.9377094509980135e-05, "loss": 0.0468, "step": 23530 }, { "epoch": 0.1127, "grad_norm": 0.14526723325252533, "learning_rate": 4.9376177175105035e-05, "loss": 0.0466, "step": 23540 }, { "epoch": 0.11275, "grad_norm": 0.17123140394687653, "learning_rate": 4.937525917379439e-05, "loss": 0.0487, "step": 23550 }, { "epoch": 0.1128, "grad_norm": 0.1246662586927414, "learning_rate": 4.937434050607332e-05, "loss": 0.0452, "step": 23560 }, { "epoch": 0.11285, "grad_norm": 0.16733843088150024, "learning_rate": 4.937342117196695e-05, "loss": 0.0461, "step": 23570 }, { "epoch": 0.1129, "grad_norm": 0.15778127312660217, "learning_rate": 4.937250117150039e-05, "loss": 0.0459, "step": 23580 }, { "epoch": 0.11295, "grad_norm": 0.1407967209815979, "learning_rate": 4.9371580504698814e-05, "loss": 0.0482, "step": 23590 }, { "epoch": 0.113, "grad_norm": 0.15934114158153534, "learning_rate": 4.9370659171587367e-05, "loss": 0.0485, "step": 23600 }, { "epoch": 0.11305, "grad_norm": 0.18262344598770142, "learning_rate": 4.9369737172191265e-05, "loss": 0.0468, "step": 23610 }, { "epoch": 0.1131, "grad_norm": 0.15956884622573853, "learning_rate": 4.93688145065357e-05, "loss": 0.0465, "step": 23620 }, { "epoch": 0.11315, "grad_norm": 0.16960874199867249, "learning_rate": 4.936789117464591e-05, "loss": 0.0449, "step": 23630 }, { "epoch": 0.1132, "grad_norm": 0.1442752331495285, "learning_rate": 4.936696717654712e-05, "loss": 0.0473, "step": 23640 }, { "epoch": 0.11325, "grad_norm": 0.14218948781490326, "learning_rate": 4.9366042512264604e-05, "loss": 0.0468, "step": 23650 }, { "epoch": 0.1133, "grad_norm": 0.14883334934711456, "learning_rate": 4.936511718182364e-05, "loss": 0.0464, "step": 23660 }, { "epoch": 0.11335, "grad_norm": 0.14746984839439392, "learning_rate": 4.9364191185249534e-05, "loss": 0.0443, "step": 23670 }, { "epoch": 0.1134, "grad_norm": 0.17093515396118164, "learning_rate": 4.936326452256758e-05, "loss": 0.0454, "step": 23680 }, { "epoch": 0.11345, "grad_norm": 0.15980379283428192, "learning_rate": 4.936233719380313e-05, "loss": 0.0472, "step": 23690 }, { "epoch": 0.1135, "grad_norm": 0.16157618165016174, "learning_rate": 4.936140919898155e-05, "loss": 0.0467, "step": 23700 }, { "epoch": 0.11355, "grad_norm": 0.17927232384681702, "learning_rate": 4.936048053812817e-05, "loss": 0.0488, "step": 23710 }, { "epoch": 0.1136, "grad_norm": 0.19343438744544983, "learning_rate": 4.9359551211268415e-05, "loss": 0.0456, "step": 23720 }, { "epoch": 0.11365, "grad_norm": 0.1483830362558365, "learning_rate": 4.935862121842769e-05, "loss": 0.047, "step": 23730 }, { "epoch": 0.1137, "grad_norm": 0.16596491634845734, "learning_rate": 4.93576905596314e-05, "loss": 0.0438, "step": 23740 }, { "epoch": 0.11375, "grad_norm": 0.15183082222938538, "learning_rate": 4.9356759234905e-05, "loss": 0.044, "step": 23750 }, { "epoch": 0.1138, "grad_norm": 0.15099778771400452, "learning_rate": 4.935582724427397e-05, "loss": 0.0455, "step": 23760 }, { "epoch": 0.11385, "grad_norm": 0.19475258886814117, "learning_rate": 4.935489458776375e-05, "loss": 0.0448, "step": 23770 }, { "epoch": 0.1139, "grad_norm": 0.18847818672657013, "learning_rate": 4.935396126539988e-05, "loss": 0.0445, "step": 23780 }, { "epoch": 0.11395, "grad_norm": 0.15470103919506073, "learning_rate": 4.935302727720785e-05, "loss": 0.0448, "step": 23790 }, { "epoch": 0.114, "grad_norm": 0.13757477700710297, "learning_rate": 4.93520926232132e-05, "loss": 0.0449, "step": 23800 }, { "epoch": 0.11405, "grad_norm": 0.1728510856628418, "learning_rate": 4.9351157303441495e-05, "loss": 0.0441, "step": 23810 }, { "epoch": 0.1141, "grad_norm": 0.15007427334785461, "learning_rate": 4.935022131791829e-05, "loss": 0.0435, "step": 23820 }, { "epoch": 0.11415, "grad_norm": 0.1726987063884735, "learning_rate": 4.934928466666919e-05, "loss": 0.0461, "step": 23830 }, { "epoch": 0.1142, "grad_norm": 0.15952524542808533, "learning_rate": 4.9348347349719784e-05, "loss": 0.0432, "step": 23840 }, { "epoch": 0.11425, "grad_norm": 0.14444439113140106, "learning_rate": 4.934740936709572e-05, "loss": 0.0431, "step": 23850 }, { "epoch": 0.1143, "grad_norm": 0.1386149674654007, "learning_rate": 4.934647071882262e-05, "loss": 0.0475, "step": 23860 }, { "epoch": 0.11435, "grad_norm": 0.14660941064357758, "learning_rate": 4.934553140492617e-05, "loss": 0.0451, "step": 23870 }, { "epoch": 0.1144, "grad_norm": 0.1509007215499878, "learning_rate": 4.934459142543203e-05, "loss": 0.0458, "step": 23880 }, { "epoch": 0.11445, "grad_norm": 0.1373247653245926, "learning_rate": 4.93436507803659e-05, "loss": 0.0473, "step": 23890 }, { "epoch": 0.1145, "grad_norm": 0.1345268040895462, "learning_rate": 4.934270946975351e-05, "loss": 0.0441, "step": 23900 }, { "epoch": 0.11455, "grad_norm": 0.1353430151939392, "learning_rate": 4.934176749362059e-05, "loss": 0.045, "step": 23910 }, { "epoch": 0.1146, "grad_norm": 0.14987526834011078, "learning_rate": 4.9340824851992894e-05, "loss": 0.0447, "step": 23920 }, { "epoch": 0.11465, "grad_norm": 0.12962773442268372, "learning_rate": 4.933988154489618e-05, "loss": 0.0453, "step": 23930 }, { "epoch": 0.1147, "grad_norm": 0.1771841198205948, "learning_rate": 4.933893757235626e-05, "loss": 0.0461, "step": 23940 }, { "epoch": 0.11475, "grad_norm": 0.14618121087551117, "learning_rate": 4.9337992934398926e-05, "loss": 0.0484, "step": 23950 }, { "epoch": 0.1148, "grad_norm": 0.13878297805786133, "learning_rate": 4.933704763105e-05, "loss": 0.0494, "step": 23960 }, { "epoch": 0.11485, "grad_norm": 0.13771523535251617, "learning_rate": 4.9336101662335346e-05, "loss": 0.0458, "step": 23970 }, { "epoch": 0.1149, "grad_norm": 0.15850576758384705, "learning_rate": 4.9335155028280816e-05, "loss": 0.0484, "step": 23980 }, { "epoch": 0.11495, "grad_norm": 0.15937262773513794, "learning_rate": 4.9334207728912284e-05, "loss": 0.0465, "step": 23990 }, { "epoch": 0.115, "grad_norm": 0.1431184709072113, "learning_rate": 4.9333259764255655e-05, "loss": 0.0448, "step": 24000 }, { "epoch": 0.11505, "grad_norm": 0.1383841186761856, "learning_rate": 4.933231113433685e-05, "loss": 0.0438, "step": 24010 }, { "epoch": 0.1151, "grad_norm": 0.15110275149345398, "learning_rate": 4.93313618391818e-05, "loss": 0.0476, "step": 24020 }, { "epoch": 0.11515, "grad_norm": 0.1487853080034256, "learning_rate": 4.933041187881645e-05, "loss": 0.0462, "step": 24030 }, { "epoch": 0.1152, "grad_norm": 0.14043785631656647, "learning_rate": 4.9329461253266784e-05, "loss": 0.0462, "step": 24040 }, { "epoch": 0.11525, "grad_norm": 0.1876734346151352, "learning_rate": 4.932850996255879e-05, "loss": 0.0469, "step": 24050 }, { "epoch": 0.1153, "grad_norm": 0.13531990349292755, "learning_rate": 4.932755800671848e-05, "loss": 0.045, "step": 24060 }, { "epoch": 0.11535, "grad_norm": 0.17501574754714966, "learning_rate": 4.932660538577186e-05, "loss": 0.0453, "step": 24070 }, { "epoch": 0.1154, "grad_norm": 0.17578630149364471, "learning_rate": 4.932565209974499e-05, "loss": 0.0455, "step": 24080 }, { "epoch": 0.11545, "grad_norm": 0.16083061695098877, "learning_rate": 4.932469814866394e-05, "loss": 0.0452, "step": 24090 }, { "epoch": 0.1155, "grad_norm": 0.1421496570110321, "learning_rate": 4.932374353255477e-05, "loss": 0.045, "step": 24100 }, { "epoch": 0.11555, "grad_norm": 0.16017691791057587, "learning_rate": 4.9322788251443596e-05, "loss": 0.0463, "step": 24110 }, { "epoch": 0.1156, "grad_norm": 0.14793579280376434, "learning_rate": 4.932183230535653e-05, "loss": 0.0467, "step": 24120 }, { "epoch": 0.11565, "grad_norm": 0.14724218845367432, "learning_rate": 4.93208756943197e-05, "loss": 0.0447, "step": 24130 }, { "epoch": 0.1157, "grad_norm": 0.12646318972110748, "learning_rate": 4.931991841835927e-05, "loss": 0.0437, "step": 24140 }, { "epoch": 0.11575, "grad_norm": 0.17053678631782532, "learning_rate": 4.931896047750141e-05, "loss": 0.046, "step": 24150 }, { "epoch": 0.1158, "grad_norm": 0.16350778937339783, "learning_rate": 4.931800187177229e-05, "loss": 0.0492, "step": 24160 }, { "epoch": 0.11585, "grad_norm": 0.15240345895290375, "learning_rate": 4.931704260119815e-05, "loss": 0.0449, "step": 24170 }, { "epoch": 0.1159, "grad_norm": 0.1353459358215332, "learning_rate": 4.9316082665805204e-05, "loss": 0.0449, "step": 24180 }, { "epoch": 0.11595, "grad_norm": 0.16670545935630798, "learning_rate": 4.931512206561968e-05, "loss": 0.0451, "step": 24190 }, { "epoch": 0.116, "grad_norm": 0.12886999547481537, "learning_rate": 4.931416080066786e-05, "loss": 0.045, "step": 24200 }, { "epoch": 0.11605, "grad_norm": 0.134444460272789, "learning_rate": 4.931319887097602e-05, "loss": 0.0466, "step": 24210 }, { "epoch": 0.1161, "grad_norm": 0.12937898933887482, "learning_rate": 4.931223627657046e-05, "loss": 0.048, "step": 24220 }, { "epoch": 0.11615, "grad_norm": 0.17327240109443665, "learning_rate": 4.931127301747749e-05, "loss": 0.0468, "step": 24230 }, { "epoch": 0.1162, "grad_norm": 0.15150600671768188, "learning_rate": 4.931030909372345e-05, "loss": 0.0458, "step": 24240 }, { "epoch": 0.11625, "grad_norm": 0.17797043919563293, "learning_rate": 4.9309344505334685e-05, "loss": 0.0461, "step": 24250 }, { "epoch": 0.1163, "grad_norm": 0.13645793497562408, "learning_rate": 4.9308379252337586e-05, "loss": 0.0448, "step": 24260 }, { "epoch": 0.11635, "grad_norm": 0.13534113764762878, "learning_rate": 4.9307413334758524e-05, "loss": 0.0466, "step": 24270 }, { "epoch": 0.1164, "grad_norm": 0.1359132081270218, "learning_rate": 4.930644675262391e-05, "loss": 0.0464, "step": 24280 }, { "epoch": 0.11645, "grad_norm": 0.1498836725950241, "learning_rate": 4.9305479505960176e-05, "loss": 0.0451, "step": 24290 }, { "epoch": 0.1165, "grad_norm": 0.12932755053043365, "learning_rate": 4.9304511594793765e-05, "loss": 0.0454, "step": 24300 }, { "epoch": 0.11655, "grad_norm": 0.1804189831018448, "learning_rate": 4.9303543019151135e-05, "loss": 0.0481, "step": 24310 }, { "epoch": 0.1166, "grad_norm": 0.15709887444972992, "learning_rate": 4.930257377905877e-05, "loss": 0.0459, "step": 24320 }, { "epoch": 0.11665, "grad_norm": 0.16021974384784698, "learning_rate": 4.9301603874543165e-05, "loss": 0.0457, "step": 24330 }, { "epoch": 0.1167, "grad_norm": 0.16061711311340332, "learning_rate": 4.930063330563085e-05, "loss": 0.0451, "step": 24340 }, { "epoch": 0.11675, "grad_norm": 0.14702945947647095, "learning_rate": 4.929966207234834e-05, "loss": 0.0489, "step": 24350 }, { "epoch": 0.1168, "grad_norm": 0.14569586515426636, "learning_rate": 4.9298690174722204e-05, "loss": 0.047, "step": 24360 }, { "epoch": 0.11685, "grad_norm": 0.16680438816547394, "learning_rate": 4.929771761277901e-05, "loss": 0.0472, "step": 24370 }, { "epoch": 0.1169, "grad_norm": 0.16751612722873688, "learning_rate": 4.9296744386545334e-05, "loss": 0.0483, "step": 24380 }, { "epoch": 0.11695, "grad_norm": 0.15513846278190613, "learning_rate": 4.92957704960478e-05, "loss": 0.0484, "step": 24390 }, { "epoch": 0.117, "grad_norm": 0.17224083840847015, "learning_rate": 4.9294795941313034e-05, "loss": 0.0475, "step": 24400 }, { "epoch": 0.11705, "grad_norm": 0.15373164415359497, "learning_rate": 4.929382072236766e-05, "loss": 0.0454, "step": 24410 }, { "epoch": 0.1171, "grad_norm": 0.1522088199853897, "learning_rate": 4.9292844839238364e-05, "loss": 0.0444, "step": 24420 }, { "epoch": 0.11715, "grad_norm": 0.12994112074375153, "learning_rate": 4.9291868291951814e-05, "loss": 0.0457, "step": 24430 }, { "epoch": 0.1172, "grad_norm": 0.17007765173912048, "learning_rate": 4.9290891080534715e-05, "loss": 0.044, "step": 24440 }, { "epoch": 0.11725, "grad_norm": 0.1421523094177246, "learning_rate": 4.9289913205013774e-05, "loss": 0.0474, "step": 24450 }, { "epoch": 0.1173, "grad_norm": 0.12650936841964722, "learning_rate": 4.928893466541573e-05, "loss": 0.0456, "step": 24460 }, { "epoch": 0.11735, "grad_norm": 0.1637522131204605, "learning_rate": 4.9287955461767346e-05, "loss": 0.0472, "step": 24470 }, { "epoch": 0.1174, "grad_norm": 0.16588808596134186, "learning_rate": 4.928697559409537e-05, "loss": 0.0462, "step": 24480 }, { "epoch": 0.11745, "grad_norm": 0.1298874318599701, "learning_rate": 4.928599506242662e-05, "loss": 0.0455, "step": 24490 }, { "epoch": 0.1175, "grad_norm": 0.13552021980285645, "learning_rate": 4.9285013866787886e-05, "loss": 0.0458, "step": 24500 }, { "epoch": 0.11755, "grad_norm": 0.15007399022579193, "learning_rate": 4.928403200720599e-05, "loss": 0.0446, "step": 24510 }, { "epoch": 0.1176, "grad_norm": 0.14397795498371124, "learning_rate": 4.928304948370779e-05, "loss": 0.0432, "step": 24520 }, { "epoch": 0.11765, "grad_norm": 0.13422149419784546, "learning_rate": 4.9282066296320125e-05, "loss": 0.0454, "step": 24530 }, { "epoch": 0.1177, "grad_norm": 0.15637044608592987, "learning_rate": 4.928108244506991e-05, "loss": 0.0445, "step": 24540 }, { "epoch": 0.11775, "grad_norm": 0.1382390856742859, "learning_rate": 4.928009792998401e-05, "loss": 0.0452, "step": 24550 }, { "epoch": 0.1178, "grad_norm": 0.18540625274181366, "learning_rate": 4.9279112751089356e-05, "loss": 0.0481, "step": 24560 }, { "epoch": 0.11785, "grad_norm": 0.14658458530902863, "learning_rate": 4.927812690841288e-05, "loss": 0.0467, "step": 24570 }, { "epoch": 0.1179, "grad_norm": 0.16724786162376404, "learning_rate": 4.9277140401981534e-05, "loss": 0.0484, "step": 24580 }, { "epoch": 0.11795, "grad_norm": 0.15964344143867493, "learning_rate": 4.92761532318223e-05, "loss": 0.044, "step": 24590 }, { "epoch": 0.118, "grad_norm": 0.1390381157398224, "learning_rate": 4.927516539796215e-05, "loss": 0.0463, "step": 24600 }, { "epoch": 0.11805, "grad_norm": 0.16976219415664673, "learning_rate": 4.927417690042809e-05, "loss": 0.0475, "step": 24610 }, { "epoch": 0.1181, "grad_norm": 0.1623208075761795, "learning_rate": 4.927318773924717e-05, "loss": 0.0447, "step": 24620 }, { "epoch": 0.11815, "grad_norm": 0.1608884483575821, "learning_rate": 4.9272197914446406e-05, "loss": 0.0463, "step": 24630 }, { "epoch": 0.1182, "grad_norm": 0.16607420146465302, "learning_rate": 4.9271207426052866e-05, "loss": 0.0442, "step": 24640 }, { "epoch": 0.11825, "grad_norm": 0.16706447303295135, "learning_rate": 4.927021627409364e-05, "loss": 0.0455, "step": 24650 }, { "epoch": 0.1183, "grad_norm": 0.13739296793937683, "learning_rate": 4.926922445859581e-05, "loss": 0.0444, "step": 24660 }, { "epoch": 0.11835, "grad_norm": 0.12479973584413528, "learning_rate": 4.926823197958651e-05, "loss": 0.0461, "step": 24670 }, { "epoch": 0.1184, "grad_norm": 0.14369264245033264, "learning_rate": 4.9267238837092865e-05, "loss": 0.045, "step": 24680 }, { "epoch": 0.11845, "grad_norm": 0.17088323831558228, "learning_rate": 4.926624503114202e-05, "loss": 0.0471, "step": 24690 }, { "epoch": 0.1185, "grad_norm": 0.14353309571743011, "learning_rate": 4.9265250561761155e-05, "loss": 0.0459, "step": 24700 }, { "epoch": 0.11855, "grad_norm": 0.12303953617811203, "learning_rate": 4.926425542897746e-05, "loss": 0.0464, "step": 24710 }, { "epoch": 0.1186, "grad_norm": 0.1280302107334137, "learning_rate": 4.926325963281814e-05, "loss": 0.0454, "step": 24720 }, { "epoch": 0.11865, "grad_norm": 0.16849106550216675, "learning_rate": 4.9262263173310405e-05, "loss": 0.0488, "step": 24730 }, { "epoch": 0.1187, "grad_norm": 0.14628657698631287, "learning_rate": 4.926126605048152e-05, "loss": 0.046, "step": 24740 }, { "epoch": 0.11875, "grad_norm": 0.15758998692035675, "learning_rate": 4.926026826435873e-05, "loss": 0.0452, "step": 24750 }, { "epoch": 0.1188, "grad_norm": 0.12086597830057144, "learning_rate": 4.925926981496932e-05, "loss": 0.045, "step": 24760 }, { "epoch": 0.11885, "grad_norm": 0.14628925919532776, "learning_rate": 4.925827070234059e-05, "loss": 0.0453, "step": 24770 }, { "epoch": 0.1189, "grad_norm": 0.1637919396162033, "learning_rate": 4.9257270926499855e-05, "loss": 0.0437, "step": 24780 }, { "epoch": 0.11895, "grad_norm": 0.13955332338809967, "learning_rate": 4.9256270487474437e-05, "loss": 0.0469, "step": 24790 }, { "epoch": 0.119, "grad_norm": 0.14324098825454712, "learning_rate": 4.9255269385291704e-05, "loss": 0.0485, "step": 24800 }, { "epoch": 0.11905, "grad_norm": 0.13485878705978394, "learning_rate": 4.925426761997901e-05, "loss": 0.044, "step": 24810 }, { "epoch": 0.1191, "grad_norm": 0.14601413905620575, "learning_rate": 4.925326519156376e-05, "loss": 0.0458, "step": 24820 }, { "epoch": 0.11915, "grad_norm": 0.17952613532543182, "learning_rate": 4.925226210007335e-05, "loss": 0.0455, "step": 24830 }, { "epoch": 0.1192, "grad_norm": 0.19048964977264404, "learning_rate": 4.92512583455352e-05, "loss": 0.0462, "step": 24840 }, { "epoch": 0.11925, "grad_norm": 0.1312924027442932, "learning_rate": 4.925025392797676e-05, "loss": 0.0434, "step": 24850 }, { "epoch": 0.1193, "grad_norm": 0.160012885928154, "learning_rate": 4.924924884742549e-05, "loss": 0.0429, "step": 24860 }, { "epoch": 0.11935, "grad_norm": 0.15358127653598785, "learning_rate": 4.9248243103908864e-05, "loss": 0.0444, "step": 24870 }, { "epoch": 0.1194, "grad_norm": 0.14759136736392975, "learning_rate": 4.9247236697454386e-05, "loss": 0.0444, "step": 24880 }, { "epoch": 0.11945, "grad_norm": 0.13033431768417358, "learning_rate": 4.9246229628089556e-05, "loss": 0.0436, "step": 24890 }, { "epoch": 0.1195, "grad_norm": 0.1593874990940094, "learning_rate": 4.924522189584193e-05, "loss": 0.045, "step": 24900 }, { "epoch": 0.11955, "grad_norm": 0.1567847579717636, "learning_rate": 4.924421350073904e-05, "loss": 0.0451, "step": 24910 }, { "epoch": 0.1196, "grad_norm": 0.15744049847126007, "learning_rate": 4.9243204442808456e-05, "loss": 0.0455, "step": 24920 }, { "epoch": 0.11965, "grad_norm": 0.14594145119190216, "learning_rate": 4.924219472207778e-05, "loss": 0.0442, "step": 24930 }, { "epoch": 0.1197, "grad_norm": 0.1674627661705017, "learning_rate": 4.9241184338574595e-05, "loss": 0.0449, "step": 24940 }, { "epoch": 0.11975, "grad_norm": 0.15260593593120575, "learning_rate": 4.924017329232655e-05, "loss": 0.0422, "step": 24950 }, { "epoch": 0.1198, "grad_norm": 0.13431614637374878, "learning_rate": 4.923916158336127e-05, "loss": 0.0456, "step": 24960 }, { "epoch": 0.11985, "grad_norm": 0.11683829873800278, "learning_rate": 4.923814921170641e-05, "loss": 0.047, "step": 24970 }, { "epoch": 0.1199, "grad_norm": 0.12878383696079254, "learning_rate": 4.923713617738967e-05, "loss": 0.0444, "step": 24980 }, { "epoch": 0.11995, "grad_norm": 0.14457914233207703, "learning_rate": 4.923612248043872e-05, "loss": 0.0439, "step": 24990 }, { "epoch": 0.12, "grad_norm": 0.16910295188426971, "learning_rate": 4.92351081208813e-05, "loss": 0.0469, "step": 25000 }, { "epoch": 0.12005, "grad_norm": 0.13894687592983246, "learning_rate": 4.923409309874511e-05, "loss": 0.0467, "step": 25010 }, { "epoch": 0.1201, "grad_norm": 0.1838841736316681, "learning_rate": 4.923307741405794e-05, "loss": 0.0475, "step": 25020 }, { "epoch": 0.12015, "grad_norm": 0.1557500958442688, "learning_rate": 4.923206106684752e-05, "loss": 0.0459, "step": 25030 }, { "epoch": 0.1202, "grad_norm": 0.1988830864429474, "learning_rate": 4.923104405714166e-05, "loss": 0.0459, "step": 25040 }, { "epoch": 0.12025, "grad_norm": 0.16980133950710297, "learning_rate": 4.9230026384968166e-05, "loss": 0.0466, "step": 25050 }, { "epoch": 0.1203, "grad_norm": 0.13996870815753937, "learning_rate": 4.922900805035484e-05, "loss": 0.0446, "step": 25060 }, { "epoch": 0.12035, "grad_norm": 0.11587783694267273, "learning_rate": 4.922798905332955e-05, "loss": 0.0434, "step": 25070 }, { "epoch": 0.1204, "grad_norm": 0.2113139033317566, "learning_rate": 4.922696939392013e-05, "loss": 0.0464, "step": 25080 }, { "epoch": 0.12045, "grad_norm": 0.12333756685256958, "learning_rate": 4.9225949072154474e-05, "loss": 0.047, "step": 25090 }, { "epoch": 0.1205, "grad_norm": 0.1441316455602646, "learning_rate": 4.922492808806047e-05, "loss": 0.0457, "step": 25100 }, { "epoch": 0.12055, "grad_norm": 0.1563645601272583, "learning_rate": 4.9223906441666036e-05, "loss": 0.0473, "step": 25110 }, { "epoch": 0.1206, "grad_norm": 0.1893489956855774, "learning_rate": 4.92228841329991e-05, "loss": 0.0473, "step": 25120 }, { "epoch": 0.12065, "grad_norm": 0.15625514090061188, "learning_rate": 4.922186116208761e-05, "loss": 0.0486, "step": 25130 }, { "epoch": 0.1207, "grad_norm": 0.13410907983779907, "learning_rate": 4.9220837528959535e-05, "loss": 0.0466, "step": 25140 }, { "epoch": 0.12075, "grad_norm": 0.1733837127685547, "learning_rate": 4.9219813233642866e-05, "loss": 0.0453, "step": 25150 }, { "epoch": 0.1208, "grad_norm": 0.12287669628858566, "learning_rate": 4.9218788276165596e-05, "loss": 0.0443, "step": 25160 }, { "epoch": 0.12085, "grad_norm": 0.13462580740451813, "learning_rate": 4.9217762656555754e-05, "loss": 0.0452, "step": 25170 }, { "epoch": 0.1209, "grad_norm": 0.17285668849945068, "learning_rate": 4.921673637484138e-05, "loss": 0.0456, "step": 25180 }, { "epoch": 0.12095, "grad_norm": 0.1468110978603363, "learning_rate": 4.9215709431050535e-05, "loss": 0.047, "step": 25190 }, { "epoch": 0.121, "grad_norm": 0.15016555786132812, "learning_rate": 4.921468182521128e-05, "loss": 0.0468, "step": 25200 }, { "epoch": 0.12105, "grad_norm": 0.16271093487739563, "learning_rate": 4.9213653557351736e-05, "loss": 0.0462, "step": 25210 }, { "epoch": 0.1211, "grad_norm": 0.17403465509414673, "learning_rate": 4.9212624627499994e-05, "loss": 0.0442, "step": 25220 }, { "epoch": 0.12115, "grad_norm": 0.16199059784412384, "learning_rate": 4.921159503568419e-05, "loss": 0.0437, "step": 25230 }, { "epoch": 0.1212, "grad_norm": 0.14354346692562103, "learning_rate": 4.921056478193247e-05, "loss": 0.0459, "step": 25240 }, { "epoch": 0.12125, "grad_norm": 0.12012068182229996, "learning_rate": 4.920953386627301e-05, "loss": 0.0438, "step": 25250 }, { "epoch": 0.1213, "grad_norm": 0.1600666344165802, "learning_rate": 4.9208502288733996e-05, "loss": 0.045, "step": 25260 }, { "epoch": 0.12135, "grad_norm": 0.19848820567131042, "learning_rate": 4.920747004934361e-05, "loss": 0.0462, "step": 25270 }, { "epoch": 0.1214, "grad_norm": 0.14214332401752472, "learning_rate": 4.920643714813009e-05, "loss": 0.0461, "step": 25280 }, { "epoch": 0.12145, "grad_norm": 0.12816590070724487, "learning_rate": 4.9205403585121676e-05, "loss": 0.0442, "step": 25290 }, { "epoch": 0.1215, "grad_norm": 0.11601614207029343, "learning_rate": 4.920436936034663e-05, "loss": 0.045, "step": 25300 }, { "epoch": 0.12155, "grad_norm": 0.14862045645713806, "learning_rate": 4.920333447383321e-05, "loss": 0.0455, "step": 25310 }, { "epoch": 0.1216, "grad_norm": 0.14009587466716766, "learning_rate": 4.9202298925609716e-05, "loss": 0.0443, "step": 25320 }, { "epoch": 0.12165, "grad_norm": 0.1675596684217453, "learning_rate": 4.9201262715704455e-05, "loss": 0.0434, "step": 25330 }, { "epoch": 0.1217, "grad_norm": 0.1344049572944641, "learning_rate": 4.9200225844145777e-05, "loss": 0.0453, "step": 25340 }, { "epoch": 0.12175, "grad_norm": 0.13212081789970398, "learning_rate": 4.9199188310962006e-05, "loss": 0.0432, "step": 25350 }, { "epoch": 0.1218, "grad_norm": 0.11753246933221817, "learning_rate": 4.919815011618153e-05, "loss": 0.0427, "step": 25360 }, { "epoch": 0.12185, "grad_norm": 0.16912850737571716, "learning_rate": 4.9197111259832703e-05, "loss": 0.0443, "step": 25370 }, { "epoch": 0.1219, "grad_norm": 0.14177367091178894, "learning_rate": 4.9196071741943964e-05, "loss": 0.0438, "step": 25380 }, { "epoch": 0.12195, "grad_norm": 0.11950483918190002, "learning_rate": 4.91950315625437e-05, "loss": 0.0464, "step": 25390 }, { "epoch": 0.122, "grad_norm": 0.12239759415388107, "learning_rate": 4.919399072166037e-05, "loss": 0.0453, "step": 25400 }, { "epoch": 0.12205, "grad_norm": 0.13678602874279022, "learning_rate": 4.919294921932242e-05, "loss": 0.0441, "step": 25410 }, { "epoch": 0.1221, "grad_norm": 0.13878613710403442, "learning_rate": 4.9191907055558326e-05, "loss": 0.0445, "step": 25420 }, { "epoch": 0.12215, "grad_norm": 0.1621149778366089, "learning_rate": 4.9190864230396585e-05, "loss": 0.0464, "step": 25430 }, { "epoch": 0.1222, "grad_norm": 0.1646154224872589, "learning_rate": 4.91898207438657e-05, "loss": 0.0443, "step": 25440 }, { "epoch": 0.12225, "grad_norm": 0.13514678180217743, "learning_rate": 4.9188776595994215e-05, "loss": 0.0462, "step": 25450 }, { "epoch": 0.1223, "grad_norm": 0.15032826364040375, "learning_rate": 4.9187731786810654e-05, "loss": 0.0455, "step": 25460 }, { "epoch": 0.12235, "grad_norm": 0.1921292394399643, "learning_rate": 4.9186686316343586e-05, "loss": 0.0461, "step": 25470 }, { "epoch": 0.1224, "grad_norm": 0.1746305674314499, "learning_rate": 4.918564018462162e-05, "loss": 0.0472, "step": 25480 }, { "epoch": 0.12245, "grad_norm": 0.1615629643201828, "learning_rate": 4.9184593391673325e-05, "loss": 0.0451, "step": 25490 }, { "epoch": 0.1225, "grad_norm": 0.13830015063285828, "learning_rate": 4.918354593752733e-05, "loss": 0.0473, "step": 25500 }, { "epoch": 0.12255, "grad_norm": 0.16088566184043884, "learning_rate": 4.9182497822212284e-05, "loss": 0.0514, "step": 25510 }, { "epoch": 0.1226, "grad_norm": 0.1678658276796341, "learning_rate": 4.9181449045756825e-05, "loss": 0.0466, "step": 25520 }, { "epoch": 0.12265, "grad_norm": 0.21823139488697052, "learning_rate": 4.918039960818963e-05, "loss": 0.0472, "step": 25530 }, { "epoch": 0.1227, "grad_norm": 0.19542300701141357, "learning_rate": 4.9179349509539404e-05, "loss": 0.0481, "step": 25540 }, { "epoch": 0.12275, "grad_norm": 0.15862037241458893, "learning_rate": 4.917829874983484e-05, "loss": 0.0437, "step": 25550 }, { "epoch": 0.1228, "grad_norm": 0.1674732118844986, "learning_rate": 4.917724732910467e-05, "loss": 0.0475, "step": 25560 }, { "epoch": 0.12285, "grad_norm": 0.17866535484790802, "learning_rate": 4.917619524737765e-05, "loss": 0.0442, "step": 25570 }, { "epoch": 0.1229, "grad_norm": 0.15923969447612762, "learning_rate": 4.917514250468252e-05, "loss": 0.0445, "step": 25580 }, { "epoch": 0.12295, "grad_norm": 0.16598527133464813, "learning_rate": 4.9174089101048094e-05, "loss": 0.0437, "step": 25590 }, { "epoch": 0.123, "grad_norm": 0.17330916225910187, "learning_rate": 4.917303503650314e-05, "loss": 0.0479, "step": 25600 }, { "epoch": 0.12305, "grad_norm": 0.15099115669727325, "learning_rate": 4.91719803110765e-05, "loss": 0.0449, "step": 25610 }, { "epoch": 0.1231, "grad_norm": 0.16531068086624146, "learning_rate": 4.917092492479699e-05, "loss": 0.0466, "step": 25620 }, { "epoch": 0.12315, "grad_norm": 0.18818257749080658, "learning_rate": 4.9169868877693484e-05, "loss": 0.0456, "step": 25630 }, { "epoch": 0.1232, "grad_norm": 0.17593468725681305, "learning_rate": 4.916881216979483e-05, "loss": 0.0451, "step": 25640 }, { "epoch": 0.12325, "grad_norm": 0.17095914483070374, "learning_rate": 4.916775480112994e-05, "loss": 0.046, "step": 25650 }, { "epoch": 0.1233, "grad_norm": 0.1490679830312729, "learning_rate": 4.916669677172771e-05, "loss": 0.0454, "step": 25660 }, { "epoch": 0.12335, "grad_norm": 0.1847240924835205, "learning_rate": 4.9165638081617065e-05, "loss": 0.046, "step": 25670 }, { "epoch": 0.1234, "grad_norm": 0.16121570765972137, "learning_rate": 4.916457873082696e-05, "loss": 0.0473, "step": 25680 }, { "epoch": 0.12345, "grad_norm": 0.15476711094379425, "learning_rate": 4.916351871938635e-05, "loss": 0.044, "step": 25690 }, { "epoch": 0.1235, "grad_norm": 0.17886728048324585, "learning_rate": 4.916245804732421e-05, "loss": 0.047, "step": 25700 }, { "epoch": 0.12355, "grad_norm": 0.14230097830295563, "learning_rate": 4.916139671466955e-05, "loss": 0.0445, "step": 25710 }, { "epoch": 0.1236, "grad_norm": 0.2042488008737564, "learning_rate": 4.9160334721451386e-05, "loss": 0.0473, "step": 25720 }, { "epoch": 0.12365, "grad_norm": 0.17995183169841766, "learning_rate": 4.9159272067698734e-05, "loss": 0.0469, "step": 25730 }, { "epoch": 0.1237, "grad_norm": 0.15162166953086853, "learning_rate": 4.9158208753440674e-05, "loss": 0.0461, "step": 25740 }, { "epoch": 0.12375, "grad_norm": 0.13117767870426178, "learning_rate": 4.915714477870625e-05, "loss": 0.0443, "step": 25750 }, { "epoch": 0.1238, "grad_norm": 0.12508119642734528, "learning_rate": 4.915608014352457e-05, "loss": 0.0453, "step": 25760 }, { "epoch": 0.12385, "grad_norm": 0.14855307340621948, "learning_rate": 4.915501484792473e-05, "loss": 0.0442, "step": 25770 }, { "epoch": 0.1239, "grad_norm": 0.13553832471370697, "learning_rate": 4.9153948891935866e-05, "loss": 0.0463, "step": 25780 }, { "epoch": 0.12395, "grad_norm": 0.16864003241062164, "learning_rate": 4.915288227558711e-05, "loss": 0.0455, "step": 25790 }, { "epoch": 0.124, "grad_norm": 0.15773332118988037, "learning_rate": 4.915181499890762e-05, "loss": 0.0449, "step": 25800 }, { "epoch": 0.12405, "grad_norm": 0.16534097492694855, "learning_rate": 4.9150747061926584e-05, "loss": 0.0445, "step": 25810 }, { "epoch": 0.1241, "grad_norm": 0.13091982901096344, "learning_rate": 4.9149678464673196e-05, "loss": 0.045, "step": 25820 }, { "epoch": 0.12415, "grad_norm": 0.13769251108169556, "learning_rate": 4.914860920717668e-05, "loss": 0.0476, "step": 25830 }, { "epoch": 0.1242, "grad_norm": 0.14034955203533173, "learning_rate": 4.9147539289466256e-05, "loss": 0.0444, "step": 25840 }, { "epoch": 0.12425, "grad_norm": 0.13416755199432373, "learning_rate": 4.914646871157118e-05, "loss": 0.043, "step": 25850 }, { "epoch": 0.1243, "grad_norm": 0.14412455260753632, "learning_rate": 4.9145397473520715e-05, "loss": 0.0443, "step": 25860 }, { "epoch": 0.12435, "grad_norm": 0.14550353586673737, "learning_rate": 4.9144325575344166e-05, "loss": 0.0429, "step": 25870 }, { "epoch": 0.1244, "grad_norm": 0.1473642885684967, "learning_rate": 4.914325301707081e-05, "loss": 0.0471, "step": 25880 }, { "epoch": 0.12445, "grad_norm": 0.14089325070381165, "learning_rate": 4.914217979873e-05, "loss": 0.0477, "step": 25890 }, { "epoch": 0.1245, "grad_norm": 0.1616249531507492, "learning_rate": 4.914110592035106e-05, "loss": 0.0454, "step": 25900 }, { "epoch": 0.12455, "grad_norm": 0.14541175961494446, "learning_rate": 4.9140031381963347e-05, "loss": 0.0489, "step": 25910 }, { "epoch": 0.1246, "grad_norm": 0.21233685314655304, "learning_rate": 4.913895618359625e-05, "loss": 0.0451, "step": 25920 }, { "epoch": 0.12465, "grad_norm": 0.15763972699642181, "learning_rate": 4.913788032527916e-05, "loss": 0.0459, "step": 25930 }, { "epoch": 0.1247, "grad_norm": 0.1575399935245514, "learning_rate": 4.91368038070415e-05, "loss": 0.0458, "step": 25940 }, { "epoch": 0.12475, "grad_norm": 0.1579417586326599, "learning_rate": 4.9135726628912675e-05, "loss": 0.0481, "step": 25950 }, { "epoch": 0.1248, "grad_norm": 0.15620434284210205, "learning_rate": 4.913464879092216e-05, "loss": 0.0483, "step": 25960 }, { "epoch": 0.12485, "grad_norm": 0.20243607461452484, "learning_rate": 4.913357029309941e-05, "loss": 0.0456, "step": 25970 }, { "epoch": 0.1249, "grad_norm": 0.1301449090242386, "learning_rate": 4.913249113547392e-05, "loss": 0.0469, "step": 25980 }, { "epoch": 0.12495, "grad_norm": 0.14186380803585052, "learning_rate": 4.913141131807518e-05, "loss": 0.0471, "step": 25990 }, { "epoch": 0.125, "grad_norm": 0.13385401666164398, "learning_rate": 4.913033084093273e-05, "loss": 0.044, "step": 26000 }, { "epoch": 0.12505, "grad_norm": 0.1556403785943985, "learning_rate": 4.91292497040761e-05, "loss": 0.0481, "step": 26010 }, { "epoch": 0.1251, "grad_norm": 0.14860087633132935, "learning_rate": 4.912816790753484e-05, "loss": 0.0444, "step": 26020 }, { "epoch": 0.12515, "grad_norm": 0.1349562108516693, "learning_rate": 4.9127085451338536e-05, "loss": 0.044, "step": 26030 }, { "epoch": 0.1252, "grad_norm": 0.10741005092859268, "learning_rate": 4.9126002335516787e-05, "loss": 0.0458, "step": 26040 }, { "epoch": 0.12525, "grad_norm": 0.15222665667533875, "learning_rate": 4.912491856009919e-05, "loss": 0.0457, "step": 26050 }, { "epoch": 0.1253, "grad_norm": 0.14157378673553467, "learning_rate": 4.9123834125115384e-05, "loss": 0.0456, "step": 26060 }, { "epoch": 0.12535, "grad_norm": 0.15087658166885376, "learning_rate": 4.9122749030595024e-05, "loss": 0.0444, "step": 26070 }, { "epoch": 0.1254, "grad_norm": 0.16578349471092224, "learning_rate": 4.912166327656776e-05, "loss": 0.0454, "step": 26080 }, { "epoch": 0.12545, "grad_norm": 0.13934017717838287, "learning_rate": 4.912057686306328e-05, "loss": 0.0462, "step": 26090 }, { "epoch": 0.1255, "grad_norm": 0.141837477684021, "learning_rate": 4.91194897901113e-05, "loss": 0.0441, "step": 26100 }, { "epoch": 0.12555, "grad_norm": 0.15003222227096558, "learning_rate": 4.911840205774153e-05, "loss": 0.0459, "step": 26110 }, { "epoch": 0.1256, "grad_norm": 0.20069071650505066, "learning_rate": 4.911731366598371e-05, "loss": 0.045, "step": 26120 }, { "epoch": 0.12565, "grad_norm": 0.15775476396083832, "learning_rate": 4.911622461486759e-05, "loss": 0.0444, "step": 26130 }, { "epoch": 0.1257, "grad_norm": 0.14468349516391754, "learning_rate": 4.9115134904422946e-05, "loss": 0.0495, "step": 26140 }, { "epoch": 0.12575, "grad_norm": 0.1316310465335846, "learning_rate": 4.911404453467957e-05, "loss": 0.0445, "step": 26150 }, { "epoch": 0.1258, "grad_norm": 0.12668436765670776, "learning_rate": 4.9112953505667286e-05, "loss": 0.0454, "step": 26160 }, { "epoch": 0.12585, "grad_norm": 0.14326219260692596, "learning_rate": 4.9111861817415905e-05, "loss": 0.0459, "step": 26170 }, { "epoch": 0.1259, "grad_norm": 0.14999836683273315, "learning_rate": 4.9110769469955285e-05, "loss": 0.0446, "step": 26180 }, { "epoch": 0.12595, "grad_norm": 0.13554710149765015, "learning_rate": 4.910967646331528e-05, "loss": 0.0434, "step": 26190 }, { "epoch": 0.126, "grad_norm": 0.1860104203224182, "learning_rate": 4.9108582797525786e-05, "loss": 0.0459, "step": 26200 }, { "epoch": 0.12605, "grad_norm": 0.13717034459114075, "learning_rate": 4.9107488472616694e-05, "loss": 0.045, "step": 26210 }, { "epoch": 0.1261, "grad_norm": 0.15142101049423218, "learning_rate": 4.910639348861792e-05, "loss": 0.0478, "step": 26220 }, { "epoch": 0.12615, "grad_norm": 0.14934422075748444, "learning_rate": 4.9105297845559405e-05, "loss": 0.048, "step": 26230 }, { "epoch": 0.1262, "grad_norm": 0.1412418782711029, "learning_rate": 4.9104201543471104e-05, "loss": 0.0473, "step": 26240 }, { "epoch": 0.12625, "grad_norm": 0.1649186909198761, "learning_rate": 4.910310458238298e-05, "loss": 0.0452, "step": 26250 }, { "epoch": 0.1263, "grad_norm": 0.1373409926891327, "learning_rate": 4.9102006962325056e-05, "loss": 0.0442, "step": 26260 }, { "epoch": 0.12635, "grad_norm": 0.14176934957504272, "learning_rate": 4.9100908683327294e-05, "loss": 0.0446, "step": 26270 }, { "epoch": 0.1264, "grad_norm": 0.13973231613636017, "learning_rate": 4.909980974541975e-05, "loss": 0.0457, "step": 26280 }, { "epoch": 0.12645, "grad_norm": 0.14384910464286804, "learning_rate": 4.909871014863246e-05, "loss": 0.0446, "step": 26290 }, { "epoch": 0.1265, "grad_norm": 0.13749195635318756, "learning_rate": 4.909760989299549e-05, "loss": 0.0463, "step": 26300 }, { "epoch": 0.12655, "grad_norm": 0.1508299857378006, "learning_rate": 4.9096508978538914e-05, "loss": 0.0488, "step": 26310 }, { "epoch": 0.1266, "grad_norm": 0.1548951119184494, "learning_rate": 4.9095407405292834e-05, "loss": 0.0471, "step": 26320 }, { "epoch": 0.12665, "grad_norm": 0.150093212723732, "learning_rate": 4.909430517328738e-05, "loss": 0.0442, "step": 26330 }, { "epoch": 0.1267, "grad_norm": 0.15900050103664398, "learning_rate": 4.9093202282552666e-05, "loss": 0.0443, "step": 26340 }, { "epoch": 0.12675, "grad_norm": 0.15392284095287323, "learning_rate": 4.909209873311885e-05, "loss": 0.0431, "step": 26350 }, { "epoch": 0.1268, "grad_norm": 0.13655874133110046, "learning_rate": 4.909099452501611e-05, "loss": 0.0493, "step": 26360 }, { "epoch": 0.12685, "grad_norm": 0.1434694528579712, "learning_rate": 4.908988965827463e-05, "loss": 0.0451, "step": 26370 }, { "epoch": 0.1269, "grad_norm": 0.10418988764286041, "learning_rate": 4.9088784132924616e-05, "loss": 0.0446, "step": 26380 }, { "epoch": 0.12695, "grad_norm": 0.13806277513504028, "learning_rate": 4.908767794899629e-05, "loss": 0.0449, "step": 26390 }, { "epoch": 0.127, "grad_norm": 0.17036783695220947, "learning_rate": 4.908657110651991e-05, "loss": 0.0456, "step": 26400 }, { "epoch": 0.12705, "grad_norm": 0.1243971735239029, "learning_rate": 4.9085463605525716e-05, "loss": 0.0449, "step": 26410 }, { "epoch": 0.1271, "grad_norm": 0.1742110699415207, "learning_rate": 4.9084355446044e-05, "loss": 0.0468, "step": 26420 }, { "epoch": 0.12715, "grad_norm": 0.17756503820419312, "learning_rate": 4.908324662810505e-05, "loss": 0.0442, "step": 26430 }, { "epoch": 0.1272, "grad_norm": 0.13110657036304474, "learning_rate": 4.908213715173918e-05, "loss": 0.0464, "step": 26440 }, { "epoch": 0.12725, "grad_norm": 0.13226915895938873, "learning_rate": 4.9081027016976736e-05, "loss": 0.0449, "step": 26450 }, { "epoch": 0.1273, "grad_norm": 0.12369966506958008, "learning_rate": 4.9079916223848055e-05, "loss": 0.0453, "step": 26460 }, { "epoch": 0.12735, "grad_norm": 0.18580640852451324, "learning_rate": 4.907880477238351e-05, "loss": 0.0456, "step": 26470 }, { "epoch": 0.1274, "grad_norm": 0.17344515025615692, "learning_rate": 4.9077692662613496e-05, "loss": 0.0463, "step": 26480 }, { "epoch": 0.12745, "grad_norm": 0.17119541764259338, "learning_rate": 4.90765798945684e-05, "loss": 0.0495, "step": 26490 }, { "epoch": 0.1275, "grad_norm": 0.167933389544487, "learning_rate": 4.907546646827866e-05, "loss": 0.0475, "step": 26500 }, { "epoch": 0.12755, "grad_norm": 0.16994008421897888, "learning_rate": 4.907435238377471e-05, "loss": 0.0449, "step": 26510 }, { "epoch": 0.1276, "grad_norm": 0.1789732128381729, "learning_rate": 4.9073237641087014e-05, "loss": 0.0442, "step": 26520 }, { "epoch": 0.12765, "grad_norm": 0.19679789245128632, "learning_rate": 4.907212224024604e-05, "loss": 0.0473, "step": 26530 }, { "epoch": 0.1277, "grad_norm": 0.1414823681116104, "learning_rate": 4.90710061812823e-05, "loss": 0.0476, "step": 26540 }, { "epoch": 0.12775, "grad_norm": 0.14352194964885712, "learning_rate": 4.906988946422628e-05, "loss": 0.0489, "step": 26550 }, { "epoch": 0.1278, "grad_norm": 0.12210658937692642, "learning_rate": 4.906877208910853e-05, "loss": 0.0454, "step": 26560 }, { "epoch": 0.12785, "grad_norm": 0.15541048347949982, "learning_rate": 4.906765405595959e-05, "loss": 0.0472, "step": 26570 }, { "epoch": 0.1279, "grad_norm": 0.14378651976585388, "learning_rate": 4.9066535364810034e-05, "loss": 0.0456, "step": 26580 }, { "epoch": 0.12795, "grad_norm": 0.15418651700019836, "learning_rate": 4.906541601569044e-05, "loss": 0.0465, "step": 26590 }, { "epoch": 0.128, "grad_norm": 0.1363755464553833, "learning_rate": 4.9064296008631414e-05, "loss": 0.0468, "step": 26600 }, { "epoch": 0.12805, "grad_norm": 0.15140806138515472, "learning_rate": 4.9063175343663574e-05, "loss": 0.0467, "step": 26610 }, { "epoch": 0.1281, "grad_norm": 0.1380670815706253, "learning_rate": 4.906205402081756e-05, "loss": 0.0437, "step": 26620 }, { "epoch": 0.12815, "grad_norm": 0.13127738237380981, "learning_rate": 4.906093204012403e-05, "loss": 0.0439, "step": 26630 }, { "epoch": 0.1282, "grad_norm": 0.12910018861293793, "learning_rate": 4.905980940161366e-05, "loss": 0.0453, "step": 26640 }, { "epoch": 0.12825, "grad_norm": 0.1474410444498062, "learning_rate": 4.905868610531714e-05, "loss": 0.045, "step": 26650 }, { "epoch": 0.1283, "grad_norm": 0.11991109699010849, "learning_rate": 4.905756215126518e-05, "loss": 0.0447, "step": 26660 }, { "epoch": 0.12835, "grad_norm": 0.14297989010810852, "learning_rate": 4.9056437539488506e-05, "loss": 0.0445, "step": 26670 }, { "epoch": 0.1284, "grad_norm": 0.14665597677230835, "learning_rate": 4.905531227001786e-05, "loss": 0.0439, "step": 26680 }, { "epoch": 0.12845, "grad_norm": 0.14444920420646667, "learning_rate": 4.905418634288402e-05, "loss": 0.0458, "step": 26690 }, { "epoch": 0.1285, "grad_norm": 0.13890399038791656, "learning_rate": 4.905305975811777e-05, "loss": 0.0442, "step": 26700 }, { "epoch": 0.12855, "grad_norm": 0.14420518279075623, "learning_rate": 4.9051932515749896e-05, "loss": 0.0481, "step": 26710 }, { "epoch": 0.1286, "grad_norm": 0.1519700139760971, "learning_rate": 4.905080461581123e-05, "loss": 0.0449, "step": 26720 }, { "epoch": 0.12865, "grad_norm": 0.14807257056236267, "learning_rate": 4.904967605833259e-05, "loss": 0.0488, "step": 26730 }, { "epoch": 0.1287, "grad_norm": 0.16089566051959991, "learning_rate": 4.9048546843344846e-05, "loss": 0.0439, "step": 26740 }, { "epoch": 0.12875, "grad_norm": 0.1584918200969696, "learning_rate": 4.9047416970878866e-05, "loss": 0.0474, "step": 26750 }, { "epoch": 0.1288, "grad_norm": 0.1363014280796051, "learning_rate": 4.9046286440965535e-05, "loss": 0.0443, "step": 26760 }, { "epoch": 0.12885, "grad_norm": 0.13531547784805298, "learning_rate": 4.9045155253635776e-05, "loss": 0.0441, "step": 26770 }, { "epoch": 0.1289, "grad_norm": 0.14951872825622559, "learning_rate": 4.90440234089205e-05, "loss": 0.0437, "step": 26780 }, { "epoch": 0.12895, "grad_norm": 0.15993520617485046, "learning_rate": 4.9042890906850655e-05, "loss": 0.0494, "step": 26790 }, { "epoch": 0.129, "grad_norm": 0.13922452926635742, "learning_rate": 4.9041757747457215e-05, "loss": 0.0466, "step": 26800 }, { "epoch": 0.12905, "grad_norm": 0.15624390542507172, "learning_rate": 4.904062393077114e-05, "loss": 0.0479, "step": 26810 }, { "epoch": 0.1291, "grad_norm": 0.13383163511753082, "learning_rate": 4.903948945682344e-05, "loss": 0.0449, "step": 26820 }, { "epoch": 0.12915, "grad_norm": 0.15837447345256805, "learning_rate": 4.903835432564513e-05, "loss": 0.0456, "step": 26830 }, { "epoch": 0.1292, "grad_norm": 0.13015559315681458, "learning_rate": 4.903721853726725e-05, "loss": 0.0443, "step": 26840 }, { "epoch": 0.12925, "grad_norm": 0.12030153721570969, "learning_rate": 4.9036082091720834e-05, "loss": 0.0445, "step": 26850 }, { "epoch": 0.1293, "grad_norm": 0.12426728010177612, "learning_rate": 4.903494498903698e-05, "loss": 0.045, "step": 26860 }, { "epoch": 0.12935, "grad_norm": 0.1436558961868286, "learning_rate": 4.903380722924674e-05, "loss": 0.0435, "step": 26870 }, { "epoch": 0.1294, "grad_norm": 0.1367773413658142, "learning_rate": 4.903266881238126e-05, "loss": 0.0444, "step": 26880 }, { "epoch": 0.12945, "grad_norm": 0.14350031316280365, "learning_rate": 4.903152973847163e-05, "loss": 0.0469, "step": 26890 }, { "epoch": 0.1295, "grad_norm": 0.17975474894046783, "learning_rate": 4.9030390007549005e-05, "loss": 0.0482, "step": 26900 }, { "epoch": 0.12955, "grad_norm": 0.1254936158657074, "learning_rate": 4.902924961964455e-05, "loss": 0.0461, "step": 26910 }, { "epoch": 0.1296, "grad_norm": 0.16277149319648743, "learning_rate": 4.902810857478943e-05, "loss": 0.048, "step": 26920 }, { "epoch": 0.12965, "grad_norm": 0.16418753564357758, "learning_rate": 4.902696687301486e-05, "loss": 0.0451, "step": 26930 }, { "epoch": 0.1297, "grad_norm": 0.10756832361221313, "learning_rate": 4.902582451435203e-05, "loss": 0.0453, "step": 26940 }, { "epoch": 0.12975, "grad_norm": 0.12258588522672653, "learning_rate": 4.902468149883219e-05, "loss": 0.0464, "step": 26950 }, { "epoch": 0.1298, "grad_norm": 0.14724516868591309, "learning_rate": 4.902353782648659e-05, "loss": 0.0438, "step": 26960 }, { "epoch": 0.12985, "grad_norm": 0.1251763254404068, "learning_rate": 4.902239349734648e-05, "loss": 0.0456, "step": 26970 }, { "epoch": 0.1299, "grad_norm": 0.13004091382026672, "learning_rate": 4.9021248511443165e-05, "loss": 0.0431, "step": 26980 }, { "epoch": 0.12995, "grad_norm": 0.14965547621250153, "learning_rate": 4.9020102868807936e-05, "loss": 0.046, "step": 26990 }, { "epoch": 0.13, "grad_norm": 0.1758066564798355, "learning_rate": 4.9018956569472115e-05, "loss": 0.0449, "step": 27000 }, { "epoch": 0.13005, "grad_norm": 0.16269785165786743, "learning_rate": 4.901780961346705e-05, "loss": 0.0447, "step": 27010 }, { "epoch": 0.1301, "grad_norm": 0.1386614590883255, "learning_rate": 4.9016662000824086e-05, "loss": 0.0447, "step": 27020 }, { "epoch": 0.13015, "grad_norm": 0.15130813419818878, "learning_rate": 4.901551373157461e-05, "loss": 0.0459, "step": 27030 }, { "epoch": 0.1302, "grad_norm": 0.142473042011261, "learning_rate": 4.9014364805750016e-05, "loss": 0.0471, "step": 27040 }, { "epoch": 0.13025, "grad_norm": 0.1500493437051773, "learning_rate": 4.9013215223381705e-05, "loss": 0.0451, "step": 27050 }, { "epoch": 0.1303, "grad_norm": 0.1761331856250763, "learning_rate": 4.9012064984501115e-05, "loss": 0.0439, "step": 27060 }, { "epoch": 0.13035, "grad_norm": 0.22045297920703888, "learning_rate": 4.901091408913968e-05, "loss": 0.0475, "step": 27070 }, { "epoch": 0.1304, "grad_norm": 0.19645242393016815, "learning_rate": 4.9009762537328885e-05, "loss": 0.0474, "step": 27080 }, { "epoch": 0.13045, "grad_norm": 0.13204947113990784, "learning_rate": 4.90086103291002e-05, "loss": 0.0479, "step": 27090 }, { "epoch": 0.1305, "grad_norm": 0.1573173999786377, "learning_rate": 4.900745746448512e-05, "loss": 0.0456, "step": 27100 }, { "epoch": 0.13055, "grad_norm": 0.14312538504600525, "learning_rate": 4.9006303943515184e-05, "loss": 0.0455, "step": 27110 }, { "epoch": 0.1306, "grad_norm": 0.13176430761814117, "learning_rate": 4.9005149766221915e-05, "loss": 0.0445, "step": 27120 }, { "epoch": 0.13065, "grad_norm": 0.16872818768024445, "learning_rate": 4.900399493263686e-05, "loss": 0.0449, "step": 27130 }, { "epoch": 0.1307, "grad_norm": 0.1655738353729248, "learning_rate": 4.900283944279161e-05, "loss": 0.0439, "step": 27140 }, { "epoch": 0.13075, "grad_norm": 0.15001314878463745, "learning_rate": 4.9001683296717744e-05, "loss": 0.0442, "step": 27150 }, { "epoch": 0.1308, "grad_norm": 0.13303294777870178, "learning_rate": 4.9000526494446874e-05, "loss": 0.045, "step": 27160 }, { "epoch": 0.13085, "grad_norm": 0.13751111924648285, "learning_rate": 4.899936903601062e-05, "loss": 0.0446, "step": 27170 }, { "epoch": 0.1309, "grad_norm": 0.12845827639102936, "learning_rate": 4.8998210921440647e-05, "loss": 0.0457, "step": 27180 }, { "epoch": 0.13095, "grad_norm": 0.10752175748348236, "learning_rate": 4.899705215076859e-05, "loss": 0.0422, "step": 27190 }, { "epoch": 0.131, "grad_norm": 0.13293281197547913, "learning_rate": 4.8995892724026146e-05, "loss": 0.0424, "step": 27200 }, { "epoch": 0.13105, "grad_norm": 0.1374746561050415, "learning_rate": 4.899473264124501e-05, "loss": 0.0454, "step": 27210 }, { "epoch": 0.1311, "grad_norm": 0.12084544450044632, "learning_rate": 4.89935719024569e-05, "loss": 0.0422, "step": 27220 }, { "epoch": 0.13115, "grad_norm": 0.13153678178787231, "learning_rate": 4.8992410507693554e-05, "loss": 0.044, "step": 27230 }, { "epoch": 0.1312, "grad_norm": 0.17095345258712769, "learning_rate": 4.8991248456986714e-05, "loss": 0.0478, "step": 27240 }, { "epoch": 0.13125, "grad_norm": 0.1500340700149536, "learning_rate": 4.899008575036815e-05, "loss": 0.0442, "step": 27250 }, { "epoch": 0.1313, "grad_norm": 0.15221814811229706, "learning_rate": 4.898892238786965e-05, "loss": 0.0439, "step": 27260 }, { "epoch": 0.13135, "grad_norm": 0.1547854095697403, "learning_rate": 4.898775836952303e-05, "loss": 0.0457, "step": 27270 }, { "epoch": 0.1314, "grad_norm": 0.15095393359661102, "learning_rate": 4.8986593695360114e-05, "loss": 0.044, "step": 27280 }, { "epoch": 0.13145, "grad_norm": 0.1295640468597412, "learning_rate": 4.8985428365412734e-05, "loss": 0.0441, "step": 27290 }, { "epoch": 0.1315, "grad_norm": 0.1503930389881134, "learning_rate": 4.898426237971275e-05, "loss": 0.0432, "step": 27300 }, { "epoch": 0.13155, "grad_norm": 0.1323421448469162, "learning_rate": 4.898309573829204e-05, "loss": 0.0432, "step": 27310 }, { "epoch": 0.1316, "grad_norm": 0.1202586442232132, "learning_rate": 4.8981928441182514e-05, "loss": 0.0425, "step": 27320 }, { "epoch": 0.13165, "grad_norm": 0.10859017819166183, "learning_rate": 4.8980760488416064e-05, "loss": 0.0452, "step": 27330 }, { "epoch": 0.1317, "grad_norm": 0.15065470337867737, "learning_rate": 4.897959188002463e-05, "loss": 0.0454, "step": 27340 }, { "epoch": 0.13175, "grad_norm": 0.16167475283145905, "learning_rate": 4.897842261604017e-05, "loss": 0.0437, "step": 27350 }, { "epoch": 0.1318, "grad_norm": 0.17504888772964478, "learning_rate": 4.897725269649464e-05, "loss": 0.0452, "step": 27360 }, { "epoch": 0.13185, "grad_norm": 0.16473330557346344, "learning_rate": 4.897608212142003e-05, "loss": 0.0465, "step": 27370 }, { "epoch": 0.1319, "grad_norm": 0.15663087368011475, "learning_rate": 4.897491089084835e-05, "loss": 0.044, "step": 27380 }, { "epoch": 0.13195, "grad_norm": 0.16694441437721252, "learning_rate": 4.89737390048116e-05, "loss": 0.0479, "step": 27390 }, { "epoch": 0.132, "grad_norm": 0.15354514122009277, "learning_rate": 4.897256646334184e-05, "loss": 0.0429, "step": 27400 }, { "epoch": 0.13205, "grad_norm": 0.1537885069847107, "learning_rate": 4.897139326647111e-05, "loss": 0.0449, "step": 27410 }, { "epoch": 0.1321, "grad_norm": 0.18281573057174683, "learning_rate": 4.897021941423151e-05, "loss": 0.0442, "step": 27420 }, { "epoch": 0.13215, "grad_norm": 0.14189311861991882, "learning_rate": 4.896904490665511e-05, "loss": 0.0459, "step": 27430 }, { "epoch": 0.1322, "grad_norm": 0.13066235184669495, "learning_rate": 4.896786974377401e-05, "loss": 0.0434, "step": 27440 }, { "epoch": 0.13225, "grad_norm": 0.12698237597942352, "learning_rate": 4.896669392562038e-05, "loss": 0.0431, "step": 27450 }, { "epoch": 0.1323, "grad_norm": 0.1511726677417755, "learning_rate": 4.896551745222633e-05, "loss": 0.0453, "step": 27460 }, { "epoch": 0.13235, "grad_norm": 0.15052133798599243, "learning_rate": 4.896434032362404e-05, "loss": 0.0483, "step": 27470 }, { "epoch": 0.1324, "grad_norm": 0.14751042425632477, "learning_rate": 4.8963162539845676e-05, "loss": 0.0432, "step": 27480 }, { "epoch": 0.13245, "grad_norm": 0.130686953663826, "learning_rate": 4.896198410092347e-05, "loss": 0.0454, "step": 27490 }, { "epoch": 0.1325, "grad_norm": 0.11741486936807632, "learning_rate": 4.8960805006889604e-05, "loss": 0.0431, "step": 27500 }, { "epoch": 0.13255, "grad_norm": 0.12329643219709396, "learning_rate": 4.8959625257776344e-05, "loss": 0.0431, "step": 27510 }, { "epoch": 0.1326, "grad_norm": 0.129413902759552, "learning_rate": 4.895844485361592e-05, "loss": 0.0419, "step": 27520 }, { "epoch": 0.13265, "grad_norm": 0.12024693936109543, "learning_rate": 4.895726379444062e-05, "loss": 0.0432, "step": 27530 }, { "epoch": 0.1327, "grad_norm": 0.12062715739011765, "learning_rate": 4.8956082080282726e-05, "loss": 0.0425, "step": 27540 }, { "epoch": 0.13275, "grad_norm": 0.11159638315439224, "learning_rate": 4.895489971117455e-05, "loss": 0.042, "step": 27550 }, { "epoch": 0.1328, "grad_norm": 0.15674765408039093, "learning_rate": 4.895371668714841e-05, "loss": 0.0422, "step": 27560 }, { "epoch": 0.13285, "grad_norm": 0.1360040009021759, "learning_rate": 4.895253300823667e-05, "loss": 0.0452, "step": 27570 }, { "epoch": 0.1329, "grad_norm": 0.13582608103752136, "learning_rate": 4.8951348674471666e-05, "loss": 0.0452, "step": 27580 }, { "epoch": 0.13295, "grad_norm": 0.11164236068725586, "learning_rate": 4.8950163685885786e-05, "loss": 0.0439, "step": 27590 }, { "epoch": 0.133, "grad_norm": 0.11445189267396927, "learning_rate": 4.8948978042511426e-05, "loss": 0.0441, "step": 27600 }, { "epoch": 0.13305, "grad_norm": 0.10876807570457458, "learning_rate": 4.8947791744381005e-05, "loss": 0.041, "step": 27610 }, { "epoch": 0.1331, "grad_norm": 0.1405716985464096, "learning_rate": 4.894660479152696e-05, "loss": 0.0434, "step": 27620 }, { "epoch": 0.13315, "grad_norm": 0.11688645929098129, "learning_rate": 4.8945417183981737e-05, "loss": 0.0458, "step": 27630 }, { "epoch": 0.1332, "grad_norm": 0.14625436067581177, "learning_rate": 4.894422892177779e-05, "loss": 0.0431, "step": 27640 }, { "epoch": 0.13325, "grad_norm": 0.1444549858570099, "learning_rate": 4.894304000494764e-05, "loss": 0.042, "step": 27650 }, { "epoch": 0.1333, "grad_norm": 0.13313430547714233, "learning_rate": 4.894185043352375e-05, "loss": 0.0439, "step": 27660 }, { "epoch": 0.13335, "grad_norm": 0.11160176992416382, "learning_rate": 4.894066020753868e-05, "loss": 0.0434, "step": 27670 }, { "epoch": 0.1334, "grad_norm": 0.14903420209884644, "learning_rate": 4.893946932702494e-05, "loss": 0.0449, "step": 27680 }, { "epoch": 0.13345, "grad_norm": 0.13911889493465424, "learning_rate": 4.893827779201512e-05, "loss": 0.0425, "step": 27690 }, { "epoch": 0.1335, "grad_norm": 0.13201235234737396, "learning_rate": 4.893708560254177e-05, "loss": 0.0424, "step": 27700 }, { "epoch": 0.13355, "grad_norm": 0.14347508549690247, "learning_rate": 4.893589275863749e-05, "loss": 0.0432, "step": 27710 }, { "epoch": 0.1336, "grad_norm": 0.13755393028259277, "learning_rate": 4.8934699260334893e-05, "loss": 0.043, "step": 27720 }, { "epoch": 0.13365, "grad_norm": 0.1434353142976761, "learning_rate": 4.893350510766661e-05, "loss": 0.043, "step": 27730 }, { "epoch": 0.1337, "grad_norm": 0.21609467267990112, "learning_rate": 4.8932310300665295e-05, "loss": 0.0444, "step": 27740 }, { "epoch": 0.13375, "grad_norm": 0.15393678843975067, "learning_rate": 4.89311148393636e-05, "loss": 0.0476, "step": 27750 }, { "epoch": 0.1338, "grad_norm": 0.14801058173179626, "learning_rate": 4.8929918723794224e-05, "loss": 0.0447, "step": 27760 }, { "epoch": 0.13385, "grad_norm": 0.17422208189964294, "learning_rate": 4.892872195398985e-05, "loss": 0.0451, "step": 27770 }, { "epoch": 0.1339, "grad_norm": 0.14667102694511414, "learning_rate": 4.8927524529983224e-05, "loss": 0.0447, "step": 27780 }, { "epoch": 0.13395, "grad_norm": 0.1609266698360443, "learning_rate": 4.892632645180705e-05, "loss": 0.0449, "step": 27790 }, { "epoch": 0.134, "grad_norm": 0.15593993663787842, "learning_rate": 4.892512771949411e-05, "loss": 0.0447, "step": 27800 }, { "epoch": 0.13405, "grad_norm": 0.19725289940834045, "learning_rate": 4.8923928333077164e-05, "loss": 0.0444, "step": 27810 }, { "epoch": 0.1341, "grad_norm": 0.1465511918067932, "learning_rate": 4.8922728292589e-05, "loss": 0.0423, "step": 27820 }, { "epoch": 0.13415, "grad_norm": 0.15368938446044922, "learning_rate": 4.8921527598062435e-05, "loss": 0.0446, "step": 27830 }, { "epoch": 0.1342, "grad_norm": 0.1399412304162979, "learning_rate": 4.892032624953029e-05, "loss": 0.0435, "step": 27840 }, { "epoch": 0.13425, "grad_norm": 0.11387931555509567, "learning_rate": 4.891912424702542e-05, "loss": 0.0436, "step": 27850 }, { "epoch": 0.1343, "grad_norm": 0.17357230186462402, "learning_rate": 4.891792159058066e-05, "loss": 0.048, "step": 27860 }, { "epoch": 0.13435, "grad_norm": 0.15355756878852844, "learning_rate": 4.891671828022893e-05, "loss": 0.043, "step": 27870 }, { "epoch": 0.1344, "grad_norm": 0.17153047025203705, "learning_rate": 4.89155143160031e-05, "loss": 0.0445, "step": 27880 }, { "epoch": 0.13445, "grad_norm": 0.2016763985157013, "learning_rate": 4.891430969793609e-05, "loss": 0.046, "step": 27890 }, { "epoch": 0.1345, "grad_norm": 0.16488516330718994, "learning_rate": 4.891310442606084e-05, "loss": 0.0448, "step": 27900 }, { "epoch": 0.13455, "grad_norm": 0.15825265645980835, "learning_rate": 4.8911898500410304e-05, "loss": 0.0437, "step": 27910 }, { "epoch": 0.1346, "grad_norm": 0.1382274627685547, "learning_rate": 4.8910691921017434e-05, "loss": 0.0439, "step": 27920 }, { "epoch": 0.13465, "grad_norm": 0.15109901130199432, "learning_rate": 4.890948468791524e-05, "loss": 0.0433, "step": 27930 }, { "epoch": 0.1347, "grad_norm": 0.1563548445701599, "learning_rate": 4.890827680113671e-05, "loss": 0.0417, "step": 27940 }, { "epoch": 0.13475, "grad_norm": 0.15322072803974152, "learning_rate": 4.890706826071488e-05, "loss": 0.0429, "step": 27950 }, { "epoch": 0.1348, "grad_norm": 0.14017850160598755, "learning_rate": 4.890585906668278e-05, "loss": 0.0441, "step": 27960 }, { "epoch": 0.13485, "grad_norm": 0.12772151827812195, "learning_rate": 4.890464921907348e-05, "loss": 0.0444, "step": 27970 }, { "epoch": 0.1349, "grad_norm": 0.1149485856294632, "learning_rate": 4.890343871792005e-05, "loss": 0.0444, "step": 27980 }, { "epoch": 0.13495, "grad_norm": 0.1375674158334732, "learning_rate": 4.890222756325558e-05, "loss": 0.0453, "step": 27990 }, { "epoch": 0.135, "grad_norm": 0.1502043753862381, "learning_rate": 4.8901015755113195e-05, "loss": 0.0457, "step": 28000 }, { "epoch": 0.13505, "grad_norm": 0.1569092720746994, "learning_rate": 4.889980329352602e-05, "loss": 0.0467, "step": 28010 }, { "epoch": 0.1351, "grad_norm": 0.1355733722448349, "learning_rate": 4.8898590178527195e-05, "loss": 0.0441, "step": 28020 }, { "epoch": 0.13515, "grad_norm": 0.13171911239624023, "learning_rate": 4.8897376410149885e-05, "loss": 0.0468, "step": 28030 }, { "epoch": 0.1352, "grad_norm": 0.1389130800962448, "learning_rate": 4.889616198842729e-05, "loss": 0.0472, "step": 28040 }, { "epoch": 0.13525, "grad_norm": 0.14203593134880066, "learning_rate": 4.8894946913392616e-05, "loss": 0.0444, "step": 28050 }, { "epoch": 0.1353, "grad_norm": 0.13637006282806396, "learning_rate": 4.889373118507905e-05, "loss": 0.0422, "step": 28060 }, { "epoch": 0.13535, "grad_norm": 0.15632040798664093, "learning_rate": 4.889251480351986e-05, "loss": 0.0454, "step": 28070 }, { "epoch": 0.1354, "grad_norm": 0.14616303145885468, "learning_rate": 4.889129776874829e-05, "loss": 0.0458, "step": 28080 }, { "epoch": 0.13545, "grad_norm": 0.1338450163602829, "learning_rate": 4.889008008079762e-05, "loss": 0.0453, "step": 28090 }, { "epoch": 0.1355, "grad_norm": 0.1341494768857956, "learning_rate": 4.888886173970113e-05, "loss": 0.0444, "step": 28100 }, { "epoch": 0.13555, "grad_norm": 0.13778996467590332, "learning_rate": 4.888764274549213e-05, "loss": 0.0446, "step": 28110 }, { "epoch": 0.1356, "grad_norm": 0.12898795306682587, "learning_rate": 4.888642309820396e-05, "loss": 0.0434, "step": 28120 }, { "epoch": 0.13565, "grad_norm": 0.13595975935459137, "learning_rate": 4.888520279786996e-05, "loss": 0.0444, "step": 28130 }, { "epoch": 0.1357, "grad_norm": 0.1189492866396904, "learning_rate": 4.8883981844523476e-05, "loss": 0.0447, "step": 28140 }, { "epoch": 0.13575, "grad_norm": 0.15735140442848206, "learning_rate": 4.8882760238197906e-05, "loss": 0.0425, "step": 28150 }, { "epoch": 0.1358, "grad_norm": 0.1403653472661972, "learning_rate": 4.888153797892665e-05, "loss": 0.0424, "step": 28160 }, { "epoch": 0.13585, "grad_norm": 0.1440337896347046, "learning_rate": 4.888031506674311e-05, "loss": 0.0425, "step": 28170 }, { "epoch": 0.1359, "grad_norm": 0.13385984301567078, "learning_rate": 4.887909150168073e-05, "loss": 0.0423, "step": 28180 }, { "epoch": 0.13595, "grad_norm": 0.1268351823091507, "learning_rate": 4.8877867283772956e-05, "loss": 0.0411, "step": 28190 }, { "epoch": 0.136, "grad_norm": 0.1349794566631317, "learning_rate": 4.8876642413053266e-05, "loss": 0.0444, "step": 28200 }, { "epoch": 0.13605, "grad_norm": 0.13911347091197968, "learning_rate": 4.887541688955514e-05, "loss": 0.0418, "step": 28210 }, { "epoch": 0.1361, "grad_norm": 0.173640638589859, "learning_rate": 4.8874190713312086e-05, "loss": 0.0451, "step": 28220 }, { "epoch": 0.13615, "grad_norm": 0.1754017323255539, "learning_rate": 4.887296388435763e-05, "loss": 0.045, "step": 28230 }, { "epoch": 0.1362, "grad_norm": 0.1387481838464737, "learning_rate": 4.88717364027253e-05, "loss": 0.0438, "step": 28240 }, { "epoch": 0.13625, "grad_norm": 0.1442059576511383, "learning_rate": 4.8870508268448676e-05, "loss": 0.0443, "step": 28250 }, { "epoch": 0.1363, "grad_norm": 0.1649225652217865, "learning_rate": 4.8869279481561316e-05, "loss": 0.0437, "step": 28260 }, { "epoch": 0.13635, "grad_norm": 0.13748526573181152, "learning_rate": 4.886805004209682e-05, "loss": 0.0432, "step": 28270 }, { "epoch": 0.1364, "grad_norm": 0.12064554542303085, "learning_rate": 4.886681995008881e-05, "loss": 0.0422, "step": 28280 }, { "epoch": 0.13645, "grad_norm": 0.1495683491230011, "learning_rate": 4.886558920557091e-05, "loss": 0.0426, "step": 28290 }, { "epoch": 0.1365, "grad_norm": 0.1313278079032898, "learning_rate": 4.8864357808576765e-05, "loss": 0.0417, "step": 28300 }, { "epoch": 0.13655, "grad_norm": 0.11771807074546814, "learning_rate": 4.8863125759140036e-05, "loss": 0.044, "step": 28310 }, { "epoch": 0.1366, "grad_norm": 0.12219876050949097, "learning_rate": 4.886189305729443e-05, "loss": 0.0433, "step": 28320 }, { "epoch": 0.13665, "grad_norm": 0.13192905485630035, "learning_rate": 4.886065970307362e-05, "loss": 0.045, "step": 28330 }, { "epoch": 0.1367, "grad_norm": 0.13180845975875854, "learning_rate": 4.885942569651134e-05, "loss": 0.0421, "step": 28340 }, { "epoch": 0.13675, "grad_norm": 0.14230282604694366, "learning_rate": 4.885819103764132e-05, "loss": 0.0423, "step": 28350 }, { "epoch": 0.1368, "grad_norm": 0.14785164594650269, "learning_rate": 4.8856955726497327e-05, "loss": 0.043, "step": 28360 }, { "epoch": 0.13685, "grad_norm": 0.13133099675178528, "learning_rate": 4.885571976311313e-05, "loss": 0.0431, "step": 28370 }, { "epoch": 0.1369, "grad_norm": 0.12610967457294464, "learning_rate": 4.885448314752251e-05, "loss": 0.0447, "step": 28380 }, { "epoch": 0.13695, "grad_norm": 0.15596739947795868, "learning_rate": 4.885324587975928e-05, "loss": 0.044, "step": 28390 }, { "epoch": 0.137, "grad_norm": 0.1739337146282196, "learning_rate": 4.885200795985727e-05, "loss": 0.0476, "step": 28400 }, { "epoch": 0.13705, "grad_norm": 0.14911046624183655, "learning_rate": 4.8850769387850334e-05, "loss": 0.0464, "step": 28410 }, { "epoch": 0.1371, "grad_norm": 0.13654664158821106, "learning_rate": 4.884953016377232e-05, "loss": 0.0437, "step": 28420 }, { "epoch": 0.13715, "grad_norm": 0.11479999125003815, "learning_rate": 4.884829028765711e-05, "loss": 0.0435, "step": 28430 }, { "epoch": 0.1372, "grad_norm": 0.13781145215034485, "learning_rate": 4.884704975953859e-05, "loss": 0.0442, "step": 28440 }, { "epoch": 0.13725, "grad_norm": 0.12538376450538635, "learning_rate": 4.88458085794507e-05, "loss": 0.0446, "step": 28450 }, { "epoch": 0.1373, "grad_norm": 0.10659784823656082, "learning_rate": 4.884456674742736e-05, "loss": 0.0441, "step": 28460 }, { "epoch": 0.13735, "grad_norm": 0.12495476007461548, "learning_rate": 4.8843324263502523e-05, "loss": 0.044, "step": 28470 }, { "epoch": 0.1374, "grad_norm": 0.16172359883785248, "learning_rate": 4.884208112771016e-05, "loss": 0.0468, "step": 28480 }, { "epoch": 0.13745, "grad_norm": 0.1775505542755127, "learning_rate": 4.884083734008425e-05, "loss": 0.0446, "step": 28490 }, { "epoch": 0.1375, "grad_norm": 0.22093729674816132, "learning_rate": 4.883959290065882e-05, "loss": 0.0441, "step": 28500 }, { "epoch": 0.13755, "grad_norm": 0.16403494775295258, "learning_rate": 4.883834780946786e-05, "loss": 0.043, "step": 28510 }, { "epoch": 0.1376, "grad_norm": 0.13922278583049774, "learning_rate": 4.883710206654543e-05, "loss": 0.0466, "step": 28520 }, { "epoch": 0.13765, "grad_norm": 0.14404745399951935, "learning_rate": 4.883585567192559e-05, "loss": 0.0436, "step": 28530 }, { "epoch": 0.1377, "grad_norm": 0.12358921021223068, "learning_rate": 4.8834608625642404e-05, "loss": 0.0443, "step": 28540 }, { "epoch": 0.13775, "grad_norm": 0.14362011849880219, "learning_rate": 4.8833360927729976e-05, "loss": 0.0441, "step": 28550 }, { "epoch": 0.1378, "grad_norm": 0.17407487332820892, "learning_rate": 4.883211257822241e-05, "loss": 0.0433, "step": 28560 }, { "epoch": 0.13785, "grad_norm": 0.1505686640739441, "learning_rate": 4.883086357715384e-05, "loss": 0.044, "step": 28570 }, { "epoch": 0.1379, "grad_norm": 0.16092127561569214, "learning_rate": 4.882961392455842e-05, "loss": 0.0425, "step": 28580 }, { "epoch": 0.13795, "grad_norm": 0.1629003882408142, "learning_rate": 4.88283636204703e-05, "loss": 0.0444, "step": 28590 }, { "epoch": 0.138, "grad_norm": 0.13917267322540283, "learning_rate": 4.8827112664923674e-05, "loss": 0.0424, "step": 28600 }, { "epoch": 0.13805, "grad_norm": 0.14045603573322296, "learning_rate": 4.882586105795274e-05, "loss": 0.0424, "step": 28610 }, { "epoch": 0.1381, "grad_norm": 0.14074933528900146, "learning_rate": 4.882460879959171e-05, "loss": 0.0424, "step": 28620 }, { "epoch": 0.13815, "grad_norm": 0.14256353676319122, "learning_rate": 4.882335588987483e-05, "loss": 0.0436, "step": 28630 }, { "epoch": 0.1382, "grad_norm": 0.15993767976760864, "learning_rate": 4.882210232883635e-05, "loss": 0.044, "step": 28640 }, { "epoch": 0.13825, "grad_norm": 0.13555461168289185, "learning_rate": 4.8820848116510544e-05, "loss": 0.045, "step": 28650 }, { "epoch": 0.1383, "grad_norm": 0.14272552728652954, "learning_rate": 4.881959325293169e-05, "loss": 0.0437, "step": 28660 }, { "epoch": 0.13835, "grad_norm": 0.14286339282989502, "learning_rate": 4.8818337738134124e-05, "loss": 0.0444, "step": 28670 }, { "epoch": 0.1384, "grad_norm": 0.12127411365509033, "learning_rate": 4.881708157215213e-05, "loss": 0.0442, "step": 28680 }, { "epoch": 0.13845, "grad_norm": 0.132398322224617, "learning_rate": 4.881582475502009e-05, "loss": 0.0445, "step": 28690 }, { "epoch": 0.1385, "grad_norm": 0.13280871510505676, "learning_rate": 4.8814567286772344e-05, "loss": 0.0428, "step": 28700 }, { "epoch": 0.13855, "grad_norm": 0.11291223764419556, "learning_rate": 4.881330916744327e-05, "loss": 0.0451, "step": 28710 }, { "epoch": 0.1386, "grad_norm": 0.14127440750598907, "learning_rate": 4.8812050397067277e-05, "loss": 0.0457, "step": 28720 }, { "epoch": 0.13865, "grad_norm": 0.1400194764137268, "learning_rate": 4.881079097567877e-05, "loss": 0.0444, "step": 28730 }, { "epoch": 0.1387, "grad_norm": 0.1517164260149002, "learning_rate": 4.880953090331218e-05, "loss": 0.0425, "step": 28740 }, { "epoch": 0.13875, "grad_norm": 0.1363055408000946, "learning_rate": 4.880827018000196e-05, "loss": 0.0434, "step": 28750 }, { "epoch": 0.1388, "grad_norm": 0.13896967470645905, "learning_rate": 4.880700880578258e-05, "loss": 0.0464, "step": 28760 }, { "epoch": 0.13885, "grad_norm": 0.1326436996459961, "learning_rate": 4.880574678068852e-05, "loss": 0.0437, "step": 28770 }, { "epoch": 0.1389, "grad_norm": 0.11332570761442184, "learning_rate": 4.880448410475429e-05, "loss": 0.0413, "step": 28780 }, { "epoch": 0.13895, "grad_norm": 0.10437055677175522, "learning_rate": 4.88032207780144e-05, "loss": 0.044, "step": 28790 }, { "epoch": 0.139, "grad_norm": 0.11086023598909378, "learning_rate": 4.8801956800503406e-05, "loss": 0.0429, "step": 28800 }, { "epoch": 0.13905, "grad_norm": 0.12515541911125183, "learning_rate": 4.880069217225585e-05, "loss": 0.0459, "step": 28810 }, { "epoch": 0.1391, "grad_norm": 0.1383202075958252, "learning_rate": 4.879942689330631e-05, "loss": 0.0446, "step": 28820 }, { "epoch": 0.13915, "grad_norm": 0.17073068022727966, "learning_rate": 4.879816096368939e-05, "loss": 0.0488, "step": 28830 }, { "epoch": 0.1392, "grad_norm": 0.14519037306308746, "learning_rate": 4.879689438343968e-05, "loss": 0.0444, "step": 28840 }, { "epoch": 0.13925, "grad_norm": 0.12331254780292511, "learning_rate": 4.8795627152591825e-05, "loss": 0.0458, "step": 28850 }, { "epoch": 0.1393, "grad_norm": 0.13862133026123047, "learning_rate": 4.8794359271180454e-05, "loss": 0.0457, "step": 28860 }, { "epoch": 0.13935, "grad_norm": 0.147194504737854, "learning_rate": 4.8793090739240244e-05, "loss": 0.0444, "step": 28870 }, { "epoch": 0.1394, "grad_norm": 0.15450793504714966, "learning_rate": 4.879182155680587e-05, "loss": 0.0465, "step": 28880 }, { "epoch": 0.13945, "grad_norm": 0.14073385298252106, "learning_rate": 4.879055172391204e-05, "loss": 0.0441, "step": 28890 }, { "epoch": 0.1395, "grad_norm": 0.12063471972942352, "learning_rate": 4.878928124059345e-05, "loss": 0.044, "step": 28900 }, { "epoch": 0.13955, "grad_norm": 0.14246101677417755, "learning_rate": 4.878801010688486e-05, "loss": 0.0443, "step": 28910 }, { "epoch": 0.1396, "grad_norm": 0.12795647978782654, "learning_rate": 4.878673832282101e-05, "loss": 0.0455, "step": 28920 }, { "epoch": 0.13965, "grad_norm": 0.1561502069234848, "learning_rate": 4.878546588843666e-05, "loss": 0.0458, "step": 28930 }, { "epoch": 0.1397, "grad_norm": 0.15751421451568604, "learning_rate": 4.8784192803766624e-05, "loss": 0.0451, "step": 28940 }, { "epoch": 0.13975, "grad_norm": 0.1431007981300354, "learning_rate": 4.878291906884568e-05, "loss": 0.0424, "step": 28950 }, { "epoch": 0.1398, "grad_norm": 0.1989883929491043, "learning_rate": 4.878164468370867e-05, "loss": 0.0441, "step": 28960 }, { "epoch": 0.13985, "grad_norm": 0.14188385009765625, "learning_rate": 4.8780369648390426e-05, "loss": 0.0435, "step": 28970 }, { "epoch": 0.1399, "grad_norm": 0.13006633520126343, "learning_rate": 4.87790939629258e-05, "loss": 0.0436, "step": 28980 }, { "epoch": 0.13995, "grad_norm": 0.1398228257894516, "learning_rate": 4.87778176273497e-05, "loss": 0.0432, "step": 28990 }, { "epoch": 0.14, "grad_norm": 0.1444910317659378, "learning_rate": 4.877654064169698e-05, "loss": 0.0425, "step": 29000 }, { "epoch": 0.14005, "grad_norm": 0.17665322124958038, "learning_rate": 4.877526300600258e-05, "loss": 0.0468, "step": 29010 }, { "epoch": 0.1401, "grad_norm": 0.13634498417377472, "learning_rate": 4.877398472030142e-05, "loss": 0.0436, "step": 29020 }, { "epoch": 0.14015, "grad_norm": 0.12259041517972946, "learning_rate": 4.877270578462845e-05, "loss": 0.044, "step": 29030 }, { "epoch": 0.1402, "grad_norm": 0.19929362833499908, "learning_rate": 4.8771426199018634e-05, "loss": 0.0452, "step": 29040 }, { "epoch": 0.14025, "grad_norm": 0.15330860018730164, "learning_rate": 4.877014596350695e-05, "loss": 0.0425, "step": 29050 }, { "epoch": 0.1403, "grad_norm": 0.145126074552536, "learning_rate": 4.876886507812841e-05, "loss": 0.0442, "step": 29060 }, { "epoch": 0.14035, "grad_norm": 0.11339390277862549, "learning_rate": 4.8767583542918037e-05, "loss": 0.0421, "step": 29070 }, { "epoch": 0.1404, "grad_norm": 0.15190169215202332, "learning_rate": 4.876630135791085e-05, "loss": 0.0424, "step": 29080 }, { "epoch": 0.14045, "grad_norm": 0.12558703124523163, "learning_rate": 4.8765018523141915e-05, "loss": 0.0441, "step": 29090 }, { "epoch": 0.1405, "grad_norm": 0.14016909897327423, "learning_rate": 4.8763735038646296e-05, "loss": 0.0462, "step": 29100 }, { "epoch": 0.14055, "grad_norm": 0.1210547462105751, "learning_rate": 4.87624509044591e-05, "loss": 0.0436, "step": 29110 }, { "epoch": 0.1406, "grad_norm": 0.15262196958065033, "learning_rate": 4.8761166120615415e-05, "loss": 0.0445, "step": 29120 }, { "epoch": 0.14065, "grad_norm": 0.13301680982112885, "learning_rate": 4.8759880687150375e-05, "loss": 0.0479, "step": 29130 }, { "epoch": 0.1407, "grad_norm": 0.12849004566669464, "learning_rate": 4.875859460409913e-05, "loss": 0.0427, "step": 29140 }, { "epoch": 0.14075, "grad_norm": 0.1430385261774063, "learning_rate": 4.8757307871496825e-05, "loss": 0.0424, "step": 29150 }, { "epoch": 0.1408, "grad_norm": 0.1357356458902359, "learning_rate": 4.875602048937865e-05, "loss": 0.0418, "step": 29160 }, { "epoch": 0.14085, "grad_norm": 0.12177083641290665, "learning_rate": 4.875473245777981e-05, "loss": 0.0436, "step": 29170 }, { "epoch": 0.1409, "grad_norm": 0.12450092285871506, "learning_rate": 4.87534437767355e-05, "loss": 0.0457, "step": 29180 }, { "epoch": 0.14095, "grad_norm": 0.1108444556593895, "learning_rate": 4.875215444628095e-05, "loss": 0.0436, "step": 29190 }, { "epoch": 0.141, "grad_norm": 0.14912369847297668, "learning_rate": 4.875086446645144e-05, "loss": 0.0437, "step": 29200 }, { "epoch": 0.14105, "grad_norm": 0.1382071077823639, "learning_rate": 4.8749573837282207e-05, "loss": 0.0452, "step": 29210 }, { "epoch": 0.1411, "grad_norm": 0.1486228108406067, "learning_rate": 4.874828255880855e-05, "loss": 0.0465, "step": 29220 }, { "epoch": 0.14115, "grad_norm": 0.12856066226959229, "learning_rate": 4.874699063106577e-05, "loss": 0.0428, "step": 29230 }, { "epoch": 0.1412, "grad_norm": 0.16187553107738495, "learning_rate": 4.874569805408919e-05, "loss": 0.0439, "step": 29240 }, { "epoch": 0.14125, "grad_norm": 0.1266612708568573, "learning_rate": 4.8744404827914144e-05, "loss": 0.0437, "step": 29250 }, { "epoch": 0.1413, "grad_norm": 0.14293614029884338, "learning_rate": 4.874311095257599e-05, "loss": 0.0431, "step": 29260 }, { "epoch": 0.14135, "grad_norm": 0.16605037450790405, "learning_rate": 4.87418164281101e-05, "loss": 0.0438, "step": 29270 }, { "epoch": 0.1414, "grad_norm": 0.12890055775642395, "learning_rate": 4.8740521254551876e-05, "loss": 0.0463, "step": 29280 }, { "epoch": 0.14145, "grad_norm": 0.14314982295036316, "learning_rate": 4.873922543193671e-05, "loss": 0.0436, "step": 29290 }, { "epoch": 0.1415, "grad_norm": 0.1430131494998932, "learning_rate": 4.873792896030005e-05, "loss": 0.0434, "step": 29300 }, { "epoch": 0.14155, "grad_norm": 0.19689872860908508, "learning_rate": 4.873663183967732e-05, "loss": 0.0447, "step": 29310 }, { "epoch": 0.1416, "grad_norm": 0.19659042358398438, "learning_rate": 4.8735334070104e-05, "loss": 0.0464, "step": 29320 }, { "epoch": 0.14165, "grad_norm": 0.15170709788799286, "learning_rate": 4.873403565161556e-05, "loss": 0.0429, "step": 29330 }, { "epoch": 0.1417, "grad_norm": 0.1255590319633484, "learning_rate": 4.873273658424751e-05, "loss": 0.0446, "step": 29340 }, { "epoch": 0.14175, "grad_norm": 0.13071368634700775, "learning_rate": 4.8731436868035343e-05, "loss": 0.0472, "step": 29350 }, { "epoch": 0.1418, "grad_norm": 0.1392545998096466, "learning_rate": 4.873013650301461e-05, "loss": 0.0444, "step": 29360 }, { "epoch": 0.14185, "grad_norm": 0.1398293524980545, "learning_rate": 4.872883548922087e-05, "loss": 0.045, "step": 29370 }, { "epoch": 0.1419, "grad_norm": 0.12076738476753235, "learning_rate": 4.8727533826689677e-05, "loss": 0.0431, "step": 29380 }, { "epoch": 0.14195, "grad_norm": 0.12129142880439758, "learning_rate": 4.872623151545662e-05, "loss": 0.0442, "step": 29390 }, { "epoch": 0.142, "grad_norm": 0.13107934594154358, "learning_rate": 4.872492855555732e-05, "loss": 0.0474, "step": 29400 }, { "epoch": 0.14205, "grad_norm": 0.13777202367782593, "learning_rate": 4.872362494702737e-05, "loss": 0.0514, "step": 29410 }, { "epoch": 0.1421, "grad_norm": 0.1417369246482849, "learning_rate": 4.8722320689902434e-05, "loss": 0.0447, "step": 29420 }, { "epoch": 0.14215, "grad_norm": 0.16407637298107147, "learning_rate": 4.872101578421816e-05, "loss": 0.0492, "step": 29430 }, { "epoch": 0.1422, "grad_norm": 0.16390691697597504, "learning_rate": 4.871971023001023e-05, "loss": 0.0459, "step": 29440 }, { "epoch": 0.14225, "grad_norm": 0.1474514603614807, "learning_rate": 4.871840402731432e-05, "loss": 0.0458, "step": 29450 }, { "epoch": 0.1423, "grad_norm": 0.14336465299129486, "learning_rate": 4.871709717616617e-05, "loss": 0.047, "step": 29460 }, { "epoch": 0.14235, "grad_norm": 0.1408020406961441, "learning_rate": 4.8715789676601484e-05, "loss": 0.0437, "step": 29470 }, { "epoch": 0.1424, "grad_norm": 0.14137528836727142, "learning_rate": 4.871448152865603e-05, "loss": 0.0437, "step": 29480 }, { "epoch": 0.14245, "grad_norm": 0.14283783733844757, "learning_rate": 4.8713172732365554e-05, "loss": 0.0424, "step": 29490 }, { "epoch": 0.1425, "grad_norm": 0.11966720223426819, "learning_rate": 4.871186328776583e-05, "loss": 0.0441, "step": 29500 }, { "epoch": 0.14255, "grad_norm": 0.12390667200088501, "learning_rate": 4.871055319489269e-05, "loss": 0.0433, "step": 29510 }, { "epoch": 0.1426, "grad_norm": 0.13050583004951477, "learning_rate": 4.8709242453781936e-05, "loss": 0.0425, "step": 29520 }, { "epoch": 0.14265, "grad_norm": 0.12697172164916992, "learning_rate": 4.8707931064469385e-05, "loss": 0.0456, "step": 29530 }, { "epoch": 0.1427, "grad_norm": 0.12325213849544525, "learning_rate": 4.870661902699092e-05, "loss": 0.0431, "step": 29540 }, { "epoch": 0.14275, "grad_norm": 0.11337971687316895, "learning_rate": 4.8705306341382385e-05, "loss": 0.0419, "step": 29550 }, { "epoch": 0.1428, "grad_norm": 0.1135648712515831, "learning_rate": 4.870399300767968e-05, "loss": 0.0419, "step": 29560 }, { "epoch": 0.14285, "grad_norm": 0.16290436685085297, "learning_rate": 4.870267902591872e-05, "loss": 0.0449, "step": 29570 }, { "epoch": 0.1429, "grad_norm": 0.1278180629014969, "learning_rate": 4.870136439613542e-05, "loss": 0.0412, "step": 29580 }, { "epoch": 0.14295, "grad_norm": 0.15755663812160492, "learning_rate": 4.870004911836572e-05, "loss": 0.0421, "step": 29590 }, { "epoch": 0.143, "grad_norm": 0.16184493899345398, "learning_rate": 4.8698733192645574e-05, "loss": 0.0428, "step": 29600 }, { "epoch": 0.14305, "grad_norm": 0.12744948267936707, "learning_rate": 4.869741661901097e-05, "loss": 0.0442, "step": 29610 }, { "epoch": 0.1431, "grad_norm": 0.1351950615644455, "learning_rate": 4.86960993974979e-05, "loss": 0.0475, "step": 29620 }, { "epoch": 0.14315, "grad_norm": 0.12647458910942078, "learning_rate": 4.869478152814238e-05, "loss": 0.0445, "step": 29630 }, { "epoch": 0.1432, "grad_norm": 0.13030609488487244, "learning_rate": 4.869346301098042e-05, "loss": 0.049, "step": 29640 }, { "epoch": 0.14325, "grad_norm": 0.15038636326789856, "learning_rate": 4.869214384604809e-05, "loss": 0.0433, "step": 29650 }, { "epoch": 0.1433, "grad_norm": 0.1297498345375061, "learning_rate": 4.869082403338145e-05, "loss": 0.0445, "step": 29660 }, { "epoch": 0.14335, "grad_norm": 0.16710205376148224, "learning_rate": 4.868950357301658e-05, "loss": 0.0456, "step": 29670 }, { "epoch": 0.1434, "grad_norm": 0.11809530109167099, "learning_rate": 4.868818246498958e-05, "loss": 0.0441, "step": 29680 }, { "epoch": 0.14345, "grad_norm": 0.14866682887077332, "learning_rate": 4.8686860709336575e-05, "loss": 0.0444, "step": 29690 }, { "epoch": 0.1435, "grad_norm": 0.15026448667049408, "learning_rate": 4.868553830609369e-05, "loss": 0.0447, "step": 29700 }, { "epoch": 0.14355, "grad_norm": 0.11738848686218262, "learning_rate": 4.86842152552971e-05, "loss": 0.0429, "step": 29710 }, { "epoch": 0.1436, "grad_norm": 0.15006989240646362, "learning_rate": 4.868289155698294e-05, "loss": 0.0443, "step": 29720 }, { "epoch": 0.14365, "grad_norm": 0.12709090113639832, "learning_rate": 4.868156721118744e-05, "loss": 0.0426, "step": 29730 }, { "epoch": 0.1437, "grad_norm": 0.1313161700963974, "learning_rate": 4.868024221794678e-05, "loss": 0.0434, "step": 29740 }, { "epoch": 0.14375, "grad_norm": 0.1490878313779831, "learning_rate": 4.8678916577297205e-05, "loss": 0.0433, "step": 29750 }, { "epoch": 0.1438, "grad_norm": 0.13998951017856598, "learning_rate": 4.867759028927494e-05, "loss": 0.0424, "step": 29760 }, { "epoch": 0.14385, "grad_norm": 0.14876703917980194, "learning_rate": 4.867626335391625e-05, "loss": 0.0465, "step": 29770 }, { "epoch": 0.1439, "grad_norm": 0.17557351291179657, "learning_rate": 4.867493577125741e-05, "loss": 0.0463, "step": 29780 }, { "epoch": 0.14395, "grad_norm": 0.205479234457016, "learning_rate": 4.867360754133473e-05, "loss": 0.0476, "step": 29790 }, { "epoch": 0.144, "grad_norm": 0.12718096375465393, "learning_rate": 4.867227866418451e-05, "loss": 0.045, "step": 29800 }, { "epoch": 0.14405, "grad_norm": 0.12666594982147217, "learning_rate": 4.867094913984309e-05, "loss": 0.0461, "step": 29810 }, { "epoch": 0.1441, "grad_norm": 0.15450750291347504, "learning_rate": 4.866961896834681e-05, "loss": 0.0441, "step": 29820 }, { "epoch": 0.14415, "grad_norm": 0.15755434334278107, "learning_rate": 4.866828814973203e-05, "loss": 0.0436, "step": 29830 }, { "epoch": 0.1442, "grad_norm": 0.14265063405036926, "learning_rate": 4.866695668403515e-05, "loss": 0.0444, "step": 29840 }, { "epoch": 0.14425, "grad_norm": 0.12945078313350677, "learning_rate": 4.866562457129257e-05, "loss": 0.0435, "step": 29850 }, { "epoch": 0.1443, "grad_norm": 0.13932296633720398, "learning_rate": 4.8664291811540704e-05, "loss": 0.0431, "step": 29860 }, { "epoch": 0.14435, "grad_norm": 0.13899292051792145, "learning_rate": 4.866295840481598e-05, "loss": 0.0439, "step": 29870 }, { "epoch": 0.1444, "grad_norm": 0.14892756938934326, "learning_rate": 4.8661624351154877e-05, "loss": 0.043, "step": 29880 }, { "epoch": 0.14445, "grad_norm": 0.14375853538513184, "learning_rate": 4.8660289650593846e-05, "loss": 0.0423, "step": 29890 }, { "epoch": 0.1445, "grad_norm": 0.1570858210325241, "learning_rate": 4.865895430316939e-05, "loss": 0.0421, "step": 29900 }, { "epoch": 0.14455, "grad_norm": 0.11134622991085052, "learning_rate": 4.865761830891801e-05, "loss": 0.043, "step": 29910 }, { "epoch": 0.1446, "grad_norm": 0.13085021078586578, "learning_rate": 4.865628166787623e-05, "loss": 0.0436, "step": 29920 }, { "epoch": 0.14465, "grad_norm": 0.14486683905124664, "learning_rate": 4.865494438008059e-05, "loss": 0.0446, "step": 29930 }, { "epoch": 0.1447, "grad_norm": 0.17880284786224365, "learning_rate": 4.865360644556767e-05, "loss": 0.0456, "step": 29940 }, { "epoch": 0.14475, "grad_norm": 0.15188609063625336, "learning_rate": 4.865226786437403e-05, "loss": 0.043, "step": 29950 }, { "epoch": 0.1448, "grad_norm": 0.11204826831817627, "learning_rate": 4.8650928636536277e-05, "loss": 0.0406, "step": 29960 }, { "epoch": 0.14485, "grad_norm": 0.1512330174446106, "learning_rate": 4.8649588762091016e-05, "loss": 0.0413, "step": 29970 }, { "epoch": 0.1449, "grad_norm": 0.1452956199645996, "learning_rate": 4.864824824107488e-05, "loss": 0.0434, "step": 29980 }, { "epoch": 0.14495, "grad_norm": 0.16116859018802643, "learning_rate": 4.864690707352453e-05, "loss": 0.0427, "step": 29990 }, { "epoch": 0.145, "grad_norm": 0.1275971531867981, "learning_rate": 4.864556525947661e-05, "loss": 0.0462, "step": 30000 }, { "epoch": 0.14505, "grad_norm": 0.142277330160141, "learning_rate": 4.864422279896783e-05, "loss": 0.0414, "step": 30010 }, { "epoch": 0.1451, "grad_norm": 0.13965527713298798, "learning_rate": 4.864287969203488e-05, "loss": 0.0453, "step": 30020 }, { "epoch": 0.14515, "grad_norm": 0.14702655375003815, "learning_rate": 4.8641535938714486e-05, "loss": 0.0491, "step": 30030 }, { "epoch": 0.1452, "grad_norm": 0.14110992848873138, "learning_rate": 4.864019153904337e-05, "loss": 0.0435, "step": 30040 }, { "epoch": 0.14525, "grad_norm": 0.12232547998428345, "learning_rate": 4.863884649305831e-05, "loss": 0.0414, "step": 30050 }, { "epoch": 0.1453, "grad_norm": 0.12205258011817932, "learning_rate": 4.863750080079606e-05, "loss": 0.0427, "step": 30060 }, { "epoch": 0.14535, "grad_norm": 0.1372302770614624, "learning_rate": 4.863615446229342e-05, "loss": 0.0414, "step": 30070 }, { "epoch": 0.1454, "grad_norm": 0.13404032588005066, "learning_rate": 4.86348074775872e-05, "loss": 0.0465, "step": 30080 }, { "epoch": 0.14545, "grad_norm": 0.13370977342128754, "learning_rate": 4.863345984671422e-05, "loss": 0.043, "step": 30090 }, { "epoch": 0.1455, "grad_norm": 0.1394168585538864, "learning_rate": 4.8632111569711326e-05, "loss": 0.042, "step": 30100 }, { "epoch": 0.14555, "grad_norm": 0.15278661251068115, "learning_rate": 4.863076264661538e-05, "loss": 0.0459, "step": 30110 }, { "epoch": 0.1456, "grad_norm": 0.15998971462249756, "learning_rate": 4.862941307746326e-05, "loss": 0.0436, "step": 30120 }, { "epoch": 0.14565, "grad_norm": 0.1253858506679535, "learning_rate": 4.8628062862291865e-05, "loss": 0.0484, "step": 30130 }, { "epoch": 0.1457, "grad_norm": 0.11512526869773865, "learning_rate": 4.862671200113811e-05, "loss": 0.0433, "step": 30140 }, { "epoch": 0.14575, "grad_norm": 0.13237643241882324, "learning_rate": 4.862536049403892e-05, "loss": 0.0434, "step": 30150 }, { "epoch": 0.1458, "grad_norm": 0.14471276104450226, "learning_rate": 4.862400834103125e-05, "loss": 0.0473, "step": 30160 }, { "epoch": 0.14585, "grad_norm": 0.14546695351600647, "learning_rate": 4.862265554215207e-05, "loss": 0.0445, "step": 30170 }, { "epoch": 0.1459, "grad_norm": 0.132734015583992, "learning_rate": 4.862130209743837e-05, "loss": 0.0443, "step": 30180 }, { "epoch": 0.14595, "grad_norm": 0.12827634811401367, "learning_rate": 4.861994800692713e-05, "loss": 0.0451, "step": 30190 }, { "epoch": 0.146, "grad_norm": 0.1215103417634964, "learning_rate": 4.861859327065539e-05, "loss": 0.0444, "step": 30200 }, { "epoch": 0.14605, "grad_norm": 0.1259389966726303, "learning_rate": 4.8617237888660185e-05, "loss": 0.0427, "step": 30210 }, { "epoch": 0.1461, "grad_norm": 0.14394418895244598, "learning_rate": 4.861588186097858e-05, "loss": 0.0435, "step": 30220 }, { "epoch": 0.14615, "grad_norm": 0.13755978643894196, "learning_rate": 4.861452518764762e-05, "loss": 0.0436, "step": 30230 }, { "epoch": 0.1462, "grad_norm": 0.16422000527381897, "learning_rate": 4.8613167868704414e-05, "loss": 0.0451, "step": 30240 }, { "epoch": 0.14625, "grad_norm": 0.15230846405029297, "learning_rate": 4.8611809904186074e-05, "loss": 0.0438, "step": 30250 }, { "epoch": 0.1463, "grad_norm": 0.14470531046390533, "learning_rate": 4.861045129412972e-05, "loss": 0.0425, "step": 30260 }, { "epoch": 0.14635, "grad_norm": 0.151280477643013, "learning_rate": 4.86090920385725e-05, "loss": 0.0436, "step": 30270 }, { "epoch": 0.1464, "grad_norm": 0.15169380605220795, "learning_rate": 4.860773213755158e-05, "loss": 0.0443, "step": 30280 }, { "epoch": 0.14645, "grad_norm": 0.19994564354419708, "learning_rate": 4.8606371591104114e-05, "loss": 0.0429, "step": 30290 }, { "epoch": 0.1465, "grad_norm": 0.16692997515201569, "learning_rate": 4.860501039926734e-05, "loss": 0.0418, "step": 30300 }, { "epoch": 0.14655, "grad_norm": 0.17754624783992767, "learning_rate": 4.860364856207843e-05, "loss": 0.0448, "step": 30310 }, { "epoch": 0.1466, "grad_norm": 0.148456409573555, "learning_rate": 4.860228607957464e-05, "loss": 0.0429, "step": 30320 }, { "epoch": 0.14665, "grad_norm": 0.14688019454479218, "learning_rate": 4.860092295179323e-05, "loss": 0.0421, "step": 30330 }, { "epoch": 0.1467, "grad_norm": 0.2008282095193863, "learning_rate": 4.8599559178771436e-05, "loss": 0.0455, "step": 30340 }, { "epoch": 0.14675, "grad_norm": 0.21322788298130035, "learning_rate": 4.859819476054657e-05, "loss": 0.0446, "step": 30350 }, { "epoch": 0.1468, "grad_norm": 0.1635725349187851, "learning_rate": 4.859682969715592e-05, "loss": 0.0434, "step": 30360 }, { "epoch": 0.14685, "grad_norm": 0.1512279212474823, "learning_rate": 4.859546398863681e-05, "loss": 0.0436, "step": 30370 }, { "epoch": 0.1469, "grad_norm": 0.13886789977550507, "learning_rate": 4.859409763502658e-05, "loss": 0.0431, "step": 30380 }, { "epoch": 0.14695, "grad_norm": 0.15730249881744385, "learning_rate": 4.859273063636258e-05, "loss": 0.0466, "step": 30390 }, { "epoch": 0.147, "grad_norm": 0.13086619973182678, "learning_rate": 4.85913629926822e-05, "loss": 0.0429, "step": 30400 }, { "epoch": 0.14705, "grad_norm": 0.13605093955993652, "learning_rate": 4.858999470402281e-05, "loss": 0.0447, "step": 30410 }, { "epoch": 0.1471, "grad_norm": 0.13402029871940613, "learning_rate": 4.8588625770421825e-05, "loss": 0.0453, "step": 30420 }, { "epoch": 0.14715, "grad_norm": 0.1320749968290329, "learning_rate": 4.8587256191916674e-05, "loss": 0.0447, "step": 30430 }, { "epoch": 0.1472, "grad_norm": 0.11509065330028534, "learning_rate": 4.858588596854481e-05, "loss": 0.0461, "step": 30440 }, { "epoch": 0.14725, "grad_norm": 0.14351004362106323, "learning_rate": 4.858451510034367e-05, "loss": 0.0456, "step": 30450 }, { "epoch": 0.1473, "grad_norm": 0.12643565237522125, "learning_rate": 4.858314358735076e-05, "loss": 0.0435, "step": 30460 }, { "epoch": 0.14735, "grad_norm": 0.14670643210411072, "learning_rate": 4.858177142960356e-05, "loss": 0.0436, "step": 30470 }, { "epoch": 0.1474, "grad_norm": 0.14714765548706055, "learning_rate": 4.858039862713959e-05, "loss": 0.0443, "step": 30480 }, { "epoch": 0.14745, "grad_norm": 0.12171582132577896, "learning_rate": 4.857902517999638e-05, "loss": 0.0429, "step": 30490 }, { "epoch": 0.1475, "grad_norm": 0.13254272937774658, "learning_rate": 4.8577651088211475e-05, "loss": 0.044, "step": 30500 }, { "epoch": 0.14755, "grad_norm": 0.11033543199300766, "learning_rate": 4.8576276351822445e-05, "loss": 0.0441, "step": 30510 }, { "epoch": 0.1476, "grad_norm": 0.13831742107868195, "learning_rate": 4.857490097086688e-05, "loss": 0.0423, "step": 30520 }, { "epoch": 0.14765, "grad_norm": 0.11719835549592972, "learning_rate": 4.857352494538239e-05, "loss": 0.0446, "step": 30530 }, { "epoch": 0.1477, "grad_norm": 0.11771178245544434, "learning_rate": 4.857214827540657e-05, "loss": 0.0416, "step": 30540 }, { "epoch": 0.14775, "grad_norm": 0.10563771426677704, "learning_rate": 4.857077096097708e-05, "loss": 0.0425, "step": 30550 }, { "epoch": 0.1478, "grad_norm": 0.13564516603946686, "learning_rate": 4.856939300213156e-05, "loss": 0.0435, "step": 30560 }, { "epoch": 0.14785, "grad_norm": 0.12499434500932693, "learning_rate": 4.856801439890769e-05, "loss": 0.0429, "step": 30570 }, { "epoch": 0.1479, "grad_norm": 0.12963660061359406, "learning_rate": 4.8566635151343164e-05, "loss": 0.0447, "step": 30580 }, { "epoch": 0.14795, "grad_norm": 0.14376573264598846, "learning_rate": 4.8565255259475686e-05, "loss": 0.0418, "step": 30590 }, { "epoch": 0.148, "grad_norm": 0.14322718977928162, "learning_rate": 4.856387472334298e-05, "loss": 0.0416, "step": 30600 }, { "epoch": 0.14805, "grad_norm": 0.11483057588338852, "learning_rate": 4.8562493542982796e-05, "loss": 0.0408, "step": 30610 }, { "epoch": 0.1481, "grad_norm": 0.12188133597373962, "learning_rate": 4.856111171843289e-05, "loss": 0.042, "step": 30620 }, { "epoch": 0.14815, "grad_norm": 0.13449354469776154, "learning_rate": 4.855972924973104e-05, "loss": 0.0422, "step": 30630 }, { "epoch": 0.1482, "grad_norm": 0.1258065402507782, "learning_rate": 4.855834613691505e-05, "loss": 0.0403, "step": 30640 }, { "epoch": 0.14825, "grad_norm": 0.12190357595682144, "learning_rate": 4.855696238002271e-05, "loss": 0.0412, "step": 30650 }, { "epoch": 0.1483, "grad_norm": 0.12858296930789948, "learning_rate": 4.855557797909188e-05, "loss": 0.0423, "step": 30660 }, { "epoch": 0.14835, "grad_norm": 0.11363779753446579, "learning_rate": 4.85541929341604e-05, "loss": 0.0433, "step": 30670 }, { "epoch": 0.1484, "grad_norm": 0.1235252171754837, "learning_rate": 4.855280724526613e-05, "loss": 0.0422, "step": 30680 }, { "epoch": 0.14845, "grad_norm": 0.1475268006324768, "learning_rate": 4.8551420912446956e-05, "loss": 0.0411, "step": 30690 }, { "epoch": 0.1485, "grad_norm": 0.1395253837108612, "learning_rate": 4.855003393574079e-05, "loss": 0.0429, "step": 30700 }, { "epoch": 0.14855, "grad_norm": 0.13783404231071472, "learning_rate": 4.854864631518553e-05, "loss": 0.0435, "step": 30710 }, { "epoch": 0.1486, "grad_norm": 0.12407044321298599, "learning_rate": 4.854725805081913e-05, "loss": 0.0442, "step": 30720 }, { "epoch": 0.14865, "grad_norm": 0.11461728811264038, "learning_rate": 4.8545869142679556e-05, "loss": 0.0418, "step": 30730 }, { "epoch": 0.1487, "grad_norm": 0.15319527685642242, "learning_rate": 4.8544479590804754e-05, "loss": 0.0417, "step": 30740 }, { "epoch": 0.14875, "grad_norm": 0.10197413712739944, "learning_rate": 4.854308939523272e-05, "loss": 0.0422, "step": 30750 }, { "epoch": 0.1488, "grad_norm": 0.11823474615812302, "learning_rate": 4.854169855600148e-05, "loss": 0.0416, "step": 30760 }, { "epoch": 0.14885, "grad_norm": 0.1088530421257019, "learning_rate": 4.854030707314904e-05, "loss": 0.0416, "step": 30770 }, { "epoch": 0.1489, "grad_norm": 0.12142953276634216, "learning_rate": 4.853891494671344e-05, "loss": 0.0436, "step": 30780 }, { "epoch": 0.14895, "grad_norm": 0.1265615075826645, "learning_rate": 4.853752217673276e-05, "loss": 0.0422, "step": 30790 }, { "epoch": 0.149, "grad_norm": 0.1514863669872284, "learning_rate": 4.853612876324506e-05, "loss": 0.0458, "step": 30800 }, { "epoch": 0.14905, "grad_norm": 0.14438602328300476, "learning_rate": 4.853473470628844e-05, "loss": 0.0461, "step": 30810 }, { "epoch": 0.1491, "grad_norm": 0.12658219039440155, "learning_rate": 4.853334000590102e-05, "loss": 0.0449, "step": 30820 }, { "epoch": 0.14915, "grad_norm": 0.10492241382598877, "learning_rate": 4.853194466212093e-05, "loss": 0.0422, "step": 30830 }, { "epoch": 0.1492, "grad_norm": 0.11288110911846161, "learning_rate": 4.85305486749863e-05, "loss": 0.0431, "step": 30840 }, { "epoch": 0.14925, "grad_norm": 0.1231205090880394, "learning_rate": 4.852915204453532e-05, "loss": 0.0439, "step": 30850 }, { "epoch": 0.1493, "grad_norm": 0.13926143944263458, "learning_rate": 4.852775477080616e-05, "loss": 0.0418, "step": 30860 }, { "epoch": 0.14935, "grad_norm": 0.12145378440618515, "learning_rate": 4.852635685383702e-05, "loss": 0.043, "step": 30870 }, { "epoch": 0.1494, "grad_norm": 0.13048270344734192, "learning_rate": 4.8524958293666125e-05, "loss": 0.0447, "step": 30880 }, { "epoch": 0.14945, "grad_norm": 0.13639546930789948, "learning_rate": 4.852355909033171e-05, "loss": 0.0449, "step": 30890 }, { "epoch": 0.1495, "grad_norm": 0.12359599024057388, "learning_rate": 4.852215924387202e-05, "loss": 0.043, "step": 30900 }, { "epoch": 0.14955, "grad_norm": 0.1215788796544075, "learning_rate": 4.8520758754325343e-05, "loss": 0.0432, "step": 30910 }, { "epoch": 0.1496, "grad_norm": 0.13396112620830536, "learning_rate": 4.851935762172995e-05, "loss": 0.0435, "step": 30920 }, { "epoch": 0.14965, "grad_norm": 0.10398834198713303, "learning_rate": 4.8517955846124164e-05, "loss": 0.0428, "step": 30930 }, { "epoch": 0.1497, "grad_norm": 0.13041123747825623, "learning_rate": 4.851655342754629e-05, "loss": 0.0436, "step": 30940 }, { "epoch": 0.14975, "grad_norm": 0.13264279067516327, "learning_rate": 4.851515036603469e-05, "loss": 0.0433, "step": 30950 }, { "epoch": 0.1498, "grad_norm": 0.12627571821212769, "learning_rate": 4.85137466616277e-05, "loss": 0.0426, "step": 30960 }, { "epoch": 0.14985, "grad_norm": 0.11815209686756134, "learning_rate": 4.851234231436372e-05, "loss": 0.0429, "step": 30970 }, { "epoch": 0.1499, "grad_norm": 0.13062144815921783, "learning_rate": 4.8510937324281134e-05, "loss": 0.0441, "step": 30980 }, { "epoch": 0.14995, "grad_norm": 0.11060670763254166, "learning_rate": 4.850953169141835e-05, "loss": 0.0422, "step": 30990 }, { "epoch": 0.15, "grad_norm": 0.11919166892766953, "learning_rate": 4.850812541581381e-05, "loss": 0.0419, "step": 31000 }, { "epoch": 0.15005, "grad_norm": 0.1286785900592804, "learning_rate": 4.8506718497505944e-05, "loss": 0.0435, "step": 31010 }, { "epoch": 0.1501, "grad_norm": 0.1418483853340149, "learning_rate": 4.8505310936533225e-05, "loss": 0.0417, "step": 31020 }, { "epoch": 0.15015, "grad_norm": 0.14956867694854736, "learning_rate": 4.8503902732934133e-05, "loss": 0.0432, "step": 31030 }, { "epoch": 0.1502, "grad_norm": 0.12980765104293823, "learning_rate": 4.850249388674718e-05, "loss": 0.0424, "step": 31040 }, { "epoch": 0.15025, "grad_norm": 0.12441367655992508, "learning_rate": 4.8501084398010873e-05, "loss": 0.0417, "step": 31050 }, { "epoch": 0.1503, "grad_norm": 0.14082591235637665, "learning_rate": 4.8499674266763745e-05, "loss": 0.0417, "step": 31060 }, { "epoch": 0.15035, "grad_norm": 0.12361367046833038, "learning_rate": 4.849826349304435e-05, "loss": 0.0429, "step": 31070 }, { "epoch": 0.1504, "grad_norm": 0.12353827059268951, "learning_rate": 4.849685207689126e-05, "loss": 0.0445, "step": 31080 }, { "epoch": 0.15045, "grad_norm": 0.12288283556699753, "learning_rate": 4.849544001834306e-05, "loss": 0.0426, "step": 31090 }, { "epoch": 0.1505, "grad_norm": 0.1343759000301361, "learning_rate": 4.849402731743836e-05, "loss": 0.0443, "step": 31100 }, { "epoch": 0.15055, "grad_norm": 0.12144505977630615, "learning_rate": 4.849261397421577e-05, "loss": 0.0427, "step": 31110 }, { "epoch": 0.1506, "grad_norm": 0.13607613742351532, "learning_rate": 4.849119998871395e-05, "loss": 0.0452, "step": 31120 }, { "epoch": 0.15065, "grad_norm": 0.15730790793895721, "learning_rate": 4.848978536097154e-05, "loss": 0.0449, "step": 31130 }, { "epoch": 0.1507, "grad_norm": 0.1263347566127777, "learning_rate": 4.848837009102723e-05, "loss": 0.0448, "step": 31140 }, { "epoch": 0.15075, "grad_norm": 0.16018787026405334, "learning_rate": 4.8486954178919704e-05, "loss": 0.0455, "step": 31150 }, { "epoch": 0.1508, "grad_norm": 0.14352253079414368, "learning_rate": 4.848553762468767e-05, "loss": 0.0434, "step": 31160 }, { "epoch": 0.15085, "grad_norm": 0.138255774974823, "learning_rate": 4.8484120428369864e-05, "loss": 0.0437, "step": 31170 }, { "epoch": 0.1509, "grad_norm": 0.15098395943641663, "learning_rate": 4.848270259000503e-05, "loss": 0.0439, "step": 31180 }, { "epoch": 0.15095, "grad_norm": 0.1253850758075714, "learning_rate": 4.848128410963193e-05, "loss": 0.0436, "step": 31190 }, { "epoch": 0.151, "grad_norm": 0.13632526993751526, "learning_rate": 4.8479864987289336e-05, "loss": 0.0427, "step": 31200 }, { "epoch": 0.15105, "grad_norm": 0.12763665616512299, "learning_rate": 4.847844522301606e-05, "loss": 0.0422, "step": 31210 }, { "epoch": 0.1511, "grad_norm": 0.12563873827457428, "learning_rate": 4.8477024816850916e-05, "loss": 0.045, "step": 31220 }, { "epoch": 0.15115, "grad_norm": 0.1258392035961151, "learning_rate": 4.847560376883272e-05, "loss": 0.0422, "step": 31230 }, { "epoch": 0.1512, "grad_norm": 0.1221856102347374, "learning_rate": 4.847418207900035e-05, "loss": 0.0423, "step": 31240 }, { "epoch": 0.15125, "grad_norm": 0.1283620297908783, "learning_rate": 4.847275974739266e-05, "loss": 0.0414, "step": 31250 }, { "epoch": 0.1513, "grad_norm": 0.13173000514507294, "learning_rate": 4.8471336774048526e-05, "loss": 0.0446, "step": 31260 }, { "epoch": 0.15135, "grad_norm": 0.12331975251436234, "learning_rate": 4.846991315900687e-05, "loss": 0.0446, "step": 31270 }, { "epoch": 0.1514, "grad_norm": 0.12383377552032471, "learning_rate": 4.846848890230661e-05, "loss": 0.0417, "step": 31280 }, { "epoch": 0.15145, "grad_norm": 0.14767709374427795, "learning_rate": 4.8467064003986676e-05, "loss": 0.043, "step": 31290 }, { "epoch": 0.1515, "grad_norm": 0.13305449485778809, "learning_rate": 4.846563846408602e-05, "loss": 0.041, "step": 31300 }, { "epoch": 0.15155, "grad_norm": 0.1448502540588379, "learning_rate": 4.846421228264363e-05, "loss": 0.0448, "step": 31310 }, { "epoch": 0.1516, "grad_norm": 0.14288462698459625, "learning_rate": 4.846278545969849e-05, "loss": 0.0413, "step": 31320 }, { "epoch": 0.15165, "grad_norm": 0.15707068145275116, "learning_rate": 4.846135799528961e-05, "loss": 0.0411, "step": 31330 }, { "epoch": 0.1517, "grad_norm": 0.13953132927417755, "learning_rate": 4.845992988945602e-05, "loss": 0.0427, "step": 31340 }, { "epoch": 0.15175, "grad_norm": 0.14889703691005707, "learning_rate": 4.845850114223677e-05, "loss": 0.0442, "step": 31350 }, { "epoch": 0.1518, "grad_norm": 0.15186628699302673, "learning_rate": 4.845707175367089e-05, "loss": 0.0453, "step": 31360 }, { "epoch": 0.15185, "grad_norm": 0.14371851086616516, "learning_rate": 4.8455641723797496e-05, "loss": 0.0428, "step": 31370 }, { "epoch": 0.1519, "grad_norm": 0.1377260982990265, "learning_rate": 4.8454211052655665e-05, "loss": 0.044, "step": 31380 }, { "epoch": 0.15195, "grad_norm": 0.12451760470867157, "learning_rate": 4.8452779740284516e-05, "loss": 0.0416, "step": 31390 }, { "epoch": 0.152, "grad_norm": 0.15763983130455017, "learning_rate": 4.8451347786723175e-05, "loss": 0.0421, "step": 31400 }, { "epoch": 0.15205, "grad_norm": 0.15236034989356995, "learning_rate": 4.8449915192010795e-05, "loss": 0.0418, "step": 31410 }, { "epoch": 0.1521, "grad_norm": 0.14867082238197327, "learning_rate": 4.8448481956186556e-05, "loss": 0.0423, "step": 31420 }, { "epoch": 0.15215, "grad_norm": 0.13444784283638, "learning_rate": 4.844704807928961e-05, "loss": 0.0432, "step": 31430 }, { "epoch": 0.1522, "grad_norm": 0.13199631869792938, "learning_rate": 4.844561356135919e-05, "loss": 0.0413, "step": 31440 }, { "epoch": 0.15225, "grad_norm": 0.14558719098567963, "learning_rate": 4.844417840243451e-05, "loss": 0.0411, "step": 31450 }, { "epoch": 0.1523, "grad_norm": 0.1309782713651657, "learning_rate": 4.8442742602554794e-05, "loss": 0.0422, "step": 31460 }, { "epoch": 0.15235, "grad_norm": 0.11371913552284241, "learning_rate": 4.84413061617593e-05, "loss": 0.0422, "step": 31470 }, { "epoch": 0.1524, "grad_norm": 0.12032821029424667, "learning_rate": 4.84398690800873e-05, "loss": 0.0404, "step": 31480 }, { "epoch": 0.15245, "grad_norm": 0.13665400445461273, "learning_rate": 4.843843135757809e-05, "loss": 0.042, "step": 31490 }, { "epoch": 0.1525, "grad_norm": 0.1554425209760666, "learning_rate": 4.843699299427097e-05, "loss": 0.0441, "step": 31500 }, { "epoch": 0.15255, "grad_norm": 0.1209154799580574, "learning_rate": 4.8435553990205265e-05, "loss": 0.0427, "step": 31510 }, { "epoch": 0.1526, "grad_norm": 0.1109633594751358, "learning_rate": 4.843411434542032e-05, "loss": 0.0415, "step": 31520 }, { "epoch": 0.15265, "grad_norm": 0.10953951627016068, "learning_rate": 4.8432674059955496e-05, "loss": 0.0416, "step": 31530 }, { "epoch": 0.1527, "grad_norm": 0.10354560613632202, "learning_rate": 4.843123313385016e-05, "loss": 0.0416, "step": 31540 }, { "epoch": 0.15275, "grad_norm": 0.08530950546264648, "learning_rate": 4.842979156714372e-05, "loss": 0.0413, "step": 31550 }, { "epoch": 0.1528, "grad_norm": 0.10561627149581909, "learning_rate": 4.842834935987557e-05, "loss": 0.0432, "step": 31560 }, { "epoch": 0.15285, "grad_norm": 0.11676881462335587, "learning_rate": 4.842690651208516e-05, "loss": 0.0417, "step": 31570 }, { "epoch": 0.1529, "grad_norm": 0.11251876503229141, "learning_rate": 4.8425463023811924e-05, "loss": 0.0417, "step": 31580 }, { "epoch": 0.15295, "grad_norm": 0.15875111520290375, "learning_rate": 4.842401889509532e-05, "loss": 0.0459, "step": 31590 }, { "epoch": 0.153, "grad_norm": 0.1364508420228958, "learning_rate": 4.8422574125974855e-05, "loss": 0.0429, "step": 31600 }, { "epoch": 0.15305, "grad_norm": 0.13057385385036469, "learning_rate": 4.8421128716490004e-05, "loss": 0.0432, "step": 31610 }, { "epoch": 0.1531, "grad_norm": 0.12634187936782837, "learning_rate": 4.84196826666803e-05, "loss": 0.0414, "step": 31620 }, { "epoch": 0.15315, "grad_norm": 0.11812356114387512, "learning_rate": 4.841823597658527e-05, "loss": 0.0414, "step": 31630 }, { "epoch": 0.1532, "grad_norm": 0.12652729451656342, "learning_rate": 4.841678864624446e-05, "loss": 0.0421, "step": 31640 }, { "epoch": 0.15325, "grad_norm": 0.11509863287210464, "learning_rate": 4.841534067569744e-05, "loss": 0.0451, "step": 31650 }, { "epoch": 0.1533, "grad_norm": 0.16723394393920898, "learning_rate": 4.841389206498381e-05, "loss": 0.0462, "step": 31660 }, { "epoch": 0.15335, "grad_norm": 0.15288612246513367, "learning_rate": 4.841244281414317e-05, "loss": 0.0426, "step": 31670 }, { "epoch": 0.1534, "grad_norm": 0.13192178308963776, "learning_rate": 4.841099292321514e-05, "loss": 0.0443, "step": 31680 }, { "epoch": 0.15345, "grad_norm": 0.13673368096351624, "learning_rate": 4.840954239223935e-05, "loss": 0.0424, "step": 31690 }, { "epoch": 0.1535, "grad_norm": 0.12678951025009155, "learning_rate": 4.840809122125547e-05, "loss": 0.0422, "step": 31700 }, { "epoch": 0.15355, "grad_norm": 0.11920646578073502, "learning_rate": 4.840663941030317e-05, "loss": 0.041, "step": 31710 }, { "epoch": 0.1536, "grad_norm": 0.12292204797267914, "learning_rate": 4.840518695942214e-05, "loss": 0.0433, "step": 31720 }, { "epoch": 0.15365, "grad_norm": 0.14412416517734528, "learning_rate": 4.8403733868652104e-05, "loss": 0.044, "step": 31730 }, { "epoch": 0.1537, "grad_norm": 0.13721759617328644, "learning_rate": 4.840228013803276e-05, "loss": 0.0435, "step": 31740 }, { "epoch": 0.15375, "grad_norm": 0.13853123784065247, "learning_rate": 4.840082576760388e-05, "loss": 0.0429, "step": 31750 }, { "epoch": 0.1538, "grad_norm": 0.1147557869553566, "learning_rate": 4.839937075740521e-05, "loss": 0.0429, "step": 31760 }, { "epoch": 0.15385, "grad_norm": 0.15210305154323578, "learning_rate": 4.8397915107476535e-05, "loss": 0.043, "step": 31770 }, { "epoch": 0.1539, "grad_norm": 0.1328851878643036, "learning_rate": 4.839645881785765e-05, "loss": 0.0417, "step": 31780 }, { "epoch": 0.15395, "grad_norm": 0.10255947709083557, "learning_rate": 4.8395001888588366e-05, "loss": 0.0408, "step": 31790 }, { "epoch": 0.154, "grad_norm": 0.13945145905017853, "learning_rate": 4.8393544319708524e-05, "loss": 0.043, "step": 31800 }, { "epoch": 0.15405, "grad_norm": 0.13556864857673645, "learning_rate": 4.839208611125797e-05, "loss": 0.0419, "step": 31810 }, { "epoch": 0.1541, "grad_norm": 0.12402399629354477, "learning_rate": 4.839062726327657e-05, "loss": 0.0444, "step": 31820 }, { "epoch": 0.15415, "grad_norm": 0.13532769680023193, "learning_rate": 4.83891677758042e-05, "loss": 0.0425, "step": 31830 }, { "epoch": 0.1542, "grad_norm": 0.13559764623641968, "learning_rate": 4.838770764888078e-05, "loss": 0.0431, "step": 31840 }, { "epoch": 0.15425, "grad_norm": 0.12736962735652924, "learning_rate": 4.838624688254621e-05, "loss": 0.0422, "step": 31850 }, { "epoch": 0.1543, "grad_norm": 0.11483705788850784, "learning_rate": 4.838478547684045e-05, "loss": 0.0415, "step": 31860 }, { "epoch": 0.15435, "grad_norm": 0.14374835789203644, "learning_rate": 4.838332343180343e-05, "loss": 0.0416, "step": 31870 }, { "epoch": 0.1544, "grad_norm": 0.14173774421215057, "learning_rate": 4.8381860747475136e-05, "loss": 0.0442, "step": 31880 }, { "epoch": 0.15445, "grad_norm": 0.1337192803621292, "learning_rate": 4.838039742389555e-05, "loss": 0.0448, "step": 31890 }, { "epoch": 0.1545, "grad_norm": 0.13194599747657776, "learning_rate": 4.837893346110469e-05, "loss": 0.0446, "step": 31900 }, { "epoch": 0.15455, "grad_norm": 0.11847416311502457, "learning_rate": 4.837746885914256e-05, "loss": 0.0453, "step": 31910 }, { "epoch": 0.1546, "grad_norm": 0.15355290472507477, "learning_rate": 4.8376003618049225e-05, "loss": 0.042, "step": 31920 }, { "epoch": 0.15465, "grad_norm": 0.1322755068540573, "learning_rate": 4.837453773786472e-05, "loss": 0.0411, "step": 31930 }, { "epoch": 0.1547, "grad_norm": 0.12431639432907104, "learning_rate": 4.837307121862915e-05, "loss": 0.042, "step": 31940 }, { "epoch": 0.15475, "grad_norm": 0.14812661707401276, "learning_rate": 4.837160406038258e-05, "loss": 0.0435, "step": 31950 }, { "epoch": 0.1548, "grad_norm": 0.1294979453086853, "learning_rate": 4.8370136263165146e-05, "loss": 0.0425, "step": 31960 }, { "epoch": 0.15485, "grad_norm": 0.12444666028022766, "learning_rate": 4.836866782701696e-05, "loss": 0.0429, "step": 31970 }, { "epoch": 0.1549, "grad_norm": 0.1274973303079605, "learning_rate": 4.836719875197818e-05, "loss": 0.0446, "step": 31980 }, { "epoch": 0.15495, "grad_norm": 0.13861462473869324, "learning_rate": 4.836572903808896e-05, "loss": 0.0428, "step": 31990 }, { "epoch": 0.155, "grad_norm": 0.13576054573059082, "learning_rate": 4.836425868538949e-05, "loss": 0.0446, "step": 32000 }, { "epoch": 0.15505, "grad_norm": 0.19204431772232056, "learning_rate": 4.8362787693919967e-05, "loss": 0.0435, "step": 32010 }, { "epoch": 0.1551, "grad_norm": 0.11911473423242569, "learning_rate": 4.83613160637206e-05, "loss": 0.0441, "step": 32020 }, { "epoch": 0.15515, "grad_norm": 0.1512276828289032, "learning_rate": 4.835984379483163e-05, "loss": 0.0443, "step": 32030 }, { "epoch": 0.1552, "grad_norm": 0.1160498857498169, "learning_rate": 4.83583708872933e-05, "loss": 0.0431, "step": 32040 }, { "epoch": 0.15525, "grad_norm": 0.11876894533634186, "learning_rate": 4.835689734114589e-05, "loss": 0.043, "step": 32050 }, { "epoch": 0.1553, "grad_norm": 0.12058950960636139, "learning_rate": 4.835542315642968e-05, "loss": 0.0414, "step": 32060 }, { "epoch": 0.15535, "grad_norm": 0.1484060287475586, "learning_rate": 4.8353948333184986e-05, "loss": 0.0445, "step": 32070 }, { "epoch": 0.1554, "grad_norm": 0.12514857947826385, "learning_rate": 4.8352472871452106e-05, "loss": 0.0443, "step": 32080 }, { "epoch": 0.15545, "grad_norm": 0.12500616908073425, "learning_rate": 4.8350996771271394e-05, "loss": 0.0467, "step": 32090 }, { "epoch": 0.1555, "grad_norm": 0.1326914280653, "learning_rate": 4.83495200326832e-05, "loss": 0.0443, "step": 32100 }, { "epoch": 0.15555, "grad_norm": 0.1272750347852707, "learning_rate": 4.834804265572791e-05, "loss": 0.0423, "step": 32110 }, { "epoch": 0.1556, "grad_norm": 0.14041025936603546, "learning_rate": 4.8346564640445905e-05, "loss": 0.0421, "step": 32120 }, { "epoch": 0.15565, "grad_norm": 0.13470202684402466, "learning_rate": 4.834508598687758e-05, "loss": 0.0446, "step": 32130 }, { "epoch": 0.1557, "grad_norm": 0.16813085973262787, "learning_rate": 4.8343606695063384e-05, "loss": 0.046, "step": 32140 }, { "epoch": 0.15575, "grad_norm": 0.1304176151752472, "learning_rate": 4.8342126765043746e-05, "loss": 0.0429, "step": 32150 }, { "epoch": 0.1558, "grad_norm": 0.11898932605981827, "learning_rate": 4.834064619685914e-05, "loss": 0.0412, "step": 32160 }, { "epoch": 0.15585, "grad_norm": 0.11131195724010468, "learning_rate": 4.833916499055003e-05, "loss": 0.042, "step": 32170 }, { "epoch": 0.1559, "grad_norm": 0.12093434482812881, "learning_rate": 4.833768314615692e-05, "loss": 0.0413, "step": 32180 }, { "epoch": 0.15595, "grad_norm": 0.13842767477035522, "learning_rate": 4.833620066372031e-05, "loss": 0.0423, "step": 32190 }, { "epoch": 0.156, "grad_norm": 0.1440475434064865, "learning_rate": 4.833471754328075e-05, "loss": 0.0424, "step": 32200 }, { "epoch": 0.15605, "grad_norm": 0.13751320540905, "learning_rate": 4.8333233784878785e-05, "loss": 0.0416, "step": 32210 }, { "epoch": 0.1561, "grad_norm": 0.15234899520874023, "learning_rate": 4.8331749388554956e-05, "loss": 0.0426, "step": 32220 }, { "epoch": 0.15615, "grad_norm": 0.1456148475408554, "learning_rate": 4.8330264354349886e-05, "loss": 0.0444, "step": 32230 }, { "epoch": 0.1562, "grad_norm": 0.14833270013332367, "learning_rate": 4.832877868230414e-05, "loss": 0.0444, "step": 32240 }, { "epoch": 0.15625, "grad_norm": 0.14564715325832367, "learning_rate": 4.832729237245835e-05, "loss": 0.0434, "step": 32250 }, { "epoch": 0.1563, "grad_norm": 0.13104142248630524, "learning_rate": 4.832580542485316e-05, "loss": 0.0433, "step": 32260 }, { "epoch": 0.15635, "grad_norm": 0.12680700421333313, "learning_rate": 4.83243178395292e-05, "loss": 0.0432, "step": 32270 }, { "epoch": 0.1564, "grad_norm": 0.1314367651939392, "learning_rate": 4.832282961652716e-05, "loss": 0.043, "step": 32280 }, { "epoch": 0.15645, "grad_norm": 0.13626080751419067, "learning_rate": 4.832134075588771e-05, "loss": 0.0437, "step": 32290 }, { "epoch": 0.1565, "grad_norm": 0.16060654819011688, "learning_rate": 4.831985125765157e-05, "loss": 0.0452, "step": 32300 }, { "epoch": 0.15655, "grad_norm": 0.13280725479125977, "learning_rate": 4.831836112185946e-05, "loss": 0.0461, "step": 32310 }, { "epoch": 0.1566, "grad_norm": 0.14577241241931915, "learning_rate": 4.8316870348552116e-05, "loss": 0.044, "step": 32320 }, { "epoch": 0.15665, "grad_norm": 0.12814298272132874, "learning_rate": 4.83153789377703e-05, "loss": 0.0439, "step": 32330 }, { "epoch": 0.1567, "grad_norm": 0.1262589991092682, "learning_rate": 4.831388688955478e-05, "loss": 0.044, "step": 32340 }, { "epoch": 0.15675, "grad_norm": 0.13185927271842957, "learning_rate": 4.8312394203946356e-05, "loss": 0.0477, "step": 32350 }, { "epoch": 0.1568, "grad_norm": 0.11433325707912445, "learning_rate": 4.831090088098582e-05, "loss": 0.0425, "step": 32360 }, { "epoch": 0.15685, "grad_norm": 0.12537720799446106, "learning_rate": 4.8309406920714024e-05, "loss": 0.0455, "step": 32370 }, { "epoch": 0.1569, "grad_norm": 0.12167773395776749, "learning_rate": 4.83079123231718e-05, "loss": 0.0438, "step": 32380 }, { "epoch": 0.15695, "grad_norm": 0.1502913534641266, "learning_rate": 4.83064170884e-05, "loss": 0.0436, "step": 32390 }, { "epoch": 0.157, "grad_norm": 0.13366125524044037, "learning_rate": 4.830492121643951e-05, "loss": 0.0464, "step": 32400 }, { "epoch": 0.15705, "grad_norm": 0.10904904454946518, "learning_rate": 4.830342470733125e-05, "loss": 0.0436, "step": 32410 }, { "epoch": 0.1571, "grad_norm": 0.13437418639659882, "learning_rate": 4.8301927561116095e-05, "loss": 0.0448, "step": 32420 }, { "epoch": 0.15715, "grad_norm": 0.1309521645307541, "learning_rate": 4.8300429777835e-05, "loss": 0.0441, "step": 32430 }, { "epoch": 0.1572, "grad_norm": 0.11571266502141953, "learning_rate": 4.829893135752891e-05, "loss": 0.0441, "step": 32440 }, { "epoch": 0.15725, "grad_norm": 0.13288447260856628, "learning_rate": 4.829743230023879e-05, "loss": 0.0431, "step": 32450 }, { "epoch": 0.1573, "grad_norm": 0.10984359681606293, "learning_rate": 4.829593260600561e-05, "loss": 0.0422, "step": 32460 }, { "epoch": 0.15735, "grad_norm": 0.12429521977901459, "learning_rate": 4.82944322748704e-05, "loss": 0.0438, "step": 32470 }, { "epoch": 0.1574, "grad_norm": 0.11131302267313004, "learning_rate": 4.829293130687416e-05, "loss": 0.045, "step": 32480 }, { "epoch": 0.15745, "grad_norm": 0.1384342461824417, "learning_rate": 4.829142970205792e-05, "loss": 0.0446, "step": 32490 }, { "epoch": 0.1575, "grad_norm": 0.12439969182014465, "learning_rate": 4.828992746046276e-05, "loss": 0.0443, "step": 32500 }, { "epoch": 0.15755, "grad_norm": 0.14163966476917267, "learning_rate": 4.828842458212972e-05, "loss": 0.0451, "step": 32510 }, { "epoch": 0.1576, "grad_norm": 0.1380494385957718, "learning_rate": 4.82869210670999e-05, "loss": 0.044, "step": 32520 }, { "epoch": 0.15765, "grad_norm": 0.13433505594730377, "learning_rate": 4.8285416915414406e-05, "loss": 0.0427, "step": 32530 }, { "epoch": 0.1577, "grad_norm": 0.12882503867149353, "learning_rate": 4.828391212711437e-05, "loss": 0.0415, "step": 32540 }, { "epoch": 0.15775, "grad_norm": 0.1770928055047989, "learning_rate": 4.828240670224092e-05, "loss": 0.0439, "step": 32550 }, { "epoch": 0.1578, "grad_norm": 0.1492464393377304, "learning_rate": 4.828090064083521e-05, "loss": 0.0416, "step": 32560 }, { "epoch": 0.15785, "grad_norm": 0.1461704522371292, "learning_rate": 4.8279393942938434e-05, "loss": 0.0427, "step": 32570 }, { "epoch": 0.1579, "grad_norm": 0.14312250912189484, "learning_rate": 4.8277886608591766e-05, "loss": 0.0421, "step": 32580 }, { "epoch": 0.15795, "grad_norm": 0.13308829069137573, "learning_rate": 4.827637863783643e-05, "loss": 0.0445, "step": 32590 }, { "epoch": 0.158, "grad_norm": 0.13714931905269623, "learning_rate": 4.827487003071364e-05, "loss": 0.0423, "step": 32600 }, { "epoch": 0.15805, "grad_norm": 0.1418546587228775, "learning_rate": 4.8273360787264644e-05, "loss": 0.043, "step": 32610 }, { "epoch": 0.1581, "grad_norm": 0.13813413679599762, "learning_rate": 4.8271850907530715e-05, "loss": 0.0421, "step": 32620 }, { "epoch": 0.15815, "grad_norm": 0.1461115926504135, "learning_rate": 4.827034039155312e-05, "loss": 0.0423, "step": 32630 }, { "epoch": 0.1582, "grad_norm": 0.15559233725070953, "learning_rate": 4.826882923937317e-05, "loss": 0.0451, "step": 32640 }, { "epoch": 0.15825, "grad_norm": 0.12452242523431778, "learning_rate": 4.826731745103216e-05, "loss": 0.0427, "step": 32650 }, { "epoch": 0.1583, "grad_norm": 0.11743653565645218, "learning_rate": 4.826580502657144e-05, "loss": 0.0429, "step": 32660 }, { "epoch": 0.15835, "grad_norm": 0.12276814877986908, "learning_rate": 4.826429196603235e-05, "loss": 0.0436, "step": 32670 }, { "epoch": 0.1584, "grad_norm": 0.13034766912460327, "learning_rate": 4.826277826945625e-05, "loss": 0.0398, "step": 32680 }, { "epoch": 0.15845, "grad_norm": 0.1436816155910492, "learning_rate": 4.826126393688454e-05, "loss": 0.0417, "step": 32690 }, { "epoch": 0.1585, "grad_norm": 0.14281252026557922, "learning_rate": 4.825974896835861e-05, "loss": 0.0448, "step": 32700 }, { "epoch": 0.15855, "grad_norm": 0.133723646402359, "learning_rate": 4.825823336391988e-05, "loss": 0.0445, "step": 32710 }, { "epoch": 0.1586, "grad_norm": 0.12675301730632782, "learning_rate": 4.825671712360978e-05, "loss": 0.0416, "step": 32720 }, { "epoch": 0.15865, "grad_norm": 0.12376851588487625, "learning_rate": 4.825520024746978e-05, "loss": 0.0412, "step": 32730 }, { "epoch": 0.1587, "grad_norm": 0.13101379573345184, "learning_rate": 4.825368273554135e-05, "loss": 0.0419, "step": 32740 }, { "epoch": 0.15875, "grad_norm": 0.12889224290847778, "learning_rate": 4.825216458786596e-05, "loss": 0.041, "step": 32750 }, { "epoch": 0.1588, "grad_norm": 0.11569836735725403, "learning_rate": 4.8250645804485125e-05, "loss": 0.0406, "step": 32760 }, { "epoch": 0.15885, "grad_norm": 0.1261492222547531, "learning_rate": 4.824912638544037e-05, "loss": 0.0427, "step": 32770 }, { "epoch": 0.1589, "grad_norm": 0.10232339799404144, "learning_rate": 4.824760633077323e-05, "loss": 0.0427, "step": 32780 }, { "epoch": 0.15895, "grad_norm": 0.10576627403497696, "learning_rate": 4.8246085640525276e-05, "loss": 0.0413, "step": 32790 }, { "epoch": 0.159, "grad_norm": 0.11924322694540024, "learning_rate": 4.824456431473807e-05, "loss": 0.0413, "step": 32800 }, { "epoch": 0.15905, "grad_norm": 0.13449467718601227, "learning_rate": 4.82430423534532e-05, "loss": 0.0421, "step": 32810 }, { "epoch": 0.1591, "grad_norm": 0.13242179155349731, "learning_rate": 4.8241519756712293e-05, "loss": 0.0422, "step": 32820 }, { "epoch": 0.15915, "grad_norm": 0.11021512001752853, "learning_rate": 4.823999652455696e-05, "loss": 0.0428, "step": 32830 }, { "epoch": 0.1592, "grad_norm": 0.1300233155488968, "learning_rate": 4.823847265702887e-05, "loss": 0.0448, "step": 32840 }, { "epoch": 0.15925, "grad_norm": 0.1332443505525589, "learning_rate": 4.823694815416965e-05, "loss": 0.0426, "step": 32850 }, { "epoch": 0.1593, "grad_norm": 0.11915509402751923, "learning_rate": 4.8235423016021e-05, "loss": 0.0433, "step": 32860 }, { "epoch": 0.15935, "grad_norm": 0.1483246237039566, "learning_rate": 4.8233897242624616e-05, "loss": 0.044, "step": 32870 }, { "epoch": 0.1594, "grad_norm": 0.10429392009973526, "learning_rate": 4.823237083402221e-05, "loss": 0.0404, "step": 32880 }, { "epoch": 0.15945, "grad_norm": 0.10859861224889755, "learning_rate": 4.823084379025552e-05, "loss": 0.0409, "step": 32890 }, { "epoch": 0.1595, "grad_norm": 0.12180684506893158, "learning_rate": 4.822931611136628e-05, "loss": 0.0414, "step": 32900 }, { "epoch": 0.15955, "grad_norm": 0.10532913357019424, "learning_rate": 4.8227787797396265e-05, "loss": 0.0421, "step": 32910 }, { "epoch": 0.1596, "grad_norm": 0.11721262335777283, "learning_rate": 4.822625884838726e-05, "loss": 0.0426, "step": 32920 }, { "epoch": 0.15965, "grad_norm": 0.1321670413017273, "learning_rate": 4.8224729264381065e-05, "loss": 0.0426, "step": 32930 }, { "epoch": 0.1597, "grad_norm": 0.13177931308746338, "learning_rate": 4.82231990454195e-05, "loss": 0.0419, "step": 32940 }, { "epoch": 0.15975, "grad_norm": 0.1313125044107437, "learning_rate": 4.822166819154439e-05, "loss": 0.0429, "step": 32950 }, { "epoch": 0.1598, "grad_norm": 0.12595787644386292, "learning_rate": 4.8220136702797596e-05, "loss": 0.0434, "step": 32960 }, { "epoch": 0.15985, "grad_norm": 0.13454866409301758, "learning_rate": 4.8218604579220994e-05, "loss": 0.0443, "step": 32970 }, { "epoch": 0.1599, "grad_norm": 0.12417369335889816, "learning_rate": 4.821707182085646e-05, "loss": 0.0434, "step": 32980 }, { "epoch": 0.15995, "grad_norm": 0.12248330563306808, "learning_rate": 4.821553842774591e-05, "loss": 0.0425, "step": 32990 }, { "epoch": 0.16, "grad_norm": 0.12675760686397552, "learning_rate": 4.8214004399931255e-05, "loss": 0.0419, "step": 33000 }, { "epoch": 0.16005, "grad_norm": 0.13656532764434814, "learning_rate": 4.8212469737454444e-05, "loss": 0.0417, "step": 33010 }, { "epoch": 0.1601, "grad_norm": 0.12864769995212555, "learning_rate": 4.821093444035743e-05, "loss": 0.0426, "step": 33020 }, { "epoch": 0.16015, "grad_norm": 0.12323490530252457, "learning_rate": 4.820939850868219e-05, "loss": 0.0431, "step": 33030 }, { "epoch": 0.1602, "grad_norm": 0.1335224062204361, "learning_rate": 4.8207861942470714e-05, "loss": 0.0427, "step": 33040 }, { "epoch": 0.16025, "grad_norm": 0.14590322971343994, "learning_rate": 4.8206324741765006e-05, "loss": 0.0427, "step": 33050 }, { "epoch": 0.1603, "grad_norm": 0.1478782743215561, "learning_rate": 4.820478690660711e-05, "loss": 0.0428, "step": 33060 }, { "epoch": 0.16035, "grad_norm": 0.15049858391284943, "learning_rate": 4.820324843703905e-05, "loss": 0.0451, "step": 33070 }, { "epoch": 0.1604, "grad_norm": 0.14486844837665558, "learning_rate": 4.82017093331029e-05, "loss": 0.0427, "step": 33080 }, { "epoch": 0.16045, "grad_norm": 0.14537589251995087, "learning_rate": 4.8200169594840713e-05, "loss": 0.0448, "step": 33090 }, { "epoch": 0.1605, "grad_norm": 0.1660872846841812, "learning_rate": 4.819862922229463e-05, "loss": 0.0424, "step": 33100 }, { "epoch": 0.16055, "grad_norm": 0.12716545164585114, "learning_rate": 4.8197088215506724e-05, "loss": 0.0424, "step": 33110 }, { "epoch": 0.1606, "grad_norm": 0.13636474311351776, "learning_rate": 4.819554657451915e-05, "loss": 0.0434, "step": 33120 }, { "epoch": 0.16065, "grad_norm": 0.14646953344345093, "learning_rate": 4.819400429937404e-05, "loss": 0.0422, "step": 33130 }, { "epoch": 0.1607, "grad_norm": 0.14615385234355927, "learning_rate": 4.819246139011358e-05, "loss": 0.0444, "step": 33140 }, { "epoch": 0.16075, "grad_norm": 0.13825707137584686, "learning_rate": 4.819091784677992e-05, "loss": 0.0434, "step": 33150 }, { "epoch": 0.1608, "grad_norm": 0.11570119857788086, "learning_rate": 4.8189373669415284e-05, "loss": 0.0407, "step": 33160 }, { "epoch": 0.16085, "grad_norm": 0.09911048412322998, "learning_rate": 4.818782885806189e-05, "loss": 0.0422, "step": 33170 }, { "epoch": 0.1609, "grad_norm": 0.15140579640865326, "learning_rate": 4.818628341276196e-05, "loss": 0.046, "step": 33180 }, { "epoch": 0.16095, "grad_norm": 0.14800402522087097, "learning_rate": 4.8184737333557754e-05, "loss": 0.0451, "step": 33190 }, { "epoch": 0.161, "grad_norm": 0.18855224549770355, "learning_rate": 4.818319062049154e-05, "loss": 0.0445, "step": 33200 }, { "epoch": 0.16105, "grad_norm": 0.15011833608150482, "learning_rate": 4.8181643273605605e-05, "loss": 0.048, "step": 33210 }, { "epoch": 0.1611, "grad_norm": 0.1251029670238495, "learning_rate": 4.818009529294225e-05, "loss": 0.0449, "step": 33220 }, { "epoch": 0.16115, "grad_norm": 0.1206798106431961, "learning_rate": 4.81785466785438e-05, "loss": 0.0429, "step": 33230 }, { "epoch": 0.1612, "grad_norm": 0.1255817860364914, "learning_rate": 4.817699743045259e-05, "loss": 0.0415, "step": 33240 }, { "epoch": 0.16125, "grad_norm": 0.15104557573795319, "learning_rate": 4.817544754871098e-05, "loss": 0.0458, "step": 33250 }, { "epoch": 0.1613, "grad_norm": 0.1329016238451004, "learning_rate": 4.8173897033361336e-05, "loss": 0.0428, "step": 33260 }, { "epoch": 0.16135, "grad_norm": 0.12674003839492798, "learning_rate": 4.8172345884446056e-05, "loss": 0.0429, "step": 33270 }, { "epoch": 0.1614, "grad_norm": 0.12564903497695923, "learning_rate": 4.817079410200754e-05, "loss": 0.0431, "step": 33280 }, { "epoch": 0.16145, "grad_norm": 0.13768647611141205, "learning_rate": 4.816924168608823e-05, "loss": 0.0435, "step": 33290 }, { "epoch": 0.1615, "grad_norm": 0.10589944571256638, "learning_rate": 4.816768863673055e-05, "loss": 0.0427, "step": 33300 }, { "epoch": 0.16155, "grad_norm": 0.11592059582471848, "learning_rate": 4.816613495397696e-05, "loss": 0.0417, "step": 33310 }, { "epoch": 0.1616, "grad_norm": 0.10585790127515793, "learning_rate": 4.8164580637869946e-05, "loss": 0.041, "step": 33320 }, { "epoch": 0.16165, "grad_norm": 0.12283612787723541, "learning_rate": 4.816302568845201e-05, "loss": 0.0421, "step": 33330 }, { "epoch": 0.1617, "grad_norm": 0.1281306892633438, "learning_rate": 4.816147010576565e-05, "loss": 0.0412, "step": 33340 }, { "epoch": 0.16175, "grad_norm": 0.1307964026927948, "learning_rate": 4.815991388985339e-05, "loss": 0.0421, "step": 33350 }, { "epoch": 0.1618, "grad_norm": 0.12150103598833084, "learning_rate": 4.8158357040757794e-05, "loss": 0.044, "step": 33360 }, { "epoch": 0.16185, "grad_norm": 0.14208216965198517, "learning_rate": 4.8156799558521406e-05, "loss": 0.0451, "step": 33370 }, { "epoch": 0.1619, "grad_norm": 0.12786491215229034, "learning_rate": 4.815524144318683e-05, "loss": 0.0432, "step": 33380 }, { "epoch": 0.16195, "grad_norm": 0.1281542181968689, "learning_rate": 4.815368269479664e-05, "loss": 0.0424, "step": 33390 }, { "epoch": 0.162, "grad_norm": 0.13442762196063995, "learning_rate": 4.8152123313393475e-05, "loss": 0.0443, "step": 33400 }, { "epoch": 0.16205, "grad_norm": 0.11269143223762512, "learning_rate": 4.8150563299019955e-05, "loss": 0.0434, "step": 33410 }, { "epoch": 0.1621, "grad_norm": 0.11426067352294922, "learning_rate": 4.8149002651718725e-05, "loss": 0.0426, "step": 33420 }, { "epoch": 0.16215, "grad_norm": 0.10544957965612411, "learning_rate": 4.814744137153247e-05, "loss": 0.0444, "step": 33430 }, { "epoch": 0.1622, "grad_norm": 0.15541492402553558, "learning_rate": 4.814587945850385e-05, "loss": 0.0466, "step": 33440 }, { "epoch": 0.16225, "grad_norm": 0.14596639573574066, "learning_rate": 4.814431691267559e-05, "loss": 0.0422, "step": 33450 }, { "epoch": 0.1623, "grad_norm": 0.11651241034269333, "learning_rate": 4.8142753734090395e-05, "loss": 0.0419, "step": 33460 }, { "epoch": 0.16235, "grad_norm": 0.11221083253622055, "learning_rate": 4.8141189922791014e-05, "loss": 0.0416, "step": 33470 }, { "epoch": 0.1624, "grad_norm": 0.10135234147310257, "learning_rate": 4.813962547882019e-05, "loss": 0.0417, "step": 33480 }, { "epoch": 0.16245, "grad_norm": 0.132141575217247, "learning_rate": 4.81380604022207e-05, "loss": 0.0405, "step": 33490 }, { "epoch": 0.1625, "grad_norm": 0.13567662239074707, "learning_rate": 4.813649469303533e-05, "loss": 0.0421, "step": 33500 }, { "epoch": 0.16255, "grad_norm": 0.13327448070049286, "learning_rate": 4.813492835130688e-05, "loss": 0.0421, "step": 33510 }, { "epoch": 0.1626, "grad_norm": 0.10723388195037842, "learning_rate": 4.813336137707819e-05, "loss": 0.0421, "step": 33520 }, { "epoch": 0.16265, "grad_norm": 0.1065554991364479, "learning_rate": 4.813179377039209e-05, "loss": 0.0417, "step": 33530 }, { "epoch": 0.1627, "grad_norm": 0.11611859500408173, "learning_rate": 4.813022553129144e-05, "loss": 0.0405, "step": 33540 }, { "epoch": 0.16275, "grad_norm": 0.13964098691940308, "learning_rate": 4.812865665981911e-05, "loss": 0.0444, "step": 33550 }, { "epoch": 0.1628, "grad_norm": 0.1278601586818695, "learning_rate": 4.8127087156018e-05, "loss": 0.0429, "step": 33560 }, { "epoch": 0.16285, "grad_norm": 0.14337113499641418, "learning_rate": 4.812551701993101e-05, "loss": 0.0407, "step": 33570 }, { "epoch": 0.1629, "grad_norm": 0.1198313906788826, "learning_rate": 4.812394625160107e-05, "loss": 0.044, "step": 33580 }, { "epoch": 0.16295, "grad_norm": 0.13165201246738434, "learning_rate": 4.8122374851071134e-05, "loss": 0.0434, "step": 33590 }, { "epoch": 0.163, "grad_norm": 0.12109248340129852, "learning_rate": 4.812080281838415e-05, "loss": 0.0418, "step": 33600 }, { "epoch": 0.16305, "grad_norm": 0.11725418269634247, "learning_rate": 4.811923015358311e-05, "loss": 0.044, "step": 33610 }, { "epoch": 0.1631, "grad_norm": 0.12260973453521729, "learning_rate": 4.8117656856711005e-05, "loss": 0.0429, "step": 33620 }, { "epoch": 0.16315, "grad_norm": 0.14538836479187012, "learning_rate": 4.8116082927810836e-05, "loss": 0.0427, "step": 33630 }, { "epoch": 0.1632, "grad_norm": 0.12341050058603287, "learning_rate": 4.811450836692565e-05, "loss": 0.0411, "step": 33640 }, { "epoch": 0.16325, "grad_norm": 0.12862537801265717, "learning_rate": 4.811293317409848e-05, "loss": 0.043, "step": 33650 }, { "epoch": 0.1633, "grad_norm": 0.13257691264152527, "learning_rate": 4.811135734937242e-05, "loss": 0.044, "step": 33660 }, { "epoch": 0.16335, "grad_norm": 0.1441202312707901, "learning_rate": 4.810978089279052e-05, "loss": 0.0436, "step": 33670 }, { "epoch": 0.1634, "grad_norm": 0.12990273535251617, "learning_rate": 4.810820380439589e-05, "loss": 0.0475, "step": 33680 }, { "epoch": 0.16345, "grad_norm": 0.13740640878677368, "learning_rate": 4.8106626084231656e-05, "loss": 0.0451, "step": 33690 }, { "epoch": 0.1635, "grad_norm": 0.1576330065727234, "learning_rate": 4.810504773234094e-05, "loss": 0.0428, "step": 33700 }, { "epoch": 0.16355, "grad_norm": 0.13011543452739716, "learning_rate": 4.81034687487669e-05, "loss": 0.0445, "step": 33710 }, { "epoch": 0.1636, "grad_norm": 0.129585400223732, "learning_rate": 4.8101889133552706e-05, "loss": 0.0425, "step": 33720 }, { "epoch": 0.16365, "grad_norm": 0.13436222076416016, "learning_rate": 4.810030888674154e-05, "loss": 0.0418, "step": 33730 }, { "epoch": 0.1637, "grad_norm": 0.11319204419851303, "learning_rate": 4.809872800837662e-05, "loss": 0.0415, "step": 33740 }, { "epoch": 0.16375, "grad_norm": 0.09980058670043945, "learning_rate": 4.809714649850113e-05, "loss": 0.0424, "step": 33750 }, { "epoch": 0.1638, "grad_norm": 0.13024428486824036, "learning_rate": 4.809556435715835e-05, "loss": 0.0449, "step": 33760 }, { "epoch": 0.16385, "grad_norm": 0.13720740377902985, "learning_rate": 4.809398158439151e-05, "loss": 0.0429, "step": 33770 }, { "epoch": 0.1639, "grad_norm": 0.1280098259449005, "learning_rate": 4.809239818024389e-05, "loss": 0.0418, "step": 33780 }, { "epoch": 0.16395, "grad_norm": 0.13328638672828674, "learning_rate": 4.8090814144758787e-05, "loss": 0.0434, "step": 33790 }, { "epoch": 0.164, "grad_norm": 0.1063094437122345, "learning_rate": 4.808922947797949e-05, "loss": 0.0433, "step": 33800 }, { "epoch": 0.16405, "grad_norm": 0.10085838288068771, "learning_rate": 4.8087644179949335e-05, "loss": 0.044, "step": 33810 }, { "epoch": 0.1641, "grad_norm": 0.1739586889743805, "learning_rate": 4.808605825071166e-05, "loss": 0.0462, "step": 33820 }, { "epoch": 0.16415, "grad_norm": 0.12872996926307678, "learning_rate": 4.808447169030983e-05, "loss": 0.043, "step": 33830 }, { "epoch": 0.1642, "grad_norm": 0.13191847503185272, "learning_rate": 4.808288449878722e-05, "loss": 0.0444, "step": 33840 }, { "epoch": 0.16425, "grad_norm": 0.13148938119411469, "learning_rate": 4.8081296676187214e-05, "loss": 0.0437, "step": 33850 }, { "epoch": 0.1643, "grad_norm": 0.11032675951719284, "learning_rate": 4.807970822255323e-05, "loss": 0.0432, "step": 33860 }, { "epoch": 0.16435, "grad_norm": 0.11404123902320862, "learning_rate": 4.807811913792869e-05, "loss": 0.0422, "step": 33870 }, { "epoch": 0.1644, "grad_norm": 0.11356104910373688, "learning_rate": 4.8076529422357054e-05, "loss": 0.0438, "step": 33880 }, { "epoch": 0.16445, "grad_norm": 0.10598156601190567, "learning_rate": 4.807493907588176e-05, "loss": 0.043, "step": 33890 }, { "epoch": 0.1645, "grad_norm": 0.11783069372177124, "learning_rate": 4.807334809854631e-05, "loss": 0.0425, "step": 33900 }, { "epoch": 0.16455, "grad_norm": 0.1382354497909546, "learning_rate": 4.807175649039418e-05, "loss": 0.0437, "step": 33910 }, { "epoch": 0.1646, "grad_norm": 0.11479967087507248, "learning_rate": 4.807016425146891e-05, "loss": 0.0424, "step": 33920 }, { "epoch": 0.16465, "grad_norm": 0.11492490768432617, "learning_rate": 4.806857138181401e-05, "loss": 0.0423, "step": 33930 }, { "epoch": 0.1647, "grad_norm": 0.10321938991546631, "learning_rate": 4.806697788147303e-05, "loss": 0.0429, "step": 33940 }, { "epoch": 0.16475, "grad_norm": 0.13526004552841187, "learning_rate": 4.8065383750489544e-05, "loss": 0.0416, "step": 33950 }, { "epoch": 0.1648, "grad_norm": 0.14217951893806458, "learning_rate": 4.806378898890713e-05, "loss": 0.0429, "step": 33960 }, { "epoch": 0.16485, "grad_norm": 0.12112054973840714, "learning_rate": 4.806219359676939e-05, "loss": 0.0428, "step": 33970 }, { "epoch": 0.1649, "grad_norm": 0.13174232840538025, "learning_rate": 4.8060597574119945e-05, "loss": 0.0423, "step": 33980 }, { "epoch": 0.16495, "grad_norm": 0.1228984072804451, "learning_rate": 4.805900092100242e-05, "loss": 0.0422, "step": 33990 }, { "epoch": 0.165, "grad_norm": 0.14152731001377106, "learning_rate": 4.8057403637460475e-05, "loss": 0.0421, "step": 34000 }, { "epoch": 0.16505, "grad_norm": 0.10901544243097305, "learning_rate": 4.8055805723537775e-05, "loss": 0.0439, "step": 34010 }, { "epoch": 0.1651, "grad_norm": 0.12598423659801483, "learning_rate": 4.805420717927802e-05, "loss": 0.0425, "step": 34020 }, { "epoch": 0.16515, "grad_norm": 0.13269755244255066, "learning_rate": 4.805260800472489e-05, "loss": 0.0439, "step": 34030 }, { "epoch": 0.1652, "grad_norm": 0.11629320681095123, "learning_rate": 4.8051008199922123e-05, "loss": 0.0426, "step": 34040 }, { "epoch": 0.16525, "grad_norm": 0.1244744211435318, "learning_rate": 4.804940776491345e-05, "loss": 0.042, "step": 34050 }, { "epoch": 0.1653, "grad_norm": 0.11696765571832657, "learning_rate": 4.804780669974262e-05, "loss": 0.0424, "step": 34060 }, { "epoch": 0.16535, "grad_norm": 0.12034659087657928, "learning_rate": 4.804620500445342e-05, "loss": 0.0424, "step": 34070 }, { "epoch": 0.1654, "grad_norm": 0.11395153403282166, "learning_rate": 4.8044602679089634e-05, "loss": 0.041, "step": 34080 }, { "epoch": 0.16545, "grad_norm": 0.12198033183813095, "learning_rate": 4.804299972369507e-05, "loss": 0.0428, "step": 34090 }, { "epoch": 0.1655, "grad_norm": 0.11845625191926956, "learning_rate": 4.804139613831354e-05, "loss": 0.0435, "step": 34100 }, { "epoch": 0.16555, "grad_norm": 0.1283753514289856, "learning_rate": 4.803979192298891e-05, "loss": 0.0432, "step": 34110 }, { "epoch": 0.1656, "grad_norm": 0.1280210018157959, "learning_rate": 4.803818707776502e-05, "loss": 0.0448, "step": 34120 }, { "epoch": 0.16565, "grad_norm": 0.165738046169281, "learning_rate": 4.803658160268575e-05, "loss": 0.0456, "step": 34130 }, { "epoch": 0.1657, "grad_norm": 0.134948268532753, "learning_rate": 4.803497549779499e-05, "loss": 0.047, "step": 34140 }, { "epoch": 0.16575, "grad_norm": 0.16082508862018585, "learning_rate": 4.803336876313666e-05, "loss": 0.0425, "step": 34150 }, { "epoch": 0.1658, "grad_norm": 0.12395468354225159, "learning_rate": 4.803176139875467e-05, "loss": 0.0436, "step": 34160 }, { "epoch": 0.16585, "grad_norm": 0.15877650678157806, "learning_rate": 4.803015340469299e-05, "loss": 0.0419, "step": 34170 }, { "epoch": 0.1659, "grad_norm": 0.1441933810710907, "learning_rate": 4.802854478099555e-05, "loss": 0.0442, "step": 34180 }, { "epoch": 0.16595, "grad_norm": 0.11970805376768112, "learning_rate": 4.802693552770636e-05, "loss": 0.0432, "step": 34190 }, { "epoch": 0.166, "grad_norm": 0.154872864484787, "learning_rate": 4.802532564486941e-05, "loss": 0.0426, "step": 34200 }, { "epoch": 0.16605, "grad_norm": 0.133418008685112, "learning_rate": 4.80237151325287e-05, "loss": 0.0424, "step": 34210 }, { "epoch": 0.1661, "grad_norm": 0.12561088800430298, "learning_rate": 4.802210399072826e-05, "loss": 0.0421, "step": 34220 }, { "epoch": 0.16615, "grad_norm": 0.1357862651348114, "learning_rate": 4.8020492219512156e-05, "loss": 0.0431, "step": 34230 }, { "epoch": 0.1662, "grad_norm": 0.12643086910247803, "learning_rate": 4.801887981892444e-05, "loss": 0.0436, "step": 34240 }, { "epoch": 0.16625, "grad_norm": 0.13576337695121765, "learning_rate": 4.80172667890092e-05, "loss": 0.0434, "step": 34250 }, { "epoch": 0.1663, "grad_norm": 0.10537552833557129, "learning_rate": 4.801565312981052e-05, "loss": 0.0419, "step": 34260 }, { "epoch": 0.16635, "grad_norm": 0.13234888017177582, "learning_rate": 4.8014038841372547e-05, "loss": 0.0417, "step": 34270 }, { "epoch": 0.1664, "grad_norm": 0.1402786672115326, "learning_rate": 4.8012423923739395e-05, "loss": 0.0425, "step": 34280 }, { "epoch": 0.16645, "grad_norm": 0.13282206654548645, "learning_rate": 4.801080837695521e-05, "loss": 0.0439, "step": 34290 }, { "epoch": 0.1665, "grad_norm": 0.12814287841320038, "learning_rate": 4.8009192201064174e-05, "loss": 0.0434, "step": 34300 }, { "epoch": 0.16655, "grad_norm": 0.10998068004846573, "learning_rate": 4.800757539611047e-05, "loss": 0.0425, "step": 34310 }, { "epoch": 0.1666, "grad_norm": 0.132798969745636, "learning_rate": 4.800595796213829e-05, "loss": 0.0414, "step": 34320 }, { "epoch": 0.16665, "grad_norm": 0.1641465574502945, "learning_rate": 4.800433989919187e-05, "loss": 0.0437, "step": 34330 }, { "epoch": 0.1667, "grad_norm": 0.1456586867570877, "learning_rate": 4.800272120731544e-05, "loss": 0.0423, "step": 34340 }, { "epoch": 0.16675, "grad_norm": 0.13222168385982513, "learning_rate": 4.800110188655325e-05, "loss": 0.0413, "step": 34350 }, { "epoch": 0.1668, "grad_norm": 0.14294667541980743, "learning_rate": 4.7999481936949575e-05, "loss": 0.0419, "step": 34360 }, { "epoch": 0.16685, "grad_norm": 0.1279636025428772, "learning_rate": 4.7997861358548704e-05, "loss": 0.0421, "step": 34370 }, { "epoch": 0.1669, "grad_norm": 0.12441389262676239, "learning_rate": 4.7996240151394945e-05, "loss": 0.0418, "step": 34380 }, { "epoch": 0.16695, "grad_norm": 0.11313042789697647, "learning_rate": 4.799461831553262e-05, "loss": 0.0401, "step": 34390 }, { "epoch": 0.167, "grad_norm": 0.11300710588693619, "learning_rate": 4.7992995851006066e-05, "loss": 0.042, "step": 34400 }, { "epoch": 0.16705, "grad_norm": 0.13788188993930817, "learning_rate": 4.799137275785965e-05, "loss": 0.0407, "step": 34410 }, { "epoch": 0.1671, "grad_norm": 0.12407025694847107, "learning_rate": 4.798974903613773e-05, "loss": 0.0386, "step": 34420 }, { "epoch": 0.16715, "grad_norm": 0.13035129010677338, "learning_rate": 4.7988124685884713e-05, "loss": 0.0419, "step": 34430 }, { "epoch": 0.1672, "grad_norm": 0.13006633520126343, "learning_rate": 4.798649970714501e-05, "loss": 0.04, "step": 34440 }, { "epoch": 0.16725, "grad_norm": 0.14761589467525482, "learning_rate": 4.798487409996303e-05, "loss": 0.0404, "step": 34450 }, { "epoch": 0.1673, "grad_norm": 0.1414937973022461, "learning_rate": 4.798324786438324e-05, "loss": 0.0404, "step": 34460 }, { "epoch": 0.16735, "grad_norm": 0.12699708342552185, "learning_rate": 4.798162100045008e-05, "loss": 0.0403, "step": 34470 }, { "epoch": 0.1674, "grad_norm": 0.1410609930753708, "learning_rate": 4.797999350820803e-05, "loss": 0.0419, "step": 34480 }, { "epoch": 0.16745, "grad_norm": 0.14010128378868103, "learning_rate": 4.7978365387701595e-05, "loss": 0.0416, "step": 34490 }, { "epoch": 0.1675, "grad_norm": 0.11545085906982422, "learning_rate": 4.797673663897529e-05, "loss": 0.0431, "step": 34500 }, { "epoch": 0.16755, "grad_norm": 0.12026017159223557, "learning_rate": 4.7975107262073634e-05, "loss": 0.0445, "step": 34510 }, { "epoch": 0.1676, "grad_norm": 0.13077577948570251, "learning_rate": 4.7973477257041175e-05, "loss": 0.0426, "step": 34520 }, { "epoch": 0.16765, "grad_norm": 0.1372055560350418, "learning_rate": 4.7971846623922476e-05, "loss": 0.0431, "step": 34530 }, { "epoch": 0.1677, "grad_norm": 0.1372019201517105, "learning_rate": 4.7970215362762125e-05, "loss": 0.0439, "step": 34540 }, { "epoch": 0.16775, "grad_norm": 0.13161469995975494, "learning_rate": 4.796858347360471e-05, "loss": 0.0417, "step": 34550 }, { "epoch": 0.1678, "grad_norm": 0.14171482622623444, "learning_rate": 4.796695095649485e-05, "loss": 0.0419, "step": 34560 }, { "epoch": 0.16785, "grad_norm": 0.10291971266269684, "learning_rate": 4.796531781147719e-05, "loss": 0.0435, "step": 34570 }, { "epoch": 0.1679, "grad_norm": 0.11898507177829742, "learning_rate": 4.7963684038596356e-05, "loss": 0.0431, "step": 34580 }, { "epoch": 0.16795, "grad_norm": 0.13572126626968384, "learning_rate": 4.7962049637897036e-05, "loss": 0.0417, "step": 34590 }, { "epoch": 0.168, "grad_norm": 0.1346631646156311, "learning_rate": 4.796041460942391e-05, "loss": 0.045, "step": 34600 }, { "epoch": 0.16805, "grad_norm": 0.11369791626930237, "learning_rate": 4.795877895322166e-05, "loss": 0.0408, "step": 34610 }, { "epoch": 0.1681, "grad_norm": 0.20911628007888794, "learning_rate": 4.7957142669335034e-05, "loss": 0.0435, "step": 34620 }, { "epoch": 0.16815, "grad_norm": 0.1218675747513771, "learning_rate": 4.795550575780874e-05, "loss": 0.0414, "step": 34630 }, { "epoch": 0.1682, "grad_norm": 0.15563416481018066, "learning_rate": 4.795386821868755e-05, "loss": 0.0425, "step": 34640 }, { "epoch": 0.16825, "grad_norm": 0.12554652988910675, "learning_rate": 4.7952230052016226e-05, "loss": 0.0412, "step": 34650 }, { "epoch": 0.1683, "grad_norm": 0.1206367164850235, "learning_rate": 4.795059125783955e-05, "loss": 0.0444, "step": 34660 }, { "epoch": 0.16835, "grad_norm": 0.1299329549074173, "learning_rate": 4.794895183620233e-05, "loss": 0.043, "step": 34670 }, { "epoch": 0.1684, "grad_norm": 0.17082592844963074, "learning_rate": 4.794731178714939e-05, "loss": 0.046, "step": 34680 }, { "epoch": 0.16845, "grad_norm": 0.13465429842472076, "learning_rate": 4.794567111072557e-05, "loss": 0.0422, "step": 34690 }, { "epoch": 0.1685, "grad_norm": 0.12303932756185532, "learning_rate": 4.794402980697572e-05, "loss": 0.0416, "step": 34700 }, { "epoch": 0.16855, "grad_norm": 0.10682618618011475, "learning_rate": 4.794238787594472e-05, "loss": 0.042, "step": 34710 }, { "epoch": 0.1686, "grad_norm": 0.11872848123311996, "learning_rate": 4.794074531767745e-05, "loss": 0.0421, "step": 34720 }, { "epoch": 0.16865, "grad_norm": 0.12520605325698853, "learning_rate": 4.7939102132218816e-05, "loss": 0.0433, "step": 34730 }, { "epoch": 0.1687, "grad_norm": 0.13752523064613342, "learning_rate": 4.793745831961375e-05, "loss": 0.0421, "step": 34740 }, { "epoch": 0.16875, "grad_norm": 0.1419668048620224, "learning_rate": 4.7935813879907195e-05, "loss": 0.0436, "step": 34750 }, { "epoch": 0.1688, "grad_norm": 0.12771490216255188, "learning_rate": 4.7934168813144106e-05, "loss": 0.0418, "step": 34760 }, { "epoch": 0.16885, "grad_norm": 0.11459469795227051, "learning_rate": 4.793252311936945e-05, "loss": 0.0411, "step": 34770 }, { "epoch": 0.1689, "grad_norm": 0.13765087723731995, "learning_rate": 4.793087679862824e-05, "loss": 0.0416, "step": 34780 }, { "epoch": 0.16895, "grad_norm": 0.11228399723768234, "learning_rate": 4.7929229850965465e-05, "loss": 0.0423, "step": 34790 }, { "epoch": 0.169, "grad_norm": 0.13364118337631226, "learning_rate": 4.7927582276426155e-05, "loss": 0.0412, "step": 34800 }, { "epoch": 0.16905, "grad_norm": 0.12470797449350357, "learning_rate": 4.792593407505537e-05, "loss": 0.0435, "step": 34810 }, { "epoch": 0.1691, "grad_norm": 0.16428443789482117, "learning_rate": 4.7924285246898154e-05, "loss": 0.0428, "step": 34820 }, { "epoch": 0.16915, "grad_norm": 0.13110315799713135, "learning_rate": 4.7922635791999594e-05, "loss": 0.0429, "step": 34830 }, { "epoch": 0.1692, "grad_norm": 0.13065670430660248, "learning_rate": 4.7920985710404774e-05, "loss": 0.042, "step": 34840 }, { "epoch": 0.16925, "grad_norm": 0.15915274620056152, "learning_rate": 4.7919335002158826e-05, "loss": 0.0442, "step": 34850 }, { "epoch": 0.1693, "grad_norm": 0.11836789548397064, "learning_rate": 4.7917683667306864e-05, "loss": 0.043, "step": 34860 }, { "epoch": 0.16935, "grad_norm": 0.125947505235672, "learning_rate": 4.791603170589405e-05, "loss": 0.0423, "step": 34870 }, { "epoch": 0.1694, "grad_norm": 0.14417004585266113, "learning_rate": 4.791437911796553e-05, "loss": 0.0441, "step": 34880 }, { "epoch": 0.16945, "grad_norm": 0.1497897356748581, "learning_rate": 4.791272590356649e-05, "loss": 0.0429, "step": 34890 }, { "epoch": 0.1695, "grad_norm": 0.1332363337278366, "learning_rate": 4.791107206274214e-05, "loss": 0.0447, "step": 34900 }, { "epoch": 0.16955, "grad_norm": 0.1140265166759491, "learning_rate": 4.790941759553769e-05, "loss": 0.0434, "step": 34910 }, { "epoch": 0.1696, "grad_norm": 0.1542254090309143, "learning_rate": 4.790776250199836e-05, "loss": 0.0438, "step": 34920 }, { "epoch": 0.16965, "grad_norm": 0.13593624532222748, "learning_rate": 4.790610678216941e-05, "loss": 0.0459, "step": 34930 }, { "epoch": 0.1697, "grad_norm": 0.11747143417596817, "learning_rate": 4.7904450436096104e-05, "loss": 0.0413, "step": 34940 }, { "epoch": 0.16975, "grad_norm": 0.10711923241615295, "learning_rate": 4.7902793463823735e-05, "loss": 0.0413, "step": 34950 }, { "epoch": 0.1698, "grad_norm": 0.11600740998983383, "learning_rate": 4.7901135865397594e-05, "loss": 0.0445, "step": 34960 }, { "epoch": 0.16985, "grad_norm": 0.12567615509033203, "learning_rate": 4.7899477640863e-05, "loss": 0.042, "step": 34970 }, { "epoch": 0.1699, "grad_norm": 0.12033237516880035, "learning_rate": 4.789781879026529e-05, "loss": 0.0408, "step": 34980 }, { "epoch": 0.16995, "grad_norm": 0.1295735239982605, "learning_rate": 4.789615931364983e-05, "loss": 0.0429, "step": 34990 }, { "epoch": 0.17, "grad_norm": 0.13830392062664032, "learning_rate": 4.789449921106196e-05, "loss": 0.0452, "step": 35000 }, { "epoch": 0.17005, "grad_norm": 0.14405027031898499, "learning_rate": 4.789283848254709e-05, "loss": 0.0408, "step": 35010 }, { "epoch": 0.1701, "grad_norm": 0.10081218183040619, "learning_rate": 4.789117712815061e-05, "loss": 0.04, "step": 35020 }, { "epoch": 0.17015, "grad_norm": 0.09701535850763321, "learning_rate": 4.788951514791795e-05, "loss": 0.0409, "step": 35030 }, { "epoch": 0.1702, "grad_norm": 0.11014352738857269, "learning_rate": 4.7887852541894554e-05, "loss": 0.0415, "step": 35040 }, { "epoch": 0.17025, "grad_norm": 0.11594094336032867, "learning_rate": 4.788618931012585e-05, "loss": 0.0409, "step": 35050 }, { "epoch": 0.1703, "grad_norm": 0.09745623916387558, "learning_rate": 4.788452545265734e-05, "loss": 0.04, "step": 35060 }, { "epoch": 0.17035, "grad_norm": 0.1125708594918251, "learning_rate": 4.7882860969534504e-05, "loss": 0.0401, "step": 35070 }, { "epoch": 0.1704, "grad_norm": 0.10175339877605438, "learning_rate": 4.788119586080284e-05, "loss": 0.0403, "step": 35080 }, { "epoch": 0.17045, "grad_norm": 0.15936651825904846, "learning_rate": 4.787953012650788e-05, "loss": 0.0435, "step": 35090 }, { "epoch": 0.1705, "grad_norm": 0.1685941219329834, "learning_rate": 4.787786376669516e-05, "loss": 0.0415, "step": 35100 }, { "epoch": 0.17055, "grad_norm": 0.14762447774410248, "learning_rate": 4.7876196781410245e-05, "loss": 0.0436, "step": 35110 }, { "epoch": 0.1706, "grad_norm": 0.13409827649593353, "learning_rate": 4.78745291706987e-05, "loss": 0.0439, "step": 35120 }, { "epoch": 0.17065, "grad_norm": 0.1589771956205368, "learning_rate": 4.787286093460611e-05, "loss": 0.0427, "step": 35130 }, { "epoch": 0.1707, "grad_norm": 0.12625166773796082, "learning_rate": 4.7871192073178105e-05, "loss": 0.0433, "step": 35140 }, { "epoch": 0.17075, "grad_norm": 0.1295214742422104, "learning_rate": 4.78695225864603e-05, "loss": 0.044, "step": 35150 }, { "epoch": 0.1708, "grad_norm": 0.11780333518981934, "learning_rate": 4.786785247449834e-05, "loss": 0.043, "step": 35160 }, { "epoch": 0.17085, "grad_norm": 0.10930639505386353, "learning_rate": 4.786618173733789e-05, "loss": 0.0426, "step": 35170 }, { "epoch": 0.1709, "grad_norm": 0.10027613490819931, "learning_rate": 4.786451037502462e-05, "loss": 0.0421, "step": 35180 }, { "epoch": 0.17095, "grad_norm": 0.13434745371341705, "learning_rate": 4.786283838760422e-05, "loss": 0.0443, "step": 35190 }, { "epoch": 0.171, "grad_norm": 0.13159210979938507, "learning_rate": 4.786116577512241e-05, "loss": 0.0438, "step": 35200 }, { "epoch": 0.17105, "grad_norm": 0.1493593007326126, "learning_rate": 4.785949253762492e-05, "loss": 0.0427, "step": 35210 }, { "epoch": 0.1711, "grad_norm": 0.11078088730573654, "learning_rate": 4.7857818675157484e-05, "loss": 0.041, "step": 35220 }, { "epoch": 0.17115, "grad_norm": 0.10402943938970566, "learning_rate": 4.7856144187765884e-05, "loss": 0.0434, "step": 35230 }, { "epoch": 0.1712, "grad_norm": 0.11463809758424759, "learning_rate": 4.785446907549588e-05, "loss": 0.0412, "step": 35240 }, { "epoch": 0.17125, "grad_norm": 0.12795351445674896, "learning_rate": 4.785279333839328e-05, "loss": 0.041, "step": 35250 }, { "epoch": 0.1713, "grad_norm": 0.09924820065498352, "learning_rate": 4.7851116976503895e-05, "loss": 0.0412, "step": 35260 }, { "epoch": 0.17135, "grad_norm": 0.10971652716398239, "learning_rate": 4.784943998987356e-05, "loss": 0.0443, "step": 35270 }, { "epoch": 0.1714, "grad_norm": 0.09839256852865219, "learning_rate": 4.784776237854811e-05, "loss": 0.041, "step": 35280 }, { "epoch": 0.17145, "grad_norm": 0.13040214776992798, "learning_rate": 4.7846084142573425e-05, "loss": 0.0418, "step": 35290 }, { "epoch": 0.1715, "grad_norm": 0.12357919663190842, "learning_rate": 4.784440528199539e-05, "loss": 0.0414, "step": 35300 }, { "epoch": 0.17155, "grad_norm": 0.13220131397247314, "learning_rate": 4.784272579685989e-05, "loss": 0.042, "step": 35310 }, { "epoch": 0.1716, "grad_norm": 0.1541513353586197, "learning_rate": 4.784104568721285e-05, "loss": 0.0434, "step": 35320 }, { "epoch": 0.17165, "grad_norm": 0.1281459629535675, "learning_rate": 4.78393649531002e-05, "loss": 0.0413, "step": 35330 }, { "epoch": 0.1717, "grad_norm": 0.11172624677419662, "learning_rate": 4.783768359456789e-05, "loss": 0.0418, "step": 35340 }, { "epoch": 0.17175, "grad_norm": 0.10488735139369965, "learning_rate": 4.7836001611661895e-05, "loss": 0.0416, "step": 35350 }, { "epoch": 0.1718, "grad_norm": 0.14712375402450562, "learning_rate": 4.783431900442819e-05, "loss": 0.0412, "step": 35360 }, { "epoch": 0.17185, "grad_norm": 0.12036072462797165, "learning_rate": 4.783263577291279e-05, "loss": 0.041, "step": 35370 }, { "epoch": 0.1719, "grad_norm": 0.11746695637702942, "learning_rate": 4.78309519171617e-05, "loss": 0.0438, "step": 35380 }, { "epoch": 0.17195, "grad_norm": 0.1313781887292862, "learning_rate": 4.782926743722096e-05, "loss": 0.0436, "step": 35390 }, { "epoch": 0.172, "grad_norm": 0.12686990201473236, "learning_rate": 4.7827582333136635e-05, "loss": 0.0424, "step": 35400 }, { "epoch": 0.17205, "grad_norm": 0.11555877327919006, "learning_rate": 4.7825896604954774e-05, "loss": 0.0419, "step": 35410 }, { "epoch": 0.1721, "grad_norm": 0.16383430361747742, "learning_rate": 4.782421025272148e-05, "loss": 0.0401, "step": 35420 }, { "epoch": 0.17215, "grad_norm": 0.14573214948177338, "learning_rate": 4.7822523276482844e-05, "loss": 0.0419, "step": 35430 }, { "epoch": 0.1722, "grad_norm": 0.1528214067220688, "learning_rate": 4.782083567628501e-05, "loss": 0.0407, "step": 35440 }, { "epoch": 0.17225, "grad_norm": 0.12795792520046234, "learning_rate": 4.78191474521741e-05, "loss": 0.0424, "step": 35450 }, { "epoch": 0.1723, "grad_norm": 0.12186912447214127, "learning_rate": 4.7817458604196264e-05, "loss": 0.0401, "step": 35460 }, { "epoch": 0.17235, "grad_norm": 0.12507309019565582, "learning_rate": 4.781576913239768e-05, "loss": 0.0425, "step": 35470 }, { "epoch": 0.1724, "grad_norm": 0.11313261091709137, "learning_rate": 4.781407903682454e-05, "loss": 0.0417, "step": 35480 }, { "epoch": 0.17245, "grad_norm": 0.1325627565383911, "learning_rate": 4.781238831752306e-05, "loss": 0.0437, "step": 35490 }, { "epoch": 0.1725, "grad_norm": 0.14015568792819977, "learning_rate": 4.781069697453944e-05, "loss": 0.0456, "step": 35500 }, { "epoch": 0.17255, "grad_norm": 0.1312084048986435, "learning_rate": 4.780900500791995e-05, "loss": 0.0416, "step": 35510 }, { "epoch": 0.1726, "grad_norm": 0.12628820538520813, "learning_rate": 4.780731241771082e-05, "loss": 0.0422, "step": 35520 }, { "epoch": 0.17265, "grad_norm": 0.10066360980272293, "learning_rate": 4.780561920395834e-05, "loss": 0.0419, "step": 35530 }, { "epoch": 0.1727, "grad_norm": 0.129759281873703, "learning_rate": 4.78039253667088e-05, "loss": 0.0422, "step": 35540 }, { "epoch": 0.17275, "grad_norm": 0.11697951704263687, "learning_rate": 4.780223090600851e-05, "loss": 0.0419, "step": 35550 }, { "epoch": 0.1728, "grad_norm": 0.101000115275383, "learning_rate": 4.7800535821903784e-05, "loss": 0.0398, "step": 35560 }, { "epoch": 0.17285, "grad_norm": 0.14083412289619446, "learning_rate": 4.779884011444098e-05, "loss": 0.0416, "step": 35570 }, { "epoch": 0.1729, "grad_norm": 0.12678256630897522, "learning_rate": 4.7797143783666454e-05, "loss": 0.0418, "step": 35580 }, { "epoch": 0.17295, "grad_norm": 0.11358196288347244, "learning_rate": 4.7795446829626575e-05, "loss": 0.0432, "step": 35590 }, { "epoch": 0.173, "grad_norm": 0.1325872242450714, "learning_rate": 4.779374925236775e-05, "loss": 0.0436, "step": 35600 }, { "epoch": 0.17305, "grad_norm": 0.11626822501420975, "learning_rate": 4.7792051051936386e-05, "loss": 0.0432, "step": 35610 }, { "epoch": 0.1731, "grad_norm": 0.13823387026786804, "learning_rate": 4.779035222837891e-05, "loss": 0.043, "step": 35620 }, { "epoch": 0.17315, "grad_norm": 0.13054156303405762, "learning_rate": 4.778865278174176e-05, "loss": 0.042, "step": 35630 }, { "epoch": 0.1732, "grad_norm": 0.19842328131198883, "learning_rate": 4.7786952712071404e-05, "loss": 0.045, "step": 35640 }, { "epoch": 0.17325, "grad_norm": 0.1172667071223259, "learning_rate": 4.778525201941432e-05, "loss": 0.0428, "step": 35650 }, { "epoch": 0.1733, "grad_norm": 0.11237437278032303, "learning_rate": 4.778355070381701e-05, "loss": 0.0424, "step": 35660 }, { "epoch": 0.17335, "grad_norm": 0.1331014186143875, "learning_rate": 4.778184876532598e-05, "loss": 0.0406, "step": 35670 }, { "epoch": 0.1734, "grad_norm": 0.12321636825799942, "learning_rate": 4.7780146203987766e-05, "loss": 0.0407, "step": 35680 }, { "epoch": 0.17345, "grad_norm": 0.1291119009256363, "learning_rate": 4.777844301984892e-05, "loss": 0.0433, "step": 35690 }, { "epoch": 0.1735, "grad_norm": 0.12867674231529236, "learning_rate": 4.777673921295599e-05, "loss": 0.0403, "step": 35700 }, { "epoch": 0.17355, "grad_norm": 0.10945740342140198, "learning_rate": 4.777503478335557e-05, "loss": 0.0419, "step": 35710 }, { "epoch": 0.1736, "grad_norm": 0.14428310096263885, "learning_rate": 4.777332973109425e-05, "loss": 0.0463, "step": 35720 }, { "epoch": 0.17365, "grad_norm": 0.09689648449420929, "learning_rate": 4.7771624056218655e-05, "loss": 0.0412, "step": 35730 }, { "epoch": 0.1737, "grad_norm": 0.10113327950239182, "learning_rate": 4.7769917758775405e-05, "loss": 0.0405, "step": 35740 }, { "epoch": 0.17375, "grad_norm": 0.1302248239517212, "learning_rate": 4.7768210838811166e-05, "loss": 0.0423, "step": 35750 }, { "epoch": 0.1738, "grad_norm": 0.14287564158439636, "learning_rate": 4.77665032963726e-05, "loss": 0.0421, "step": 35760 }, { "epoch": 0.17385, "grad_norm": 0.11934467405080795, "learning_rate": 4.776479513150638e-05, "loss": 0.042, "step": 35770 }, { "epoch": 0.1739, "grad_norm": 0.13890060782432556, "learning_rate": 4.776308634425922e-05, "loss": 0.0445, "step": 35780 }, { "epoch": 0.17395, "grad_norm": 0.10712046176195145, "learning_rate": 4.7761376934677826e-05, "loss": 0.0403, "step": 35790 }, { "epoch": 0.174, "grad_norm": 0.11675825715065002, "learning_rate": 4.775966690280894e-05, "loss": 0.0408, "step": 35800 }, { "epoch": 0.17405, "grad_norm": 0.12506303191184998, "learning_rate": 4.775795624869931e-05, "loss": 0.0418, "step": 35810 }, { "epoch": 0.1741, "grad_norm": 0.11785845458507538, "learning_rate": 4.77562449723957e-05, "loss": 0.0444, "step": 35820 }, { "epoch": 0.17415, "grad_norm": 0.1276514232158661, "learning_rate": 4.775453307394491e-05, "loss": 0.0427, "step": 35830 }, { "epoch": 0.1742, "grad_norm": 0.12888126075267792, "learning_rate": 4.775282055339373e-05, "loss": 0.0409, "step": 35840 }, { "epoch": 0.17425, "grad_norm": 0.12849290668964386, "learning_rate": 4.775110741078899e-05, "loss": 0.0415, "step": 35850 }, { "epoch": 0.1743, "grad_norm": 0.1092122346162796, "learning_rate": 4.774939364617751e-05, "loss": 0.0416, "step": 35860 }, { "epoch": 0.17435, "grad_norm": 0.10579238086938858, "learning_rate": 4.774767925960616e-05, "loss": 0.0404, "step": 35870 }, { "epoch": 0.1744, "grad_norm": 0.10392007976770401, "learning_rate": 4.774596425112181e-05, "loss": 0.0424, "step": 35880 }, { "epoch": 0.17445, "grad_norm": 0.13740697503089905, "learning_rate": 4.774424862077134e-05, "loss": 0.0418, "step": 35890 }, { "epoch": 0.1745, "grad_norm": 0.21001854538917542, "learning_rate": 4.774253236860165e-05, "loss": 0.041, "step": 35900 }, { "epoch": 0.17455, "grad_norm": 0.15767604112625122, "learning_rate": 4.7740815494659675e-05, "loss": 0.0419, "step": 35910 }, { "epoch": 0.1746, "grad_norm": 0.15760114789009094, "learning_rate": 4.773909799899234e-05, "loss": 0.0407, "step": 35920 }, { "epoch": 0.17465, "grad_norm": 0.12130020558834076, "learning_rate": 4.773737988164663e-05, "loss": 0.0402, "step": 35930 }, { "epoch": 0.1747, "grad_norm": 0.11412428319454193, "learning_rate": 4.773566114266948e-05, "loss": 0.0403, "step": 35940 }, { "epoch": 0.17475, "grad_norm": 0.10798191279172897, "learning_rate": 4.773394178210789e-05, "loss": 0.0413, "step": 35950 }, { "epoch": 0.1748, "grad_norm": 0.11256234347820282, "learning_rate": 4.773222180000888e-05, "loss": 0.0414, "step": 35960 }, { "epoch": 0.17485, "grad_norm": 0.10670135915279388, "learning_rate": 4.773050119641946e-05, "loss": 0.0421, "step": 35970 }, { "epoch": 0.1749, "grad_norm": 0.1281348466873169, "learning_rate": 4.7728779971386686e-05, "loss": 0.0406, "step": 35980 }, { "epoch": 0.17495, "grad_norm": 0.14552175998687744, "learning_rate": 4.7727058124957605e-05, "loss": 0.0429, "step": 35990 }, { "epoch": 0.175, "grad_norm": 0.1336289942264557, "learning_rate": 4.772533565717929e-05, "loss": 0.0414, "step": 36000 }, { "epoch": 0.17505, "grad_norm": 0.1263270229101181, "learning_rate": 4.772361256809884e-05, "loss": 0.0436, "step": 36010 }, { "epoch": 0.1751, "grad_norm": 0.11949208378791809, "learning_rate": 4.772188885776335e-05, "loss": 0.0445, "step": 36020 }, { "epoch": 0.17515, "grad_norm": 0.12418423593044281, "learning_rate": 4.772016452621997e-05, "loss": 0.0418, "step": 36030 }, { "epoch": 0.1752, "grad_norm": 0.11824215948581696, "learning_rate": 4.771843957351581e-05, "loss": 0.0415, "step": 36040 }, { "epoch": 0.17525, "grad_norm": 0.13300898671150208, "learning_rate": 4.771671399969806e-05, "loss": 0.0433, "step": 36050 }, { "epoch": 0.1753, "grad_norm": 0.1046031191945076, "learning_rate": 4.7714987804813875e-05, "loss": 0.0433, "step": 36060 }, { "epoch": 0.17535, "grad_norm": 0.12247949093580246, "learning_rate": 4.771326098891046e-05, "loss": 0.0425, "step": 36070 }, { "epoch": 0.1754, "grad_norm": 0.1275743842124939, "learning_rate": 4.771153355203502e-05, "loss": 0.0449, "step": 36080 }, { "epoch": 0.17545, "grad_norm": 0.11374276876449585, "learning_rate": 4.7709805494234796e-05, "loss": 0.041, "step": 36090 }, { "epoch": 0.1755, "grad_norm": 0.14593404531478882, "learning_rate": 4.770807681555701e-05, "loss": 0.0426, "step": 36100 }, { "epoch": 0.17555, "grad_norm": 0.14208447933197021, "learning_rate": 4.770634751604894e-05, "loss": 0.0407, "step": 36110 }, { "epoch": 0.1756, "grad_norm": 0.131479874253273, "learning_rate": 4.7704617595757856e-05, "loss": 0.0415, "step": 36120 }, { "epoch": 0.17565, "grad_norm": 0.11647072434425354, "learning_rate": 4.770288705473106e-05, "loss": 0.041, "step": 36130 }, { "epoch": 0.1757, "grad_norm": 0.11759953945875168, "learning_rate": 4.770115589301586e-05, "loss": 0.0411, "step": 36140 }, { "epoch": 0.17575, "grad_norm": 0.10574915260076523, "learning_rate": 4.769942411065959e-05, "loss": 0.0422, "step": 36150 }, { "epoch": 0.1758, "grad_norm": 0.10825838148593903, "learning_rate": 4.769769170770958e-05, "loss": 0.0419, "step": 36160 }, { "epoch": 0.17585, "grad_norm": 0.09498035907745361, "learning_rate": 4.7695958684213215e-05, "loss": 0.0407, "step": 36170 }, { "epoch": 0.1759, "grad_norm": 0.1188809871673584, "learning_rate": 4.7694225040217866e-05, "loss": 0.0424, "step": 36180 }, { "epoch": 0.17595, "grad_norm": 0.11033222079277039, "learning_rate": 4.7692490775770924e-05, "loss": 0.0411, "step": 36190 }, { "epoch": 0.176, "grad_norm": 0.1235862448811531, "learning_rate": 4.769075589091982e-05, "loss": 0.0421, "step": 36200 }, { "epoch": 0.17605, "grad_norm": 0.1003691628575325, "learning_rate": 4.768902038571197e-05, "loss": 0.0411, "step": 36210 }, { "epoch": 0.1761, "grad_norm": 0.1102064847946167, "learning_rate": 4.768728426019482e-05, "loss": 0.0426, "step": 36220 }, { "epoch": 0.17615, "grad_norm": 0.11367040872573853, "learning_rate": 4.768554751441585e-05, "loss": 0.0404, "step": 36230 }, { "epoch": 0.1762, "grad_norm": 0.12277144193649292, "learning_rate": 4.7683810148422534e-05, "loss": 0.0415, "step": 36240 }, { "epoch": 0.17625, "grad_norm": 0.11358033120632172, "learning_rate": 4.768207216226237e-05, "loss": 0.0414, "step": 36250 }, { "epoch": 0.1763, "grad_norm": 0.10719634592533112, "learning_rate": 4.768033355598287e-05, "loss": 0.0439, "step": 36260 }, { "epoch": 0.17635, "grad_norm": 0.1190221831202507, "learning_rate": 4.767859432963157e-05, "loss": 0.0414, "step": 36270 }, { "epoch": 0.1764, "grad_norm": 0.11484932154417038, "learning_rate": 4.7676854483256025e-05, "loss": 0.0411, "step": 36280 }, { "epoch": 0.17645, "grad_norm": 0.13288499414920807, "learning_rate": 4.7675114016903795e-05, "loss": 0.0416, "step": 36290 }, { "epoch": 0.1765, "grad_norm": 0.12918755412101746, "learning_rate": 4.767337293062247e-05, "loss": 0.0422, "step": 36300 }, { "epoch": 0.17655, "grad_norm": 0.12075911462306976, "learning_rate": 4.767163122445964e-05, "loss": 0.0426, "step": 36310 }, { "epoch": 0.1766, "grad_norm": 0.12961497902870178, "learning_rate": 4.766988889846294e-05, "loss": 0.0429, "step": 36320 }, { "epoch": 0.17665, "grad_norm": 0.1006336435675621, "learning_rate": 4.766814595267999e-05, "loss": 0.0407, "step": 36330 }, { "epoch": 0.1767, "grad_norm": 0.12820640206336975, "learning_rate": 4.766640238715844e-05, "loss": 0.0408, "step": 36340 }, { "epoch": 0.17675, "grad_norm": 0.12266498059034348, "learning_rate": 4.7664658201945966e-05, "loss": 0.042, "step": 36350 }, { "epoch": 0.1768, "grad_norm": 0.1417977511882782, "learning_rate": 4.766291339709026e-05, "loss": 0.0421, "step": 36360 }, { "epoch": 0.17685, "grad_norm": 0.14648132026195526, "learning_rate": 4.766116797263901e-05, "loss": 0.0439, "step": 36370 }, { "epoch": 0.1769, "grad_norm": 0.13662028312683105, "learning_rate": 4.7659421928639934e-05, "loss": 0.0426, "step": 36380 }, { "epoch": 0.17695, "grad_norm": 0.12682056427001953, "learning_rate": 4.765767526514079e-05, "loss": 0.0422, "step": 36390 }, { "epoch": 0.177, "grad_norm": 0.11788281798362732, "learning_rate": 4.7655927982189305e-05, "loss": 0.0437, "step": 36400 }, { "epoch": 0.17705, "grad_norm": 0.09522189944982529, "learning_rate": 4.765418007983327e-05, "loss": 0.0411, "step": 36410 }, { "epoch": 0.1771, "grad_norm": 0.11930128931999207, "learning_rate": 4.765243155812045e-05, "loss": 0.0417, "step": 36420 }, { "epoch": 0.17715, "grad_norm": 0.11503031104803085, "learning_rate": 4.7650682417098666e-05, "loss": 0.0429, "step": 36430 }, { "epoch": 0.1772, "grad_norm": 0.1319715827703476, "learning_rate": 4.764893265681574e-05, "loss": 0.0417, "step": 36440 }, { "epoch": 0.17725, "grad_norm": 0.13776862621307373, "learning_rate": 4.76471822773195e-05, "loss": 0.0428, "step": 36450 }, { "epoch": 0.1773, "grad_norm": 0.13260617852210999, "learning_rate": 4.76454312786578e-05, "loss": 0.0427, "step": 36460 }, { "epoch": 0.17735, "grad_norm": 0.11885460466146469, "learning_rate": 4.764367966087852e-05, "loss": 0.042, "step": 36470 }, { "epoch": 0.1774, "grad_norm": 0.1272895336151123, "learning_rate": 4.764192742402955e-05, "loss": 0.043, "step": 36480 }, { "epoch": 0.17745, "grad_norm": 0.11527583003044128, "learning_rate": 4.764017456815878e-05, "loss": 0.0429, "step": 36490 }, { "epoch": 0.1775, "grad_norm": 0.14333097636699677, "learning_rate": 4.7638421093314156e-05, "loss": 0.0426, "step": 36500 }, { "epoch": 0.17755, "grad_norm": 0.11847139149904251, "learning_rate": 4.763666699954359e-05, "loss": 0.0417, "step": 36510 }, { "epoch": 0.1776, "grad_norm": 0.1231340542435646, "learning_rate": 4.763491228689506e-05, "loss": 0.0414, "step": 36520 }, { "epoch": 0.17765, "grad_norm": 0.09365687519311905, "learning_rate": 4.7633156955416535e-05, "loss": 0.0397, "step": 36530 }, { "epoch": 0.1777, "grad_norm": 0.13405677676200867, "learning_rate": 4.763140100515599e-05, "loss": 0.0412, "step": 36540 }, { "epoch": 0.17775, "grad_norm": 0.12576311826705933, "learning_rate": 4.762964443616146e-05, "loss": 0.0429, "step": 36550 }, { "epoch": 0.1778, "grad_norm": 0.11587604135274887, "learning_rate": 4.762788724848094e-05, "loss": 0.0414, "step": 36560 }, { "epoch": 0.17785, "grad_norm": 0.11648576706647873, "learning_rate": 4.7626129442162495e-05, "loss": 0.0401, "step": 36570 }, { "epoch": 0.1779, "grad_norm": 0.12591294944286346, "learning_rate": 4.762437101725416e-05, "loss": 0.0421, "step": 36580 }, { "epoch": 0.17795, "grad_norm": 0.21443773806095123, "learning_rate": 4.762261197380402e-05, "loss": 0.0501, "step": 36590 }, { "epoch": 0.178, "grad_norm": 0.1882997751235962, "learning_rate": 4.7620852311860176e-05, "loss": 0.0456, "step": 36600 }, { "epoch": 0.17805, "grad_norm": 0.12708589434623718, "learning_rate": 4.761909203147073e-05, "loss": 0.0427, "step": 36610 }, { "epoch": 0.1781, "grad_norm": 0.12208003550767899, "learning_rate": 4.7617331132683795e-05, "loss": 0.0424, "step": 36620 }, { "epoch": 0.17815, "grad_norm": 0.1320149004459381, "learning_rate": 4.761556961554753e-05, "loss": 0.0435, "step": 36630 }, { "epoch": 0.1782, "grad_norm": 0.13649402558803558, "learning_rate": 4.7613807480110086e-05, "loss": 0.0448, "step": 36640 }, { "epoch": 0.17825, "grad_norm": 0.134579598903656, "learning_rate": 4.761204472641965e-05, "loss": 0.0432, "step": 36650 }, { "epoch": 0.1783, "grad_norm": 0.17320720851421356, "learning_rate": 4.761028135452439e-05, "loss": 0.0449, "step": 36660 }, { "epoch": 0.17835, "grad_norm": 0.11641421914100647, "learning_rate": 4.760851736447254e-05, "loss": 0.0445, "step": 36670 }, { "epoch": 0.1784, "grad_norm": 0.12470538914203644, "learning_rate": 4.7606752756312325e-05, "loss": 0.0414, "step": 36680 }, { "epoch": 0.17845, "grad_norm": 0.13089129328727722, "learning_rate": 4.760498753009197e-05, "loss": 0.0444, "step": 36690 }, { "epoch": 0.1785, "grad_norm": 0.11784205585718155, "learning_rate": 4.760322168585976e-05, "loss": 0.0417, "step": 36700 }, { "epoch": 0.17855, "grad_norm": 0.09654921293258667, "learning_rate": 4.760145522366395e-05, "loss": 0.0411, "step": 36710 }, { "epoch": 0.1786, "grad_norm": 0.11832010000944138, "learning_rate": 4.759968814355286e-05, "loss": 0.0419, "step": 36720 }, { "epoch": 0.17865, "grad_norm": 0.09841049462556839, "learning_rate": 4.759792044557477e-05, "loss": 0.0403, "step": 36730 }, { "epoch": 0.1787, "grad_norm": 0.12049797922372818, "learning_rate": 4.759615212977803e-05, "loss": 0.0417, "step": 36740 }, { "epoch": 0.17875, "grad_norm": 0.11145062744617462, "learning_rate": 4.759438319621099e-05, "loss": 0.0434, "step": 36750 }, { "epoch": 0.1788, "grad_norm": 0.11987379938364029, "learning_rate": 4.7592613644921993e-05, "loss": 0.0409, "step": 36760 }, { "epoch": 0.17885, "grad_norm": 0.11805453151464462, "learning_rate": 4.7590843475959424e-05, "loss": 0.0407, "step": 36770 }, { "epoch": 0.1789, "grad_norm": 0.1306656002998352, "learning_rate": 4.758907268937168e-05, "loss": 0.0413, "step": 36780 }, { "epoch": 0.17895, "grad_norm": 0.11330852657556534, "learning_rate": 4.758730128520718e-05, "loss": 0.042, "step": 36790 }, { "epoch": 0.179, "grad_norm": 0.13489657640457153, "learning_rate": 4.758552926351435e-05, "loss": 0.0414, "step": 36800 }, { "epoch": 0.17905, "grad_norm": 0.10893921554088593, "learning_rate": 4.758375662434163e-05, "loss": 0.0428, "step": 36810 }, { "epoch": 0.1791, "grad_norm": 0.11990348249673843, "learning_rate": 4.758198336773749e-05, "loss": 0.041, "step": 36820 }, { "epoch": 0.17915, "grad_norm": 0.09690472483634949, "learning_rate": 4.758020949375041e-05, "loss": 0.0405, "step": 36830 }, { "epoch": 0.1792, "grad_norm": 0.12950287759304047, "learning_rate": 4.757843500242889e-05, "loss": 0.0406, "step": 36840 }, { "epoch": 0.17925, "grad_norm": 0.13361723721027374, "learning_rate": 4.757665989382143e-05, "loss": 0.0416, "step": 36850 }, { "epoch": 0.1793, "grad_norm": 0.11338499933481216, "learning_rate": 4.7574884167976575e-05, "loss": 0.0416, "step": 36860 }, { "epoch": 0.17935, "grad_norm": 0.12144909054040909, "learning_rate": 4.757310782494286e-05, "loss": 0.0409, "step": 36870 }, { "epoch": 0.1794, "grad_norm": 0.1445496678352356, "learning_rate": 4.7571330864768874e-05, "loss": 0.0424, "step": 36880 }, { "epoch": 0.17945, "grad_norm": 0.1517098993062973, "learning_rate": 4.756955328750317e-05, "loss": 0.0439, "step": 36890 }, { "epoch": 0.1795, "grad_norm": 0.11401533335447311, "learning_rate": 4.756777509319436e-05, "loss": 0.0395, "step": 36900 }, { "epoch": 0.17955, "grad_norm": 0.1113637238740921, "learning_rate": 4.7565996281891054e-05, "loss": 0.0411, "step": 36910 }, { "epoch": 0.1796, "grad_norm": 0.1286325603723526, "learning_rate": 4.7564216853641896e-05, "loss": 0.0425, "step": 36920 }, { "epoch": 0.17965, "grad_norm": 0.11840667575597763, "learning_rate": 4.756243680849552e-05, "loss": 0.0445, "step": 36930 }, { "epoch": 0.1797, "grad_norm": 0.13859489560127258, "learning_rate": 4.75606561465006e-05, "loss": 0.0427, "step": 36940 }, { "epoch": 0.17975, "grad_norm": 0.13032551109790802, "learning_rate": 4.7558874867705815e-05, "loss": 0.0404, "step": 36950 }, { "epoch": 0.1798, "grad_norm": 0.15403352677822113, "learning_rate": 4.755709297215987e-05, "loss": 0.0411, "step": 36960 }, { "epoch": 0.17985, "grad_norm": 0.12021184712648392, "learning_rate": 4.7555310459911476e-05, "loss": 0.0417, "step": 36970 }, { "epoch": 0.1799, "grad_norm": 0.09335090965032578, "learning_rate": 4.755352733100936e-05, "loss": 0.04, "step": 36980 }, { "epoch": 0.17995, "grad_norm": 0.10187531262636185, "learning_rate": 4.755174358550229e-05, "loss": 0.0395, "step": 36990 }, { "epoch": 0.18, "grad_norm": 0.11029377579689026, "learning_rate": 4.7549959223439016e-05, "loss": 0.0408, "step": 37000 }, { "epoch": 0.18005, "grad_norm": 0.10429729521274567, "learning_rate": 4.754817424486833e-05, "loss": 0.04, "step": 37010 }, { "epoch": 0.1801, "grad_norm": 0.10928675532341003, "learning_rate": 4.754638864983904e-05, "loss": 0.0411, "step": 37020 }, { "epoch": 0.18015, "grad_norm": 0.11169719696044922, "learning_rate": 4.7544602438399945e-05, "loss": 0.041, "step": 37030 }, { "epoch": 0.1802, "grad_norm": 0.1231222003698349, "learning_rate": 4.754281561059989e-05, "loss": 0.042, "step": 37040 }, { "epoch": 0.18025, "grad_norm": 0.11430267244577408, "learning_rate": 4.754102816648772e-05, "loss": 0.041, "step": 37050 }, { "epoch": 0.1803, "grad_norm": 0.11378253251314163, "learning_rate": 4.753924010611231e-05, "loss": 0.041, "step": 37060 }, { "epoch": 0.18035, "grad_norm": 0.11739642173051834, "learning_rate": 4.753745142952255e-05, "loss": 0.0395, "step": 37070 }, { "epoch": 0.1804, "grad_norm": 0.10547371953725815, "learning_rate": 4.7535662136767336e-05, "loss": 0.0421, "step": 37080 }, { "epoch": 0.18045, "grad_norm": 0.11511596292257309, "learning_rate": 4.753387222789558e-05, "loss": 0.0406, "step": 37090 }, { "epoch": 0.1805, "grad_norm": 0.11022399365901947, "learning_rate": 4.753208170295622e-05, "loss": 0.0427, "step": 37100 }, { "epoch": 0.18055, "grad_norm": 0.10935037583112717, "learning_rate": 4.7530290561998216e-05, "loss": 0.0443, "step": 37110 }, { "epoch": 0.1806, "grad_norm": 0.11182756721973419, "learning_rate": 4.7528498805070534e-05, "loss": 0.043, "step": 37120 }, { "epoch": 0.18065, "grad_norm": 0.10958198457956314, "learning_rate": 4.752670643222216e-05, "loss": 0.0403, "step": 37130 }, { "epoch": 0.1807, "grad_norm": 0.12529942393302917, "learning_rate": 4.7524913443502086e-05, "loss": 0.0441, "step": 37140 }, { "epoch": 0.18075, "grad_norm": 0.17277610301971436, "learning_rate": 4.7523119838959345e-05, "loss": 0.0424, "step": 37150 }, { "epoch": 0.1808, "grad_norm": 0.12355010211467743, "learning_rate": 4.752132561864297e-05, "loss": 0.0414, "step": 37160 }, { "epoch": 0.18085, "grad_norm": 0.12070120126008987, "learning_rate": 4.751953078260202e-05, "loss": 0.0427, "step": 37170 }, { "epoch": 0.1809, "grad_norm": 0.12554572522640228, "learning_rate": 4.751773533088554e-05, "loss": 0.0446, "step": 37180 }, { "epoch": 0.18095, "grad_norm": 0.12956035137176514, "learning_rate": 4.751593926354265e-05, "loss": 0.0413, "step": 37190 }, { "epoch": 0.181, "grad_norm": 0.10663142800331116, "learning_rate": 4.751414258062244e-05, "loss": 0.0421, "step": 37200 }, { "epoch": 0.18105, "grad_norm": 0.1439746767282486, "learning_rate": 4.751234528217402e-05, "loss": 0.0426, "step": 37210 }, { "epoch": 0.1811, "grad_norm": 0.1384759396314621, "learning_rate": 4.751054736824655e-05, "loss": 0.0429, "step": 37220 }, { "epoch": 0.18115, "grad_norm": 0.14315307140350342, "learning_rate": 4.750874883888916e-05, "loss": 0.0426, "step": 37230 }, { "epoch": 0.1812, "grad_norm": 0.11895201355218887, "learning_rate": 4.750694969415105e-05, "loss": 0.0423, "step": 37240 }, { "epoch": 0.18125, "grad_norm": 0.14067918062210083, "learning_rate": 4.750514993408137e-05, "loss": 0.0422, "step": 37250 }, { "epoch": 0.1813, "grad_norm": 0.13616418838500977, "learning_rate": 4.7503349558729356e-05, "loss": 0.0425, "step": 37260 }, { "epoch": 0.18135, "grad_norm": 0.1241074725985527, "learning_rate": 4.750154856814422e-05, "loss": 0.041, "step": 37270 }, { "epoch": 0.1814, "grad_norm": 0.11493717133998871, "learning_rate": 4.74997469623752e-05, "loss": 0.0413, "step": 37280 }, { "epoch": 0.18145, "grad_norm": 0.11242803186178207, "learning_rate": 4.7497944741471546e-05, "loss": 0.0402, "step": 37290 }, { "epoch": 0.1815, "grad_norm": 0.10811872780323029, "learning_rate": 4.749614190548254e-05, "loss": 0.0425, "step": 37300 }, { "epoch": 0.18155, "grad_norm": 0.10886134952306747, "learning_rate": 4.749433845445746e-05, "loss": 0.04, "step": 37310 }, { "epoch": 0.1816, "grad_norm": 0.12404333055019379, "learning_rate": 4.749253438844562e-05, "loss": 0.0424, "step": 37320 }, { "epoch": 0.18165, "grad_norm": 0.11045855283737183, "learning_rate": 4.7490729707496346e-05, "loss": 0.0427, "step": 37330 }, { "epoch": 0.1817, "grad_norm": 0.12336307018995285, "learning_rate": 4.7488924411658965e-05, "loss": 0.0427, "step": 37340 }, { "epoch": 0.18175, "grad_norm": 0.1207285076379776, "learning_rate": 4.748711850098284e-05, "loss": 0.0414, "step": 37350 }, { "epoch": 0.1818, "grad_norm": 0.10973604023456573, "learning_rate": 4.748531197551734e-05, "loss": 0.0417, "step": 37360 }, { "epoch": 0.18185, "grad_norm": 0.11707280576229095, "learning_rate": 4.7483504835311866e-05, "loss": 0.0425, "step": 37370 }, { "epoch": 0.1819, "grad_norm": 0.12151395529508591, "learning_rate": 4.748169708041581e-05, "loss": 0.0432, "step": 37380 }, { "epoch": 0.18195, "grad_norm": 0.13998086750507355, "learning_rate": 4.747988871087861e-05, "loss": 0.0435, "step": 37390 }, { "epoch": 0.182, "grad_norm": 0.10092715173959732, "learning_rate": 4.7478079726749686e-05, "loss": 0.0409, "step": 37400 }, { "epoch": 0.18205, "grad_norm": 0.1240464299917221, "learning_rate": 4.7476270128078506e-05, "loss": 0.0411, "step": 37410 }, { "epoch": 0.1821, "grad_norm": 0.11496293544769287, "learning_rate": 4.7474459914914556e-05, "loss": 0.0406, "step": 37420 }, { "epoch": 0.18215, "grad_norm": 0.14588946104049683, "learning_rate": 4.747264908730731e-05, "loss": 0.0423, "step": 37430 }, { "epoch": 0.1822, "grad_norm": 0.10504837334156036, "learning_rate": 4.747083764530628e-05, "loss": 0.0411, "step": 37440 }, { "epoch": 0.18225, "grad_norm": 0.13117945194244385, "learning_rate": 4.746902558896099e-05, "loss": 0.0424, "step": 37450 }, { "epoch": 0.1823, "grad_norm": 0.13221648335456848, "learning_rate": 4.746721291832098e-05, "loss": 0.0436, "step": 37460 }, { "epoch": 0.18235, "grad_norm": 0.1333584040403366, "learning_rate": 4.7465399633435814e-05, "loss": 0.0418, "step": 37470 }, { "epoch": 0.1824, "grad_norm": 0.12405399233102798, "learning_rate": 4.7463585734355064e-05, "loss": 0.045, "step": 37480 }, { "epoch": 0.18245, "grad_norm": 0.13527528941631317, "learning_rate": 4.746177122112831e-05, "loss": 0.0419, "step": 37490 }, { "epoch": 0.1825, "grad_norm": 0.14091968536376953, "learning_rate": 4.745995609380518e-05, "loss": 0.0444, "step": 37500 }, { "epoch": 0.18255, "grad_norm": 0.13996820151805878, "learning_rate": 4.745814035243528e-05, "loss": 0.0405, "step": 37510 }, { "epoch": 0.1826, "grad_norm": 0.11088625341653824, "learning_rate": 4.7456323997068264e-05, "loss": 0.0424, "step": 37520 }, { "epoch": 0.18265, "grad_norm": 0.12366887181997299, "learning_rate": 4.7454507027753784e-05, "loss": 0.0434, "step": 37530 }, { "epoch": 0.1827, "grad_norm": 0.11137080937623978, "learning_rate": 4.745268944454152e-05, "loss": 0.0411, "step": 37540 }, { "epoch": 0.18275, "grad_norm": 0.1298379898071289, "learning_rate": 4.745087124748116e-05, "loss": 0.0406, "step": 37550 }, { "epoch": 0.1828, "grad_norm": 0.11690547317266464, "learning_rate": 4.744905243662241e-05, "loss": 0.0402, "step": 37560 }, { "epoch": 0.18285, "grad_norm": 0.11526573449373245, "learning_rate": 4.744723301201501e-05, "loss": 0.0421, "step": 37570 }, { "epoch": 0.1829, "grad_norm": 0.12407028675079346, "learning_rate": 4.7445412973708694e-05, "loss": 0.0392, "step": 37580 }, { "epoch": 0.18295, "grad_norm": 0.13423435389995575, "learning_rate": 4.74435923217532e-05, "loss": 0.0395, "step": 37590 }, { "epoch": 0.183, "grad_norm": 0.12274365872144699, "learning_rate": 4.744177105619835e-05, "loss": 0.0405, "step": 37600 }, { "epoch": 0.18305, "grad_norm": 0.10433799028396606, "learning_rate": 4.743994917709389e-05, "loss": 0.039, "step": 37610 }, { "epoch": 0.1831, "grad_norm": 0.1150219738483429, "learning_rate": 4.7438126684489656e-05, "loss": 0.0414, "step": 37620 }, { "epoch": 0.18315, "grad_norm": 0.13015680015087128, "learning_rate": 4.743630357843547e-05, "loss": 0.04, "step": 37630 }, { "epoch": 0.1832, "grad_norm": 0.12152936309576035, "learning_rate": 4.743447985898117e-05, "loss": 0.0415, "step": 37640 }, { "epoch": 0.18325, "grad_norm": 0.11936400085687637, "learning_rate": 4.743265552617663e-05, "loss": 0.04, "step": 37650 }, { "epoch": 0.1833, "grad_norm": 0.151456817984581, "learning_rate": 4.74308305800717e-05, "loss": 0.0408, "step": 37660 }, { "epoch": 0.18335, "grad_norm": 0.13457858562469482, "learning_rate": 4.74290050207163e-05, "loss": 0.042, "step": 37670 }, { "epoch": 0.1834, "grad_norm": 0.12324851751327515, "learning_rate": 4.742717884816032e-05, "loss": 0.0411, "step": 37680 }, { "epoch": 0.18345, "grad_norm": 0.12512250244617462, "learning_rate": 4.74253520624537e-05, "loss": 0.0397, "step": 37690 }, { "epoch": 0.1835, "grad_norm": 0.12226025015115738, "learning_rate": 4.742352466364638e-05, "loss": 0.0387, "step": 37700 }, { "epoch": 0.18355, "grad_norm": 0.12683923542499542, "learning_rate": 4.742169665178832e-05, "loss": 0.0397, "step": 37710 }, { "epoch": 0.1836, "grad_norm": 0.1118398904800415, "learning_rate": 4.741986802692949e-05, "loss": 0.0389, "step": 37720 }, { "epoch": 0.18365, "grad_norm": 0.11483946442604065, "learning_rate": 4.74180387891199e-05, "loss": 0.0433, "step": 37730 }, { "epoch": 0.1837, "grad_norm": 0.11242949962615967, "learning_rate": 4.741620893840955e-05, "loss": 0.0392, "step": 37740 }, { "epoch": 0.18375, "grad_norm": 0.1014494001865387, "learning_rate": 4.7414378474848464e-05, "loss": 0.0398, "step": 37750 }, { "epoch": 0.1838, "grad_norm": 0.10241231322288513, "learning_rate": 4.741254739848669e-05, "loss": 0.0395, "step": 37760 }, { "epoch": 0.18385, "grad_norm": 0.10721082240343094, "learning_rate": 4.74107157093743e-05, "loss": 0.0425, "step": 37770 }, { "epoch": 0.1839, "grad_norm": 0.13023872673511505, "learning_rate": 4.740888340756136e-05, "loss": 0.0391, "step": 37780 }, { "epoch": 0.18395, "grad_norm": 0.11415564268827438, "learning_rate": 4.740705049309796e-05, "loss": 0.0416, "step": 37790 }, { "epoch": 0.184, "grad_norm": 0.1057114526629448, "learning_rate": 4.740521696603423e-05, "loss": 0.0426, "step": 37800 }, { "epoch": 0.18405, "grad_norm": 0.12084627896547318, "learning_rate": 4.740338282642027e-05, "loss": 0.0413, "step": 37810 }, { "epoch": 0.1841, "grad_norm": 0.10022873431444168, "learning_rate": 4.7401548074306245e-05, "loss": 0.0388, "step": 37820 }, { "epoch": 0.18415, "grad_norm": 0.09546560049057007, "learning_rate": 4.7399712709742316e-05, "loss": 0.0412, "step": 37830 }, { "epoch": 0.1842, "grad_norm": 0.12398182600736618, "learning_rate": 4.739787673277865e-05, "loss": 0.04, "step": 37840 }, { "epoch": 0.18425, "grad_norm": 0.11884655058383942, "learning_rate": 4.739604014346545e-05, "loss": 0.0412, "step": 37850 }, { "epoch": 0.1843, "grad_norm": 0.1322961449623108, "learning_rate": 4.7394202941852925e-05, "loss": 0.0416, "step": 37860 }, { "epoch": 0.18435, "grad_norm": 0.11267701536417007, "learning_rate": 4.7392365127991315e-05, "loss": 0.0417, "step": 37870 }, { "epoch": 0.1844, "grad_norm": 0.10018815845251083, "learning_rate": 4.739052670193085e-05, "loss": 0.0405, "step": 37880 }, { "epoch": 0.18445, "grad_norm": 0.11316210776567459, "learning_rate": 4.7388687663721784e-05, "loss": 0.0398, "step": 37890 }, { "epoch": 0.1845, "grad_norm": 0.11430251598358154, "learning_rate": 4.738684801341442e-05, "loss": 0.0425, "step": 37900 }, { "epoch": 0.18455, "grad_norm": 0.12140493839979172, "learning_rate": 4.738500775105904e-05, "loss": 0.0406, "step": 37910 }, { "epoch": 0.1846, "grad_norm": 0.14046849310398102, "learning_rate": 4.7383166876705966e-05, "loss": 0.0402, "step": 37920 }, { "epoch": 0.18465, "grad_norm": 0.17947669327259064, "learning_rate": 4.73813253904055e-05, "loss": 0.0446, "step": 37930 }, { "epoch": 0.1847, "grad_norm": 0.13138681650161743, "learning_rate": 4.7379483292208026e-05, "loss": 0.0399, "step": 37940 }, { "epoch": 0.18475, "grad_norm": 0.1199428141117096, "learning_rate": 4.7377640582163876e-05, "loss": 0.0399, "step": 37950 }, { "epoch": 0.1848, "grad_norm": 0.1424848437309265, "learning_rate": 4.737579726032344e-05, "loss": 0.0425, "step": 37960 }, { "epoch": 0.18485, "grad_norm": 0.1481008380651474, "learning_rate": 4.7373953326737114e-05, "loss": 0.0423, "step": 37970 }, { "epoch": 0.1849, "grad_norm": 0.15787029266357422, "learning_rate": 4.7372108781455306e-05, "loss": 0.0428, "step": 37980 }, { "epoch": 0.18495, "grad_norm": 0.16060331463813782, "learning_rate": 4.737026362452845e-05, "loss": 0.0415, "step": 37990 }, { "epoch": 0.185, "grad_norm": 0.1347510814666748, "learning_rate": 4.7368417856006996e-05, "loss": 0.0408, "step": 38000 }, { "epoch": 0.18505, "grad_norm": 0.13687683641910553, "learning_rate": 4.73665714759414e-05, "loss": 0.0395, "step": 38010 }, { "epoch": 0.1851, "grad_norm": 0.13434498012065887, "learning_rate": 4.7364724484382137e-05, "loss": 0.0399, "step": 38020 }, { "epoch": 0.18515, "grad_norm": 0.12056301534175873, "learning_rate": 4.7362876881379714e-05, "loss": 0.0393, "step": 38030 }, { "epoch": 0.1852, "grad_norm": 0.10731586813926697, "learning_rate": 4.736102866698463e-05, "loss": 0.0402, "step": 38040 }, { "epoch": 0.18525, "grad_norm": 0.14883634448051453, "learning_rate": 4.7359179841247436e-05, "loss": 0.0407, "step": 38050 }, { "epoch": 0.1853, "grad_norm": 0.13408304750919342, "learning_rate": 4.735733040421866e-05, "loss": 0.0414, "step": 38060 }, { "epoch": 0.18535, "grad_norm": 0.13457036018371582, "learning_rate": 4.735548035594887e-05, "loss": 0.0408, "step": 38070 }, { "epoch": 0.1854, "grad_norm": 0.12219943106174469, "learning_rate": 4.7353629696488636e-05, "loss": 0.0409, "step": 38080 }, { "epoch": 0.18545, "grad_norm": 0.10472157597541809, "learning_rate": 4.735177842588857e-05, "loss": 0.041, "step": 38090 }, { "epoch": 0.1855, "grad_norm": 0.11975611001253128, "learning_rate": 4.7349926544199285e-05, "loss": 0.0392, "step": 38100 }, { "epoch": 0.18555, "grad_norm": 0.10920672118663788, "learning_rate": 4.7348074051471404e-05, "loss": 0.0403, "step": 38110 }, { "epoch": 0.1856, "grad_norm": 0.1137668713927269, "learning_rate": 4.734622094775557e-05, "loss": 0.0389, "step": 38120 }, { "epoch": 0.18565, "grad_norm": 0.11372815817594528, "learning_rate": 4.734436723310245e-05, "loss": 0.0413, "step": 38130 }, { "epoch": 0.1857, "grad_norm": 0.11171303689479828, "learning_rate": 4.734251290756272e-05, "loss": 0.0415, "step": 38140 }, { "epoch": 0.18575, "grad_norm": 0.1370186060667038, "learning_rate": 4.7340657971187094e-05, "loss": 0.0426, "step": 38150 }, { "epoch": 0.1858, "grad_norm": 0.13567283749580383, "learning_rate": 4.7338802424026266e-05, "loss": 0.0413, "step": 38160 }, { "epoch": 0.18585, "grad_norm": 0.12056778371334076, "learning_rate": 4.7336946266130965e-05, "loss": 0.0421, "step": 38170 }, { "epoch": 0.1859, "grad_norm": 0.11835743486881256, "learning_rate": 4.733508949755195e-05, "loss": 0.0409, "step": 38180 }, { "epoch": 0.18595, "grad_norm": 0.1459125429391861, "learning_rate": 4.733323211833998e-05, "loss": 0.0417, "step": 38190 }, { "epoch": 0.186, "grad_norm": 0.13521605730056763, "learning_rate": 4.733137412854583e-05, "loss": 0.0417, "step": 38200 }, { "epoch": 0.18605, "grad_norm": 0.11533375829458237, "learning_rate": 4.7329515528220306e-05, "loss": 0.0415, "step": 38210 }, { "epoch": 0.1861, "grad_norm": 0.11277813464403152, "learning_rate": 4.732765631741422e-05, "loss": 0.0423, "step": 38220 }, { "epoch": 0.18615, "grad_norm": 0.09624869376420975, "learning_rate": 4.7325796496178384e-05, "loss": 0.0419, "step": 38230 }, { "epoch": 0.1862, "grad_norm": 0.1310359239578247, "learning_rate": 4.7323936064563665e-05, "loss": 0.0435, "step": 38240 }, { "epoch": 0.18625, "grad_norm": 0.1489342749118805, "learning_rate": 4.732207502262093e-05, "loss": 0.0444, "step": 38250 }, { "epoch": 0.1863, "grad_norm": 0.11564663052558899, "learning_rate": 4.732021337040105e-05, "loss": 0.0415, "step": 38260 }, { "epoch": 0.18635, "grad_norm": 0.11250898241996765, "learning_rate": 4.731835110795491e-05, "loss": 0.0413, "step": 38270 }, { "epoch": 0.1864, "grad_norm": 0.11310267448425293, "learning_rate": 4.7316488235333434e-05, "loss": 0.0428, "step": 38280 }, { "epoch": 0.18645, "grad_norm": 0.12901782989501953, "learning_rate": 4.731462475258757e-05, "loss": 0.0433, "step": 38290 }, { "epoch": 0.1865, "grad_norm": 0.13150332868099213, "learning_rate": 4.731276065976823e-05, "loss": 0.0411, "step": 38300 }, { "epoch": 0.18655, "grad_norm": 0.13157768547534943, "learning_rate": 4.7310895956926406e-05, "loss": 0.0405, "step": 38310 }, { "epoch": 0.1866, "grad_norm": 0.12932339310646057, "learning_rate": 4.730903064411307e-05, "loss": 0.04, "step": 38320 }, { "epoch": 0.18665, "grad_norm": 0.13414748013019562, "learning_rate": 4.7307164721379216e-05, "loss": 0.0438, "step": 38330 }, { "epoch": 0.1867, "grad_norm": 0.13133932650089264, "learning_rate": 4.730529818877585e-05, "loss": 0.0418, "step": 38340 }, { "epoch": 0.18675, "grad_norm": 0.12572602927684784, "learning_rate": 4.730343104635402e-05, "loss": 0.0417, "step": 38350 }, { "epoch": 0.1868, "grad_norm": 0.10463862866163254, "learning_rate": 4.7301563294164764e-05, "loss": 0.0409, "step": 38360 }, { "epoch": 0.18685, "grad_norm": 0.14042209088802338, "learning_rate": 4.729969493225914e-05, "loss": 0.0423, "step": 38370 }, { "epoch": 0.1869, "grad_norm": 0.1559218168258667, "learning_rate": 4.729782596068825e-05, "loss": 0.0406, "step": 38380 }, { "epoch": 0.18695, "grad_norm": 0.12093447893857956, "learning_rate": 4.729595637950316e-05, "loss": 0.0442, "step": 38390 }, { "epoch": 0.187, "grad_norm": 0.13172774016857147, "learning_rate": 4.7294086188755e-05, "loss": 0.0403, "step": 38400 }, { "epoch": 0.18705, "grad_norm": 0.1175733283162117, "learning_rate": 4.7292215388494896e-05, "loss": 0.0397, "step": 38410 }, { "epoch": 0.1871, "grad_norm": 0.12560902535915375, "learning_rate": 4.729034397877401e-05, "loss": 0.0412, "step": 38420 }, { "epoch": 0.18715, "grad_norm": 0.12575411796569824, "learning_rate": 4.728847195964349e-05, "loss": 0.0395, "step": 38430 }, { "epoch": 0.1872, "grad_norm": 0.10520323365926743, "learning_rate": 4.728659933115451e-05, "loss": 0.0386, "step": 38440 }, { "epoch": 0.18725, "grad_norm": 0.1346939504146576, "learning_rate": 4.728472609335829e-05, "loss": 0.041, "step": 38450 }, { "epoch": 0.1873, "grad_norm": 0.10870691388845444, "learning_rate": 4.728285224630602e-05, "loss": 0.0402, "step": 38460 }, { "epoch": 0.18735, "grad_norm": 0.13659881055355072, "learning_rate": 4.7280977790048955e-05, "loss": 0.041, "step": 38470 }, { "epoch": 0.1874, "grad_norm": 0.11378481239080429, "learning_rate": 4.727910272463831e-05, "loss": 0.0392, "step": 38480 }, { "epoch": 0.18745, "grad_norm": 0.13256292045116425, "learning_rate": 4.727722705012538e-05, "loss": 0.0401, "step": 38490 }, { "epoch": 0.1875, "grad_norm": 0.13196402788162231, "learning_rate": 4.7275350766561424e-05, "loss": 0.0393, "step": 38500 }, { "epoch": 0.18755, "grad_norm": 0.14104105532169342, "learning_rate": 4.727347387399775e-05, "loss": 0.0395, "step": 38510 }, { "epoch": 0.1876, "grad_norm": 0.1576061099767685, "learning_rate": 4.727159637248567e-05, "loss": 0.0401, "step": 38520 }, { "epoch": 0.18765, "grad_norm": 0.12883985042572021, "learning_rate": 4.726971826207651e-05, "loss": 0.0413, "step": 38530 }, { "epoch": 0.1877, "grad_norm": 0.14144699275493622, "learning_rate": 4.7267839542821615e-05, "loss": 0.0418, "step": 38540 }, { "epoch": 0.18775, "grad_norm": 0.13098569214344025, "learning_rate": 4.7265960214772354e-05, "loss": 0.0421, "step": 38550 }, { "epoch": 0.1878, "grad_norm": 0.1399405300617218, "learning_rate": 4.726408027798011e-05, "loss": 0.0401, "step": 38560 }, { "epoch": 0.18785, "grad_norm": 0.12670686841011047, "learning_rate": 4.726219973249627e-05, "loss": 0.0404, "step": 38570 }, { "epoch": 0.1879, "grad_norm": 0.154850035905838, "learning_rate": 4.7260318578372265e-05, "loss": 0.0411, "step": 38580 }, { "epoch": 0.18795, "grad_norm": 0.12611621618270874, "learning_rate": 4.7258436815659504e-05, "loss": 0.0417, "step": 38590 }, { "epoch": 0.188, "grad_norm": 0.17699086666107178, "learning_rate": 4.725655444440944e-05, "loss": 0.0444, "step": 38600 }, { "epoch": 0.18805, "grad_norm": 0.12984317541122437, "learning_rate": 4.725467146467354e-05, "loss": 0.0403, "step": 38610 }, { "epoch": 0.1881, "grad_norm": 0.13115186989307404, "learning_rate": 4.725278787650328e-05, "loss": 0.0431, "step": 38620 }, { "epoch": 0.18815, "grad_norm": 0.1118667721748352, "learning_rate": 4.725090367995016e-05, "loss": 0.0425, "step": 38630 }, { "epoch": 0.1882, "grad_norm": 0.1281067132949829, "learning_rate": 4.72490188750657e-05, "loss": 0.0427, "step": 38640 }, { "epoch": 0.18825, "grad_norm": 0.13673530519008636, "learning_rate": 4.724713346190142e-05, "loss": 0.0412, "step": 38650 }, { "epoch": 0.1883, "grad_norm": 0.14275339245796204, "learning_rate": 4.7245247440508864e-05, "loss": 0.0405, "step": 38660 }, { "epoch": 0.18835, "grad_norm": 0.1330052763223648, "learning_rate": 4.7243360810939606e-05, "loss": 0.0429, "step": 38670 }, { "epoch": 0.1884, "grad_norm": 0.17282722890377045, "learning_rate": 4.724147357324522e-05, "loss": 0.0424, "step": 38680 }, { "epoch": 0.18845, "grad_norm": 0.13815313577651978, "learning_rate": 4.7239585727477296e-05, "loss": 0.0415, "step": 38690 }, { "epoch": 0.1885, "grad_norm": 0.14790645241737366, "learning_rate": 4.723769727368747e-05, "loss": 0.0406, "step": 38700 }, { "epoch": 0.18855, "grad_norm": 0.12194222956895828, "learning_rate": 4.723580821192733e-05, "loss": 0.0395, "step": 38710 }, { "epoch": 0.1886, "grad_norm": 0.16369490325450897, "learning_rate": 4.723391854224857e-05, "loss": 0.0417, "step": 38720 }, { "epoch": 0.18865, "grad_norm": 0.1218780055642128, "learning_rate": 4.723202826470281e-05, "loss": 0.041, "step": 38730 }, { "epoch": 0.1887, "grad_norm": 0.12561967968940735, "learning_rate": 4.723013737934176e-05, "loss": 0.0419, "step": 38740 }, { "epoch": 0.18875, "grad_norm": 0.11251445859670639, "learning_rate": 4.7228245886217104e-05, "loss": 0.0402, "step": 38750 }, { "epoch": 0.1888, "grad_norm": 0.1460985243320465, "learning_rate": 4.722635378538056e-05, "loss": 0.0462, "step": 38760 }, { "epoch": 0.18885, "grad_norm": 0.15045461058616638, "learning_rate": 4.722446107688385e-05, "loss": 0.0412, "step": 38770 }, { "epoch": 0.1889, "grad_norm": 0.12663142383098602, "learning_rate": 4.722256776077872e-05, "loss": 0.0416, "step": 38780 }, { "epoch": 0.18895, "grad_norm": 0.14913487434387207, "learning_rate": 4.722067383711694e-05, "loss": 0.043, "step": 38790 }, { "epoch": 0.189, "grad_norm": 0.10253901779651642, "learning_rate": 4.721877930595029e-05, "loss": 0.0413, "step": 38800 }, { "epoch": 0.18905, "grad_norm": 0.13283059000968933, "learning_rate": 4.721688416733055e-05, "loss": 0.0429, "step": 38810 }, { "epoch": 0.1891, "grad_norm": 0.12368650734424591, "learning_rate": 4.721498842130955e-05, "loss": 0.042, "step": 38820 }, { "epoch": 0.18915, "grad_norm": 0.13323071599006653, "learning_rate": 4.721309206793911e-05, "loss": 0.0411, "step": 38830 }, { "epoch": 0.1892, "grad_norm": 0.09104353189468384, "learning_rate": 4.721119510727108e-05, "loss": 0.0425, "step": 38840 }, { "epoch": 0.18925, "grad_norm": 0.10489680618047714, "learning_rate": 4.7209297539357324e-05, "loss": 0.0423, "step": 38850 }, { "epoch": 0.1893, "grad_norm": 0.11301996558904648, "learning_rate": 4.72073993642497e-05, "loss": 0.0422, "step": 38860 }, { "epoch": 0.18935, "grad_norm": 0.14078927040100098, "learning_rate": 4.720550058200014e-05, "loss": 0.0424, "step": 38870 }, { "epoch": 0.1894, "grad_norm": 0.15084753930568695, "learning_rate": 4.720360119266053e-05, "loss": 0.0415, "step": 38880 }, { "epoch": 0.18945, "grad_norm": 0.11446460336446762, "learning_rate": 4.7201701196282804e-05, "loss": 0.0409, "step": 38890 }, { "epoch": 0.1895, "grad_norm": 0.11791659146547318, "learning_rate": 4.719980059291891e-05, "loss": 0.0435, "step": 38900 }, { "epoch": 0.18955, "grad_norm": 0.11702030152082443, "learning_rate": 4.71978993826208e-05, "loss": 0.0411, "step": 38910 }, { "epoch": 0.1896, "grad_norm": 0.10994726419448853, "learning_rate": 4.719599756544047e-05, "loss": 0.0433, "step": 38920 }, { "epoch": 0.18965, "grad_norm": 0.11107087880373001, "learning_rate": 4.71940951414299e-05, "loss": 0.0424, "step": 38930 }, { "epoch": 0.1897, "grad_norm": 0.11057529598474503, "learning_rate": 4.719219211064111e-05, "loss": 0.0429, "step": 38940 }, { "epoch": 0.18975, "grad_norm": 0.11657113581895828, "learning_rate": 4.719028847312612e-05, "loss": 0.0407, "step": 38950 }, { "epoch": 0.1898, "grad_norm": 0.12191347032785416, "learning_rate": 4.7188384228936986e-05, "loss": 0.0419, "step": 38960 }, { "epoch": 0.18985, "grad_norm": 0.11876232177019119, "learning_rate": 4.7186479378125756e-05, "loss": 0.0416, "step": 38970 }, { "epoch": 0.1899, "grad_norm": 0.11813126504421234, "learning_rate": 4.718457392074452e-05, "loss": 0.0421, "step": 38980 }, { "epoch": 0.18995, "grad_norm": 0.11932237446308136, "learning_rate": 4.7182667856845364e-05, "loss": 0.042, "step": 38990 }, { "epoch": 0.19, "grad_norm": 0.12584024667739868, "learning_rate": 4.71807611864804e-05, "loss": 0.0449, "step": 39000 }, { "epoch": 0.19005, "grad_norm": 0.11920773237943649, "learning_rate": 4.717885390970177e-05, "loss": 0.0468, "step": 39010 }, { "epoch": 0.1901, "grad_norm": 0.11415545642375946, "learning_rate": 4.7176946026561596e-05, "loss": 0.0428, "step": 39020 }, { "epoch": 0.19015, "grad_norm": 0.10740591585636139, "learning_rate": 4.717503753711205e-05, "loss": 0.0431, "step": 39030 }, { "epoch": 0.1902, "grad_norm": 0.1185695007443428, "learning_rate": 4.7173128441405315e-05, "loss": 0.0448, "step": 39040 }, { "epoch": 0.19025, "grad_norm": 0.1198916956782341, "learning_rate": 4.717121873949357e-05, "loss": 0.0415, "step": 39050 }, { "epoch": 0.1903, "grad_norm": 0.14157956838607788, "learning_rate": 4.716930843142904e-05, "loss": 0.0443, "step": 39060 }, { "epoch": 0.19035, "grad_norm": 0.11701211333274841, "learning_rate": 4.716739751726394e-05, "loss": 0.0427, "step": 39070 }, { "epoch": 0.1904, "grad_norm": 0.13657964766025543, "learning_rate": 4.716548599705053e-05, "loss": 0.0403, "step": 39080 }, { "epoch": 0.19045, "grad_norm": 0.13691385090351105, "learning_rate": 4.716357387084105e-05, "loss": 0.0441, "step": 39090 }, { "epoch": 0.1905, "grad_norm": 0.149797260761261, "learning_rate": 4.7161661138687794e-05, "loss": 0.0428, "step": 39100 }, { "epoch": 0.19055, "grad_norm": 0.12537500262260437, "learning_rate": 4.715974780064304e-05, "loss": 0.0427, "step": 39110 }, { "epoch": 0.1906, "grad_norm": 0.11275333166122437, "learning_rate": 4.7157833856759116e-05, "loss": 0.0408, "step": 39120 }, { "epoch": 0.19065, "grad_norm": 0.12390165030956268, "learning_rate": 4.715591930708833e-05, "loss": 0.0425, "step": 39130 }, { "epoch": 0.1907, "grad_norm": 0.11583295464515686, "learning_rate": 4.715400415168304e-05, "loss": 0.0398, "step": 39140 }, { "epoch": 0.19075, "grad_norm": 0.10922762751579285, "learning_rate": 4.7152088390595595e-05, "loss": 0.0398, "step": 39150 }, { "epoch": 0.1908, "grad_norm": 0.11143206804990768, "learning_rate": 4.715017202387838e-05, "loss": 0.0402, "step": 39160 }, { "epoch": 0.19085, "grad_norm": 0.11785729974508286, "learning_rate": 4.714825505158378e-05, "loss": 0.0423, "step": 39170 }, { "epoch": 0.1909, "grad_norm": 0.12262077629566193, "learning_rate": 4.714633747376421e-05, "loss": 0.0403, "step": 39180 }, { "epoch": 0.19095, "grad_norm": 0.11699897050857544, "learning_rate": 4.714441929047209e-05, "loss": 0.0414, "step": 39190 }, { "epoch": 0.191, "grad_norm": 0.1262628734111786, "learning_rate": 4.7142500501759866e-05, "loss": 0.0421, "step": 39200 }, { "epoch": 0.19105, "grad_norm": 0.13855388760566711, "learning_rate": 4.714058110768e-05, "loss": 0.0413, "step": 39210 }, { "epoch": 0.1911, "grad_norm": 0.1330314725637436, "learning_rate": 4.713866110828496e-05, "loss": 0.0405, "step": 39220 }, { "epoch": 0.19115, "grad_norm": 0.12841005623340607, "learning_rate": 4.713674050362724e-05, "loss": 0.0406, "step": 39230 }, { "epoch": 0.1912, "grad_norm": 0.15829530358314514, "learning_rate": 4.713481929375936e-05, "loss": 0.0396, "step": 39240 }, { "epoch": 0.19125, "grad_norm": 0.11351760476827621, "learning_rate": 4.7132897478733836e-05, "loss": 0.0429, "step": 39250 }, { "epoch": 0.1913, "grad_norm": 0.12715424597263336, "learning_rate": 4.71309750586032e-05, "loss": 0.0424, "step": 39260 }, { "epoch": 0.19135, "grad_norm": 0.12090346217155457, "learning_rate": 4.712905203342003e-05, "loss": 0.0404, "step": 39270 }, { "epoch": 0.1914, "grad_norm": 0.10686006397008896, "learning_rate": 4.712712840323689e-05, "loss": 0.0402, "step": 39280 }, { "epoch": 0.19145, "grad_norm": 0.1376791000366211, "learning_rate": 4.7125204168106365e-05, "loss": 0.043, "step": 39290 }, { "epoch": 0.1915, "grad_norm": 0.1357308030128479, "learning_rate": 4.7123279328081074e-05, "loss": 0.0411, "step": 39300 }, { "epoch": 0.19155, "grad_norm": 0.13313201069831848, "learning_rate": 4.712135388321364e-05, "loss": 0.0401, "step": 39310 }, { "epoch": 0.1916, "grad_norm": 0.11176084727048874, "learning_rate": 4.7119427833556696e-05, "loss": 0.0433, "step": 39320 }, { "epoch": 0.19165, "grad_norm": 0.13899466395378113, "learning_rate": 4.711750117916292e-05, "loss": 0.0426, "step": 39330 }, { "epoch": 0.1917, "grad_norm": 0.11376943439245224, "learning_rate": 4.711557392008495e-05, "loss": 0.0447, "step": 39340 }, { "epoch": 0.19175, "grad_norm": 0.13883130252361298, "learning_rate": 4.7113646056375506e-05, "loss": 0.042, "step": 39350 }, { "epoch": 0.1918, "grad_norm": 0.11968471109867096, "learning_rate": 4.711171758808729e-05, "loss": 0.043, "step": 39360 }, { "epoch": 0.19185, "grad_norm": 0.1307145357131958, "learning_rate": 4.710978851527302e-05, "loss": 0.04, "step": 39370 }, { "epoch": 0.1919, "grad_norm": 0.11859049648046494, "learning_rate": 4.710785883798543e-05, "loss": 0.0402, "step": 39380 }, { "epoch": 0.19195, "grad_norm": 0.12955895066261292, "learning_rate": 4.71059285562773e-05, "loss": 0.0412, "step": 39390 }, { "epoch": 0.192, "grad_norm": 0.11519957333803177, "learning_rate": 4.7103997670201376e-05, "loss": 0.0443, "step": 39400 }, { "epoch": 0.19205, "grad_norm": 0.13244330883026123, "learning_rate": 4.710206617981047e-05, "loss": 0.0402, "step": 39410 }, { "epoch": 0.1921, "grad_norm": 0.09134592860937119, "learning_rate": 4.7100134085157365e-05, "loss": 0.0393, "step": 39420 }, { "epoch": 0.19215, "grad_norm": 0.13230308890342712, "learning_rate": 4.7098201386294904e-05, "loss": 0.0402, "step": 39430 }, { "epoch": 0.1922, "grad_norm": 0.12215183675289154, "learning_rate": 4.7096268083275926e-05, "loss": 0.0409, "step": 39440 }, { "epoch": 0.19225, "grad_norm": 0.10658028721809387, "learning_rate": 4.709433417615327e-05, "loss": 0.0415, "step": 39450 }, { "epoch": 0.1923, "grad_norm": 0.1050388514995575, "learning_rate": 4.7092399664979824e-05, "loss": 0.0434, "step": 39460 }, { "epoch": 0.19235, "grad_norm": 0.10058680176734924, "learning_rate": 4.709046454980846e-05, "loss": 0.0387, "step": 39470 }, { "epoch": 0.1924, "grad_norm": 0.10979174822568893, "learning_rate": 4.708852883069211e-05, "loss": 0.0392, "step": 39480 }, { "epoch": 0.19245, "grad_norm": 0.1021413505077362, "learning_rate": 4.7086592507683667e-05, "loss": 0.04, "step": 39490 }, { "epoch": 0.1925, "grad_norm": 0.10319457948207855, "learning_rate": 4.708465558083609e-05, "loss": 0.0387, "step": 39500 }, { "epoch": 0.19255, "grad_norm": 0.10485776513814926, "learning_rate": 4.7082718050202326e-05, "loss": 0.0397, "step": 39510 }, { "epoch": 0.1926, "grad_norm": 0.10097415745258331, "learning_rate": 4.708077991583534e-05, "loss": 0.0395, "step": 39520 }, { "epoch": 0.19265, "grad_norm": 0.10246019810438156, "learning_rate": 4.7078841177788136e-05, "loss": 0.0409, "step": 39530 }, { "epoch": 0.1927, "grad_norm": 0.10946165025234222, "learning_rate": 4.7076901836113696e-05, "loss": 0.0433, "step": 39540 }, { "epoch": 0.19275, "grad_norm": 0.14613662660121918, "learning_rate": 4.7074961890865065e-05, "loss": 0.0423, "step": 39550 }, { "epoch": 0.1928, "grad_norm": 0.12220507115125656, "learning_rate": 4.707302134209527e-05, "loss": 0.0395, "step": 39560 }, { "epoch": 0.19285, "grad_norm": 0.13756348192691803, "learning_rate": 4.7071080189857356e-05, "loss": 0.0413, "step": 39570 }, { "epoch": 0.1929, "grad_norm": 0.1259576976299286, "learning_rate": 4.706913843420441e-05, "loss": 0.0417, "step": 39580 }, { "epoch": 0.19295, "grad_norm": 0.1224607303738594, "learning_rate": 4.70671960751895e-05, "loss": 0.0416, "step": 39590 }, { "epoch": 0.193, "grad_norm": 0.1667010635137558, "learning_rate": 4.706525311286574e-05, "loss": 0.0456, "step": 39600 }, { "epoch": 0.19305, "grad_norm": 0.13042321801185608, "learning_rate": 4.706330954728626e-05, "loss": 0.0442, "step": 39610 }, { "epoch": 0.1931, "grad_norm": 0.12576870620250702, "learning_rate": 4.7061365378504174e-05, "loss": 0.0432, "step": 39620 }, { "epoch": 0.19315, "grad_norm": 0.11800982058048248, "learning_rate": 4.705942060657266e-05, "loss": 0.0411, "step": 39630 }, { "epoch": 0.1932, "grad_norm": 0.10502568632364273, "learning_rate": 4.7057475231544865e-05, "loss": 0.0403, "step": 39640 }, { "epoch": 0.19325, "grad_norm": 0.09104301780462265, "learning_rate": 4.705552925347398e-05, "loss": 0.0424, "step": 39650 }, { "epoch": 0.1933, "grad_norm": 0.11457667499780655, "learning_rate": 4.705358267241322e-05, "loss": 0.0426, "step": 39660 }, { "epoch": 0.19335, "grad_norm": 0.12124619632959366, "learning_rate": 4.705163548841579e-05, "loss": 0.0401, "step": 39670 }, { "epoch": 0.1934, "grad_norm": 0.11806228756904602, "learning_rate": 4.704968770153493e-05, "loss": 0.0406, "step": 39680 }, { "epoch": 0.19345, "grad_norm": 0.11900746077299118, "learning_rate": 4.704773931182389e-05, "loss": 0.0408, "step": 39690 }, { "epoch": 0.1935, "grad_norm": 0.09312348812818527, "learning_rate": 4.704579031933595e-05, "loss": 0.042, "step": 39700 }, { "epoch": 0.19355, "grad_norm": 0.10966067761182785, "learning_rate": 4.7043840724124375e-05, "loss": 0.0411, "step": 39710 }, { "epoch": 0.1936, "grad_norm": 0.11456384509801865, "learning_rate": 4.704189052624248e-05, "loss": 0.041, "step": 39720 }, { "epoch": 0.19365, "grad_norm": 0.1079399585723877, "learning_rate": 4.703993972574358e-05, "loss": 0.0402, "step": 39730 }, { "epoch": 0.1937, "grad_norm": 0.10426142066717148, "learning_rate": 4.7037988322681e-05, "loss": 0.0403, "step": 39740 }, { "epoch": 0.19375, "grad_norm": 0.13527005910873413, "learning_rate": 4.703603631710811e-05, "loss": 0.0404, "step": 39750 }, { "epoch": 0.1938, "grad_norm": 0.11864572763442993, "learning_rate": 4.703408370907826e-05, "loss": 0.0422, "step": 39760 }, { "epoch": 0.19385, "grad_norm": 0.1404629349708557, "learning_rate": 4.7032130498644835e-05, "loss": 0.0413, "step": 39770 }, { "epoch": 0.1939, "grad_norm": 0.10311874002218246, "learning_rate": 4.703017668586125e-05, "loss": 0.0425, "step": 39780 }, { "epoch": 0.19395, "grad_norm": 0.1279229074716568, "learning_rate": 4.70282222707809e-05, "loss": 0.0402, "step": 39790 }, { "epoch": 0.194, "grad_norm": 0.12124011665582657, "learning_rate": 4.702626725345723e-05, "loss": 0.0448, "step": 39800 }, { "epoch": 0.19405, "grad_norm": 0.11050168424844742, "learning_rate": 4.7024311633943696e-05, "loss": 0.0402, "step": 39810 }, { "epoch": 0.1941, "grad_norm": 0.13088427484035492, "learning_rate": 4.702235541229375e-05, "loss": 0.0409, "step": 39820 }, { "epoch": 0.19415, "grad_norm": 0.1190885379910469, "learning_rate": 4.702039858856088e-05, "loss": 0.0429, "step": 39830 }, { "epoch": 0.1942, "grad_norm": 0.10510970652103424, "learning_rate": 4.701844116279859e-05, "loss": 0.0402, "step": 39840 }, { "epoch": 0.19425, "grad_norm": 0.10281083732843399, "learning_rate": 4.7016483135060386e-05, "loss": 0.0401, "step": 39850 }, { "epoch": 0.1943, "grad_norm": 0.12347941845655441, "learning_rate": 4.701452450539981e-05, "loss": 0.0402, "step": 39860 }, { "epoch": 0.19435, "grad_norm": 0.10865604877471924, "learning_rate": 4.70125652738704e-05, "loss": 0.0403, "step": 39870 }, { "epoch": 0.1944, "grad_norm": 0.14394986629486084, "learning_rate": 4.701060544052572e-05, "loss": 0.0422, "step": 39880 }, { "epoch": 0.19445, "grad_norm": 0.11788714677095413, "learning_rate": 4.700864500541936e-05, "loss": 0.0434, "step": 39890 }, { "epoch": 0.1945, "grad_norm": 0.11781013011932373, "learning_rate": 4.7006683968604915e-05, "loss": 0.0434, "step": 39900 }, { "epoch": 0.19455, "grad_norm": 0.10928132385015488, "learning_rate": 4.7004722330136005e-05, "loss": 0.0397, "step": 39910 }, { "epoch": 0.1946, "grad_norm": 0.10922566801309586, "learning_rate": 4.700276009006625e-05, "loss": 0.0412, "step": 39920 }, { "epoch": 0.19465, "grad_norm": 0.10895717144012451, "learning_rate": 4.700079724844929e-05, "loss": 0.0403, "step": 39930 }, { "epoch": 0.1947, "grad_norm": 0.11466559022665024, "learning_rate": 4.6998833805338806e-05, "loss": 0.041, "step": 39940 }, { "epoch": 0.19475, "grad_norm": 0.11927556246519089, "learning_rate": 4.699686976078847e-05, "loss": 0.0401, "step": 39950 }, { "epoch": 0.1948, "grad_norm": 0.10541284084320068, "learning_rate": 4.6994905114851976e-05, "loss": 0.0409, "step": 39960 }, { "epoch": 0.19485, "grad_norm": 0.11831863224506378, "learning_rate": 4.699293986758304e-05, "loss": 0.0418, "step": 39970 }, { "epoch": 0.1949, "grad_norm": 0.12878260016441345, "learning_rate": 4.699097401903539e-05, "loss": 0.0407, "step": 39980 }, { "epoch": 0.19495, "grad_norm": 0.10302649438381195, "learning_rate": 4.6989007569262776e-05, "loss": 0.042, "step": 39990 }, { "epoch": 0.195, "grad_norm": 0.13494367897510529, "learning_rate": 4.698704051831896e-05, "loss": 0.0412, "step": 40000 }, { "epoch": 0.19505, "grad_norm": 0.12048853188753128, "learning_rate": 4.6985072866257704e-05, "loss": 0.0418, "step": 40010 }, { "epoch": 0.1951, "grad_norm": 0.13809038698673248, "learning_rate": 4.698310461313282e-05, "loss": 0.0412, "step": 40020 }, { "epoch": 0.19515, "grad_norm": 0.13032236695289612, "learning_rate": 4.6981135758998115e-05, "loss": 0.0404, "step": 40030 }, { "epoch": 0.1952, "grad_norm": 0.14365342259407043, "learning_rate": 4.6979166303907425e-05, "loss": 0.0404, "step": 40040 }, { "epoch": 0.19525, "grad_norm": 0.15081055462360382, "learning_rate": 4.697719624791458e-05, "loss": 0.0398, "step": 40050 }, { "epoch": 0.1953, "grad_norm": 0.13427375257015228, "learning_rate": 4.697522559107344e-05, "loss": 0.0404, "step": 40060 }, { "epoch": 0.19535, "grad_norm": 0.14125995337963104, "learning_rate": 4.697325433343789e-05, "loss": 0.0422, "step": 40070 }, { "epoch": 0.1954, "grad_norm": 0.14351476728916168, "learning_rate": 4.697128247506183e-05, "loss": 0.0415, "step": 40080 }, { "epoch": 0.19545, "grad_norm": 0.12644805014133453, "learning_rate": 4.696931001599914e-05, "loss": 0.0403, "step": 40090 }, { "epoch": 0.1955, "grad_norm": 0.1217808797955513, "learning_rate": 4.6967336956303794e-05, "loss": 0.0423, "step": 40100 }, { "epoch": 0.19555, "grad_norm": 0.11470723897218704, "learning_rate": 4.6965363296029695e-05, "loss": 0.0417, "step": 40110 }, { "epoch": 0.1956, "grad_norm": 0.10986042767763138, "learning_rate": 4.696338903523082e-05, "loss": 0.0425, "step": 40120 }, { "epoch": 0.19565, "grad_norm": 0.122439906001091, "learning_rate": 4.696141417396114e-05, "loss": 0.0433, "step": 40130 }, { "epoch": 0.1957, "grad_norm": 0.11072283983230591, "learning_rate": 4.695943871227464e-05, "loss": 0.0422, "step": 40140 }, { "epoch": 0.19575, "grad_norm": 0.14070159196853638, "learning_rate": 4.695746265022534e-05, "loss": 0.0445, "step": 40150 }, { "epoch": 0.1958, "grad_norm": 0.11827629059553146, "learning_rate": 4.695548598786726e-05, "loss": 0.0435, "step": 40160 }, { "epoch": 0.19585, "grad_norm": 0.123737633228302, "learning_rate": 4.695350872525444e-05, "loss": 0.045, "step": 40170 }, { "epoch": 0.1959, "grad_norm": 0.0988842099905014, "learning_rate": 4.695153086244094e-05, "loss": 0.0434, "step": 40180 }, { "epoch": 0.19595, "grad_norm": 0.12570056319236755, "learning_rate": 4.6949552399480834e-05, "loss": 0.0425, "step": 40190 }, { "epoch": 0.196, "grad_norm": 0.1427522897720337, "learning_rate": 4.694757333642821e-05, "loss": 0.0402, "step": 40200 }, { "epoch": 0.19605, "grad_norm": 0.11194559931755066, "learning_rate": 4.6945593673337173e-05, "loss": 0.0441, "step": 40210 }, { "epoch": 0.1961, "grad_norm": 0.1071145310997963, "learning_rate": 4.6943613410261856e-05, "loss": 0.0432, "step": 40220 }, { "epoch": 0.19615, "grad_norm": 0.1292775571346283, "learning_rate": 4.694163254725639e-05, "loss": 0.0437, "step": 40230 }, { "epoch": 0.1962, "grad_norm": 0.11506784707307816, "learning_rate": 4.693965108437494e-05, "loss": 0.0403, "step": 40240 }, { "epoch": 0.19625, "grad_norm": 0.12157977372407913, "learning_rate": 4.693766902167166e-05, "loss": 0.0411, "step": 40250 }, { "epoch": 0.1963, "grad_norm": 0.14803488552570343, "learning_rate": 4.6935686359200754e-05, "loss": 0.0407, "step": 40260 }, { "epoch": 0.19635, "grad_norm": 0.12866628170013428, "learning_rate": 4.6933703097016425e-05, "loss": 0.0418, "step": 40270 }, { "epoch": 0.1964, "grad_norm": 0.15898150205612183, "learning_rate": 4.693171923517289e-05, "loss": 0.041, "step": 40280 }, { "epoch": 0.19645, "grad_norm": 0.11652620136737823, "learning_rate": 4.69297347737244e-05, "loss": 0.0414, "step": 40290 }, { "epoch": 0.1965, "grad_norm": 0.11326843500137329, "learning_rate": 4.692774971272519e-05, "loss": 0.0403, "step": 40300 }, { "epoch": 0.19655, "grad_norm": 0.11849953234195709, "learning_rate": 4.692576405222955e-05, "loss": 0.0389, "step": 40310 }, { "epoch": 0.1966, "grad_norm": 0.13824835419654846, "learning_rate": 4.6923777792291746e-05, "loss": 0.0454, "step": 40320 }, { "epoch": 0.19665, "grad_norm": 0.12199034541845322, "learning_rate": 4.69217909329661e-05, "loss": 0.0406, "step": 40330 }, { "epoch": 0.1967, "grad_norm": 0.15483516454696655, "learning_rate": 4.6919803474306926e-05, "loss": 0.0408, "step": 40340 }, { "epoch": 0.19675, "grad_norm": 0.11486873030662537, "learning_rate": 4.691781541636856e-05, "loss": 0.0422, "step": 40350 }, { "epoch": 0.1968, "grad_norm": 0.12523794174194336, "learning_rate": 4.6915826759205355e-05, "loss": 0.0414, "step": 40360 }, { "epoch": 0.19685, "grad_norm": 0.11457782983779907, "learning_rate": 4.691383750287168e-05, "loss": 0.0414, "step": 40370 }, { "epoch": 0.1969, "grad_norm": 0.09785265475511551, "learning_rate": 4.691184764742192e-05, "loss": 0.0416, "step": 40380 }, { "epoch": 0.19695, "grad_norm": 0.10499918460845947, "learning_rate": 4.690985719291048e-05, "loss": 0.0407, "step": 40390 }, { "epoch": 0.197, "grad_norm": 0.09804340451955795, "learning_rate": 4.6907866139391766e-05, "loss": 0.0418, "step": 40400 }, { "epoch": 0.19705, "grad_norm": 0.09513143450021744, "learning_rate": 4.6905874486920234e-05, "loss": 0.0396, "step": 40410 }, { "epoch": 0.1971, "grad_norm": 0.11613192409276962, "learning_rate": 4.690388223555031e-05, "loss": 0.0398, "step": 40420 }, { "epoch": 0.19715, "grad_norm": 0.11552929133176804, "learning_rate": 4.6901889385336486e-05, "loss": 0.0397, "step": 40430 }, { "epoch": 0.1972, "grad_norm": 0.10436779260635376, "learning_rate": 4.6899895936333226e-05, "loss": 0.0401, "step": 40440 }, { "epoch": 0.19725, "grad_norm": 0.1088162288069725, "learning_rate": 4.6897901888595044e-05, "loss": 0.04, "step": 40450 }, { "epoch": 0.1973, "grad_norm": 0.0982193872332573, "learning_rate": 4.689590724217645e-05, "loss": 0.0403, "step": 40460 }, { "epoch": 0.19735, "grad_norm": 0.11795471608638763, "learning_rate": 4.689391199713198e-05, "loss": 0.0401, "step": 40470 }, { "epoch": 0.1974, "grad_norm": 0.10341308265924454, "learning_rate": 4.689191615351618e-05, "loss": 0.0397, "step": 40480 }, { "epoch": 0.19745, "grad_norm": 0.10679616034030914, "learning_rate": 4.6889919711383614e-05, "loss": 0.0395, "step": 40490 }, { "epoch": 0.1975, "grad_norm": 0.11206220835447311, "learning_rate": 4.6887922670788866e-05, "loss": 0.0418, "step": 40500 }, { "epoch": 0.19755, "grad_norm": 0.13069789111614227, "learning_rate": 4.688592503178654e-05, "loss": 0.0401, "step": 40510 }, { "epoch": 0.1976, "grad_norm": 0.10349959135055542, "learning_rate": 4.6883926794431244e-05, "loss": 0.0394, "step": 40520 }, { "epoch": 0.19765, "grad_norm": 0.10999346524477005, "learning_rate": 4.68819279587776e-05, "loss": 0.0395, "step": 40530 }, { "epoch": 0.1977, "grad_norm": 0.11055387556552887, "learning_rate": 4.6879928524880284e-05, "loss": 0.0403, "step": 40540 }, { "epoch": 0.19775, "grad_norm": 0.12687133252620697, "learning_rate": 4.6877928492793933e-05, "loss": 0.0438, "step": 40550 }, { "epoch": 0.1978, "grad_norm": 0.11826013028621674, "learning_rate": 4.687592786257324e-05, "loss": 0.0394, "step": 40560 }, { "epoch": 0.19785, "grad_norm": 0.11717572808265686, "learning_rate": 4.687392663427289e-05, "loss": 0.0408, "step": 40570 }, { "epoch": 0.1979, "grad_norm": 0.11580274999141693, "learning_rate": 4.6871924807947615e-05, "loss": 0.0412, "step": 40580 }, { "epoch": 0.19795, "grad_norm": 0.11562202870845795, "learning_rate": 4.686992238365212e-05, "loss": 0.0397, "step": 40590 }, { "epoch": 0.198, "grad_norm": 0.09922197461128235, "learning_rate": 4.6867919361441174e-05, "loss": 0.0404, "step": 40600 }, { "epoch": 0.19805, "grad_norm": 0.10405375063419342, "learning_rate": 4.6865915741369526e-05, "loss": 0.0428, "step": 40610 }, { "epoch": 0.1981, "grad_norm": 0.12160701304674149, "learning_rate": 4.6863911523491956e-05, "loss": 0.0401, "step": 40620 }, { "epoch": 0.19815, "grad_norm": 0.12346811592578888, "learning_rate": 4.6861906707863255e-05, "loss": 0.0394, "step": 40630 }, { "epoch": 0.1982, "grad_norm": 0.12269331514835358, "learning_rate": 4.6859901294538236e-05, "loss": 0.0396, "step": 40640 }, { "epoch": 0.19825, "grad_norm": 0.11659187078475952, "learning_rate": 4.685789528357173e-05, "loss": 0.0417, "step": 40650 }, { "epoch": 0.1983, "grad_norm": 0.11113385856151581, "learning_rate": 4.685588867501858e-05, "loss": 0.0414, "step": 40660 }, { "epoch": 0.19835, "grad_norm": 0.11463132500648499, "learning_rate": 4.6853881468933645e-05, "loss": 0.0414, "step": 40670 }, { "epoch": 0.1984, "grad_norm": 0.1146690770983696, "learning_rate": 4.68518736653718e-05, "loss": 0.0412, "step": 40680 }, { "epoch": 0.19845, "grad_norm": 0.11185281723737717, "learning_rate": 4.6849865264387936e-05, "loss": 0.0423, "step": 40690 }, { "epoch": 0.1985, "grad_norm": 0.11425045132637024, "learning_rate": 4.684785626603697e-05, "loss": 0.0438, "step": 40700 }, { "epoch": 0.19855, "grad_norm": 0.10859055817127228, "learning_rate": 4.6845846670373815e-05, "loss": 0.042, "step": 40710 }, { "epoch": 0.1986, "grad_norm": 0.14859603345394135, "learning_rate": 4.684383647745343e-05, "loss": 0.0404, "step": 40720 }, { "epoch": 0.19865, "grad_norm": 0.12444942444562912, "learning_rate": 4.684182568733075e-05, "loss": 0.0455, "step": 40730 }, { "epoch": 0.1987, "grad_norm": 0.11537047475576401, "learning_rate": 4.683981430006077e-05, "loss": 0.041, "step": 40740 }, { "epoch": 0.19875, "grad_norm": 0.11382672190666199, "learning_rate": 4.683780231569846e-05, "loss": 0.0402, "step": 40750 }, { "epoch": 0.1988, "grad_norm": 0.11329783499240875, "learning_rate": 4.683578973429885e-05, "loss": 0.042, "step": 40760 }, { "epoch": 0.19885, "grad_norm": 0.10937277972698212, "learning_rate": 4.683377655591695e-05, "loss": 0.0404, "step": 40770 }, { "epoch": 0.1989, "grad_norm": 0.11984826624393463, "learning_rate": 4.68317627806078e-05, "loss": 0.0421, "step": 40780 }, { "epoch": 0.19895, "grad_norm": 0.12853102385997772, "learning_rate": 4.6829748408426454e-05, "loss": 0.041, "step": 40790 }, { "epoch": 0.199, "grad_norm": 0.16592375934123993, "learning_rate": 4.6827733439428e-05, "loss": 0.044, "step": 40800 }, { "epoch": 0.19905, "grad_norm": 0.12035780400037766, "learning_rate": 4.682571787366749e-05, "loss": 0.0422, "step": 40810 }, { "epoch": 0.1991, "grad_norm": 0.11223269999027252, "learning_rate": 4.682370171120008e-05, "loss": 0.0424, "step": 40820 }, { "epoch": 0.19915, "grad_norm": 0.10703121870756149, "learning_rate": 4.682168495208085e-05, "loss": 0.0411, "step": 40830 }, { "epoch": 0.1992, "grad_norm": 0.12128929793834686, "learning_rate": 4.681966759636495e-05, "loss": 0.0417, "step": 40840 }, { "epoch": 0.19925, "grad_norm": 0.12266344577074051, "learning_rate": 4.681764964410754e-05, "loss": 0.0415, "step": 40850 }, { "epoch": 0.1993, "grad_norm": 0.11447182297706604, "learning_rate": 4.6815631095363785e-05, "loss": 0.0432, "step": 40860 }, { "epoch": 0.19935, "grad_norm": 0.1356915533542633, "learning_rate": 4.6813611950188874e-05, "loss": 0.0409, "step": 40870 }, { "epoch": 0.1994, "grad_norm": 0.13189062476158142, "learning_rate": 4.6811592208638e-05, "loss": 0.0422, "step": 40880 }, { "epoch": 0.19945, "grad_norm": 0.1379818320274353, "learning_rate": 4.68095718707664e-05, "loss": 0.0432, "step": 40890 }, { "epoch": 0.1995, "grad_norm": 0.13345512747764587, "learning_rate": 4.6807550936629286e-05, "loss": 0.0405, "step": 40900 }, { "epoch": 0.19955, "grad_norm": 0.12089315801858902, "learning_rate": 4.6805529406281925e-05, "loss": 0.0435, "step": 40910 }, { "epoch": 0.1996, "grad_norm": 0.15447144210338593, "learning_rate": 4.680350727977959e-05, "loss": 0.0458, "step": 40920 }, { "epoch": 0.19965, "grad_norm": 0.1232617050409317, "learning_rate": 4.6801484557177546e-05, "loss": 0.0398, "step": 40930 }, { "epoch": 0.1997, "grad_norm": 0.1233721524477005, "learning_rate": 4.679946123853111e-05, "loss": 0.0423, "step": 40940 }, { "epoch": 0.19975, "grad_norm": 0.121514692902565, "learning_rate": 4.67974373238956e-05, "loss": 0.0428, "step": 40950 }, { "epoch": 0.1998, "grad_norm": 0.1508697271347046, "learning_rate": 4.679541281332633e-05, "loss": 0.0473, "step": 40960 }, { "epoch": 0.19985, "grad_norm": 0.12545537948608398, "learning_rate": 4.679338770687867e-05, "loss": 0.0423, "step": 40970 }, { "epoch": 0.1999, "grad_norm": 0.15301913022994995, "learning_rate": 4.679136200460798e-05, "loss": 0.0438, "step": 40980 }, { "epoch": 0.19995, "grad_norm": 0.11956794559955597, "learning_rate": 4.6789335706569635e-05, "loss": 0.0415, "step": 40990 }, { "epoch": 0.2, "grad_norm": 0.11663742363452911, "learning_rate": 4.678730881281904e-05, "loss": 0.0434, "step": 41000 }, { "epoch": 0.20005, "grad_norm": 0.10982909053564072, "learning_rate": 4.67852813234116e-05, "loss": 0.0412, "step": 41010 }, { "epoch": 0.2001, "grad_norm": 0.11424484103918076, "learning_rate": 4.678325323840276e-05, "loss": 0.0406, "step": 41020 }, { "epoch": 0.20015, "grad_norm": 0.125224769115448, "learning_rate": 4.6781224557847955e-05, "loss": 0.0409, "step": 41030 }, { "epoch": 0.2002, "grad_norm": 0.10726512968540192, "learning_rate": 4.677919528180266e-05, "loss": 0.0396, "step": 41040 }, { "epoch": 0.20025, "grad_norm": 0.12481468170881271, "learning_rate": 4.6777165410322344e-05, "loss": 0.0422, "step": 41050 }, { "epoch": 0.2003, "grad_norm": 0.11721587926149368, "learning_rate": 4.6775134943462504e-05, "loss": 0.042, "step": 41060 }, { "epoch": 0.20035, "grad_norm": 0.1256370097398758, "learning_rate": 4.6773103881278655e-05, "loss": 0.043, "step": 41070 }, { "epoch": 0.2004, "grad_norm": 0.11127348989248276, "learning_rate": 4.6771072223826336e-05, "loss": 0.0403, "step": 41080 }, { "epoch": 0.20045, "grad_norm": 0.10558488219976425, "learning_rate": 4.676903997116107e-05, "loss": 0.0416, "step": 41090 }, { "epoch": 0.2005, "grad_norm": 0.10310178250074387, "learning_rate": 4.676700712333843e-05, "loss": 0.0402, "step": 41100 }, { "epoch": 0.20055, "grad_norm": 0.1143566370010376, "learning_rate": 4.6764973680414e-05, "loss": 0.0415, "step": 41110 }, { "epoch": 0.2006, "grad_norm": 0.1191648617386818, "learning_rate": 4.6762939642443366e-05, "loss": 0.0416, "step": 41120 }, { "epoch": 0.20065, "grad_norm": 0.10538813471794128, "learning_rate": 4.6760905009482136e-05, "loss": 0.0409, "step": 41130 }, { "epoch": 0.2007, "grad_norm": 0.11145827174186707, "learning_rate": 4.6758869781585936e-05, "loss": 0.0424, "step": 41140 }, { "epoch": 0.20075, "grad_norm": 0.10465462505817413, "learning_rate": 4.6756833958810406e-05, "loss": 0.043, "step": 41150 }, { "epoch": 0.2008, "grad_norm": 0.10324224084615707, "learning_rate": 4.675479754121122e-05, "loss": 0.0418, "step": 41160 }, { "epoch": 0.20085, "grad_norm": 0.1077202707529068, "learning_rate": 4.675276052884404e-05, "loss": 0.0418, "step": 41170 }, { "epoch": 0.2009, "grad_norm": 0.11636579036712646, "learning_rate": 4.6750722921764556e-05, "loss": 0.0408, "step": 41180 }, { "epoch": 0.20095, "grad_norm": 0.08367370069026947, "learning_rate": 4.674868472002848e-05, "loss": 0.0395, "step": 41190 }, { "epoch": 0.201, "grad_norm": 0.10918355733156204, "learning_rate": 4.674664592369154e-05, "loss": 0.0419, "step": 41200 }, { "epoch": 0.20105, "grad_norm": 0.11053916066884995, "learning_rate": 4.6744606532809456e-05, "loss": 0.0446, "step": 41210 }, { "epoch": 0.2011, "grad_norm": 0.0952436625957489, "learning_rate": 4.6742566547438006e-05, "loss": 0.0414, "step": 41220 }, { "epoch": 0.20115, "grad_norm": 0.11707165092229843, "learning_rate": 4.6740525967632955e-05, "loss": 0.0427, "step": 41230 }, { "epoch": 0.2012, "grad_norm": 0.11166837066411972, "learning_rate": 4.673848479345009e-05, "loss": 0.0416, "step": 41240 }, { "epoch": 0.20125, "grad_norm": 0.13893011212348938, "learning_rate": 4.673644302494522e-05, "loss": 0.0429, "step": 41250 }, { "epoch": 0.2013, "grad_norm": 0.11983472853899002, "learning_rate": 4.6734400662174164e-05, "loss": 0.0405, "step": 41260 }, { "epoch": 0.20135, "grad_norm": 0.12756171822547913, "learning_rate": 4.673235770519276e-05, "loss": 0.0413, "step": 41270 }, { "epoch": 0.2014, "grad_norm": 0.10711822658777237, "learning_rate": 4.673031415405686e-05, "loss": 0.0393, "step": 41280 }, { "epoch": 0.20145, "grad_norm": 0.10304751247167587, "learning_rate": 4.672827000882233e-05, "loss": 0.0412, "step": 41290 }, { "epoch": 0.2015, "grad_norm": 0.1470487266778946, "learning_rate": 4.672622526954506e-05, "loss": 0.0415, "step": 41300 }, { "epoch": 0.20155, "grad_norm": 0.12710584700107574, "learning_rate": 4.6724179936280965e-05, "loss": 0.0395, "step": 41310 }, { "epoch": 0.2016, "grad_norm": 0.12694060802459717, "learning_rate": 4.672213400908595e-05, "loss": 0.0395, "step": 41320 }, { "epoch": 0.20165, "grad_norm": 0.10214198380708694, "learning_rate": 4.672008748801594e-05, "loss": 0.0408, "step": 41330 }, { "epoch": 0.2017, "grad_norm": 0.10517226159572601, "learning_rate": 4.67180403731269e-05, "loss": 0.0407, "step": 41340 }, { "epoch": 0.20175, "grad_norm": 0.12303302437067032, "learning_rate": 4.6715992664474805e-05, "loss": 0.0406, "step": 41350 }, { "epoch": 0.2018, "grad_norm": 0.10291822999715805, "learning_rate": 4.6713944362115625e-05, "loss": 0.0399, "step": 41360 }, { "epoch": 0.20185, "grad_norm": 0.11955802887678146, "learning_rate": 4.671189546610536e-05, "loss": 0.0422, "step": 41370 }, { "epoch": 0.2019, "grad_norm": 0.1398523449897766, "learning_rate": 4.670984597650003e-05, "loss": 0.0403, "step": 41380 }, { "epoch": 0.20195, "grad_norm": 0.11278299987316132, "learning_rate": 4.6707795893355675e-05, "loss": 0.0403, "step": 41390 }, { "epoch": 0.202, "grad_norm": 0.13503143191337585, "learning_rate": 4.6705745216728334e-05, "loss": 0.0399, "step": 41400 }, { "epoch": 0.20205, "grad_norm": 0.0978846326470375, "learning_rate": 4.670369394667407e-05, "loss": 0.0412, "step": 41410 }, { "epoch": 0.2021, "grad_norm": 0.11684785783290863, "learning_rate": 4.670164208324896e-05, "loss": 0.0427, "step": 41420 }, { "epoch": 0.20215, "grad_norm": 0.12648822367191315, "learning_rate": 4.669958962650912e-05, "loss": 0.0409, "step": 41430 }, { "epoch": 0.2022, "grad_norm": 0.13909755647182465, "learning_rate": 4.6697536576510644e-05, "loss": 0.0412, "step": 41440 }, { "epoch": 0.20225, "grad_norm": 0.12606674432754517, "learning_rate": 4.669548293330967e-05, "loss": 0.0399, "step": 41450 }, { "epoch": 0.2023, "grad_norm": 0.10662326961755753, "learning_rate": 4.6693428696962344e-05, "loss": 0.0416, "step": 41460 }, { "epoch": 0.20235, "grad_norm": 0.09710852801799774, "learning_rate": 4.669137386752483e-05, "loss": 0.0406, "step": 41470 }, { "epoch": 0.2024, "grad_norm": 0.1193772554397583, "learning_rate": 4.66893184450533e-05, "loss": 0.0417, "step": 41480 }, { "epoch": 0.20245, "grad_norm": 0.12737101316452026, "learning_rate": 4.668726242960395e-05, "loss": 0.0407, "step": 41490 }, { "epoch": 0.2025, "grad_norm": 0.1282685548067093, "learning_rate": 4.6685205821233e-05, "loss": 0.0422, "step": 41500 }, { "epoch": 0.20255, "grad_norm": 0.11876413971185684, "learning_rate": 4.668314861999667e-05, "loss": 0.0416, "step": 41510 }, { "epoch": 0.2026, "grad_norm": 0.10370395332574844, "learning_rate": 4.6681090825951194e-05, "loss": 0.0424, "step": 41520 }, { "epoch": 0.20265, "grad_norm": 0.12113114446401596, "learning_rate": 4.667903243915285e-05, "loss": 0.0421, "step": 41530 }, { "epoch": 0.2027, "grad_norm": 0.11187760531902313, "learning_rate": 4.66769734596579e-05, "loss": 0.0426, "step": 41540 }, { "epoch": 0.20275, "grad_norm": 0.11668987572193146, "learning_rate": 4.667491388752263e-05, "loss": 0.0423, "step": 41550 }, { "epoch": 0.2028, "grad_norm": 0.11089004576206207, "learning_rate": 4.6672853722803365e-05, "loss": 0.0426, "step": 41560 }, { "epoch": 0.20285, "grad_norm": 0.12049264460802078, "learning_rate": 4.667079296555642e-05, "loss": 0.0439, "step": 41570 }, { "epoch": 0.2029, "grad_norm": 0.10137578099966049, "learning_rate": 4.6668731615838144e-05, "loss": 0.0405, "step": 41580 }, { "epoch": 0.20295, "grad_norm": 0.11302396655082703, "learning_rate": 4.666666967370488e-05, "loss": 0.0417, "step": 41590 }, { "epoch": 0.203, "grad_norm": 0.11328114569187164, "learning_rate": 4.6664607139213e-05, "loss": 0.0422, "step": 41600 }, { "epoch": 0.20305, "grad_norm": 0.12339196354150772, "learning_rate": 4.666254401241891e-05, "loss": 0.0406, "step": 41610 }, { "epoch": 0.2031, "grad_norm": 0.09714601933956146, "learning_rate": 4.6660480293379006e-05, "loss": 0.0407, "step": 41620 }, { "epoch": 0.20315, "grad_norm": 0.13312238454818726, "learning_rate": 4.66584159821497e-05, "loss": 0.0432, "step": 41630 }, { "epoch": 0.2032, "grad_norm": 0.11992169916629791, "learning_rate": 4.665635107878744e-05, "loss": 0.0408, "step": 41640 }, { "epoch": 0.20325, "grad_norm": 0.10778101533651352, "learning_rate": 4.665428558334868e-05, "loss": 0.0411, "step": 41650 }, { "epoch": 0.2033, "grad_norm": 0.12014755606651306, "learning_rate": 4.665221949588989e-05, "loss": 0.0446, "step": 41660 }, { "epoch": 0.20335, "grad_norm": 0.090846486389637, "learning_rate": 4.6650152816467545e-05, "loss": 0.0382, "step": 41670 }, { "epoch": 0.2034, "grad_norm": 0.11623945832252502, "learning_rate": 4.6648085545138164e-05, "loss": 0.0391, "step": 41680 }, { "epoch": 0.20345, "grad_norm": 0.13945305347442627, "learning_rate": 4.6646017681958254e-05, "loss": 0.0414, "step": 41690 }, { "epoch": 0.2035, "grad_norm": 0.13401776552200317, "learning_rate": 4.664394922698435e-05, "loss": 0.041, "step": 41700 }, { "epoch": 0.20355, "grad_norm": 0.11550464481115341, "learning_rate": 4.664188018027301e-05, "loss": 0.0423, "step": 41710 }, { "epoch": 0.2036, "grad_norm": 0.11903069168329239, "learning_rate": 4.663981054188079e-05, "loss": 0.04, "step": 41720 }, { "epoch": 0.20365, "grad_norm": 0.1341111809015274, "learning_rate": 4.663774031186429e-05, "loss": 0.0407, "step": 41730 }, { "epoch": 0.2037, "grad_norm": 0.13095654547214508, "learning_rate": 4.6635669490280085e-05, "loss": 0.0405, "step": 41740 }, { "epoch": 0.20375, "grad_norm": 0.09725949913263321, "learning_rate": 4.6633598077184815e-05, "loss": 0.0421, "step": 41750 }, { "epoch": 0.2038, "grad_norm": 0.10696309059858322, "learning_rate": 4.6631526072635095e-05, "loss": 0.0387, "step": 41760 }, { "epoch": 0.20385, "grad_norm": 0.106041319668293, "learning_rate": 4.662945347668758e-05, "loss": 0.041, "step": 41770 }, { "epoch": 0.2039, "grad_norm": 0.10835712403059006, "learning_rate": 4.6627380289398936e-05, "loss": 0.0415, "step": 41780 }, { "epoch": 0.20395, "grad_norm": 0.09863156825304031, "learning_rate": 4.662530651082584e-05, "loss": 0.0415, "step": 41790 }, { "epoch": 0.204, "grad_norm": 0.12385343760251999, "learning_rate": 4.662323214102499e-05, "loss": 0.0401, "step": 41800 }, { "epoch": 0.20405, "grad_norm": 0.1045091450214386, "learning_rate": 4.6621157180053085e-05, "loss": 0.0418, "step": 41810 }, { "epoch": 0.2041, "grad_norm": 0.10694091022014618, "learning_rate": 4.661908162796687e-05, "loss": 0.0408, "step": 41820 }, { "epoch": 0.20415, "grad_norm": 0.11447501182556152, "learning_rate": 4.661700548482309e-05, "loss": 0.0402, "step": 41830 }, { "epoch": 0.2042, "grad_norm": 0.0982501357793808, "learning_rate": 4.66149287506785e-05, "loss": 0.0416, "step": 41840 }, { "epoch": 0.20425, "grad_norm": 0.16061526536941528, "learning_rate": 4.6612851425589876e-05, "loss": 0.0441, "step": 41850 }, { "epoch": 0.2043, "grad_norm": 0.11797483265399933, "learning_rate": 4.6610773509614016e-05, "loss": 0.0397, "step": 41860 }, { "epoch": 0.20435, "grad_norm": 0.11120536178350449, "learning_rate": 4.6608695002807724e-05, "loss": 0.0418, "step": 41870 }, { "epoch": 0.2044, "grad_norm": 0.10933781415224075, "learning_rate": 4.6606615905227834e-05, "loss": 0.042, "step": 41880 }, { "epoch": 0.20445, "grad_norm": 0.10738882422447205, "learning_rate": 4.6604536216931185e-05, "loss": 0.0412, "step": 41890 }, { "epoch": 0.2045, "grad_norm": 0.1318594068288803, "learning_rate": 4.660245593797462e-05, "loss": 0.0395, "step": 41900 }, { "epoch": 0.20455, "grad_norm": 0.13262218236923218, "learning_rate": 4.6600375068415034e-05, "loss": 0.0407, "step": 41910 }, { "epoch": 0.2046, "grad_norm": 0.10908342152833939, "learning_rate": 4.6598293608309306e-05, "loss": 0.0394, "step": 41920 }, { "epoch": 0.20465, "grad_norm": 0.12204093486070633, "learning_rate": 4.659621155771434e-05, "loss": 0.0412, "step": 41930 }, { "epoch": 0.2047, "grad_norm": 0.1055048406124115, "learning_rate": 4.6594128916687074e-05, "loss": 0.041, "step": 41940 }, { "epoch": 0.20475, "grad_norm": 0.12463721632957458, "learning_rate": 4.659204568528443e-05, "loss": 0.0419, "step": 41950 }, { "epoch": 0.2048, "grad_norm": 0.12951551377773285, "learning_rate": 4.658996186356337e-05, "loss": 0.0409, "step": 41960 }, { "epoch": 0.20485, "grad_norm": 0.11702567338943481, "learning_rate": 4.658787745158086e-05, "loss": 0.0428, "step": 41970 }, { "epoch": 0.2049, "grad_norm": 0.09821664541959763, "learning_rate": 4.6585792449393894e-05, "loss": 0.0421, "step": 41980 }, { "epoch": 0.20495, "grad_norm": 0.1046162024140358, "learning_rate": 4.6583706857059475e-05, "loss": 0.0408, "step": 41990 }, { "epoch": 0.205, "grad_norm": 0.13040538132190704, "learning_rate": 4.658162067463461e-05, "loss": 0.0397, "step": 42000 }, { "epoch": 0.20505, "grad_norm": 0.1357438713312149, "learning_rate": 4.657953390217635e-05, "loss": 0.0398, "step": 42010 }, { "epoch": 0.2051, "grad_norm": 0.13346247375011444, "learning_rate": 4.6577446539741745e-05, "loss": 0.0406, "step": 42020 }, { "epoch": 0.20515, "grad_norm": 0.12445792555809021, "learning_rate": 4.657535858738785e-05, "loss": 0.0404, "step": 42030 }, { "epoch": 0.2052, "grad_norm": 0.1608707159757614, "learning_rate": 4.657327004517176e-05, "loss": 0.041, "step": 42040 }, { "epoch": 0.20525, "grad_norm": 0.12387620657682419, "learning_rate": 4.657118091315057e-05, "loss": 0.0402, "step": 42050 }, { "epoch": 0.2053, "grad_norm": 0.12562602758407593, "learning_rate": 4.65690911913814e-05, "loss": 0.0407, "step": 42060 }, { "epoch": 0.20535, "grad_norm": 0.12161588668823242, "learning_rate": 4.6567000879921376e-05, "loss": 0.0411, "step": 42070 }, { "epoch": 0.2054, "grad_norm": 0.10995645076036453, "learning_rate": 4.656490997882765e-05, "loss": 0.0415, "step": 42080 }, { "epoch": 0.20545, "grad_norm": 0.09514041990041733, "learning_rate": 4.656281848815739e-05, "loss": 0.0406, "step": 42090 }, { "epoch": 0.2055, "grad_norm": 0.12755118310451508, "learning_rate": 4.656072640796777e-05, "loss": 0.0428, "step": 42100 }, { "epoch": 0.20555, "grad_norm": 0.14956414699554443, "learning_rate": 4.655863373831599e-05, "loss": 0.0404, "step": 42110 }, { "epoch": 0.2056, "grad_norm": 0.11889217048883438, "learning_rate": 4.655654047925927e-05, "loss": 0.0423, "step": 42120 }, { "epoch": 0.20565, "grad_norm": 0.10934683680534363, "learning_rate": 4.6554446630854833e-05, "loss": 0.0406, "step": 42130 }, { "epoch": 0.2057, "grad_norm": 0.14071911573410034, "learning_rate": 4.655235219315991e-05, "loss": 0.0445, "step": 42140 }, { "epoch": 0.20575, "grad_norm": 0.1343953013420105, "learning_rate": 4.6550257166231784e-05, "loss": 0.0414, "step": 42150 }, { "epoch": 0.2058, "grad_norm": 0.1337887942790985, "learning_rate": 4.654816155012772e-05, "loss": 0.0411, "step": 42160 }, { "epoch": 0.20585, "grad_norm": 0.10310254991054535, "learning_rate": 4.6546065344905015e-05, "loss": 0.0416, "step": 42170 }, { "epoch": 0.2059, "grad_norm": 0.12980087101459503, "learning_rate": 4.654396855062098e-05, "loss": 0.0419, "step": 42180 }, { "epoch": 0.20595, "grad_norm": 0.1000036969780922, "learning_rate": 4.6541871167332934e-05, "loss": 0.0403, "step": 42190 }, { "epoch": 0.206, "grad_norm": 0.11595522612333298, "learning_rate": 4.653977319509822e-05, "loss": 0.0432, "step": 42200 }, { "epoch": 0.20605, "grad_norm": 0.12469027191400528, "learning_rate": 4.653767463397421e-05, "loss": 0.0438, "step": 42210 }, { "epoch": 0.2061, "grad_norm": 0.13064132630825043, "learning_rate": 4.653557548401827e-05, "loss": 0.0413, "step": 42220 }, { "epoch": 0.20615, "grad_norm": 0.13942815363407135, "learning_rate": 4.653347574528777e-05, "loss": 0.0429, "step": 42230 }, { "epoch": 0.2062, "grad_norm": 0.11371278017759323, "learning_rate": 4.6531375417840145e-05, "loss": 0.0411, "step": 42240 }, { "epoch": 0.20625, "grad_norm": 0.12325238436460495, "learning_rate": 4.65292745017328e-05, "loss": 0.04, "step": 42250 }, { "epoch": 0.2063, "grad_norm": 0.13593898713588715, "learning_rate": 4.6527172997023184e-05, "loss": 0.042, "step": 42260 }, { "epoch": 0.20635, "grad_norm": 0.14073550701141357, "learning_rate": 4.652507090376874e-05, "loss": 0.0419, "step": 42270 }, { "epoch": 0.2064, "grad_norm": 0.13174958527088165, "learning_rate": 4.652296822202694e-05, "loss": 0.0413, "step": 42280 }, { "epoch": 0.20645, "grad_norm": 0.13833463191986084, "learning_rate": 4.652086495185528e-05, "loss": 0.0427, "step": 42290 }, { "epoch": 0.2065, "grad_norm": 0.10557810217142105, "learning_rate": 4.6518761093311256e-05, "loss": 0.0453, "step": 42300 }, { "epoch": 0.20655, "grad_norm": 0.11190221458673477, "learning_rate": 4.6516656646452395e-05, "loss": 0.0405, "step": 42310 }, { "epoch": 0.2066, "grad_norm": 0.1107366532087326, "learning_rate": 4.651455161133622e-05, "loss": 0.0416, "step": 42320 }, { "epoch": 0.20665, "grad_norm": 0.10672711580991745, "learning_rate": 4.651244598802028e-05, "loss": 0.0435, "step": 42330 }, { "epoch": 0.2067, "grad_norm": 0.11309855431318283, "learning_rate": 4.651033977656216e-05, "loss": 0.0419, "step": 42340 }, { "epoch": 0.20675, "grad_norm": 0.11873457580804825, "learning_rate": 4.650823297701942e-05, "loss": 0.0413, "step": 42350 }, { "epoch": 0.2068, "grad_norm": 0.12466181814670563, "learning_rate": 4.650612558944968e-05, "loss": 0.0423, "step": 42360 }, { "epoch": 0.20685, "grad_norm": 0.1014702096581459, "learning_rate": 4.650401761391054e-05, "loss": 0.0427, "step": 42370 }, { "epoch": 0.2069, "grad_norm": 0.12159010767936707, "learning_rate": 4.6501909050459644e-05, "loss": 0.0421, "step": 42380 }, { "epoch": 0.20695, "grad_norm": 0.1161002367734909, "learning_rate": 4.649979989915463e-05, "loss": 0.0453, "step": 42390 }, { "epoch": 0.207, "grad_norm": 0.16336022317409515, "learning_rate": 4.649769016005316e-05, "loss": 0.0422, "step": 42400 }, { "epoch": 0.20705, "grad_norm": 0.11819625645875931, "learning_rate": 4.649557983321292e-05, "loss": 0.0401, "step": 42410 }, { "epoch": 0.2071, "grad_norm": 0.15638066828250885, "learning_rate": 4.649346891869159e-05, "loss": 0.0399, "step": 42420 }, { "epoch": 0.20715, "grad_norm": 0.1084650382399559, "learning_rate": 4.649135741654691e-05, "loss": 0.0399, "step": 42430 }, { "epoch": 0.2072, "grad_norm": 0.12430199235677719, "learning_rate": 4.648924532683659e-05, "loss": 0.0405, "step": 42440 }, { "epoch": 0.20725, "grad_norm": 0.11965826898813248, "learning_rate": 4.648713264961838e-05, "loss": 0.042, "step": 42450 }, { "epoch": 0.2073, "grad_norm": 0.11596711724996567, "learning_rate": 4.648501938495003e-05, "loss": 0.0405, "step": 42460 }, { "epoch": 0.20735, "grad_norm": 0.1251993179321289, "learning_rate": 4.648290553288932e-05, "loss": 0.0405, "step": 42470 }, { "epoch": 0.2074, "grad_norm": 0.10778885334730148, "learning_rate": 4.6480791093494046e-05, "loss": 0.0422, "step": 42480 }, { "epoch": 0.20745, "grad_norm": 0.1158812865614891, "learning_rate": 4.6478676066822016e-05, "loss": 0.041, "step": 42490 }, { "epoch": 0.2075, "grad_norm": 0.107663094997406, "learning_rate": 4.647656045293104e-05, "loss": 0.0403, "step": 42500 }, { "epoch": 0.20755, "grad_norm": 0.09674108773469925, "learning_rate": 4.647444425187898e-05, "loss": 0.0409, "step": 42510 }, { "epoch": 0.2076, "grad_norm": 0.10285669565200806, "learning_rate": 4.6472327463723684e-05, "loss": 0.0389, "step": 42520 }, { "epoch": 0.20765, "grad_norm": 0.09745500981807709, "learning_rate": 4.6470210088523015e-05, "loss": 0.0398, "step": 42530 }, { "epoch": 0.2077, "grad_norm": 0.11266753077507019, "learning_rate": 4.646809212633487e-05, "loss": 0.0416, "step": 42540 }, { "epoch": 0.20775, "grad_norm": 0.13525390625, "learning_rate": 4.6465973577217146e-05, "loss": 0.0421, "step": 42550 }, { "epoch": 0.2078, "grad_norm": 0.14094169437885284, "learning_rate": 4.6463854441227785e-05, "loss": 0.0413, "step": 42560 }, { "epoch": 0.20785, "grad_norm": 0.10353077203035355, "learning_rate": 4.6461734718424685e-05, "loss": 0.0402, "step": 42570 }, { "epoch": 0.2079, "grad_norm": 0.11642540246248245, "learning_rate": 4.6459614408865836e-05, "loss": 0.0399, "step": 42580 }, { "epoch": 0.20795, "grad_norm": 0.13795001804828644, "learning_rate": 4.645749351260919e-05, "loss": 0.0413, "step": 42590 }, { "epoch": 0.208, "grad_norm": 0.1193564310669899, "learning_rate": 4.645537202971273e-05, "loss": 0.0394, "step": 42600 }, { "epoch": 0.20805, "grad_norm": 0.09946589171886444, "learning_rate": 4.645324996023446e-05, "loss": 0.0408, "step": 42610 }, { "epoch": 0.2081, "grad_norm": 0.12816078960895538, "learning_rate": 4.64511273042324e-05, "loss": 0.0407, "step": 42620 }, { "epoch": 0.20815, "grad_norm": 0.11315611749887466, "learning_rate": 4.6449004061764565e-05, "loss": 0.0398, "step": 42630 }, { "epoch": 0.2082, "grad_norm": 0.10693423449993134, "learning_rate": 4.644688023288903e-05, "loss": 0.0434, "step": 42640 }, { "epoch": 0.20825, "grad_norm": 0.09122775495052338, "learning_rate": 4.6444755817663845e-05, "loss": 0.0405, "step": 42650 }, { "epoch": 0.2083, "grad_norm": 0.1261245608329773, "learning_rate": 4.644263081614708e-05, "loss": 0.04, "step": 42660 }, { "epoch": 0.20835, "grad_norm": 0.10321059823036194, "learning_rate": 4.6440505228396855e-05, "loss": 0.0402, "step": 42670 }, { "epoch": 0.2084, "grad_norm": 0.10114654153585434, "learning_rate": 4.6438379054471274e-05, "loss": 0.0422, "step": 42680 }, { "epoch": 0.20845, "grad_norm": 0.10504954308271408, "learning_rate": 4.643625229442846e-05, "loss": 0.0415, "step": 42690 }, { "epoch": 0.2085, "grad_norm": 0.11723367124795914, "learning_rate": 4.6434124948326564e-05, "loss": 0.0415, "step": 42700 }, { "epoch": 0.20855, "grad_norm": 0.11996826529502869, "learning_rate": 4.643199701622374e-05, "loss": 0.0427, "step": 42710 }, { "epoch": 0.2086, "grad_norm": 0.11167429387569427, "learning_rate": 4.642986849817817e-05, "loss": 0.0403, "step": 42720 }, { "epoch": 0.20865, "grad_norm": 0.13511034846305847, "learning_rate": 4.6427739394248046e-05, "loss": 0.0419, "step": 42730 }, { "epoch": 0.2087, "grad_norm": 0.12214132398366928, "learning_rate": 4.642560970449158e-05, "loss": 0.0421, "step": 42740 }, { "epoch": 0.20875, "grad_norm": 0.1669897884130478, "learning_rate": 4.642347942896699e-05, "loss": 0.0427, "step": 42750 }, { "epoch": 0.2088, "grad_norm": 0.12708796560764313, "learning_rate": 4.642134856773253e-05, "loss": 0.0395, "step": 42760 }, { "epoch": 0.20885, "grad_norm": 0.13688746094703674, "learning_rate": 4.641921712084644e-05, "loss": 0.044, "step": 42770 }, { "epoch": 0.2089, "grad_norm": 0.13669873774051666, "learning_rate": 4.6417085088366996e-05, "loss": 0.0404, "step": 42780 }, { "epoch": 0.20895, "grad_norm": 0.13903357088565826, "learning_rate": 4.6414952470352494e-05, "loss": 0.0403, "step": 42790 }, { "epoch": 0.209, "grad_norm": 0.13542260229587555, "learning_rate": 4.641281926686124e-05, "loss": 0.0429, "step": 42800 }, { "epoch": 0.20905, "grad_norm": 0.14414159953594208, "learning_rate": 4.641068547795155e-05, "loss": 0.0428, "step": 42810 }, { "epoch": 0.2091, "grad_norm": 0.09341292828321457, "learning_rate": 4.640855110368177e-05, "loss": 0.0393, "step": 42820 }, { "epoch": 0.20915, "grad_norm": 0.13524645566940308, "learning_rate": 4.6406416144110236e-05, "loss": 0.0424, "step": 42830 }, { "epoch": 0.2092, "grad_norm": 0.11175638437271118, "learning_rate": 4.640428059929534e-05, "loss": 0.0441, "step": 42840 }, { "epoch": 0.20925, "grad_norm": 0.10336706787347794, "learning_rate": 4.640214446929544e-05, "loss": 0.0397, "step": 42850 }, { "epoch": 0.2093, "grad_norm": 0.09097188711166382, "learning_rate": 4.640000775416895e-05, "loss": 0.0405, "step": 42860 }, { "epoch": 0.20935, "grad_norm": 0.10464566200971603, "learning_rate": 4.639787045397429e-05, "loss": 0.04, "step": 42870 }, { "epoch": 0.2094, "grad_norm": 0.10859711468219757, "learning_rate": 4.639573256876989e-05, "loss": 0.0425, "step": 42880 }, { "epoch": 0.20945, "grad_norm": 0.11505578458309174, "learning_rate": 4.6393594098614204e-05, "loss": 0.0403, "step": 42890 }, { "epoch": 0.2095, "grad_norm": 0.11063778400421143, "learning_rate": 4.63914550435657e-05, "loss": 0.0438, "step": 42900 }, { "epoch": 0.20955, "grad_norm": 0.11395706981420517, "learning_rate": 4.6389315403682846e-05, "loss": 0.0404, "step": 42910 }, { "epoch": 0.2096, "grad_norm": 0.11956844478845596, "learning_rate": 4.6387175179024134e-05, "loss": 0.0417, "step": 42920 }, { "epoch": 0.20965, "grad_norm": 0.11688018590211868, "learning_rate": 4.6385034369648096e-05, "loss": 0.0401, "step": 42930 }, { "epoch": 0.2097, "grad_norm": 0.10507351905107498, "learning_rate": 4.6382892975613244e-05, "loss": 0.0404, "step": 42940 }, { "epoch": 0.20975, "grad_norm": 0.10709551721811295, "learning_rate": 4.638075099697814e-05, "loss": 0.042, "step": 42950 }, { "epoch": 0.2098, "grad_norm": 0.1598290354013443, "learning_rate": 4.6378608433801336e-05, "loss": 0.0445, "step": 42960 }, { "epoch": 0.20985, "grad_norm": 0.12490461766719818, "learning_rate": 4.637646528614141e-05, "loss": 0.0432, "step": 42970 }, { "epoch": 0.2099, "grad_norm": 0.13051536679267883, "learning_rate": 4.637432155405694e-05, "loss": 0.0441, "step": 42980 }, { "epoch": 0.20995, "grad_norm": 0.11199908703565598, "learning_rate": 4.6372177237606565e-05, "loss": 0.0405, "step": 42990 }, { "epoch": 0.21, "grad_norm": 0.13808445632457733, "learning_rate": 4.637003233684889e-05, "loss": 0.0419, "step": 43000 }, { "epoch": 0.21005, "grad_norm": 0.163025364279747, "learning_rate": 4.636788685184256e-05, "loss": 0.0421, "step": 43010 }, { "epoch": 0.2101, "grad_norm": 0.11270739883184433, "learning_rate": 4.636574078264623e-05, "loss": 0.0432, "step": 43020 }, { "epoch": 0.21015, "grad_norm": 0.15009522438049316, "learning_rate": 4.636359412931857e-05, "loss": 0.0429, "step": 43030 }, { "epoch": 0.2102, "grad_norm": 0.13547831773757935, "learning_rate": 4.636144689191827e-05, "loss": 0.0415, "step": 43040 }, { "epoch": 0.21025, "grad_norm": 0.1354401558637619, "learning_rate": 4.635929907050404e-05, "loss": 0.0419, "step": 43050 }, { "epoch": 0.2103, "grad_norm": 0.11277691274881363, "learning_rate": 4.63571506651346e-05, "loss": 0.0428, "step": 43060 }, { "epoch": 0.21035, "grad_norm": 0.134175643324852, "learning_rate": 4.635500167586868e-05, "loss": 0.0418, "step": 43070 }, { "epoch": 0.2104, "grad_norm": 0.11299371719360352, "learning_rate": 4.635285210276504e-05, "loss": 0.04, "step": 43080 }, { "epoch": 0.21045, "grad_norm": 0.11659647524356842, "learning_rate": 4.635070194588245e-05, "loss": 0.0437, "step": 43090 }, { "epoch": 0.2105, "grad_norm": 0.11916449666023254, "learning_rate": 4.6348551205279686e-05, "loss": 0.0405, "step": 43100 }, { "epoch": 0.21055, "grad_norm": 0.12605997920036316, "learning_rate": 4.634639988101555e-05, "loss": 0.0406, "step": 43110 }, { "epoch": 0.2106, "grad_norm": 0.1151721179485321, "learning_rate": 4.6344247973148866e-05, "loss": 0.0438, "step": 43120 }, { "epoch": 0.21065, "grad_norm": 0.1175384446978569, "learning_rate": 4.634209548173846e-05, "loss": 0.0433, "step": 43130 }, { "epoch": 0.2107, "grad_norm": 0.10693112015724182, "learning_rate": 4.6339942406843174e-05, "loss": 0.0393, "step": 43140 }, { "epoch": 0.21075, "grad_norm": 0.09275225549936295, "learning_rate": 4.6337788748521886e-05, "loss": 0.0397, "step": 43150 }, { "epoch": 0.2108, "grad_norm": 0.104120172560215, "learning_rate": 4.633563450683347e-05, "loss": 0.0395, "step": 43160 }, { "epoch": 0.21085, "grad_norm": 0.11659583449363708, "learning_rate": 4.6333479681836825e-05, "loss": 0.0399, "step": 43170 }, { "epoch": 0.2109, "grad_norm": 0.11879605799913406, "learning_rate": 4.633132427359085e-05, "loss": 0.0418, "step": 43180 }, { "epoch": 0.21095, "grad_norm": 0.1126229539513588, "learning_rate": 4.632916828215449e-05, "loss": 0.0412, "step": 43190 }, { "epoch": 0.211, "grad_norm": 0.1250193566083908, "learning_rate": 4.632701170758668e-05, "loss": 0.0417, "step": 43200 }, { "epoch": 0.21105, "grad_norm": 0.10943544656038284, "learning_rate": 4.632485454994638e-05, "loss": 0.0392, "step": 43210 }, { "epoch": 0.2111, "grad_norm": 0.10515572130680084, "learning_rate": 4.632269680929257e-05, "loss": 0.0413, "step": 43220 }, { "epoch": 0.21115, "grad_norm": 0.1227058544754982, "learning_rate": 4.632053848568425e-05, "loss": 0.0443, "step": 43230 }, { "epoch": 0.2112, "grad_norm": 0.12968966364860535, "learning_rate": 4.6318379579180404e-05, "loss": 0.04, "step": 43240 }, { "epoch": 0.21125, "grad_norm": 0.13288845121860504, "learning_rate": 4.631622008984007e-05, "loss": 0.0412, "step": 43250 }, { "epoch": 0.2113, "grad_norm": 0.12562234699726105, "learning_rate": 4.6314060017722296e-05, "loss": 0.0422, "step": 43260 }, { "epoch": 0.21135, "grad_norm": 0.11604010313749313, "learning_rate": 4.631189936288612e-05, "loss": 0.0404, "step": 43270 }, { "epoch": 0.2114, "grad_norm": 0.10224141925573349, "learning_rate": 4.630973812539063e-05, "loss": 0.0398, "step": 43280 }, { "epoch": 0.21145, "grad_norm": 0.11876539885997772, "learning_rate": 4.63075763052949e-05, "loss": 0.0416, "step": 43290 }, { "epoch": 0.2115, "grad_norm": 0.105568528175354, "learning_rate": 4.6305413902658036e-05, "loss": 0.0395, "step": 43300 }, { "epoch": 0.21155, "grad_norm": 0.11702471971511841, "learning_rate": 4.630325091753917e-05, "loss": 0.0407, "step": 43310 }, { "epoch": 0.2116, "grad_norm": 0.10368052870035172, "learning_rate": 4.6301087349997416e-05, "loss": 0.0417, "step": 43320 }, { "epoch": 0.21165, "grad_norm": 0.11167798191308975, "learning_rate": 4.629892320009194e-05, "loss": 0.0426, "step": 43330 }, { "epoch": 0.2117, "grad_norm": 0.12406478822231293, "learning_rate": 4.62967584678819e-05, "loss": 0.0407, "step": 43340 }, { "epoch": 0.21175, "grad_norm": 0.11944945901632309, "learning_rate": 4.6294593153426496e-05, "loss": 0.0407, "step": 43350 }, { "epoch": 0.2118, "grad_norm": 0.1270545870065689, "learning_rate": 4.629242725678491e-05, "loss": 0.0405, "step": 43360 }, { "epoch": 0.21185, "grad_norm": 0.12933006882667542, "learning_rate": 4.629026077801636e-05, "loss": 0.0397, "step": 43370 }, { "epoch": 0.2119, "grad_norm": 0.11296647787094116, "learning_rate": 4.628809371718008e-05, "loss": 0.0425, "step": 43380 }, { "epoch": 0.21195, "grad_norm": 0.11964956670999527, "learning_rate": 4.6285926074335315e-05, "loss": 0.0409, "step": 43390 }, { "epoch": 0.212, "grad_norm": 0.1273273080587387, "learning_rate": 4.628375784954133e-05, "loss": 0.0403, "step": 43400 }, { "epoch": 0.21205, "grad_norm": 0.09842070192098618, "learning_rate": 4.62815890428574e-05, "loss": 0.0395, "step": 43410 }, { "epoch": 0.2121, "grad_norm": 0.10548171401023865, "learning_rate": 4.627941965434281e-05, "loss": 0.043, "step": 43420 }, { "epoch": 0.21215, "grad_norm": 0.1085827574133873, "learning_rate": 4.62772496840569e-05, "loss": 0.0413, "step": 43430 }, { "epoch": 0.2122, "grad_norm": 0.11527638882398605, "learning_rate": 4.627507913205897e-05, "loss": 0.0391, "step": 43440 }, { "epoch": 0.21225, "grad_norm": 0.10231008380651474, "learning_rate": 4.627290799840837e-05, "loss": 0.0396, "step": 43450 }, { "epoch": 0.2123, "grad_norm": 0.1240062266588211, "learning_rate": 4.627073628316445e-05, "loss": 0.0395, "step": 43460 }, { "epoch": 0.21235, "grad_norm": 0.11377868801355362, "learning_rate": 4.6268563986386596e-05, "loss": 0.0436, "step": 43470 }, { "epoch": 0.2124, "grad_norm": 0.1008082777261734, "learning_rate": 4.6266391108134195e-05, "loss": 0.0411, "step": 43480 }, { "epoch": 0.21245, "grad_norm": 0.1036459431052208, "learning_rate": 4.626421764846665e-05, "loss": 0.0405, "step": 43490 }, { "epoch": 0.2125, "grad_norm": 0.11455801874399185, "learning_rate": 4.626204360744338e-05, "loss": 0.0409, "step": 43500 }, { "epoch": 0.21255, "grad_norm": 0.12558279931545258, "learning_rate": 4.625986898512382e-05, "loss": 0.0428, "step": 43510 }, { "epoch": 0.2126, "grad_norm": 0.11081899702548981, "learning_rate": 4.625769378156744e-05, "loss": 0.0427, "step": 43520 }, { "epoch": 0.21265, "grad_norm": 0.11352989077568054, "learning_rate": 4.6255517996833696e-05, "loss": 0.0404, "step": 43530 }, { "epoch": 0.2127, "grad_norm": 0.11873759329319, "learning_rate": 4.6253341630982075e-05, "loss": 0.0397, "step": 43540 }, { "epoch": 0.21275, "grad_norm": 0.1015138179063797, "learning_rate": 4.6251164684072065e-05, "loss": 0.0426, "step": 43550 }, { "epoch": 0.2128, "grad_norm": 0.12051185220479965, "learning_rate": 4.624898715616322e-05, "loss": 0.0411, "step": 43560 }, { "epoch": 0.21285, "grad_norm": 0.11388301849365234, "learning_rate": 4.6246809047315034e-05, "loss": 0.0407, "step": 43570 }, { "epoch": 0.2129, "grad_norm": 0.09138743579387665, "learning_rate": 4.624463035758707e-05, "loss": 0.04, "step": 43580 }, { "epoch": 0.21295, "grad_norm": 0.14637517929077148, "learning_rate": 4.62424510870389e-05, "loss": 0.0454, "step": 43590 }, { "epoch": 0.213, "grad_norm": 0.11419618129730225, "learning_rate": 4.6240271235730095e-05, "loss": 0.0408, "step": 43600 }, { "epoch": 0.21305, "grad_norm": 0.1109025701880455, "learning_rate": 4.623809080372025e-05, "loss": 0.0409, "step": 43610 }, { "epoch": 0.2131, "grad_norm": 0.10209771245718002, "learning_rate": 4.6235909791068986e-05, "loss": 0.0412, "step": 43620 }, { "epoch": 0.21315, "grad_norm": 0.09903377294540405, "learning_rate": 4.623372819783592e-05, "loss": 0.0411, "step": 43630 }, { "epoch": 0.2132, "grad_norm": 0.10271365940570831, "learning_rate": 4.623154602408071e-05, "loss": 0.0405, "step": 43640 }, { "epoch": 0.21325, "grad_norm": 0.09763599932193756, "learning_rate": 4.622936326986301e-05, "loss": 0.0442, "step": 43650 }, { "epoch": 0.2133, "grad_norm": 0.10397458076477051, "learning_rate": 4.622717993524249e-05, "loss": 0.0424, "step": 43660 }, { "epoch": 0.21335, "grad_norm": 0.10201514512300491, "learning_rate": 4.6224996020278844e-05, "loss": 0.04, "step": 43670 }, { "epoch": 0.2134, "grad_norm": 0.10763736069202423, "learning_rate": 4.622281152503177e-05, "loss": 0.0406, "step": 43680 }, { "epoch": 0.21345, "grad_norm": 0.1229335218667984, "learning_rate": 4.622062644956102e-05, "loss": 0.0441, "step": 43690 }, { "epoch": 0.2135, "grad_norm": 0.10760986804962158, "learning_rate": 4.621844079392631e-05, "loss": 0.0419, "step": 43700 }, { "epoch": 0.21355, "grad_norm": 0.10992319136857986, "learning_rate": 4.6216254558187395e-05, "loss": 0.0389, "step": 43710 }, { "epoch": 0.2136, "grad_norm": 0.10541414469480515, "learning_rate": 4.6214067742404055e-05, "loss": 0.0406, "step": 43720 }, { "epoch": 0.21365, "grad_norm": 0.111421599984169, "learning_rate": 4.621188034663607e-05, "loss": 0.0406, "step": 43730 }, { "epoch": 0.2137, "grad_norm": 0.10435806214809418, "learning_rate": 4.620969237094325e-05, "loss": 0.0391, "step": 43740 }, { "epoch": 0.21375, "grad_norm": 0.12230052053928375, "learning_rate": 4.62075038153854e-05, "loss": 0.0418, "step": 43750 }, { "epoch": 0.2138, "grad_norm": 0.11830747127532959, "learning_rate": 4.620531468002237e-05, "loss": 0.0402, "step": 43760 }, { "epoch": 0.21385, "grad_norm": 0.1271902620792389, "learning_rate": 4.6203124964914005e-05, "loss": 0.04, "step": 43770 }, { "epoch": 0.2139, "grad_norm": 0.11162140220403671, "learning_rate": 4.620093467012017e-05, "loss": 0.0406, "step": 43780 }, { "epoch": 0.21395, "grad_norm": 0.11397633701562881, "learning_rate": 4.619874379570074e-05, "loss": 0.0404, "step": 43790 }, { "epoch": 0.214, "grad_norm": 0.11327353864908218, "learning_rate": 4.6196552341715615e-05, "loss": 0.0412, "step": 43800 }, { "epoch": 0.21405, "grad_norm": 0.10754092037677765, "learning_rate": 4.6194360308224715e-05, "loss": 0.0393, "step": 43810 }, { "epoch": 0.2141, "grad_norm": 0.10313699394464493, "learning_rate": 4.619216769528797e-05, "loss": 0.043, "step": 43820 }, { "epoch": 0.21415, "grad_norm": 0.11732255667448044, "learning_rate": 4.6189974502965324e-05, "loss": 0.0441, "step": 43830 }, { "epoch": 0.2142, "grad_norm": 0.1100316047668457, "learning_rate": 4.618778073131673e-05, "loss": 0.0419, "step": 43840 }, { "epoch": 0.21425, "grad_norm": 0.11513995379209518, "learning_rate": 4.6185586380402174e-05, "loss": 0.0392, "step": 43850 }, { "epoch": 0.2143, "grad_norm": 0.11842764168977737, "learning_rate": 4.618339145028164e-05, "loss": 0.0384, "step": 43860 }, { "epoch": 0.21435, "grad_norm": 0.10758869349956512, "learning_rate": 4.618119594101515e-05, "loss": 0.0383, "step": 43870 }, { "epoch": 0.2144, "grad_norm": 0.12508603930473328, "learning_rate": 4.617899985266272e-05, "loss": 0.0407, "step": 43880 }, { "epoch": 0.21445, "grad_norm": 0.12971076369285583, "learning_rate": 4.617680318528439e-05, "loss": 0.0412, "step": 43890 }, { "epoch": 0.2145, "grad_norm": 0.12210968881845474, "learning_rate": 4.617460593894021e-05, "loss": 0.0386, "step": 43900 }, { "epoch": 0.21455, "grad_norm": 0.14959664642810822, "learning_rate": 4.617240811369026e-05, "loss": 0.0387, "step": 43910 }, { "epoch": 0.2146, "grad_norm": 0.11446508765220642, "learning_rate": 4.617020970959463e-05, "loss": 0.0388, "step": 43920 }, { "epoch": 0.21465, "grad_norm": 0.11315374821424484, "learning_rate": 4.616801072671342e-05, "loss": 0.0407, "step": 43930 }, { "epoch": 0.2147, "grad_norm": 0.12179291993379593, "learning_rate": 4.6165811165106746e-05, "loss": 0.0385, "step": 43940 }, { "epoch": 0.21475, "grad_norm": 0.11779157817363739, "learning_rate": 4.616361102483475e-05, "loss": 0.0389, "step": 43950 }, { "epoch": 0.2148, "grad_norm": 0.11438784748315811, "learning_rate": 4.616141030595757e-05, "loss": 0.0399, "step": 43960 }, { "epoch": 0.21485, "grad_norm": 0.10303477197885513, "learning_rate": 4.6159209008535397e-05, "loss": 0.0388, "step": 43970 }, { "epoch": 0.2149, "grad_norm": 0.1258867383003235, "learning_rate": 4.6157007132628396e-05, "loss": 0.044, "step": 43980 }, { "epoch": 0.21495, "grad_norm": 0.11557912081480026, "learning_rate": 4.615480467829676e-05, "loss": 0.0401, "step": 43990 }, { "epoch": 0.215, "grad_norm": 0.13071365654468536, "learning_rate": 4.615260164560071e-05, "loss": 0.0407, "step": 44000 }, { "epoch": 0.21505, "grad_norm": 0.10629302263259888, "learning_rate": 4.615039803460049e-05, "loss": 0.0384, "step": 44010 }, { "epoch": 0.2151, "grad_norm": 0.11779369413852692, "learning_rate": 4.6148193845356324e-05, "loss": 0.0415, "step": 44020 }, { "epoch": 0.21515, "grad_norm": 0.11141815036535263, "learning_rate": 4.6145989077928486e-05, "loss": 0.0392, "step": 44030 }, { "epoch": 0.2152, "grad_norm": 0.14413152635097504, "learning_rate": 4.614378373237726e-05, "loss": 0.0399, "step": 44040 }, { "epoch": 0.21525, "grad_norm": 0.1350352168083191, "learning_rate": 4.614157780876292e-05, "loss": 0.0401, "step": 44050 }, { "epoch": 0.2153, "grad_norm": 0.12539725005626678, "learning_rate": 4.613937130714578e-05, "loss": 0.0402, "step": 44060 }, { "epoch": 0.21535, "grad_norm": 0.13736669719219208, "learning_rate": 4.6137164227586177e-05, "loss": 0.0402, "step": 44070 }, { "epoch": 0.2154, "grad_norm": 0.11826146394014359, "learning_rate": 4.613495657014445e-05, "loss": 0.0438, "step": 44080 }, { "epoch": 0.21545, "grad_norm": 0.11898969113826752, "learning_rate": 4.613274833488094e-05, "loss": 0.039, "step": 44090 }, { "epoch": 0.2155, "grad_norm": 0.12201030552387238, "learning_rate": 4.613053952185604e-05, "loss": 0.0408, "step": 44100 }, { "epoch": 0.21555, "grad_norm": 0.13165467977523804, "learning_rate": 4.612833013113012e-05, "loss": 0.0391, "step": 44110 }, { "epoch": 0.2156, "grad_norm": 0.12074373662471771, "learning_rate": 4.6126120162763595e-05, "loss": 0.0391, "step": 44120 }, { "epoch": 0.21565, "grad_norm": 0.11341175436973572, "learning_rate": 4.612390961681687e-05, "loss": 0.0385, "step": 44130 }, { "epoch": 0.2157, "grad_norm": 0.11415523290634155, "learning_rate": 4.612169849335041e-05, "loss": 0.041, "step": 44140 }, { "epoch": 0.21575, "grad_norm": 0.11367049813270569, "learning_rate": 4.6119486792424645e-05, "loss": 0.0389, "step": 44150 }, { "epoch": 0.2158, "grad_norm": 0.1104145422577858, "learning_rate": 4.611727451410004e-05, "loss": 0.0393, "step": 44160 }, { "epoch": 0.21585, "grad_norm": 0.125356525182724, "learning_rate": 4.611506165843708e-05, "loss": 0.0386, "step": 44170 }, { "epoch": 0.2159, "grad_norm": 0.1293518990278244, "learning_rate": 4.611284822549627e-05, "loss": 0.0399, "step": 44180 }, { "epoch": 0.21595, "grad_norm": 0.1320677399635315, "learning_rate": 4.611063421533812e-05, "loss": 0.0422, "step": 44190 }, { "epoch": 0.216, "grad_norm": 0.12373978644609451, "learning_rate": 4.610841962802317e-05, "loss": 0.0404, "step": 44200 }, { "epoch": 0.21605, "grad_norm": 0.12895497679710388, "learning_rate": 4.6106204463611944e-05, "loss": 0.0408, "step": 44210 }, { "epoch": 0.2161, "grad_norm": 0.11972857266664505, "learning_rate": 4.610398872216503e-05, "loss": 0.0396, "step": 44220 }, { "epoch": 0.21615, "grad_norm": 0.11606613546609879, "learning_rate": 4.610177240374299e-05, "loss": 0.0405, "step": 44230 }, { "epoch": 0.2162, "grad_norm": 0.1022796630859375, "learning_rate": 4.609955550840641e-05, "loss": 0.0405, "step": 44240 }, { "epoch": 0.21625, "grad_norm": 0.11300525069236755, "learning_rate": 4.609733803621592e-05, "loss": 0.0432, "step": 44250 }, { "epoch": 0.2163, "grad_norm": 0.12350910902023315, "learning_rate": 4.609511998723213e-05, "loss": 0.0431, "step": 44260 }, { "epoch": 0.21635, "grad_norm": 0.13697876036167145, "learning_rate": 4.6092901361515684e-05, "loss": 0.0411, "step": 44270 }, { "epoch": 0.2164, "grad_norm": 0.11545902490615845, "learning_rate": 4.609068215912724e-05, "loss": 0.0409, "step": 44280 }, { "epoch": 0.21645, "grad_norm": 0.13642287254333496, "learning_rate": 4.6088462380127476e-05, "loss": 0.0408, "step": 44290 }, { "epoch": 0.2165, "grad_norm": 0.11719198524951935, "learning_rate": 4.608624202457706e-05, "loss": 0.0423, "step": 44300 }, { "epoch": 0.21655, "grad_norm": 0.1083524227142334, "learning_rate": 4.6084021092536715e-05, "loss": 0.0405, "step": 44310 }, { "epoch": 0.2166, "grad_norm": 0.11146090924739838, "learning_rate": 4.608179958406715e-05, "loss": 0.0438, "step": 44320 }, { "epoch": 0.21665, "grad_norm": 0.10394291579723358, "learning_rate": 4.607957749922911e-05, "loss": 0.0416, "step": 44330 }, { "epoch": 0.2167, "grad_norm": 0.11491485685110092, "learning_rate": 4.607735483808334e-05, "loss": 0.0396, "step": 44340 }, { "epoch": 0.21675, "grad_norm": 0.10888620465993881, "learning_rate": 4.607513160069061e-05, "loss": 0.0422, "step": 44350 }, { "epoch": 0.2168, "grad_norm": 0.12154927104711533, "learning_rate": 4.60729077871117e-05, "loss": 0.041, "step": 44360 }, { "epoch": 0.21685, "grad_norm": 0.11862784624099731, "learning_rate": 4.60706833974074e-05, "loss": 0.0413, "step": 44370 }, { "epoch": 0.2169, "grad_norm": 0.08911058306694031, "learning_rate": 4.6068458431638537e-05, "loss": 0.0398, "step": 44380 }, { "epoch": 0.21695, "grad_norm": 0.11093780398368835, "learning_rate": 4.606623288986594e-05, "loss": 0.0402, "step": 44390 }, { "epoch": 0.217, "grad_norm": 0.11029799282550812, "learning_rate": 4.606400677215044e-05, "loss": 0.041, "step": 44400 }, { "epoch": 0.21705, "grad_norm": 0.12298743426799774, "learning_rate": 4.6061780078552906e-05, "loss": 0.0399, "step": 44410 }, { "epoch": 0.2171, "grad_norm": 0.11163527518510818, "learning_rate": 4.6059552809134224e-05, "loss": 0.0389, "step": 44420 }, { "epoch": 0.21715, "grad_norm": 0.09122147411108017, "learning_rate": 4.6057324963955284e-05, "loss": 0.0395, "step": 44430 }, { "epoch": 0.2172, "grad_norm": 0.10241471230983734, "learning_rate": 4.605509654307698e-05, "loss": 0.0403, "step": 44440 }, { "epoch": 0.21725, "grad_norm": 0.0955214723944664, "learning_rate": 4.605286754656025e-05, "loss": 0.0396, "step": 44450 }, { "epoch": 0.2173, "grad_norm": 0.09183744341135025, "learning_rate": 4.6050637974466036e-05, "loss": 0.0395, "step": 44460 }, { "epoch": 0.21735, "grad_norm": 0.09811728447675705, "learning_rate": 4.604840782685529e-05, "loss": 0.0396, "step": 44470 }, { "epoch": 0.2174, "grad_norm": 0.11486396938562393, "learning_rate": 4.604617710378897e-05, "loss": 0.0387, "step": 44480 }, { "epoch": 0.21745, "grad_norm": 0.10211937129497528, "learning_rate": 4.604394580532808e-05, "loss": 0.0392, "step": 44490 }, { "epoch": 0.2175, "grad_norm": 0.13213109970092773, "learning_rate": 4.6041713931533624e-05, "loss": 0.0405, "step": 44500 }, { "epoch": 0.21755, "grad_norm": 0.10534148663282394, "learning_rate": 4.6039481482466606e-05, "loss": 0.0432, "step": 44510 }, { "epoch": 0.2176, "grad_norm": 0.10675963759422302, "learning_rate": 4.603724845818808e-05, "loss": 0.0396, "step": 44520 }, { "epoch": 0.21765, "grad_norm": 0.13914652168750763, "learning_rate": 4.603501485875907e-05, "loss": 0.0417, "step": 44530 }, { "epoch": 0.2177, "grad_norm": 0.14863325655460358, "learning_rate": 4.6032780684240665e-05, "loss": 0.0415, "step": 44540 }, { "epoch": 0.21775, "grad_norm": 0.12211088836193085, "learning_rate": 4.603054593469393e-05, "loss": 0.0412, "step": 44550 }, { "epoch": 0.2178, "grad_norm": 0.14143238961696625, "learning_rate": 4.602831061017997e-05, "loss": 0.0401, "step": 44560 }, { "epoch": 0.21785, "grad_norm": 0.13877104222774506, "learning_rate": 4.60260747107599e-05, "loss": 0.0396, "step": 44570 }, { "epoch": 0.2179, "grad_norm": 0.11739282310009003, "learning_rate": 4.6023838236494854e-05, "loss": 0.0403, "step": 44580 }, { "epoch": 0.21795, "grad_norm": 0.12237027287483215, "learning_rate": 4.602160118744596e-05, "loss": 0.0408, "step": 44590 }, { "epoch": 0.218, "grad_norm": 0.10916484892368317, "learning_rate": 4.601936356367439e-05, "loss": 0.0383, "step": 44600 }, { "epoch": 0.21805, "grad_norm": 0.11514069139957428, "learning_rate": 4.601712536524132e-05, "loss": 0.0407, "step": 44610 }, { "epoch": 0.2181, "grad_norm": 0.10619265586137772, "learning_rate": 4.601488659220794e-05, "loss": 0.0386, "step": 44620 }, { "epoch": 0.21815, "grad_norm": 0.09500869363546371, "learning_rate": 4.601264724463546e-05, "loss": 0.039, "step": 44630 }, { "epoch": 0.2182, "grad_norm": 0.09380833804607391, "learning_rate": 4.601040732258508e-05, "loss": 0.0395, "step": 44640 }, { "epoch": 0.21825, "grad_norm": 0.12103443592786789, "learning_rate": 4.600816682611807e-05, "loss": 0.0395, "step": 44650 }, { "epoch": 0.2183, "grad_norm": 0.12043504416942596, "learning_rate": 4.600592575529566e-05, "loss": 0.0392, "step": 44660 }, { "epoch": 0.21835, "grad_norm": 0.12798574566841125, "learning_rate": 4.600368411017914e-05, "loss": 0.0393, "step": 44670 }, { "epoch": 0.2184, "grad_norm": 0.1347103863954544, "learning_rate": 4.600144189082979e-05, "loss": 0.0385, "step": 44680 }, { "epoch": 0.21845, "grad_norm": 0.12477003782987595, "learning_rate": 4.599919909730891e-05, "loss": 0.0375, "step": 44690 }, { "epoch": 0.2185, "grad_norm": 0.11453071981668472, "learning_rate": 4.5996955729677803e-05, "loss": 0.0394, "step": 44700 }, { "epoch": 0.21855, "grad_norm": 0.09489106386899948, "learning_rate": 4.5994711787997826e-05, "loss": 0.0381, "step": 44710 }, { "epoch": 0.2186, "grad_norm": 0.12129834294319153, "learning_rate": 4.5992467272330315e-05, "loss": 0.0389, "step": 44720 }, { "epoch": 0.21865, "grad_norm": 0.1389053910970688, "learning_rate": 4.599022218273663e-05, "loss": 0.0385, "step": 44730 }, { "epoch": 0.2187, "grad_norm": 0.10801096260547638, "learning_rate": 4.5987976519278165e-05, "loss": 0.0402, "step": 44740 }, { "epoch": 0.21875, "grad_norm": 0.11225487291812897, "learning_rate": 4.59857302820163e-05, "loss": 0.0394, "step": 44750 }, { "epoch": 0.2188, "grad_norm": 0.11907331645488739, "learning_rate": 4.598348347101245e-05, "loss": 0.0384, "step": 44760 }, { "epoch": 0.21885, "grad_norm": 0.10216663032770157, "learning_rate": 4.598123608632805e-05, "loss": 0.0393, "step": 44770 }, { "epoch": 0.2189, "grad_norm": 0.10929743945598602, "learning_rate": 4.597898812802454e-05, "loss": 0.0387, "step": 44780 }, { "epoch": 0.21895, "grad_norm": 0.09658437222242355, "learning_rate": 4.597673959616337e-05, "loss": 0.0381, "step": 44790 }, { "epoch": 0.219, "grad_norm": 0.12752942740917206, "learning_rate": 4.597449049080602e-05, "loss": 0.0386, "step": 44800 }, { "epoch": 0.21905, "grad_norm": 0.10357045382261276, "learning_rate": 4.5972240812013986e-05, "loss": 0.0383, "step": 44810 }, { "epoch": 0.2191, "grad_norm": 0.11409095674753189, "learning_rate": 4.5969990559848766e-05, "loss": 0.0387, "step": 44820 }, { "epoch": 0.21915, "grad_norm": 0.10450869798660278, "learning_rate": 4.596773973437187e-05, "loss": 0.0386, "step": 44830 }, { "epoch": 0.2192, "grad_norm": 0.12107925117015839, "learning_rate": 4.596548833564486e-05, "loss": 0.0372, "step": 44840 }, { "epoch": 0.21925, "grad_norm": 0.10662046819925308, "learning_rate": 4.5963236363729276e-05, "loss": 0.0379, "step": 44850 }, { "epoch": 0.2193, "grad_norm": 0.11387676745653152, "learning_rate": 4.5960983818686674e-05, "loss": 0.0388, "step": 44860 }, { "epoch": 0.21935, "grad_norm": 0.14249145984649658, "learning_rate": 4.595873070057866e-05, "loss": 0.0391, "step": 44870 }, { "epoch": 0.2194, "grad_norm": 0.13589072227478027, "learning_rate": 4.595647700946682e-05, "loss": 0.0382, "step": 44880 }, { "epoch": 0.21945, "grad_norm": 0.14612525701522827, "learning_rate": 4.5954222745412766e-05, "loss": 0.0401, "step": 44890 }, { "epoch": 0.2195, "grad_norm": 0.11659722030162811, "learning_rate": 4.5951967908478147e-05, "loss": 0.0377, "step": 44900 }, { "epoch": 0.21955, "grad_norm": 0.10423924773931503, "learning_rate": 4.594971249872458e-05, "loss": 0.0378, "step": 44910 }, { "epoch": 0.2196, "grad_norm": 0.12860740721225739, "learning_rate": 4.594745651621376e-05, "loss": 0.0389, "step": 44920 }, { "epoch": 0.21965, "grad_norm": 0.09772031009197235, "learning_rate": 4.5945199961007335e-05, "loss": 0.0385, "step": 44930 }, { "epoch": 0.2197, "grad_norm": 0.15080465376377106, "learning_rate": 4.5942942833167016e-05, "loss": 0.039, "step": 44940 }, { "epoch": 0.21975, "grad_norm": 0.13005079329013824, "learning_rate": 4.5940685132754516e-05, "loss": 0.0376, "step": 44950 }, { "epoch": 0.2198, "grad_norm": 0.11674488335847855, "learning_rate": 4.593842685983154e-05, "loss": 0.0389, "step": 44960 }, { "epoch": 0.21985, "grad_norm": 0.10848286002874374, "learning_rate": 4.593616801445984e-05, "loss": 0.0391, "step": 44970 }, { "epoch": 0.2199, "grad_norm": 0.13445886969566345, "learning_rate": 4.593390859670118e-05, "loss": 0.0392, "step": 44980 }, { "epoch": 0.21995, "grad_norm": 0.11834866553544998, "learning_rate": 4.593164860661732e-05, "loss": 0.0392, "step": 44990 }, { "epoch": 0.22, "grad_norm": 0.13557834923267365, "learning_rate": 4.592938804427005e-05, "loss": 0.0396, "step": 45000 }, { "epoch": 0.22005, "grad_norm": 0.1014893501996994, "learning_rate": 4.592712690972117e-05, "loss": 0.0397, "step": 45010 }, { "epoch": 0.2201, "grad_norm": 0.11393041908740997, "learning_rate": 4.592486520303251e-05, "loss": 0.0407, "step": 45020 }, { "epoch": 0.22015, "grad_norm": 0.16742663085460663, "learning_rate": 4.59226029242659e-05, "loss": 0.0449, "step": 45030 }, { "epoch": 0.2202, "grad_norm": 0.12571029365062714, "learning_rate": 4.5920340073483175e-05, "loss": 0.0388, "step": 45040 }, { "epoch": 0.22025, "grad_norm": 0.13934175670146942, "learning_rate": 4.591807665074621e-05, "loss": 0.0386, "step": 45050 }, { "epoch": 0.2203, "grad_norm": 0.11282069236040115, "learning_rate": 4.5915812656116896e-05, "loss": 0.0392, "step": 45060 }, { "epoch": 0.22035, "grad_norm": 0.14101460576057434, "learning_rate": 4.591354808965712e-05, "loss": 0.038, "step": 45070 }, { "epoch": 0.2204, "grad_norm": 0.1505534052848816, "learning_rate": 4.59112829514288e-05, "loss": 0.0406, "step": 45080 }, { "epoch": 0.22045, "grad_norm": 0.1381063014268875, "learning_rate": 4.5909017241493854e-05, "loss": 0.0385, "step": 45090 }, { "epoch": 0.2205, "grad_norm": 0.14494432508945465, "learning_rate": 4.590675095991424e-05, "loss": 0.0378, "step": 45100 }, { "epoch": 0.22055, "grad_norm": 0.11994045972824097, "learning_rate": 4.59044841067519e-05, "loss": 0.0377, "step": 45110 }, { "epoch": 0.2206, "grad_norm": 0.1361813098192215, "learning_rate": 4.590221668206882e-05, "loss": 0.037, "step": 45120 }, { "epoch": 0.22065, "grad_norm": 0.12299936264753342, "learning_rate": 4.5899948685926985e-05, "loss": 0.0392, "step": 45130 }, { "epoch": 0.2207, "grad_norm": 0.12199182063341141, "learning_rate": 4.589768011838841e-05, "loss": 0.0394, "step": 45140 }, { "epoch": 0.22075, "grad_norm": 0.11998183280229568, "learning_rate": 4.589541097951511e-05, "loss": 0.0406, "step": 45150 }, { "epoch": 0.2208, "grad_norm": 0.10331569612026215, "learning_rate": 4.589314126936912e-05, "loss": 0.0411, "step": 45160 }, { "epoch": 0.22085, "grad_norm": 0.09803680330514908, "learning_rate": 4.5890870988012504e-05, "loss": 0.0413, "step": 45170 }, { "epoch": 0.2209, "grad_norm": 0.1218324825167656, "learning_rate": 4.588860013550732e-05, "loss": 0.0398, "step": 45180 }, { "epoch": 0.22095, "grad_norm": 0.12272092700004578, "learning_rate": 4.588632871191566e-05, "loss": 0.0406, "step": 45190 }, { "epoch": 0.221, "grad_norm": 0.1250077337026596, "learning_rate": 4.5884056717299615e-05, "loss": 0.0399, "step": 45200 }, { "epoch": 0.22105, "grad_norm": 0.10727230459451675, "learning_rate": 4.588178415172131e-05, "loss": 0.0399, "step": 45210 }, { "epoch": 0.2211, "grad_norm": 0.11648436635732651, "learning_rate": 4.587951101524286e-05, "loss": 0.044, "step": 45220 }, { "epoch": 0.22115, "grad_norm": 0.12675625085830688, "learning_rate": 4.587723730792644e-05, "loss": 0.0438, "step": 45230 }, { "epoch": 0.2212, "grad_norm": 0.11761613935232162, "learning_rate": 4.587496302983418e-05, "loss": 0.0393, "step": 45240 }, { "epoch": 0.22125, "grad_norm": 0.11859478801488876, "learning_rate": 4.587268818102828e-05, "loss": 0.04, "step": 45250 }, { "epoch": 0.2213, "grad_norm": 0.09970647096633911, "learning_rate": 4.587041276157093e-05, "loss": 0.0403, "step": 45260 }, { "epoch": 0.22135, "grad_norm": 0.1070898249745369, "learning_rate": 4.5868136771524325e-05, "loss": 0.0394, "step": 45270 }, { "epoch": 0.2214, "grad_norm": 0.12319520860910416, "learning_rate": 4.5865860210950704e-05, "loss": 0.0392, "step": 45280 }, { "epoch": 0.22145, "grad_norm": 0.10981305688619614, "learning_rate": 4.5863583079912306e-05, "loss": 0.0399, "step": 45290 }, { "epoch": 0.2215, "grad_norm": 0.12001689523458481, "learning_rate": 4.5861305378471385e-05, "loss": 0.0402, "step": 45300 }, { "epoch": 0.22155, "grad_norm": 0.11061044037342072, "learning_rate": 4.585902710669021e-05, "loss": 0.0401, "step": 45310 }, { "epoch": 0.2216, "grad_norm": 0.10832685977220535, "learning_rate": 4.585674826463108e-05, "loss": 0.0402, "step": 45320 }, { "epoch": 0.22165, "grad_norm": 0.11753688752651215, "learning_rate": 4.585446885235628e-05, "loss": 0.0388, "step": 45330 }, { "epoch": 0.2217, "grad_norm": 0.11813338100910187, "learning_rate": 4.5852188869928134e-05, "loss": 0.0422, "step": 45340 }, { "epoch": 0.22175, "grad_norm": 0.1180034875869751, "learning_rate": 4.584990831740897e-05, "loss": 0.0397, "step": 45350 }, { "epoch": 0.2218, "grad_norm": 0.11829423159360886, "learning_rate": 4.584762719486117e-05, "loss": 0.042, "step": 45360 }, { "epoch": 0.22185, "grad_norm": 0.09697888791561127, "learning_rate": 4.5845345502347055e-05, "loss": 0.0415, "step": 45370 }, { "epoch": 0.2219, "grad_norm": 0.10831569880247116, "learning_rate": 4.584306323992903e-05, "loss": 0.0402, "step": 45380 }, { "epoch": 0.22195, "grad_norm": 0.11756439507007599, "learning_rate": 4.584078040766949e-05, "loss": 0.0411, "step": 45390 }, { "epoch": 0.222, "grad_norm": 0.11222250014543533, "learning_rate": 4.5838497005630835e-05, "loss": 0.0408, "step": 45400 }, { "epoch": 0.22205, "grad_norm": 0.12091228365898132, "learning_rate": 4.5836213033875506e-05, "loss": 0.0452, "step": 45410 }, { "epoch": 0.2221, "grad_norm": 0.10155369341373444, "learning_rate": 4.583392849246594e-05, "loss": 0.0407, "step": 45420 }, { "epoch": 0.22215, "grad_norm": 0.1056760624051094, "learning_rate": 4.5831643381464596e-05, "loss": 0.0418, "step": 45430 }, { "epoch": 0.2222, "grad_norm": 0.10789384692907333, "learning_rate": 4.582935770093395e-05, "loss": 0.0395, "step": 45440 }, { "epoch": 0.22225, "grad_norm": 0.10316034406423569, "learning_rate": 4.582707145093649e-05, "loss": 0.04, "step": 45450 }, { "epoch": 0.2223, "grad_norm": 0.11070965230464935, "learning_rate": 4.582478463153472e-05, "loss": 0.0398, "step": 45460 }, { "epoch": 0.22235, "grad_norm": 0.1169808954000473, "learning_rate": 4.582249724279116e-05, "loss": 0.042, "step": 45470 }, { "epoch": 0.2224, "grad_norm": 0.11864583194255829, "learning_rate": 4.582020928476835e-05, "loss": 0.0397, "step": 45480 }, { "epoch": 0.22245, "grad_norm": 0.09163357317447662, "learning_rate": 4.5817920757528834e-05, "loss": 0.0405, "step": 45490 }, { "epoch": 0.2225, "grad_norm": 0.11840710043907166, "learning_rate": 4.5815631661135196e-05, "loss": 0.0402, "step": 45500 }, { "epoch": 0.22255, "grad_norm": 0.1186244860291481, "learning_rate": 4.581334199565e-05, "loss": 0.0404, "step": 45510 }, { "epoch": 0.2226, "grad_norm": 0.10652244091033936, "learning_rate": 4.5811051761135856e-05, "loss": 0.0397, "step": 45520 }, { "epoch": 0.22265, "grad_norm": 0.11250728368759155, "learning_rate": 4.5808760957655374e-05, "loss": 0.04, "step": 45530 }, { "epoch": 0.2227, "grad_norm": 0.1473996937274933, "learning_rate": 4.580646958527118e-05, "loss": 0.0397, "step": 45540 }, { "epoch": 0.22275, "grad_norm": 0.09371239691972733, "learning_rate": 4.5804177644045935e-05, "loss": 0.0391, "step": 45550 }, { "epoch": 0.2228, "grad_norm": 0.11303984373807907, "learning_rate": 4.5801885134042285e-05, "loss": 0.0388, "step": 45560 }, { "epoch": 0.22285, "grad_norm": 0.09464023262262344, "learning_rate": 4.579959205532291e-05, "loss": 0.0435, "step": 45570 }, { "epoch": 0.2229, "grad_norm": 0.1317097246646881, "learning_rate": 4.57972984079505e-05, "loss": 0.039, "step": 45580 }, { "epoch": 0.22295, "grad_norm": 0.12932200729846954, "learning_rate": 4.5795004191987765e-05, "loss": 0.041, "step": 45590 }, { "epoch": 0.223, "grad_norm": 0.10654744505882263, "learning_rate": 4.579270940749743e-05, "loss": 0.0408, "step": 45600 }, { "epoch": 0.22305, "grad_norm": 0.11881094425916672, "learning_rate": 4.579041405454223e-05, "loss": 0.0388, "step": 45610 }, { "epoch": 0.2231, "grad_norm": 0.12884128093719482, "learning_rate": 4.578811813318492e-05, "loss": 0.0408, "step": 45620 }, { "epoch": 0.22315, "grad_norm": 0.12064798176288605, "learning_rate": 4.578582164348827e-05, "loss": 0.0416, "step": 45630 }, { "epoch": 0.2232, "grad_norm": 0.13775815069675446, "learning_rate": 4.578352458551507e-05, "loss": 0.0392, "step": 45640 }, { "epoch": 0.22325, "grad_norm": 0.11223536729812622, "learning_rate": 4.57812269593281e-05, "loss": 0.0391, "step": 45650 }, { "epoch": 0.2233, "grad_norm": 0.13564331829547882, "learning_rate": 4.5778928764990217e-05, "loss": 0.0402, "step": 45660 }, { "epoch": 0.22335, "grad_norm": 0.12138502299785614, "learning_rate": 4.5776630002564206e-05, "loss": 0.0398, "step": 45670 }, { "epoch": 0.2234, "grad_norm": 0.1268160045146942, "learning_rate": 4.577433067211295e-05, "loss": 0.0413, "step": 45680 }, { "epoch": 0.22345, "grad_norm": 0.11599452048540115, "learning_rate": 4.577203077369929e-05, "loss": 0.0387, "step": 45690 }, { "epoch": 0.2235, "grad_norm": 0.10990285873413086, "learning_rate": 4.5769730307386114e-05, "loss": 0.0389, "step": 45700 }, { "epoch": 0.22355, "grad_norm": 0.10611005127429962, "learning_rate": 4.576742927323632e-05, "loss": 0.0381, "step": 45710 }, { "epoch": 0.2236, "grad_norm": 0.12060553580522537, "learning_rate": 4.5765127671312805e-05, "loss": 0.039, "step": 45720 }, { "epoch": 0.22365, "grad_norm": 0.12426841259002686, "learning_rate": 4.5762825501678495e-05, "loss": 0.0411, "step": 45730 }, { "epoch": 0.2237, "grad_norm": 0.1251668632030487, "learning_rate": 4.576052276439635e-05, "loss": 0.0404, "step": 45740 }, { "epoch": 0.22375, "grad_norm": 0.16352106630802155, "learning_rate": 4.57582194595293e-05, "loss": 0.0415, "step": 45750 }, { "epoch": 0.2238, "grad_norm": 0.12801645696163177, "learning_rate": 4.5755915587140336e-05, "loss": 0.0397, "step": 45760 }, { "epoch": 0.22385, "grad_norm": 0.10659578442573547, "learning_rate": 4.5753611147292435e-05, "loss": 0.0388, "step": 45770 }, { "epoch": 0.2239, "grad_norm": 0.14153005182743073, "learning_rate": 4.57513061400486e-05, "loss": 0.0404, "step": 45780 }, { "epoch": 0.22395, "grad_norm": 0.110100157558918, "learning_rate": 4.5749000565471855e-05, "loss": 0.0392, "step": 45790 }, { "epoch": 0.224, "grad_norm": 0.10864286869764328, "learning_rate": 4.574669442362522e-05, "loss": 0.0388, "step": 45800 }, { "epoch": 0.22405, "grad_norm": 0.1269918531179428, "learning_rate": 4.5744387714571766e-05, "loss": 0.0406, "step": 45810 }, { "epoch": 0.2241, "grad_norm": 0.11123709380626678, "learning_rate": 4.5742080438374545e-05, "loss": 0.0399, "step": 45820 }, { "epoch": 0.22415, "grad_norm": 0.10304092615842819, "learning_rate": 4.573977259509663e-05, "loss": 0.0399, "step": 45830 }, { "epoch": 0.2242, "grad_norm": 0.1020602434873581, "learning_rate": 4.5737464184801124e-05, "loss": 0.0403, "step": 45840 }, { "epoch": 0.22425, "grad_norm": 0.12396201491355896, "learning_rate": 4.5735155207551145e-05, "loss": 0.0409, "step": 45850 }, { "epoch": 0.2243, "grad_norm": 0.119930699467659, "learning_rate": 4.5732845663409804e-05, "loss": 0.0394, "step": 45860 }, { "epoch": 0.22435, "grad_norm": 0.11855699121952057, "learning_rate": 4.5730535552440256e-05, "loss": 0.0402, "step": 45870 }, { "epoch": 0.2244, "grad_norm": 0.11706940829753876, "learning_rate": 4.572822487470566e-05, "loss": 0.0395, "step": 45880 }, { "epoch": 0.22445, "grad_norm": 0.09675632417201996, "learning_rate": 4.572591363026918e-05, "loss": 0.039, "step": 45890 }, { "epoch": 0.2245, "grad_norm": 0.11238475143909454, "learning_rate": 4.5723601819193996e-05, "loss": 0.0378, "step": 45900 }, { "epoch": 0.22455, "grad_norm": 0.11811317503452301, "learning_rate": 4.5721289441543336e-05, "loss": 0.04, "step": 45910 }, { "epoch": 0.2246, "grad_norm": 0.10928291827440262, "learning_rate": 4.5718976497380404e-05, "loss": 0.0388, "step": 45920 }, { "epoch": 0.22465, "grad_norm": 0.11499692499637604, "learning_rate": 4.571666298676843e-05, "loss": 0.0382, "step": 45930 }, { "epoch": 0.2247, "grad_norm": 0.10858325660228729, "learning_rate": 4.571434890977069e-05, "loss": 0.0385, "step": 45940 }, { "epoch": 0.22475, "grad_norm": 0.11937263607978821, "learning_rate": 4.571203426645042e-05, "loss": 0.038, "step": 45950 }, { "epoch": 0.2248, "grad_norm": 0.10898961871862411, "learning_rate": 4.5709719056870916e-05, "loss": 0.0419, "step": 45960 }, { "epoch": 0.22485, "grad_norm": 0.11576051265001297, "learning_rate": 4.570740328109547e-05, "loss": 0.039, "step": 45970 }, { "epoch": 0.2249, "grad_norm": 0.11729492247104645, "learning_rate": 4.5705086939187414e-05, "loss": 0.0391, "step": 45980 }, { "epoch": 0.22495, "grad_norm": 0.13462895154953003, "learning_rate": 4.5702770031210044e-05, "loss": 0.041, "step": 45990 }, { "epoch": 0.225, "grad_norm": 0.10053347796201706, "learning_rate": 4.5700452557226726e-05, "loss": 0.0389, "step": 46000 }, { "epoch": 0.22505, "grad_norm": 0.1229424923658371, "learning_rate": 4.5698134517300804e-05, "loss": 0.0397, "step": 46010 }, { "epoch": 0.2251, "grad_norm": 0.14538830518722534, "learning_rate": 4.569581591149566e-05, "loss": 0.0394, "step": 46020 }, { "epoch": 0.22515, "grad_norm": 0.12600792944431305, "learning_rate": 4.5693496739874695e-05, "loss": 0.0392, "step": 46030 }, { "epoch": 0.2252, "grad_norm": 0.12603269517421722, "learning_rate": 4.56911770025013e-05, "loss": 0.0388, "step": 46040 }, { "epoch": 0.22525, "grad_norm": 0.11895067989826202, "learning_rate": 4.5688856699438895e-05, "loss": 0.0402, "step": 46050 }, { "epoch": 0.2253, "grad_norm": 0.1792805939912796, "learning_rate": 4.568653583075093e-05, "loss": 0.0418, "step": 46060 }, { "epoch": 0.22535, "grad_norm": 0.16201379895210266, "learning_rate": 4.5684214396500836e-05, "loss": 0.039, "step": 46070 }, { "epoch": 0.2254, "grad_norm": 0.13260048627853394, "learning_rate": 4.568189239675209e-05, "loss": 0.0392, "step": 46080 }, { "epoch": 0.22545, "grad_norm": 0.13739709556102753, "learning_rate": 4.567956983156818e-05, "loss": 0.039, "step": 46090 }, { "epoch": 0.2255, "grad_norm": 0.11852910369634628, "learning_rate": 4.56772467010126e-05, "loss": 0.0391, "step": 46100 }, { "epoch": 0.22555, "grad_norm": 0.14033553004264832, "learning_rate": 4.5674923005148864e-05, "loss": 0.0379, "step": 46110 }, { "epoch": 0.2256, "grad_norm": 0.16816692054271698, "learning_rate": 4.56725987440405e-05, "loss": 0.0397, "step": 46120 }, { "epoch": 0.22565, "grad_norm": 0.15413282811641693, "learning_rate": 4.567027391775105e-05, "loss": 0.0406, "step": 46130 }, { "epoch": 0.2257, "grad_norm": 0.10553387552499771, "learning_rate": 4.5667948526344086e-05, "loss": 0.0391, "step": 46140 }, { "epoch": 0.22575, "grad_norm": 0.10783561319112778, "learning_rate": 4.566562256988316e-05, "loss": 0.0411, "step": 46150 }, { "epoch": 0.2258, "grad_norm": 0.07838821411132812, "learning_rate": 4.566329604843188e-05, "loss": 0.0378, "step": 46160 }, { "epoch": 0.22585, "grad_norm": 0.10225919634103775, "learning_rate": 4.5660968962053856e-05, "loss": 0.038, "step": 46170 }, { "epoch": 0.2259, "grad_norm": 0.1086994856595993, "learning_rate": 4.56586413108127e-05, "loss": 0.0385, "step": 46180 }, { "epoch": 0.22595, "grad_norm": 0.09652048349380493, "learning_rate": 4.565631309477205e-05, "loss": 0.0392, "step": 46190 }, { "epoch": 0.226, "grad_norm": 0.11381851881742477, "learning_rate": 4.565398431399556e-05, "loss": 0.0414, "step": 46200 }, { "epoch": 0.22605, "grad_norm": 0.12784039974212646, "learning_rate": 4.56516549685469e-05, "loss": 0.0398, "step": 46210 }, { "epoch": 0.2261, "grad_norm": 0.10668402910232544, "learning_rate": 4.564932505848975e-05, "loss": 0.0389, "step": 46220 }, { "epoch": 0.22615, "grad_norm": 0.10620923340320587, "learning_rate": 4.5646994583887805e-05, "loss": 0.0397, "step": 46230 }, { "epoch": 0.2262, "grad_norm": 0.11140339076519012, "learning_rate": 4.5644663544804794e-05, "loss": 0.0394, "step": 46240 }, { "epoch": 0.22625, "grad_norm": 0.11114098131656647, "learning_rate": 4.564233194130444e-05, "loss": 0.0423, "step": 46250 }, { "epoch": 0.2263, "grad_norm": 0.09415170550346375, "learning_rate": 4.563999977345047e-05, "loss": 0.041, "step": 46260 }, { "epoch": 0.22635, "grad_norm": 0.12156187742948532, "learning_rate": 4.5637667041306675e-05, "loss": 0.0391, "step": 46270 }, { "epoch": 0.2264, "grad_norm": 0.1148257926106453, "learning_rate": 4.563533374493682e-05, "loss": 0.0404, "step": 46280 }, { "epoch": 0.22645, "grad_norm": 0.10641665756702423, "learning_rate": 4.563299988440467e-05, "loss": 0.0394, "step": 46290 }, { "epoch": 0.2265, "grad_norm": 0.12104397267103195, "learning_rate": 4.563066545977407e-05, "loss": 0.0418, "step": 46300 }, { "epoch": 0.22655, "grad_norm": 0.11271051317453384, "learning_rate": 4.562833047110883e-05, "loss": 0.0425, "step": 46310 }, { "epoch": 0.2266, "grad_norm": 0.10609057545661926, "learning_rate": 4.562599491847278e-05, "loss": 0.0393, "step": 46320 }, { "epoch": 0.22665, "grad_norm": 0.10337311774492264, "learning_rate": 4.562365880192978e-05, "loss": 0.0406, "step": 46330 }, { "epoch": 0.2267, "grad_norm": 0.1276106983423233, "learning_rate": 4.562132212154369e-05, "loss": 0.0404, "step": 46340 }, { "epoch": 0.22675, "grad_norm": 0.12957589328289032, "learning_rate": 4.56189848773784e-05, "loss": 0.0416, "step": 46350 }, { "epoch": 0.2268, "grad_norm": 0.11019352823495865, "learning_rate": 4.561664706949782e-05, "loss": 0.0405, "step": 46360 }, { "epoch": 0.22685, "grad_norm": 0.12697316706180573, "learning_rate": 4.5614308697965845e-05, "loss": 0.0391, "step": 46370 }, { "epoch": 0.2269, "grad_norm": 0.13217809796333313, "learning_rate": 4.5611969762846415e-05, "loss": 0.04, "step": 46380 }, { "epoch": 0.22695, "grad_norm": 0.12451531738042831, "learning_rate": 4.560963026420349e-05, "loss": 0.0408, "step": 46390 }, { "epoch": 0.227, "grad_norm": 0.10132154077291489, "learning_rate": 4.5607290202100996e-05, "loss": 0.0394, "step": 46400 }, { "epoch": 0.22705, "grad_norm": 0.10651516169309616, "learning_rate": 4.560494957660294e-05, "loss": 0.0396, "step": 46410 }, { "epoch": 0.2271, "grad_norm": 0.18300026655197144, "learning_rate": 4.560260838777331e-05, "loss": 0.0406, "step": 46420 }, { "epoch": 0.22715, "grad_norm": 0.13345034420490265, "learning_rate": 4.5600266635676094e-05, "loss": 0.0403, "step": 46430 }, { "epoch": 0.2272, "grad_norm": 0.11327176541090012, "learning_rate": 4.559792432037533e-05, "loss": 0.0388, "step": 46440 }, { "epoch": 0.22725, "grad_norm": 0.09436774998903275, "learning_rate": 4.559558144193505e-05, "loss": 0.0407, "step": 46450 }, { "epoch": 0.2273, "grad_norm": 0.09983410686254501, "learning_rate": 4.559323800041932e-05, "loss": 0.0403, "step": 46460 }, { "epoch": 0.22735, "grad_norm": 0.09111980348825455, "learning_rate": 4.5590893995892196e-05, "loss": 0.0401, "step": 46470 }, { "epoch": 0.2274, "grad_norm": 0.12816768884658813, "learning_rate": 4.5588549428417765e-05, "loss": 0.0406, "step": 46480 }, { "epoch": 0.22745, "grad_norm": 0.1014213114976883, "learning_rate": 4.558620429806013e-05, "loss": 0.0384, "step": 46490 }, { "epoch": 0.2275, "grad_norm": 0.10942688584327698, "learning_rate": 4.55838586048834e-05, "loss": 0.0414, "step": 46500 }, { "epoch": 0.22755, "grad_norm": 0.09891363978385925, "learning_rate": 4.5581512348951706e-05, "loss": 0.0395, "step": 46510 }, { "epoch": 0.2276, "grad_norm": 0.12483636289834976, "learning_rate": 4.55791655303292e-05, "loss": 0.0429, "step": 46520 }, { "epoch": 0.22765, "grad_norm": 0.10598792880773544, "learning_rate": 4.5576818149080045e-05, "loss": 0.0389, "step": 46530 }, { "epoch": 0.2277, "grad_norm": 0.12170213460922241, "learning_rate": 4.5574470205268406e-05, "loss": 0.0388, "step": 46540 }, { "epoch": 0.22775, "grad_norm": 0.11315925419330597, "learning_rate": 4.5572121698958484e-05, "loss": 0.0401, "step": 46550 }, { "epoch": 0.2278, "grad_norm": 0.11891046166419983, "learning_rate": 4.556977263021448e-05, "loss": 0.041, "step": 46560 }, { "epoch": 0.22785, "grad_norm": 0.0966760441660881, "learning_rate": 4.5567422999100624e-05, "loss": 0.0424, "step": 46570 }, { "epoch": 0.2279, "grad_norm": 0.11074547469615936, "learning_rate": 4.556507280568114e-05, "loss": 0.0405, "step": 46580 }, { "epoch": 0.22795, "grad_norm": 0.10060471296310425, "learning_rate": 4.55627220500203e-05, "loss": 0.0394, "step": 46590 }, { "epoch": 0.228, "grad_norm": 0.10925568640232086, "learning_rate": 4.5560370732182364e-05, "loss": 0.0397, "step": 46600 }, { "epoch": 0.22805, "grad_norm": 0.12439475953578949, "learning_rate": 4.555801885223162e-05, "loss": 0.0398, "step": 46610 }, { "epoch": 0.2281, "grad_norm": 0.12491118907928467, "learning_rate": 4.5555666410232356e-05, "loss": 0.0414, "step": 46620 }, { "epoch": 0.22815, "grad_norm": 0.10894277691841125, "learning_rate": 4.55533134062489e-05, "loss": 0.04, "step": 46630 }, { "epoch": 0.2282, "grad_norm": 0.14506717026233673, "learning_rate": 4.5550959840345574e-05, "loss": 0.0398, "step": 46640 }, { "epoch": 0.22825, "grad_norm": 0.10716503113508224, "learning_rate": 4.554860571258673e-05, "loss": 0.0398, "step": 46650 }, { "epoch": 0.2283, "grad_norm": 0.11917222291231155, "learning_rate": 4.554625102303672e-05, "loss": 0.0404, "step": 46660 }, { "epoch": 0.22835, "grad_norm": 0.1065252348780632, "learning_rate": 4.554389577175993e-05, "loss": 0.0397, "step": 46670 }, { "epoch": 0.2284, "grad_norm": 0.14224746823310852, "learning_rate": 4.554153995882074e-05, "loss": 0.0398, "step": 46680 }, { "epoch": 0.22845, "grad_norm": 0.11975722014904022, "learning_rate": 4.553918358428358e-05, "loss": 0.0379, "step": 46690 }, { "epoch": 0.2285, "grad_norm": 0.12840095162391663, "learning_rate": 4.5536826648212846e-05, "loss": 0.04, "step": 46700 }, { "epoch": 0.22855, "grad_norm": 0.1120993047952652, "learning_rate": 4.553446915067299e-05, "loss": 0.0406, "step": 46710 }, { "epoch": 0.2286, "grad_norm": 0.16436734795570374, "learning_rate": 4.5532111091728465e-05, "loss": 0.0417, "step": 46720 }, { "epoch": 0.22865, "grad_norm": 0.12969239056110382, "learning_rate": 4.552975247144373e-05, "loss": 0.0416, "step": 46730 }, { "epoch": 0.2287, "grad_norm": 0.12759140133857727, "learning_rate": 4.552739328988328e-05, "loss": 0.0413, "step": 46740 }, { "epoch": 0.22875, "grad_norm": 0.12827183306217194, "learning_rate": 4.5525033547111604e-05, "loss": 0.0407, "step": 46750 }, { "epoch": 0.2288, "grad_norm": 0.12281138449907303, "learning_rate": 4.5522673243193225e-05, "loss": 0.0403, "step": 46760 }, { "epoch": 0.22885, "grad_norm": 0.12596429884433746, "learning_rate": 4.552031237819267e-05, "loss": 0.0407, "step": 46770 }, { "epoch": 0.2289, "grad_norm": 0.11390097439289093, "learning_rate": 4.551795095217448e-05, "loss": 0.0411, "step": 46780 }, { "epoch": 0.22895, "grad_norm": 0.1110418513417244, "learning_rate": 4.551558896520323e-05, "loss": 0.0405, "step": 46790 }, { "epoch": 0.229, "grad_norm": 0.10880877077579498, "learning_rate": 4.551322641734347e-05, "loss": 0.0384, "step": 46800 }, { "epoch": 0.22905, "grad_norm": 0.09871169924736023, "learning_rate": 4.551086330865981e-05, "loss": 0.04, "step": 46810 }, { "epoch": 0.2291, "grad_norm": 0.1078561469912529, "learning_rate": 4.550849963921686e-05, "loss": 0.0405, "step": 46820 }, { "epoch": 0.22915, "grad_norm": 0.09892131388187408, "learning_rate": 4.5506135409079234e-05, "loss": 0.0415, "step": 46830 }, { "epoch": 0.2292, "grad_norm": 0.11442753672599792, "learning_rate": 4.550377061831156e-05, "loss": 0.0411, "step": 46840 }, { "epoch": 0.22925, "grad_norm": 0.10793804377317429, "learning_rate": 4.550140526697851e-05, "loss": 0.042, "step": 46850 }, { "epoch": 0.2293, "grad_norm": 0.10234250128269196, "learning_rate": 4.549903935514473e-05, "loss": 0.0391, "step": 46860 }, { "epoch": 0.22935, "grad_norm": 0.09293326735496521, "learning_rate": 4.549667288287493e-05, "loss": 0.0402, "step": 46870 }, { "epoch": 0.2294, "grad_norm": 0.12015996873378754, "learning_rate": 4.5494305850233786e-05, "loss": 0.0392, "step": 46880 }, { "epoch": 0.22945, "grad_norm": 0.1293002963066101, "learning_rate": 4.549193825728602e-05, "loss": 0.0404, "step": 46890 }, { "epoch": 0.2295, "grad_norm": 0.09664689749479294, "learning_rate": 4.548957010409636e-05, "loss": 0.0383, "step": 46900 }, { "epoch": 0.22955, "grad_norm": 0.10771577805280685, "learning_rate": 4.548720139072955e-05, "loss": 0.0404, "step": 46910 }, { "epoch": 0.2296, "grad_norm": 0.10461598634719849, "learning_rate": 4.5484832117250356e-05, "loss": 0.0406, "step": 46920 }, { "epoch": 0.22965, "grad_norm": 0.1101800948381424, "learning_rate": 4.5482462283723545e-05, "loss": 0.0384, "step": 46930 }, { "epoch": 0.2297, "grad_norm": 0.1299954056739807, "learning_rate": 4.548009189021391e-05, "loss": 0.0413, "step": 46940 }, { "epoch": 0.22975, "grad_norm": 0.1355685144662857, "learning_rate": 4.547772093678626e-05, "loss": 0.0403, "step": 46950 }, { "epoch": 0.2298, "grad_norm": 0.12462537735700607, "learning_rate": 4.547534942350541e-05, "loss": 0.0384, "step": 46960 }, { "epoch": 0.22985, "grad_norm": 0.11404204368591309, "learning_rate": 4.54729773504362e-05, "loss": 0.0393, "step": 46970 }, { "epoch": 0.2299, "grad_norm": 0.11085730791091919, "learning_rate": 4.547060471764347e-05, "loss": 0.0412, "step": 46980 }, { "epoch": 0.22995, "grad_norm": 0.09980040788650513, "learning_rate": 4.5468231525192104e-05, "loss": 0.0379, "step": 46990 }, { "epoch": 0.23, "grad_norm": 0.12433990836143494, "learning_rate": 4.546585777314698e-05, "loss": 0.0391, "step": 47000 }, { "epoch": 0.23005, "grad_norm": 0.11919044703245163, "learning_rate": 4.5463483461572996e-05, "loss": 0.0385, "step": 47010 }, { "epoch": 0.2301, "grad_norm": 0.10812638700008392, "learning_rate": 4.546110859053506e-05, "loss": 0.0386, "step": 47020 }, { "epoch": 0.23015, "grad_norm": 0.08290821313858032, "learning_rate": 4.54587331600981e-05, "loss": 0.0374, "step": 47030 }, { "epoch": 0.2302, "grad_norm": 0.12326608598232269, "learning_rate": 4.545635717032706e-05, "loss": 0.0383, "step": 47040 }, { "epoch": 0.23025, "grad_norm": 0.0903635025024414, "learning_rate": 4.5453980621286904e-05, "loss": 0.0382, "step": 47050 }, { "epoch": 0.2303, "grad_norm": 0.11884330213069916, "learning_rate": 4.54516035130426e-05, "loss": 0.0393, "step": 47060 }, { "epoch": 0.23035, "grad_norm": 0.10627713054418564, "learning_rate": 4.544922584565914e-05, "loss": 0.0386, "step": 47070 }, { "epoch": 0.2304, "grad_norm": 0.12588046491146088, "learning_rate": 4.5446847619201524e-05, "loss": 0.0404, "step": 47080 }, { "epoch": 0.23045, "grad_norm": 0.10804393887519836, "learning_rate": 4.544446883373478e-05, "loss": 0.0387, "step": 47090 }, { "epoch": 0.2305, "grad_norm": 0.13833652436733246, "learning_rate": 4.5442089489323933e-05, "loss": 0.0396, "step": 47100 }, { "epoch": 0.23055, "grad_norm": 0.11136481165885925, "learning_rate": 4.543970958603405e-05, "loss": 0.0397, "step": 47110 }, { "epoch": 0.2306, "grad_norm": 0.10702042281627655, "learning_rate": 4.5437329123930175e-05, "loss": 0.0399, "step": 47120 }, { "epoch": 0.23065, "grad_norm": 0.12892375886440277, "learning_rate": 4.543494810307741e-05, "loss": 0.0411, "step": 47130 }, { "epoch": 0.2307, "grad_norm": 0.1187572181224823, "learning_rate": 4.543256652354083e-05, "loss": 0.0402, "step": 47140 }, { "epoch": 0.23075, "grad_norm": 0.1277954876422882, "learning_rate": 4.5430184385385565e-05, "loss": 0.04, "step": 47150 }, { "epoch": 0.2308, "grad_norm": 0.10876869410276413, "learning_rate": 4.542780168867673e-05, "loss": 0.0404, "step": 47160 }, { "epoch": 0.23085, "grad_norm": 0.10255390405654907, "learning_rate": 4.5425418433479475e-05, "loss": 0.0381, "step": 47170 }, { "epoch": 0.2309, "grad_norm": 0.10682419687509537, "learning_rate": 4.542303461985895e-05, "loss": 0.0385, "step": 47180 }, { "epoch": 0.23095, "grad_norm": 0.0859571322798729, "learning_rate": 4.5420650247880337e-05, "loss": 0.0388, "step": 47190 }, { "epoch": 0.231, "grad_norm": 0.09608227014541626, "learning_rate": 4.541826531760881e-05, "loss": 0.0391, "step": 47200 }, { "epoch": 0.23105, "grad_norm": 0.1249164342880249, "learning_rate": 4.5415879829109584e-05, "loss": 0.0382, "step": 47210 }, { "epoch": 0.2311, "grad_norm": 0.10630123317241669, "learning_rate": 4.5413493782447866e-05, "loss": 0.0397, "step": 47220 }, { "epoch": 0.23115, "grad_norm": 0.11662591993808746, "learning_rate": 4.5411107177688914e-05, "loss": 0.0381, "step": 47230 }, { "epoch": 0.2312, "grad_norm": 0.12181032449007034, "learning_rate": 4.540872001489794e-05, "loss": 0.0396, "step": 47240 }, { "epoch": 0.23125, "grad_norm": 0.10808727890253067, "learning_rate": 4.540633229414024e-05, "loss": 0.0402, "step": 47250 }, { "epoch": 0.2313, "grad_norm": 0.10237918049097061, "learning_rate": 4.540394401548108e-05, "loss": 0.0418, "step": 47260 }, { "epoch": 0.23135, "grad_norm": 0.10050293803215027, "learning_rate": 4.540155517898575e-05, "loss": 0.0394, "step": 47270 }, { "epoch": 0.2314, "grad_norm": 0.10733969509601593, "learning_rate": 4.5399165784719574e-05, "loss": 0.038, "step": 47280 }, { "epoch": 0.23145, "grad_norm": 0.11772868782281876, "learning_rate": 4.5396775832747876e-05, "loss": 0.0408, "step": 47290 }, { "epoch": 0.2315, "grad_norm": 0.09678518772125244, "learning_rate": 4.5394385323135974e-05, "loss": 0.0399, "step": 47300 }, { "epoch": 0.23155, "grad_norm": 0.11706313490867615, "learning_rate": 4.5391994255949245e-05, "loss": 0.0388, "step": 47310 }, { "epoch": 0.2316, "grad_norm": 0.09137055277824402, "learning_rate": 4.5389602631253054e-05, "loss": 0.0386, "step": 47320 }, { "epoch": 0.23165, "grad_norm": 0.10179854184389114, "learning_rate": 4.5387210449112785e-05, "loss": 0.0392, "step": 47330 }, { "epoch": 0.2317, "grad_norm": 0.12545840442180634, "learning_rate": 4.538481770959384e-05, "loss": 0.039, "step": 47340 }, { "epoch": 0.23175, "grad_norm": 0.11552725732326508, "learning_rate": 4.5382424412761635e-05, "loss": 0.0387, "step": 47350 }, { "epoch": 0.2318, "grad_norm": 0.09177439659833908, "learning_rate": 4.538003055868162e-05, "loss": 0.0393, "step": 47360 }, { "epoch": 0.23185, "grad_norm": 0.07980625331401825, "learning_rate": 4.537763614741921e-05, "loss": 0.0403, "step": 47370 }, { "epoch": 0.2319, "grad_norm": 0.10224027931690216, "learning_rate": 4.5375241179039886e-05, "loss": 0.0395, "step": 47380 }, { "epoch": 0.23195, "grad_norm": 0.10371386259794235, "learning_rate": 4.537284565360913e-05, "loss": 0.0395, "step": 47390 }, { "epoch": 0.232, "grad_norm": 0.1289975494146347, "learning_rate": 4.537044957119242e-05, "loss": 0.0408, "step": 47400 }, { "epoch": 0.23205, "grad_norm": 0.11533493548631668, "learning_rate": 4.536805293185527e-05, "loss": 0.0391, "step": 47410 }, { "epoch": 0.2321, "grad_norm": 0.12109930068254471, "learning_rate": 4.5365655735663214e-05, "loss": 0.0383, "step": 47420 }, { "epoch": 0.23215, "grad_norm": 0.1250036507844925, "learning_rate": 4.536325798268177e-05, "loss": 0.0394, "step": 47430 }, { "epoch": 0.2322, "grad_norm": 0.12080734968185425, "learning_rate": 4.536085967297651e-05, "loss": 0.0394, "step": 47440 }, { "epoch": 0.23225, "grad_norm": 0.12277339398860931, "learning_rate": 4.5358460806612996e-05, "loss": 0.0425, "step": 47450 }, { "epoch": 0.2323, "grad_norm": 0.11439141631126404, "learning_rate": 4.535606138365681e-05, "loss": 0.042, "step": 47460 }, { "epoch": 0.23235, "grad_norm": 0.12071476131677628, "learning_rate": 4.5353661404173554e-05, "loss": 0.0405, "step": 47470 }, { "epoch": 0.2324, "grad_norm": 0.12388350069522858, "learning_rate": 4.535126086822884e-05, "loss": 0.0404, "step": 47480 }, { "epoch": 0.23245, "grad_norm": 0.15248411893844604, "learning_rate": 4.53488597758883e-05, "loss": 0.0405, "step": 47490 }, { "epoch": 0.2325, "grad_norm": 0.12671367824077606, "learning_rate": 4.534645812721758e-05, "loss": 0.0388, "step": 47500 }, { "epoch": 0.23255, "grad_norm": 0.14130564033985138, "learning_rate": 4.534405592228233e-05, "loss": 0.0393, "step": 47510 }, { "epoch": 0.2326, "grad_norm": 0.10151517391204834, "learning_rate": 4.534165316114825e-05, "loss": 0.04, "step": 47520 }, { "epoch": 0.23265, "grad_norm": 0.1347714066505432, "learning_rate": 4.5339249843881004e-05, "loss": 0.0425, "step": 47530 }, { "epoch": 0.2327, "grad_norm": 0.11208470910787582, "learning_rate": 4.5336845970546315e-05, "loss": 0.0385, "step": 47540 }, { "epoch": 0.23275, "grad_norm": 0.12243901938199997, "learning_rate": 4.5334441541209895e-05, "loss": 0.0394, "step": 47550 }, { "epoch": 0.2328, "grad_norm": 0.10456179082393646, "learning_rate": 4.5332036555937475e-05, "loss": 0.0401, "step": 47560 }, { "epoch": 0.23285, "grad_norm": 0.1470469832420349, "learning_rate": 4.532963101479482e-05, "loss": 0.0392, "step": 47570 }, { "epoch": 0.2329, "grad_norm": 0.11980633437633514, "learning_rate": 4.532722491784769e-05, "loss": 0.0398, "step": 47580 }, { "epoch": 0.23295, "grad_norm": 0.11149538308382034, "learning_rate": 4.5324818265161875e-05, "loss": 0.0395, "step": 47590 }, { "epoch": 0.233, "grad_norm": 0.09943703562021255, "learning_rate": 4.532241105680315e-05, "loss": 0.0399, "step": 47600 }, { "epoch": 0.23305, "grad_norm": 0.10191832482814789, "learning_rate": 4.532000329283735e-05, "loss": 0.0393, "step": 47610 }, { "epoch": 0.2331, "grad_norm": 0.116791270673275, "learning_rate": 4.531759497333029e-05, "loss": 0.0408, "step": 47620 }, { "epoch": 0.23315, "grad_norm": 0.09550857543945312, "learning_rate": 4.5315186098347814e-05, "loss": 0.0394, "step": 47630 }, { "epoch": 0.2332, "grad_norm": 0.09740550816059113, "learning_rate": 4.5312776667955795e-05, "loss": 0.039, "step": 47640 }, { "epoch": 0.23325, "grad_norm": 0.11140163987874985, "learning_rate": 4.531036668222008e-05, "loss": 0.0386, "step": 47650 }, { "epoch": 0.2333, "grad_norm": 0.10831119120121002, "learning_rate": 4.530795614120657e-05, "loss": 0.0389, "step": 47660 }, { "epoch": 0.23335, "grad_norm": 0.10964003950357437, "learning_rate": 4.530554504498118e-05, "loss": 0.0415, "step": 47670 }, { "epoch": 0.2334, "grad_norm": 0.1138722375035286, "learning_rate": 4.530313339360981e-05, "loss": 0.0414, "step": 47680 }, { "epoch": 0.23345, "grad_norm": 0.10521946847438812, "learning_rate": 4.53007211871584e-05, "loss": 0.0385, "step": 47690 }, { "epoch": 0.2335, "grad_norm": 0.12359435856342316, "learning_rate": 4.52983084256929e-05, "loss": 0.0413, "step": 47700 }, { "epoch": 0.23355, "grad_norm": 0.13060010969638824, "learning_rate": 4.529589510927927e-05, "loss": 0.0409, "step": 47710 }, { "epoch": 0.2336, "grad_norm": 0.15324173867702484, "learning_rate": 4.5293481237983506e-05, "loss": 0.0411, "step": 47720 }, { "epoch": 0.23365, "grad_norm": 0.11896300315856934, "learning_rate": 4.529106681187158e-05, "loss": 0.0407, "step": 47730 }, { "epoch": 0.2337, "grad_norm": 0.10039672255516052, "learning_rate": 4.52886518310095e-05, "loss": 0.0407, "step": 47740 }, { "epoch": 0.23375, "grad_norm": 0.11257708072662354, "learning_rate": 4.528623629546331e-05, "loss": 0.0395, "step": 47750 }, { "epoch": 0.2338, "grad_norm": 0.09427443146705627, "learning_rate": 4.5283820205299044e-05, "loss": 0.0396, "step": 47760 }, { "epoch": 0.23385, "grad_norm": 0.11360807716846466, "learning_rate": 4.5281403560582754e-05, "loss": 0.0382, "step": 47770 }, { "epoch": 0.2339, "grad_norm": 0.11459067463874817, "learning_rate": 4.52789863613805e-05, "loss": 0.038, "step": 47780 }, { "epoch": 0.23395, "grad_norm": 0.11873367428779602, "learning_rate": 4.527656860775838e-05, "loss": 0.0388, "step": 47790 }, { "epoch": 0.234, "grad_norm": 0.1183047667145729, "learning_rate": 4.52741502997825e-05, "loss": 0.0386, "step": 47800 }, { "epoch": 0.23405, "grad_norm": 0.10633184015750885, "learning_rate": 4.527173143751897e-05, "loss": 0.0396, "step": 47810 }, { "epoch": 0.2341, "grad_norm": 0.10609755665063858, "learning_rate": 4.526931202103391e-05, "loss": 0.0392, "step": 47820 }, { "epoch": 0.23415, "grad_norm": 0.12338245660066605, "learning_rate": 4.526689205039347e-05, "loss": 0.0384, "step": 47830 }, { "epoch": 0.2342, "grad_norm": 0.13311438262462616, "learning_rate": 4.526447152566382e-05, "loss": 0.0386, "step": 47840 }, { "epoch": 0.23425, "grad_norm": 0.10245085507631302, "learning_rate": 4.526205044691114e-05, "loss": 0.0383, "step": 47850 }, { "epoch": 0.2343, "grad_norm": 0.13168932497501373, "learning_rate": 4.5259628814201604e-05, "loss": 0.0412, "step": 47860 }, { "epoch": 0.23435, "grad_norm": 0.10075122863054276, "learning_rate": 4.525720662760143e-05, "loss": 0.0392, "step": 47870 }, { "epoch": 0.2344, "grad_norm": 0.11399999260902405, "learning_rate": 4.525478388717683e-05, "loss": 0.0382, "step": 47880 }, { "epoch": 0.23445, "grad_norm": 0.09355992823839188, "learning_rate": 4.5252360592994056e-05, "loss": 0.0408, "step": 47890 }, { "epoch": 0.2345, "grad_norm": 0.10210133343935013, "learning_rate": 4.524993674511935e-05, "loss": 0.041, "step": 47900 }, { "epoch": 0.23455, "grad_norm": 0.10055769979953766, "learning_rate": 4.524751234361898e-05, "loss": 0.0393, "step": 47910 }, { "epoch": 0.2346, "grad_norm": 0.10071728378534317, "learning_rate": 4.524508738855924e-05, "loss": 0.0386, "step": 47920 }, { "epoch": 0.23465, "grad_norm": 0.08796606957912445, "learning_rate": 4.52426618800064e-05, "loss": 0.0389, "step": 47930 }, { "epoch": 0.2347, "grad_norm": 0.09659391641616821, "learning_rate": 4.52402358180268e-05, "loss": 0.0391, "step": 47940 }, { "epoch": 0.23475, "grad_norm": 0.08772718906402588, "learning_rate": 4.523780920268675e-05, "loss": 0.0411, "step": 47950 }, { "epoch": 0.2348, "grad_norm": 0.10829611867666245, "learning_rate": 4.5235382034052596e-05, "loss": 0.0414, "step": 47960 }, { "epoch": 0.23485, "grad_norm": 0.08450762927532196, "learning_rate": 4.523295431219071e-05, "loss": 0.0409, "step": 47970 }, { "epoch": 0.2349, "grad_norm": 0.10977165400981903, "learning_rate": 4.5230526037167444e-05, "loss": 0.0401, "step": 47980 }, { "epoch": 0.23495, "grad_norm": 0.11166959255933762, "learning_rate": 4.52280972090492e-05, "loss": 0.0385, "step": 47990 }, { "epoch": 0.235, "grad_norm": 0.10049823671579361, "learning_rate": 4.522566782790238e-05, "loss": 0.0403, "step": 48000 }, { "epoch": 0.23505, "grad_norm": 0.10753172636032104, "learning_rate": 4.52232378937934e-05, "loss": 0.0402, "step": 48010 }, { "epoch": 0.2351, "grad_norm": 0.11045978963375092, "learning_rate": 4.52208074067887e-05, "loss": 0.0383, "step": 48020 }, { "epoch": 0.23515, "grad_norm": 0.10796971619129181, "learning_rate": 4.521837636695471e-05, "loss": 0.0434, "step": 48030 }, { "epoch": 0.2352, "grad_norm": 0.18447628617286682, "learning_rate": 4.521594477435791e-05, "loss": 0.0419, "step": 48040 }, { "epoch": 0.23525, "grad_norm": 0.1373187154531479, "learning_rate": 4.521351262906478e-05, "loss": 0.0409, "step": 48050 }, { "epoch": 0.2353, "grad_norm": 0.11791710555553436, "learning_rate": 4.5211079931141795e-05, "loss": 0.0399, "step": 48060 }, { "epoch": 0.23535, "grad_norm": 0.09290598332881927, "learning_rate": 4.5208646680655495e-05, "loss": 0.0393, "step": 48070 }, { "epoch": 0.2354, "grad_norm": 0.09873346239328384, "learning_rate": 4.520621287767237e-05, "loss": 0.0394, "step": 48080 }, { "epoch": 0.23545, "grad_norm": 0.09659668058156967, "learning_rate": 4.520377852225899e-05, "loss": 0.0392, "step": 48090 }, { "epoch": 0.2355, "grad_norm": 0.11689428240060806, "learning_rate": 4.520134361448189e-05, "loss": 0.041, "step": 48100 }, { "epoch": 0.23555, "grad_norm": 0.09561052173376083, "learning_rate": 4.519890815440764e-05, "loss": 0.0405, "step": 48110 }, { "epoch": 0.2356, "grad_norm": 0.0902252271771431, "learning_rate": 4.519647214210284e-05, "loss": 0.0413, "step": 48120 }, { "epoch": 0.23565, "grad_norm": 0.10735762864351273, "learning_rate": 4.5194035577634075e-05, "loss": 0.0392, "step": 48130 }, { "epoch": 0.2357, "grad_norm": 0.11423599720001221, "learning_rate": 4.5191598461067955e-05, "loss": 0.0391, "step": 48140 }, { "epoch": 0.23575, "grad_norm": 0.10585816204547882, "learning_rate": 4.518916079247113e-05, "loss": 0.039, "step": 48150 }, { "epoch": 0.2358, "grad_norm": 0.11375124752521515, "learning_rate": 4.518672257191023e-05, "loss": 0.0396, "step": 48160 }, { "epoch": 0.23585, "grad_norm": 0.1231110617518425, "learning_rate": 4.5184283799451916e-05, "loss": 0.0399, "step": 48170 }, { "epoch": 0.2359, "grad_norm": 0.11836814880371094, "learning_rate": 4.518184447516287e-05, "loss": 0.041, "step": 48180 }, { "epoch": 0.23595, "grad_norm": 0.11588375270366669, "learning_rate": 4.517940459910978e-05, "loss": 0.0402, "step": 48190 }, { "epoch": 0.236, "grad_norm": 0.12206704169511795, "learning_rate": 4.517696417135934e-05, "loss": 0.0434, "step": 48200 }, { "epoch": 0.23605, "grad_norm": 0.1477038711309433, "learning_rate": 4.517452319197828e-05, "loss": 0.041, "step": 48210 }, { "epoch": 0.2361, "grad_norm": 0.13147269189357758, "learning_rate": 4.5172081661033344e-05, "loss": 0.0393, "step": 48220 }, { "epoch": 0.23615, "grad_norm": 0.09250767529010773, "learning_rate": 4.5169639578591274e-05, "loss": 0.039, "step": 48230 }, { "epoch": 0.2362, "grad_norm": 0.13637393712997437, "learning_rate": 4.5167196944718824e-05, "loss": 0.0393, "step": 48240 }, { "epoch": 0.23625, "grad_norm": 0.1100025624036789, "learning_rate": 4.516475375948279e-05, "loss": 0.0401, "step": 48250 }, { "epoch": 0.2363, "grad_norm": 0.11402478814125061, "learning_rate": 4.516231002294997e-05, "loss": 0.0385, "step": 48260 }, { "epoch": 0.23635, "grad_norm": 0.0982499048113823, "learning_rate": 4.5159865735187165e-05, "loss": 0.0392, "step": 48270 }, { "epoch": 0.2364, "grad_norm": 0.11894634366035461, "learning_rate": 4.5157420896261205e-05, "loss": 0.0392, "step": 48280 }, { "epoch": 0.23645, "grad_norm": 0.08542770147323608, "learning_rate": 4.5154975506238926e-05, "loss": 0.0383, "step": 48290 }, { "epoch": 0.2365, "grad_norm": 0.13653557002544403, "learning_rate": 4.51525295651872e-05, "loss": 0.0401, "step": 48300 }, { "epoch": 0.23655, "grad_norm": 0.11575108021497726, "learning_rate": 4.515008307317288e-05, "loss": 0.04, "step": 48310 }, { "epoch": 0.2366, "grad_norm": 0.08517330139875412, "learning_rate": 4.5147636030262854e-05, "loss": 0.0398, "step": 48320 }, { "epoch": 0.23665, "grad_norm": 0.10020171105861664, "learning_rate": 4.514518843652403e-05, "loss": 0.0402, "step": 48330 }, { "epoch": 0.2367, "grad_norm": 0.09228149801492691, "learning_rate": 4.514274029202333e-05, "loss": 0.039, "step": 48340 }, { "epoch": 0.23675, "grad_norm": 0.09565956890583038, "learning_rate": 4.514029159682767e-05, "loss": 0.0373, "step": 48350 }, { "epoch": 0.2368, "grad_norm": 0.08942577987909317, "learning_rate": 4.5137842351004004e-05, "loss": 0.0384, "step": 48360 }, { "epoch": 0.23685, "grad_norm": 0.09256299585103989, "learning_rate": 4.51353925546193e-05, "loss": 0.0379, "step": 48370 }, { "epoch": 0.2369, "grad_norm": 0.1037282645702362, "learning_rate": 4.513294220774053e-05, "loss": 0.0396, "step": 48380 }, { "epoch": 0.23695, "grad_norm": 0.09529906511306763, "learning_rate": 4.513049131043467e-05, "loss": 0.0388, "step": 48390 }, { "epoch": 0.237, "grad_norm": 0.09999002516269684, "learning_rate": 4.5128039862768745e-05, "loss": 0.0424, "step": 48400 }, { "epoch": 0.23705, "grad_norm": 0.10632119327783585, "learning_rate": 4.512558786480978e-05, "loss": 0.0417, "step": 48410 }, { "epoch": 0.2371, "grad_norm": 0.10591237992048264, "learning_rate": 4.5123135316624796e-05, "loss": 0.0381, "step": 48420 }, { "epoch": 0.23715, "grad_norm": 0.09921521693468094, "learning_rate": 4.512068221828086e-05, "loss": 0.0399, "step": 48430 }, { "epoch": 0.2372, "grad_norm": 0.0894329622387886, "learning_rate": 4.511822856984502e-05, "loss": 0.0418, "step": 48440 }, { "epoch": 0.23725, "grad_norm": 0.09910974651575089, "learning_rate": 4.5115774371384375e-05, "loss": 0.039, "step": 48450 }, { "epoch": 0.2373, "grad_norm": 0.09602376818656921, "learning_rate": 4.511331962296602e-05, "loss": 0.0397, "step": 48460 }, { "epoch": 0.23735, "grad_norm": 0.09119140356779099, "learning_rate": 4.511086432465705e-05, "loss": 0.0425, "step": 48470 }, { "epoch": 0.2374, "grad_norm": 0.11407288908958435, "learning_rate": 4.510840847652462e-05, "loss": 0.0405, "step": 48480 }, { "epoch": 0.23745, "grad_norm": 0.11424467712640762, "learning_rate": 4.510595207863585e-05, "loss": 0.0391, "step": 48490 }, { "epoch": 0.2375, "grad_norm": 0.10439980030059814, "learning_rate": 4.5103495131057904e-05, "loss": 0.0394, "step": 48500 }, { "epoch": 0.23755, "grad_norm": 0.11426830291748047, "learning_rate": 4.510103763385795e-05, "loss": 0.0385, "step": 48510 }, { "epoch": 0.2376, "grad_norm": 0.1235620304942131, "learning_rate": 4.5098579587103186e-05, "loss": 0.0376, "step": 48520 }, { "epoch": 0.23765, "grad_norm": 0.12331556528806686, "learning_rate": 4.50961209908608e-05, "loss": 0.0401, "step": 48530 }, { "epoch": 0.2377, "grad_norm": 0.08665104955434799, "learning_rate": 4.509366184519802e-05, "loss": 0.0386, "step": 48540 }, { "epoch": 0.23775, "grad_norm": 0.09209824353456497, "learning_rate": 4.5091202150182064e-05, "loss": 0.0411, "step": 48550 }, { "epoch": 0.2378, "grad_norm": 0.09111570566892624, "learning_rate": 4.508874190588021e-05, "loss": 0.0389, "step": 48560 }, { "epoch": 0.23785, "grad_norm": 0.09899560362100601, "learning_rate": 4.508628111235968e-05, "loss": 0.0399, "step": 48570 }, { "epoch": 0.2379, "grad_norm": 0.10667652636766434, "learning_rate": 4.5083819769687776e-05, "loss": 0.0385, "step": 48580 }, { "epoch": 0.23795, "grad_norm": 0.10386928170919418, "learning_rate": 4.508135787793178e-05, "loss": 0.038, "step": 48590 }, { "epoch": 0.238, "grad_norm": 0.10738810151815414, "learning_rate": 4.5078895437159016e-05, "loss": 0.0374, "step": 48600 }, { "epoch": 0.23805, "grad_norm": 0.11245346814393997, "learning_rate": 4.507643244743679e-05, "loss": 0.0384, "step": 48610 }, { "epoch": 0.2381, "grad_norm": 0.09302639216184616, "learning_rate": 4.5073968908832446e-05, "loss": 0.038, "step": 48620 }, { "epoch": 0.23815, "grad_norm": 0.10083436220884323, "learning_rate": 4.5071504821413326e-05, "loss": 0.0394, "step": 48630 }, { "epoch": 0.2382, "grad_norm": 0.10127535462379456, "learning_rate": 4.5069040185246805e-05, "loss": 0.0387, "step": 48640 }, { "epoch": 0.23825, "grad_norm": 0.12256845831871033, "learning_rate": 4.5066575000400265e-05, "loss": 0.0395, "step": 48650 }, { "epoch": 0.2383, "grad_norm": 0.10586626827716827, "learning_rate": 4.5064109266941104e-05, "loss": 0.0368, "step": 48660 }, { "epoch": 0.23835, "grad_norm": 0.10445740073919296, "learning_rate": 4.506164298493674e-05, "loss": 0.0376, "step": 48670 }, { "epoch": 0.2384, "grad_norm": 0.12181146442890167, "learning_rate": 4.5059176154454586e-05, "loss": 0.0379, "step": 48680 }, { "epoch": 0.23845, "grad_norm": 0.11048153042793274, "learning_rate": 4.5056708775562096e-05, "loss": 0.0399, "step": 48690 }, { "epoch": 0.2385, "grad_norm": 0.1106812134385109, "learning_rate": 4.505424084832672e-05, "loss": 0.0387, "step": 48700 }, { "epoch": 0.23855, "grad_norm": 0.09739340096712112, "learning_rate": 4.505177237281594e-05, "loss": 0.0424, "step": 48710 }, { "epoch": 0.2386, "grad_norm": 0.1155017539858818, "learning_rate": 4.504930334909723e-05, "loss": 0.0397, "step": 48720 }, { "epoch": 0.23865, "grad_norm": 0.13820934295654297, "learning_rate": 4.50468337772381e-05, "loss": 0.0391, "step": 48730 }, { "epoch": 0.2387, "grad_norm": 0.12262959033250809, "learning_rate": 4.5044363657306055e-05, "loss": 0.0398, "step": 48740 }, { "epoch": 0.23875, "grad_norm": 0.11455817520618439, "learning_rate": 4.504189298936865e-05, "loss": 0.0398, "step": 48750 }, { "epoch": 0.2388, "grad_norm": 0.12329459190368652, "learning_rate": 4.5039421773493417e-05, "loss": 0.0401, "step": 48760 }, { "epoch": 0.23885, "grad_norm": 0.10852232575416565, "learning_rate": 4.5036950009747925e-05, "loss": 0.0397, "step": 48770 }, { "epoch": 0.2389, "grad_norm": 0.11602942645549774, "learning_rate": 4.503447769819974e-05, "loss": 0.0392, "step": 48780 }, { "epoch": 0.23895, "grad_norm": 0.11058322340250015, "learning_rate": 4.503200483891647e-05, "loss": 0.0391, "step": 48790 }, { "epoch": 0.239, "grad_norm": 0.1164764016866684, "learning_rate": 4.502953143196571e-05, "loss": 0.0398, "step": 48800 }, { "epoch": 0.23905, "grad_norm": 0.10696570575237274, "learning_rate": 4.502705747741508e-05, "loss": 0.0387, "step": 48810 }, { "epoch": 0.2391, "grad_norm": 0.10800722241401672, "learning_rate": 4.502458297533223e-05, "loss": 0.0417, "step": 48820 }, { "epoch": 0.23915, "grad_norm": 0.09370379149913788, "learning_rate": 4.502210792578481e-05, "loss": 0.039, "step": 48830 }, { "epoch": 0.2392, "grad_norm": 0.11608373373746872, "learning_rate": 4.501963232884047e-05, "loss": 0.0409, "step": 48840 }, { "epoch": 0.23925, "grad_norm": 0.09210273623466492, "learning_rate": 4.50171561845669e-05, "loss": 0.0395, "step": 48850 }, { "epoch": 0.2393, "grad_norm": 0.12715387344360352, "learning_rate": 4.501467949303181e-05, "loss": 0.0441, "step": 48860 }, { "epoch": 0.23935, "grad_norm": 0.11636749655008316, "learning_rate": 4.5012202254302894e-05, "loss": 0.0417, "step": 48870 }, { "epoch": 0.2394, "grad_norm": 0.12367291003465652, "learning_rate": 4.500972446844789e-05, "loss": 0.04, "step": 48880 }, { "epoch": 0.23945, "grad_norm": 0.11022377759218216, "learning_rate": 4.500724613553454e-05, "loss": 0.0403, "step": 48890 }, { "epoch": 0.2395, "grad_norm": 0.115224689245224, "learning_rate": 4.500476725563059e-05, "loss": 0.0406, "step": 48900 }, { "epoch": 0.23955, "grad_norm": 0.12038897722959518, "learning_rate": 4.500228782880382e-05, "loss": 0.0408, "step": 48910 }, { "epoch": 0.2396, "grad_norm": 0.17151667177677155, "learning_rate": 4.4999807855122025e-05, "loss": 0.0406, "step": 48920 }, { "epoch": 0.23965, "grad_norm": 0.12789474427700043, "learning_rate": 4.4997327334652984e-05, "loss": 0.039, "step": 48930 }, { "epoch": 0.2397, "grad_norm": 0.10562655329704285, "learning_rate": 4.499484626746453e-05, "loss": 0.0376, "step": 48940 }, { "epoch": 0.23975, "grad_norm": 0.11011016368865967, "learning_rate": 4.4992364653624495e-05, "loss": 0.0392, "step": 48950 }, { "epoch": 0.2398, "grad_norm": 0.09995349496603012, "learning_rate": 4.498988249320072e-05, "loss": 0.0413, "step": 48960 }, { "epoch": 0.23985, "grad_norm": 0.10026644915342331, "learning_rate": 4.4987399786261064e-05, "loss": 0.0396, "step": 48970 }, { "epoch": 0.2399, "grad_norm": 0.10839217156171799, "learning_rate": 4.498491653287341e-05, "loss": 0.039, "step": 48980 }, { "epoch": 0.23995, "grad_norm": 0.1049165427684784, "learning_rate": 4.4982432733105646e-05, "loss": 0.0399, "step": 48990 }, { "epoch": 0.24, "grad_norm": 0.08821311593055725, "learning_rate": 4.4979948387025675e-05, "loss": 0.0386, "step": 49000 }, { "epoch": 0.24005, "grad_norm": 0.09977595508098602, "learning_rate": 4.497746349470142e-05, "loss": 0.0391, "step": 49010 }, { "epoch": 0.2401, "grad_norm": 0.10528219491243362, "learning_rate": 4.497497805620082e-05, "loss": 0.0375, "step": 49020 }, { "epoch": 0.24015, "grad_norm": 0.11685045063495636, "learning_rate": 4.497249207159183e-05, "loss": 0.0407, "step": 49030 }, { "epoch": 0.2402, "grad_norm": 0.10669083148241043, "learning_rate": 4.4970005540942405e-05, "loss": 0.04, "step": 49040 }, { "epoch": 0.24025, "grad_norm": 0.09289010614156723, "learning_rate": 4.496751846432053e-05, "loss": 0.0385, "step": 49050 }, { "epoch": 0.2403, "grad_norm": 0.13816522061824799, "learning_rate": 4.496503084179421e-05, "loss": 0.0393, "step": 49060 }, { "epoch": 0.24035, "grad_norm": 0.14262689650058746, "learning_rate": 4.4962542673431434e-05, "loss": 0.0404, "step": 49070 }, { "epoch": 0.2404, "grad_norm": 0.10469383001327515, "learning_rate": 4.4960053959300254e-05, "loss": 0.039, "step": 49080 }, { "epoch": 0.24045, "grad_norm": 0.09244794398546219, "learning_rate": 4.495756469946869e-05, "loss": 0.0384, "step": 49090 }, { "epoch": 0.2405, "grad_norm": 0.10283170640468597, "learning_rate": 4.49550748940048e-05, "loss": 0.0388, "step": 49100 }, { "epoch": 0.24055, "grad_norm": 0.12514524161815643, "learning_rate": 4.4952584542976664e-05, "loss": 0.041, "step": 49110 }, { "epoch": 0.2406, "grad_norm": 0.09727492928504944, "learning_rate": 4.495009364645236e-05, "loss": 0.0389, "step": 49120 }, { "epoch": 0.24065, "grad_norm": 0.11048632115125656, "learning_rate": 4.49476022045e-05, "loss": 0.0411, "step": 49130 }, { "epoch": 0.2407, "grad_norm": 0.09122204035520554, "learning_rate": 4.494511021718768e-05, "loss": 0.0398, "step": 49140 }, { "epoch": 0.24075, "grad_norm": 0.1161789745092392, "learning_rate": 4.4942617684583546e-05, "loss": 0.0418, "step": 49150 }, { "epoch": 0.2408, "grad_norm": 0.11543729901313782, "learning_rate": 4.4940124606755734e-05, "loss": 0.0419, "step": 49160 }, { "epoch": 0.24085, "grad_norm": 0.12771295011043549, "learning_rate": 4.493763098377241e-05, "loss": 0.0404, "step": 49170 }, { "epoch": 0.2409, "grad_norm": 0.0894574448466301, "learning_rate": 4.493513681570174e-05, "loss": 0.0391, "step": 49180 }, { "epoch": 0.24095, "grad_norm": 0.08314885199069977, "learning_rate": 4.493264210261192e-05, "loss": 0.0402, "step": 49190 }, { "epoch": 0.241, "grad_norm": 0.09199110418558121, "learning_rate": 4.4930146844571156e-05, "loss": 0.0448, "step": 49200 }, { "epoch": 0.24105, "grad_norm": 0.08660294115543365, "learning_rate": 4.4927651041647654e-05, "loss": 0.0389, "step": 49210 }, { "epoch": 0.2411, "grad_norm": 0.09824992716312408, "learning_rate": 4.4925154693909674e-05, "loss": 0.0403, "step": 49220 }, { "epoch": 0.24115, "grad_norm": 0.10209884494543076, "learning_rate": 4.492265780142544e-05, "loss": 0.0422, "step": 49230 }, { "epoch": 0.2412, "grad_norm": 0.09823673963546753, "learning_rate": 4.4920160364263234e-05, "loss": 0.0403, "step": 49240 }, { "epoch": 0.24125, "grad_norm": 0.11731412261724472, "learning_rate": 4.491766238249132e-05, "loss": 0.0405, "step": 49250 }, { "epoch": 0.2413, "grad_norm": 0.11141234636306763, "learning_rate": 4.4915163856178e-05, "loss": 0.0391, "step": 49260 }, { "epoch": 0.24135, "grad_norm": 0.1311003714799881, "learning_rate": 4.4912664785391584e-05, "loss": 0.0393, "step": 49270 }, { "epoch": 0.2414, "grad_norm": 0.09752027690410614, "learning_rate": 4.491016517020039e-05, "loss": 0.0387, "step": 49280 }, { "epoch": 0.24145, "grad_norm": 0.09724754840135574, "learning_rate": 4.4907665010672765e-05, "loss": 0.0384, "step": 49290 }, { "epoch": 0.2415, "grad_norm": 0.11260055750608444, "learning_rate": 4.4905164306877055e-05, "loss": 0.0381, "step": 49300 }, { "epoch": 0.24155, "grad_norm": 0.11821656674146652, "learning_rate": 4.4902663058881636e-05, "loss": 0.0383, "step": 49310 }, { "epoch": 0.2416, "grad_norm": 0.13511382043361664, "learning_rate": 4.490016126675488e-05, "loss": 0.0401, "step": 49320 }, { "epoch": 0.24165, "grad_norm": 0.11218443512916565, "learning_rate": 4.4897658930565196e-05, "loss": 0.0405, "step": 49330 }, { "epoch": 0.2417, "grad_norm": 0.11669328063726425, "learning_rate": 4.4895156050380994e-05, "loss": 0.0387, "step": 49340 }, { "epoch": 0.24175, "grad_norm": 0.09982103109359741, "learning_rate": 4.489265262627069e-05, "loss": 0.0397, "step": 49350 }, { "epoch": 0.2418, "grad_norm": 0.09687146544456482, "learning_rate": 4.489014865830274e-05, "loss": 0.039, "step": 49360 }, { "epoch": 0.24185, "grad_norm": 0.10066474229097366, "learning_rate": 4.4887644146545605e-05, "loss": 0.0383, "step": 49370 }, { "epoch": 0.2419, "grad_norm": 0.09016059339046478, "learning_rate": 4.488513909106774e-05, "loss": 0.0382, "step": 49380 }, { "epoch": 0.24195, "grad_norm": 0.09374802559614182, "learning_rate": 4.4882633491937654e-05, "loss": 0.0399, "step": 49390 }, { "epoch": 0.242, "grad_norm": 0.09328625351190567, "learning_rate": 4.488012734922383e-05, "loss": 0.038, "step": 49400 }, { "epoch": 0.24205, "grad_norm": 0.0893532782793045, "learning_rate": 4.487762066299479e-05, "loss": 0.0384, "step": 49410 }, { "epoch": 0.2421, "grad_norm": 0.09541701525449753, "learning_rate": 4.487511343331908e-05, "loss": 0.0383, "step": 49420 }, { "epoch": 0.24215, "grad_norm": 0.08737003803253174, "learning_rate": 4.4872605660265227e-05, "loss": 0.0388, "step": 49430 }, { "epoch": 0.2422, "grad_norm": 0.10412393510341644, "learning_rate": 4.48700973439018e-05, "loss": 0.0385, "step": 49440 }, { "epoch": 0.24225, "grad_norm": 0.09979381412267685, "learning_rate": 4.486758848429738e-05, "loss": 0.0388, "step": 49450 }, { "epoch": 0.2423, "grad_norm": 0.10434600710868835, "learning_rate": 4.486507908152055e-05, "loss": 0.0387, "step": 49460 }, { "epoch": 0.24235, "grad_norm": 0.0980309322476387, "learning_rate": 4.4862569135639934e-05, "loss": 0.0386, "step": 49470 }, { "epoch": 0.2424, "grad_norm": 0.10557007789611816, "learning_rate": 4.486005864672412e-05, "loss": 0.0379, "step": 49480 }, { "epoch": 0.24245, "grad_norm": 0.09704895317554474, "learning_rate": 4.485754761484178e-05, "loss": 0.039, "step": 49490 }, { "epoch": 0.2425, "grad_norm": 0.08014574646949768, "learning_rate": 4.485503604006154e-05, "loss": 0.0383, "step": 49500 }, { "epoch": 0.24255, "grad_norm": 0.08936386555433273, "learning_rate": 4.4852523922452084e-05, "loss": 0.0371, "step": 49510 }, { "epoch": 0.2426, "grad_norm": 0.11614465713500977, "learning_rate": 4.485001126208207e-05, "loss": 0.0388, "step": 49520 }, { "epoch": 0.24265, "grad_norm": 0.08956684917211533, "learning_rate": 4.484749805902021e-05, "loss": 0.0387, "step": 49530 }, { "epoch": 0.2427, "grad_norm": 0.08727693557739258, "learning_rate": 4.484498431333521e-05, "loss": 0.0381, "step": 49540 }, { "epoch": 0.24275, "grad_norm": 0.0980883464217186, "learning_rate": 4.48424700250958e-05, "loss": 0.038, "step": 49550 }, { "epoch": 0.2428, "grad_norm": 0.11391928046941757, "learning_rate": 4.483995519437071e-05, "loss": 0.0379, "step": 49560 }, { "epoch": 0.24285, "grad_norm": 0.11534087359905243, "learning_rate": 4.483743982122869e-05, "loss": 0.0417, "step": 49570 }, { "epoch": 0.2429, "grad_norm": 0.11695587635040283, "learning_rate": 4.483492390573853e-05, "loss": 0.0398, "step": 49580 }, { "epoch": 0.24295, "grad_norm": 0.12372042238712311, "learning_rate": 4.4832407447968994e-05, "loss": 0.0411, "step": 49590 }, { "epoch": 0.243, "grad_norm": 0.10684667527675629, "learning_rate": 4.482989044798889e-05, "loss": 0.0388, "step": 49600 }, { "epoch": 0.24305, "grad_norm": 0.10489478707313538, "learning_rate": 4.482737290586703e-05, "loss": 0.0414, "step": 49610 }, { "epoch": 0.2431, "grad_norm": 0.11063244938850403, "learning_rate": 4.4824854821672245e-05, "loss": 0.0388, "step": 49620 }, { "epoch": 0.24315, "grad_norm": 0.1027778759598732, "learning_rate": 4.482233619547338e-05, "loss": 0.0416, "step": 49630 }, { "epoch": 0.2432, "grad_norm": 0.11320636421442032, "learning_rate": 4.481981702733929e-05, "loss": 0.0402, "step": 49640 }, { "epoch": 0.24325, "grad_norm": 0.09724336862564087, "learning_rate": 4.481729731733885e-05, "loss": 0.0403, "step": 49650 }, { "epoch": 0.2433, "grad_norm": 0.11856746673583984, "learning_rate": 4.4814777065540936e-05, "loss": 0.0393, "step": 49660 }, { "epoch": 0.24335, "grad_norm": 0.10303764790296555, "learning_rate": 4.481225627201448e-05, "loss": 0.0402, "step": 49670 }, { "epoch": 0.2434, "grad_norm": 0.0987589880824089, "learning_rate": 4.4809734936828365e-05, "loss": 0.0414, "step": 49680 }, { "epoch": 0.24345, "grad_norm": 0.12305308878421783, "learning_rate": 4.480721306005154e-05, "loss": 0.0424, "step": 49690 }, { "epoch": 0.2435, "grad_norm": 0.11457843333482742, "learning_rate": 4.4804690641752955e-05, "loss": 0.0386, "step": 49700 }, { "epoch": 0.24355, "grad_norm": 0.15410658717155457, "learning_rate": 4.480216768200157e-05, "loss": 0.0408, "step": 49710 }, { "epoch": 0.2436, "grad_norm": 0.13668401539325714, "learning_rate": 4.479964418086635e-05, "loss": 0.04, "step": 49720 }, { "epoch": 0.24365, "grad_norm": 0.1328498274087906, "learning_rate": 4.47971201384163e-05, "loss": 0.039, "step": 49730 }, { "epoch": 0.2437, "grad_norm": 0.11546991765499115, "learning_rate": 4.479459555472043e-05, "loss": 0.0389, "step": 49740 }, { "epoch": 0.24375, "grad_norm": 0.09257245808839798, "learning_rate": 4.479207042984775e-05, "loss": 0.0372, "step": 49750 }, { "epoch": 0.2438, "grad_norm": 0.11340250819921494, "learning_rate": 4.4789544763867304e-05, "loss": 0.039, "step": 49760 }, { "epoch": 0.24385, "grad_norm": 0.11558615416288376, "learning_rate": 4.478701855684814e-05, "loss": 0.0394, "step": 49770 }, { "epoch": 0.2439, "grad_norm": 0.10287030786275864, "learning_rate": 4.4784491808859314e-05, "loss": 0.0395, "step": 49780 }, { "epoch": 0.24395, "grad_norm": 0.1381189078092575, "learning_rate": 4.478196451996992e-05, "loss": 0.0404, "step": 49790 }, { "epoch": 0.244, "grad_norm": 0.136036217212677, "learning_rate": 4.4779436690249045e-05, "loss": 0.0393, "step": 49800 }, { "epoch": 0.24405, "grad_norm": 0.10182151943445206, "learning_rate": 4.4776908319765797e-05, "loss": 0.0383, "step": 49810 }, { "epoch": 0.2441, "grad_norm": 0.1141929030418396, "learning_rate": 4.477437940858932e-05, "loss": 0.0394, "step": 49820 }, { "epoch": 0.24415, "grad_norm": 0.10495369881391525, "learning_rate": 4.477184995678872e-05, "loss": 0.0383, "step": 49830 }, { "epoch": 0.2442, "grad_norm": 0.0963873639702797, "learning_rate": 4.476931996443319e-05, "loss": 0.0392, "step": 49840 }, { "epoch": 0.24425, "grad_norm": 0.0951836109161377, "learning_rate": 4.476678943159186e-05, "loss": 0.0408, "step": 49850 }, { "epoch": 0.2443, "grad_norm": 0.09826401621103287, "learning_rate": 4.476425835833394e-05, "loss": 0.039, "step": 49860 }, { "epoch": 0.24435, "grad_norm": 0.09092506021261215, "learning_rate": 4.4761726744728626e-05, "loss": 0.0383, "step": 49870 }, { "epoch": 0.2444, "grad_norm": 0.08619531244039536, "learning_rate": 4.4759194590845136e-05, "loss": 0.0388, "step": 49880 }, { "epoch": 0.24445, "grad_norm": 0.09693659096956253, "learning_rate": 4.4756661896752675e-05, "loss": 0.0392, "step": 49890 }, { "epoch": 0.2445, "grad_norm": 0.09849540889263153, "learning_rate": 4.47541286625205e-05, "loss": 0.0384, "step": 49900 }, { "epoch": 0.24455, "grad_norm": 0.09277253597974777, "learning_rate": 4.475159488821787e-05, "loss": 0.0395, "step": 49910 }, { "epoch": 0.2446, "grad_norm": 0.09384094178676605, "learning_rate": 4.474906057391406e-05, "loss": 0.0391, "step": 49920 }, { "epoch": 0.24465, "grad_norm": 0.11941733211278915, "learning_rate": 4.474652571967834e-05, "loss": 0.0387, "step": 49930 }, { "epoch": 0.2447, "grad_norm": 0.14373308420181274, "learning_rate": 4.474399032558004e-05, "loss": 0.043, "step": 49940 }, { "epoch": 0.24475, "grad_norm": 0.1225065290927887, "learning_rate": 4.474145439168846e-05, "loss": 0.0395, "step": 49950 }, { "epoch": 0.2448, "grad_norm": 0.10563571751117706, "learning_rate": 4.473891791807293e-05, "loss": 0.0391, "step": 49960 }, { "epoch": 0.24485, "grad_norm": 0.11376042664051056, "learning_rate": 4.4736380904802796e-05, "loss": 0.0396, "step": 49970 }, { "epoch": 0.2449, "grad_norm": 0.12583886086940765, "learning_rate": 4.4733843351947434e-05, "loss": 0.0397, "step": 49980 }, { "epoch": 0.24495, "grad_norm": 0.10607270151376724, "learning_rate": 4.47313052595762e-05, "loss": 0.0426, "step": 49990 }, { "epoch": 0.245, "grad_norm": 0.10916159301996231, "learning_rate": 4.47287666277585e-05, "loss": 0.0398, "step": 50000 }, { "epoch": 0.24505, "grad_norm": 0.13387051224708557, "learning_rate": 4.472622745656372e-05, "loss": 0.039, "step": 50010 }, { "epoch": 0.2451, "grad_norm": 0.10772855579853058, "learning_rate": 4.4723687746061305e-05, "loss": 0.0384, "step": 50020 }, { "epoch": 0.24515, "grad_norm": 0.11364582926034927, "learning_rate": 4.472114749632067e-05, "loss": 0.0382, "step": 50030 }, { "epoch": 0.2452, "grad_norm": 0.121206134557724, "learning_rate": 4.471860670741127e-05, "loss": 0.0397, "step": 50040 }, { "epoch": 0.24525, "grad_norm": 0.13348430395126343, "learning_rate": 4.471606537940257e-05, "loss": 0.0404, "step": 50050 }, { "epoch": 0.2453, "grad_norm": 0.11775519698858261, "learning_rate": 4.471352351236406e-05, "loss": 0.0425, "step": 50060 }, { "epoch": 0.24535, "grad_norm": 0.13693532347679138, "learning_rate": 4.4710981106365214e-05, "loss": 0.0414, "step": 50070 }, { "epoch": 0.2454, "grad_norm": 0.10618642717599869, "learning_rate": 4.470843816147555e-05, "loss": 0.0378, "step": 50080 }, { "epoch": 0.24545, "grad_norm": 0.10738607496023178, "learning_rate": 4.470589467776459e-05, "loss": 0.0412, "step": 50090 }, { "epoch": 0.2455, "grad_norm": 0.12106368690729141, "learning_rate": 4.4703350655301876e-05, "loss": 0.0415, "step": 50100 }, { "epoch": 0.24555, "grad_norm": 0.1166282445192337, "learning_rate": 4.4700806094156955e-05, "loss": 0.0393, "step": 50110 }, { "epoch": 0.2456, "grad_norm": 0.1051270067691803, "learning_rate": 4.4698260994399396e-05, "loss": 0.0388, "step": 50120 }, { "epoch": 0.24565, "grad_norm": 0.10988081246614456, "learning_rate": 4.469571535609879e-05, "loss": 0.0384, "step": 50130 }, { "epoch": 0.2457, "grad_norm": 0.11927267909049988, "learning_rate": 4.469316917932472e-05, "loss": 0.0395, "step": 50140 }, { "epoch": 0.24575, "grad_norm": 0.10540378093719482, "learning_rate": 4.46906224641468e-05, "loss": 0.0396, "step": 50150 }, { "epoch": 0.2458, "grad_norm": 0.11323534697294235, "learning_rate": 4.468807521063466e-05, "loss": 0.0405, "step": 50160 }, { "epoch": 0.24585, "grad_norm": 0.12552185356616974, "learning_rate": 4.468552741885794e-05, "loss": 0.0397, "step": 50170 }, { "epoch": 0.2459, "grad_norm": 0.1230066567659378, "learning_rate": 4.4682979088886304e-05, "loss": 0.0418, "step": 50180 }, { "epoch": 0.24595, "grad_norm": 0.14640747010707855, "learning_rate": 4.4680430220789406e-05, "loss": 0.0395, "step": 50190 }, { "epoch": 0.246, "grad_norm": 0.11146890372037888, "learning_rate": 4.467788081463694e-05, "loss": 0.0386, "step": 50200 }, { "epoch": 0.24605, "grad_norm": 0.12735052406787872, "learning_rate": 4.4675330870498604e-05, "loss": 0.0388, "step": 50210 }, { "epoch": 0.2461, "grad_norm": 0.11881628632545471, "learning_rate": 4.4672780388444114e-05, "loss": 0.0389, "step": 50220 }, { "epoch": 0.24615, "grad_norm": 0.11332988739013672, "learning_rate": 4.4670229368543206e-05, "loss": 0.0395, "step": 50230 }, { "epoch": 0.2462, "grad_norm": 0.09810035675764084, "learning_rate": 4.4667677810865606e-05, "loss": 0.0387, "step": 50240 }, { "epoch": 0.24625, "grad_norm": 0.12134762108325958, "learning_rate": 4.4665125715481096e-05, "loss": 0.0394, "step": 50250 }, { "epoch": 0.2463, "grad_norm": 0.22856345772743225, "learning_rate": 4.4662573082459424e-05, "loss": 0.0436, "step": 50260 }, { "epoch": 0.24635, "grad_norm": 0.10471013188362122, "learning_rate": 4.46600199118704e-05, "loss": 0.0395, "step": 50270 }, { "epoch": 0.2464, "grad_norm": 0.10634054243564606, "learning_rate": 4.465746620378381e-05, "loss": 0.0391, "step": 50280 }, { "epoch": 0.24645, "grad_norm": 0.102584607899189, "learning_rate": 4.465491195826948e-05, "loss": 0.0394, "step": 50290 }, { "epoch": 0.2465, "grad_norm": 0.10269295424222946, "learning_rate": 4.465235717539725e-05, "loss": 0.0392, "step": 50300 }, { "epoch": 0.24655, "grad_norm": 0.09852663427591324, "learning_rate": 4.464980185523695e-05, "loss": 0.039, "step": 50310 }, { "epoch": 0.2466, "grad_norm": 0.1208033636212349, "learning_rate": 4.464724599785846e-05, "loss": 0.0392, "step": 50320 }, { "epoch": 0.24665, "grad_norm": 0.12018878012895584, "learning_rate": 4.464468960333163e-05, "loss": 0.0423, "step": 50330 }, { "epoch": 0.2467, "grad_norm": 0.1266999989748001, "learning_rate": 4.464213267172637e-05, "loss": 0.0396, "step": 50340 }, { "epoch": 0.24675, "grad_norm": 0.12401743978261948, "learning_rate": 4.463957520311259e-05, "loss": 0.0402, "step": 50350 }, { "epoch": 0.2468, "grad_norm": 0.1187533289194107, "learning_rate": 4.4637017197560196e-05, "loss": 0.04, "step": 50360 }, { "epoch": 0.24685, "grad_norm": 0.10464418679475784, "learning_rate": 4.463445865513913e-05, "loss": 0.0393, "step": 50370 }, { "epoch": 0.2469, "grad_norm": 0.1359594464302063, "learning_rate": 4.4631899575919344e-05, "loss": 0.0391, "step": 50380 }, { "epoch": 0.24695, "grad_norm": 0.11157345026731491, "learning_rate": 4.4629339959970794e-05, "loss": 0.0401, "step": 50390 }, { "epoch": 0.247, "grad_norm": 0.12209325283765793, "learning_rate": 4.462677980736346e-05, "loss": 0.0401, "step": 50400 }, { "epoch": 0.24705, "grad_norm": 0.1256990283727646, "learning_rate": 4.4624219118167355e-05, "loss": 0.0385, "step": 50410 }, { "epoch": 0.2471, "grad_norm": 0.12570686638355255, "learning_rate": 4.462165789245246e-05, "loss": 0.0403, "step": 50420 }, { "epoch": 0.24715, "grad_norm": 0.12645868957042694, "learning_rate": 4.461909613028881e-05, "loss": 0.0398, "step": 50430 }, { "epoch": 0.2472, "grad_norm": 0.12722982466220856, "learning_rate": 4.461653383174644e-05, "loss": 0.0384, "step": 50440 }, { "epoch": 0.24725, "grad_norm": 0.11190466582775116, "learning_rate": 4.461397099689542e-05, "loss": 0.0412, "step": 50450 }, { "epoch": 0.2473, "grad_norm": 0.12560081481933594, "learning_rate": 4.461140762580579e-05, "loss": 0.0393, "step": 50460 }, { "epoch": 0.24735, "grad_norm": 0.1073743924498558, "learning_rate": 4.460884371854764e-05, "loss": 0.039, "step": 50470 }, { "epoch": 0.2474, "grad_norm": 0.12059140205383301, "learning_rate": 4.460627927519107e-05, "loss": 0.0393, "step": 50480 }, { "epoch": 0.24745, "grad_norm": 0.1306591033935547, "learning_rate": 4.46037142958062e-05, "loss": 0.0404, "step": 50490 }, { "epoch": 0.2475, "grad_norm": 0.10691185295581818, "learning_rate": 4.460114878046313e-05, "loss": 0.0378, "step": 50500 }, { "epoch": 0.24755, "grad_norm": 0.11255087703466415, "learning_rate": 4.459858272923203e-05, "loss": 0.0385, "step": 50510 }, { "epoch": 0.2476, "grad_norm": 0.10249876976013184, "learning_rate": 4.459601614218304e-05, "loss": 0.0365, "step": 50520 }, { "epoch": 0.24765, "grad_norm": 0.10994745790958405, "learning_rate": 4.459344901938633e-05, "loss": 0.039, "step": 50530 }, { "epoch": 0.2477, "grad_norm": 0.11549528688192368, "learning_rate": 4.4590881360912074e-05, "loss": 0.038, "step": 50540 }, { "epoch": 0.24775, "grad_norm": 0.10953694581985474, "learning_rate": 4.4588313166830495e-05, "loss": 0.0384, "step": 50550 }, { "epoch": 0.2478, "grad_norm": 0.09753890335559845, "learning_rate": 4.4585744437211786e-05, "loss": 0.0398, "step": 50560 }, { "epoch": 0.24785, "grad_norm": 0.11490300297737122, "learning_rate": 4.458317517212618e-05, "loss": 0.0382, "step": 50570 }, { "epoch": 0.2479, "grad_norm": 0.11047282069921494, "learning_rate": 4.458060537164393e-05, "loss": 0.0386, "step": 50580 }, { "epoch": 0.24795, "grad_norm": 0.13541057705879211, "learning_rate": 4.4578035035835275e-05, "loss": 0.0401, "step": 50590 }, { "epoch": 0.248, "grad_norm": 0.09646723419427872, "learning_rate": 4.457546416477051e-05, "loss": 0.037, "step": 50600 }, { "epoch": 0.24805, "grad_norm": 0.10674086958169937, "learning_rate": 4.45728927585199e-05, "loss": 0.0378, "step": 50610 }, { "epoch": 0.2481, "grad_norm": 0.10835059732198715, "learning_rate": 4.4570320817153756e-05, "loss": 0.0381, "step": 50620 }, { "epoch": 0.24815, "grad_norm": 0.09281093627214432, "learning_rate": 4.4567748340742396e-05, "loss": 0.0386, "step": 50630 }, { "epoch": 0.2482, "grad_norm": 0.09371732175350189, "learning_rate": 4.456517532935615e-05, "loss": 0.0425, "step": 50640 }, { "epoch": 0.24825, "grad_norm": 0.10443238168954849, "learning_rate": 4.456260178306535e-05, "loss": 0.0409, "step": 50650 }, { "epoch": 0.2483, "grad_norm": 0.10375183820724487, "learning_rate": 4.456002770194038e-05, "loss": 0.0387, "step": 50660 }, { "epoch": 0.24835, "grad_norm": 0.11241745203733444, "learning_rate": 4.4557453086051595e-05, "loss": 0.0424, "step": 50670 }, { "epoch": 0.2484, "grad_norm": 0.08906576782464981, "learning_rate": 4.455487793546939e-05, "loss": 0.0386, "step": 50680 }, { "epoch": 0.24845, "grad_norm": 0.11888798326253891, "learning_rate": 4.455230225026416e-05, "loss": 0.0394, "step": 50690 }, { "epoch": 0.2485, "grad_norm": 0.11634727567434311, "learning_rate": 4.454972603050634e-05, "loss": 0.0385, "step": 50700 }, { "epoch": 0.24855, "grad_norm": 0.11918650567531586, "learning_rate": 4.4547149276266355e-05, "loss": 0.0398, "step": 50710 }, { "epoch": 0.2486, "grad_norm": 0.11601907014846802, "learning_rate": 4.454457198761465e-05, "loss": 0.0411, "step": 50720 }, { "epoch": 0.24865, "grad_norm": 0.10228978842496872, "learning_rate": 4.454199416462169e-05, "loss": 0.0402, "step": 50730 }, { "epoch": 0.2487, "grad_norm": 0.10430190712213516, "learning_rate": 4.4539415807357955e-05, "loss": 0.0396, "step": 50740 }, { "epoch": 0.24875, "grad_norm": 0.095687136054039, "learning_rate": 4.453683691589393e-05, "loss": 0.0381, "step": 50750 }, { "epoch": 0.2488, "grad_norm": 0.10482900589704514, "learning_rate": 4.453425749030012e-05, "loss": 0.0395, "step": 50760 }, { "epoch": 0.24885, "grad_norm": 0.0979951024055481, "learning_rate": 4.4531677530647056e-05, "loss": 0.0393, "step": 50770 }, { "epoch": 0.2489, "grad_norm": 0.10559823364019394, "learning_rate": 4.452909703700526e-05, "loss": 0.0382, "step": 50780 }, { "epoch": 0.24895, "grad_norm": 0.1315240114927292, "learning_rate": 4.452651600944529e-05, "loss": 0.0386, "step": 50790 }, { "epoch": 0.249, "grad_norm": 0.10066678375005722, "learning_rate": 4.452393444803771e-05, "loss": 0.0388, "step": 50800 }, { "epoch": 0.24905, "grad_norm": 0.09541074186563492, "learning_rate": 4.4521352352853095e-05, "loss": 0.0389, "step": 50810 }, { "epoch": 0.2491, "grad_norm": 0.11731182783842087, "learning_rate": 4.451876972396204e-05, "loss": 0.039, "step": 50820 }, { "epoch": 0.24915, "grad_norm": 0.09840092062950134, "learning_rate": 4.4516186561435156e-05, "loss": 0.0384, "step": 50830 }, { "epoch": 0.2492, "grad_norm": 0.10836604237556458, "learning_rate": 4.451360286534306e-05, "loss": 0.038, "step": 50840 }, { "epoch": 0.24925, "grad_norm": 0.13531796634197235, "learning_rate": 4.45110186357564e-05, "loss": 0.0392, "step": 50850 }, { "epoch": 0.2493, "grad_norm": 0.08943230658769608, "learning_rate": 4.450843387274581e-05, "loss": 0.0384, "step": 50860 }, { "epoch": 0.24935, "grad_norm": 0.1034252792596817, "learning_rate": 4.450584857638197e-05, "loss": 0.0385, "step": 50870 }, { "epoch": 0.2494, "grad_norm": 0.09238631278276443, "learning_rate": 4.4503262746735567e-05, "loss": 0.0381, "step": 50880 }, { "epoch": 0.24945, "grad_norm": 0.0975264385342598, "learning_rate": 4.450067638387727e-05, "loss": 0.0374, "step": 50890 }, { "epoch": 0.2495, "grad_norm": 0.11952003091573715, "learning_rate": 4.449808948787782e-05, "loss": 0.0373, "step": 50900 }, { "epoch": 0.24955, "grad_norm": 0.09383943676948547, "learning_rate": 4.4495502058807925e-05, "loss": 0.0385, "step": 50910 }, { "epoch": 0.2496, "grad_norm": 0.11109983921051025, "learning_rate": 4.4492914096738326e-05, "loss": 0.0389, "step": 50920 }, { "epoch": 0.24965, "grad_norm": 0.09983723610639572, "learning_rate": 4.449032560173978e-05, "loss": 0.0397, "step": 50930 }, { "epoch": 0.2497, "grad_norm": 0.1179436445236206, "learning_rate": 4.448773657388305e-05, "loss": 0.0375, "step": 50940 }, { "epoch": 0.24975, "grad_norm": 0.131612166762352, "learning_rate": 4.4485147013238936e-05, "loss": 0.0401, "step": 50950 }, { "epoch": 0.2498, "grad_norm": 0.12161412090063095, "learning_rate": 4.4482556919878214e-05, "loss": 0.0407, "step": 50960 }, { "epoch": 0.24985, "grad_norm": 0.10872988402843475, "learning_rate": 4.44799662938717e-05, "loss": 0.0392, "step": 50970 }, { "epoch": 0.2499, "grad_norm": 0.1138838529586792, "learning_rate": 4.447737513529023e-05, "loss": 0.0403, "step": 50980 }, { "epoch": 0.24995, "grad_norm": 0.12414175271987915, "learning_rate": 4.447478344420465e-05, "loss": 0.0392, "step": 50990 }, { "epoch": 0.25, "grad_norm": 0.13560877740383148, "learning_rate": 4.44721912206858e-05, "loss": 0.0402, "step": 51000 }, { "epoch": 0.25005, "grad_norm": 0.13603296875953674, "learning_rate": 4.446959846480456e-05, "loss": 0.0394, "step": 51010 }, { "epoch": 0.2501, "grad_norm": 0.1167522445321083, "learning_rate": 4.44670051766318e-05, "loss": 0.0386, "step": 51020 }, { "epoch": 0.25015, "grad_norm": 0.1144317165017128, "learning_rate": 4.4464411356238447e-05, "loss": 0.0379, "step": 51030 }, { "epoch": 0.2502, "grad_norm": 0.13284580409526825, "learning_rate": 4.4461817003695396e-05, "loss": 0.0383, "step": 51040 }, { "epoch": 0.25025, "grad_norm": 0.1393263041973114, "learning_rate": 4.445922211907358e-05, "loss": 0.0382, "step": 51050 }, { "epoch": 0.2503, "grad_norm": 0.11129328608512878, "learning_rate": 4.445662670244394e-05, "loss": 0.0387, "step": 51060 }, { "epoch": 0.25035, "grad_norm": 0.12428943812847137, "learning_rate": 4.445403075387743e-05, "loss": 0.0393, "step": 51070 }, { "epoch": 0.2504, "grad_norm": 0.10676748305559158, "learning_rate": 4.4451434273445036e-05, "loss": 0.0374, "step": 51080 }, { "epoch": 0.25045, "grad_norm": 0.10181623697280884, "learning_rate": 4.444883726121773e-05, "loss": 0.039, "step": 51090 }, { "epoch": 0.2505, "grad_norm": 0.10132446140050888, "learning_rate": 4.4446239717266525e-05, "loss": 0.0396, "step": 51100 }, { "epoch": 0.25055, "grad_norm": 0.12805144488811493, "learning_rate": 4.444364164166244e-05, "loss": 0.0388, "step": 51110 }, { "epoch": 0.2506, "grad_norm": 0.1167738139629364, "learning_rate": 4.444104303447648e-05, "loss": 0.0385, "step": 51120 }, { "epoch": 0.25065, "grad_norm": 0.11137361824512482, "learning_rate": 4.4438443895779716e-05, "loss": 0.0386, "step": 51130 }, { "epoch": 0.2507, "grad_norm": 0.09908577054738998, "learning_rate": 4.44358442256432e-05, "loss": 0.0377, "step": 51140 }, { "epoch": 0.25075, "grad_norm": 0.11198722571134567, "learning_rate": 4.4433244024138e-05, "loss": 0.038, "step": 51150 }, { "epoch": 0.2508, "grad_norm": 0.10560618340969086, "learning_rate": 4.4430643291335206e-05, "loss": 0.0381, "step": 51160 }, { "epoch": 0.25085, "grad_norm": 0.11533159762620926, "learning_rate": 4.4428042027305934e-05, "loss": 0.0411, "step": 51170 }, { "epoch": 0.2509, "grad_norm": 0.10088592767715454, "learning_rate": 4.442544023212129e-05, "loss": 0.0406, "step": 51180 }, { "epoch": 0.25095, "grad_norm": 0.11384643614292145, "learning_rate": 4.44228379058524e-05, "loss": 0.0396, "step": 51190 }, { "epoch": 0.251, "grad_norm": 0.10306670516729355, "learning_rate": 4.442023504857042e-05, "loss": 0.0401, "step": 51200 }, { "epoch": 0.25105, "grad_norm": 0.10993543267250061, "learning_rate": 4.441763166034652e-05, "loss": 0.0386, "step": 51210 }, { "epoch": 0.2511, "grad_norm": 0.0968732088804245, "learning_rate": 4.441502774125185e-05, "loss": 0.0386, "step": 51220 }, { "epoch": 0.25115, "grad_norm": 0.10479246079921722, "learning_rate": 4.441242329135763e-05, "loss": 0.0392, "step": 51230 }, { "epoch": 0.2512, "grad_norm": 0.1229248195886612, "learning_rate": 4.440981831073504e-05, "loss": 0.0408, "step": 51240 }, { "epoch": 0.25125, "grad_norm": 0.0964876189827919, "learning_rate": 4.4407212799455313e-05, "loss": 0.0389, "step": 51250 }, { "epoch": 0.2513, "grad_norm": 0.13513514399528503, "learning_rate": 4.440460675758967e-05, "loss": 0.0406, "step": 51260 }, { "epoch": 0.25135, "grad_norm": 0.10261445492506027, "learning_rate": 4.440200018520938e-05, "loss": 0.0444, "step": 51270 }, { "epoch": 0.2514, "grad_norm": 0.11525224894285202, "learning_rate": 4.43993930823857e-05, "loss": 0.0382, "step": 51280 }, { "epoch": 0.25145, "grad_norm": 0.12469878047704697, "learning_rate": 4.439678544918989e-05, "loss": 0.0389, "step": 51290 }, { "epoch": 0.2515, "grad_norm": 0.09355195611715317, "learning_rate": 4.439417728569325e-05, "loss": 0.0414, "step": 51300 }, { "epoch": 0.25155, "grad_norm": 0.12074048072099686, "learning_rate": 4.43915685919671e-05, "loss": 0.0419, "step": 51310 }, { "epoch": 0.2516, "grad_norm": 0.1394028216600418, "learning_rate": 4.438895936808274e-05, "loss": 0.0411, "step": 51320 }, { "epoch": 0.25165, "grad_norm": 0.1064663678407669, "learning_rate": 4.4386349614111524e-05, "loss": 0.0394, "step": 51330 }, { "epoch": 0.2517, "grad_norm": 0.13321200013160706, "learning_rate": 4.438373933012478e-05, "loss": 0.0393, "step": 51340 }, { "epoch": 0.25175, "grad_norm": 0.12222933024168015, "learning_rate": 4.438112851619389e-05, "loss": 0.0413, "step": 51350 }, { "epoch": 0.2518, "grad_norm": 0.11078042536973953, "learning_rate": 4.4378517172390234e-05, "loss": 0.0402, "step": 51360 }, { "epoch": 0.25185, "grad_norm": 0.10573874413967133, "learning_rate": 4.437590529878519e-05, "loss": 0.0402, "step": 51370 }, { "epoch": 0.2519, "grad_norm": 0.14229434728622437, "learning_rate": 4.437329289545018e-05, "loss": 0.0396, "step": 51380 }, { "epoch": 0.25195, "grad_norm": 0.13220266997814178, "learning_rate": 4.437067996245662e-05, "loss": 0.0395, "step": 51390 }, { "epoch": 0.252, "grad_norm": 0.12287094444036484, "learning_rate": 4.436806649987595e-05, "loss": 0.0384, "step": 51400 }, { "epoch": 0.25205, "grad_norm": 0.11596930772066116, "learning_rate": 4.436545250777961e-05, "loss": 0.0399, "step": 51410 }, { "epoch": 0.2521, "grad_norm": 0.12470424175262451, "learning_rate": 4.436283798623908e-05, "loss": 0.0406, "step": 51420 }, { "epoch": 0.25215, "grad_norm": 0.12757036089897156, "learning_rate": 4.4360222935325835e-05, "loss": 0.0411, "step": 51430 }, { "epoch": 0.2522, "grad_norm": 0.13404591381549835, "learning_rate": 4.435760735511136e-05, "loss": 0.0402, "step": 51440 }, { "epoch": 0.25225, "grad_norm": 0.12462472170591354, "learning_rate": 4.4354991245667175e-05, "loss": 0.0399, "step": 51450 }, { "epoch": 0.2523, "grad_norm": 0.14413760602474213, "learning_rate": 4.435237460706481e-05, "loss": 0.0402, "step": 51460 }, { "epoch": 0.25235, "grad_norm": 0.10462265461683273, "learning_rate": 4.4349757439375786e-05, "loss": 0.0398, "step": 51470 }, { "epoch": 0.2524, "grad_norm": 0.10423137247562408, "learning_rate": 4.434713974267166e-05, "loss": 0.0391, "step": 51480 }, { "epoch": 0.25245, "grad_norm": 0.10511630773544312, "learning_rate": 4.4344521517024004e-05, "loss": 0.0405, "step": 51490 }, { "epoch": 0.2525, "grad_norm": 0.10550902038812637, "learning_rate": 4.43419027625044e-05, "loss": 0.0408, "step": 51500 }, { "epoch": 0.25255, "grad_norm": 0.11555942893028259, "learning_rate": 4.433928347918444e-05, "loss": 0.0433, "step": 51510 }, { "epoch": 0.2526, "grad_norm": 0.12114205956459045, "learning_rate": 4.433666366713574e-05, "loss": 0.0413, "step": 51520 }, { "epoch": 0.25265, "grad_norm": 0.10090594738721848, "learning_rate": 4.4334043326429907e-05, "loss": 0.0402, "step": 51530 }, { "epoch": 0.2527, "grad_norm": 0.09123598784208298, "learning_rate": 4.433142245713861e-05, "loss": 0.0395, "step": 51540 }, { "epoch": 0.25275, "grad_norm": 0.10139287263154984, "learning_rate": 4.432880105933347e-05, "loss": 0.0408, "step": 51550 }, { "epoch": 0.2528, "grad_norm": 0.08323674649000168, "learning_rate": 4.4326179133086174e-05, "loss": 0.0396, "step": 51560 }, { "epoch": 0.25285, "grad_norm": 0.11446565389633179, "learning_rate": 4.43235566784684e-05, "loss": 0.0416, "step": 51570 }, { "epoch": 0.2529, "grad_norm": 0.11079252511262894, "learning_rate": 4.432093369555185e-05, "loss": 0.0386, "step": 51580 }, { "epoch": 0.25295, "grad_norm": 0.12396615743637085, "learning_rate": 4.4318310184408234e-05, "loss": 0.0397, "step": 51590 }, { "epoch": 0.253, "grad_norm": 0.12712040543556213, "learning_rate": 4.431568614510927e-05, "loss": 0.0399, "step": 51600 }, { "epoch": 0.25305, "grad_norm": 0.09145376086235046, "learning_rate": 4.4313061577726703e-05, "loss": 0.0381, "step": 51610 }, { "epoch": 0.2531, "grad_norm": 0.10613539814949036, "learning_rate": 4.4310436482332294e-05, "loss": 0.0394, "step": 51620 }, { "epoch": 0.25315, "grad_norm": 0.0854751318693161, "learning_rate": 4.43078108589978e-05, "loss": 0.039, "step": 51630 }, { "epoch": 0.2532, "grad_norm": 0.09941904991865158, "learning_rate": 4.430518470779501e-05, "loss": 0.0386, "step": 51640 }, { "epoch": 0.25325, "grad_norm": 0.0964796170592308, "learning_rate": 4.430255802879573e-05, "loss": 0.0388, "step": 51650 }, { "epoch": 0.2533, "grad_norm": 0.10009407252073288, "learning_rate": 4.4299930822071755e-05, "loss": 0.0397, "step": 51660 }, { "epoch": 0.25335, "grad_norm": 0.12308847159147263, "learning_rate": 4.429730308769493e-05, "loss": 0.0386, "step": 51670 }, { "epoch": 0.2534, "grad_norm": 0.10295268893241882, "learning_rate": 4.4294674825737086e-05, "loss": 0.0391, "step": 51680 }, { "epoch": 0.25345, "grad_norm": 0.10725370049476624, "learning_rate": 4.429204603627009e-05, "loss": 0.0399, "step": 51690 }, { "epoch": 0.2535, "grad_norm": 0.09342306107282639, "learning_rate": 4.4289416719365784e-05, "loss": 0.0382, "step": 51700 }, { "epoch": 0.25355, "grad_norm": 0.10227091610431671, "learning_rate": 4.428678687509609e-05, "loss": 0.0392, "step": 51710 }, { "epoch": 0.2536, "grad_norm": 0.10371004045009613, "learning_rate": 4.4284156503532876e-05, "loss": 0.0377, "step": 51720 }, { "epoch": 0.25365, "grad_norm": 0.10542786866426468, "learning_rate": 4.428152560474807e-05, "loss": 0.0391, "step": 51730 }, { "epoch": 0.2537, "grad_norm": 0.0887867733836174, "learning_rate": 4.42788941788136e-05, "loss": 0.0392, "step": 51740 }, { "epoch": 0.25375, "grad_norm": 0.0874442532658577, "learning_rate": 4.4276262225801404e-05, "loss": 0.0388, "step": 51750 }, { "epoch": 0.2538, "grad_norm": 0.1287989616394043, "learning_rate": 4.427362974578344e-05, "loss": 0.042, "step": 51760 }, { "epoch": 0.25385, "grad_norm": 0.11925298720598221, "learning_rate": 4.4270996738831684e-05, "loss": 0.0381, "step": 51770 }, { "epoch": 0.2539, "grad_norm": 0.11712602525949478, "learning_rate": 4.4268363205018114e-05, "loss": 0.0383, "step": 51780 }, { "epoch": 0.25395, "grad_norm": 0.13583825528621674, "learning_rate": 4.426572914441474e-05, "loss": 0.0392, "step": 51790 }, { "epoch": 0.254, "grad_norm": 0.14465183019638062, "learning_rate": 4.426309455709355e-05, "loss": 0.0392, "step": 51800 }, { "epoch": 0.25405, "grad_norm": 0.12359750270843506, "learning_rate": 4.426045944312661e-05, "loss": 0.0379, "step": 51810 }, { "epoch": 0.2541, "grad_norm": 0.11113641411066055, "learning_rate": 4.425782380258594e-05, "loss": 0.0382, "step": 51820 }, { "epoch": 0.25415, "grad_norm": 0.11407049745321274, "learning_rate": 4.42551876355436e-05, "loss": 0.0392, "step": 51830 }, { "epoch": 0.2542, "grad_norm": 0.11310215294361115, "learning_rate": 4.425255094207167e-05, "loss": 0.0419, "step": 51840 }, { "epoch": 0.25425, "grad_norm": 0.11849265545606613, "learning_rate": 4.424991372224222e-05, "loss": 0.039, "step": 51850 }, { "epoch": 0.2543, "grad_norm": 0.09995820373296738, "learning_rate": 4.4247275976127366e-05, "loss": 0.0405, "step": 51860 }, { "epoch": 0.25435, "grad_norm": 0.1018250435590744, "learning_rate": 4.4244637703799216e-05, "loss": 0.0405, "step": 51870 }, { "epoch": 0.2544, "grad_norm": 0.10998055338859558, "learning_rate": 4.4241998905329904e-05, "loss": 0.0416, "step": 51880 }, { "epoch": 0.25445, "grad_norm": 0.11770867556333542, "learning_rate": 4.423935958079156e-05, "loss": 0.0394, "step": 51890 }, { "epoch": 0.2545, "grad_norm": 0.1142832338809967, "learning_rate": 4.4236719730256365e-05, "loss": 0.0407, "step": 51900 }, { "epoch": 0.25455, "grad_norm": 0.13255222141742706, "learning_rate": 4.423407935379647e-05, "loss": 0.0395, "step": 51910 }, { "epoch": 0.2546, "grad_norm": 0.11216466128826141, "learning_rate": 4.423143845148409e-05, "loss": 0.0396, "step": 51920 }, { "epoch": 0.25465, "grad_norm": 0.12529611587524414, "learning_rate": 4.422879702339139e-05, "loss": 0.04, "step": 51930 }, { "epoch": 0.2547, "grad_norm": 0.11650749295949936, "learning_rate": 4.422615506959061e-05, "loss": 0.0399, "step": 51940 }, { "epoch": 0.25475, "grad_norm": 0.11138208210468292, "learning_rate": 4.422351259015397e-05, "loss": 0.0395, "step": 51950 }, { "epoch": 0.2548, "grad_norm": 0.10220567137002945, "learning_rate": 4.422086958515372e-05, "loss": 0.0387, "step": 51960 }, { "epoch": 0.25485, "grad_norm": 0.09616690129041672, "learning_rate": 4.421822605466211e-05, "loss": 0.0377, "step": 51970 }, { "epoch": 0.2549, "grad_norm": 0.09676245599985123, "learning_rate": 4.4215581998751434e-05, "loss": 0.0387, "step": 51980 }, { "epoch": 0.25495, "grad_norm": 0.11996602267026901, "learning_rate": 4.4212937417493954e-05, "loss": 0.0394, "step": 51990 }, { "epoch": 0.255, "grad_norm": 0.10802966356277466, "learning_rate": 4.421029231096199e-05, "loss": 0.0392, "step": 52000 }, { "epoch": 0.25505, "grad_norm": 0.13428770005702972, "learning_rate": 4.4207646679227846e-05, "loss": 0.0401, "step": 52010 }, { "epoch": 0.2551, "grad_norm": 0.12374002486467361, "learning_rate": 4.420500052236386e-05, "loss": 0.0399, "step": 52020 }, { "epoch": 0.25515, "grad_norm": 0.14582951366901398, "learning_rate": 4.420235384044237e-05, "loss": 0.04, "step": 52030 }, { "epoch": 0.2552, "grad_norm": 0.10715020447969437, "learning_rate": 4.4199706633535744e-05, "loss": 0.0401, "step": 52040 }, { "epoch": 0.25525, "grad_norm": 0.11347389221191406, "learning_rate": 4.4197058901716347e-05, "loss": 0.0388, "step": 52050 }, { "epoch": 0.2553, "grad_norm": 0.09797408431768417, "learning_rate": 4.4194410645056585e-05, "loss": 0.0395, "step": 52060 }, { "epoch": 0.25535, "grad_norm": 0.13154490292072296, "learning_rate": 4.4191761863628836e-05, "loss": 0.0393, "step": 52070 }, { "epoch": 0.2554, "grad_norm": 0.1017269417643547, "learning_rate": 4.4189112557505525e-05, "loss": 0.0387, "step": 52080 }, { "epoch": 0.25545, "grad_norm": 0.12850724160671234, "learning_rate": 4.418646272675909e-05, "loss": 0.0399, "step": 52090 }, { "epoch": 0.2555, "grad_norm": 0.10764136165380478, "learning_rate": 4.418381237146198e-05, "loss": 0.0403, "step": 52100 }, { "epoch": 0.25555, "grad_norm": 0.11821214854717255, "learning_rate": 4.418116149168663e-05, "loss": 0.041, "step": 52110 }, { "epoch": 0.2556, "grad_norm": 0.10730551928281784, "learning_rate": 4.417851008750554e-05, "loss": 0.0394, "step": 52120 }, { "epoch": 0.25565, "grad_norm": 0.09921026974916458, "learning_rate": 4.417585815899119e-05, "loss": 0.0395, "step": 52130 }, { "epoch": 0.2557, "grad_norm": 0.13657888770103455, "learning_rate": 4.4173205706216084e-05, "loss": 0.0451, "step": 52140 }, { "epoch": 0.25575, "grad_norm": 0.10962523519992828, "learning_rate": 4.417055272925273e-05, "loss": 0.0402, "step": 52150 }, { "epoch": 0.2558, "grad_norm": 0.09717714041471481, "learning_rate": 4.416789922817367e-05, "loss": 0.0386, "step": 52160 }, { "epoch": 0.25585, "grad_norm": 0.11585210263729095, "learning_rate": 4.416524520305145e-05, "loss": 0.0396, "step": 52170 }, { "epoch": 0.2559, "grad_norm": 0.12299904227256775, "learning_rate": 4.416259065395862e-05, "loss": 0.0395, "step": 52180 }, { "epoch": 0.25595, "grad_norm": 0.10254476219415665, "learning_rate": 4.415993558096776e-05, "loss": 0.0381, "step": 52190 }, { "epoch": 0.256, "grad_norm": 0.08398805558681488, "learning_rate": 4.415727998415147e-05, "loss": 0.0394, "step": 52200 }, { "epoch": 0.25605, "grad_norm": 0.10847415030002594, "learning_rate": 4.415462386358233e-05, "loss": 0.0402, "step": 52210 }, { "epoch": 0.2561, "grad_norm": 0.09870469570159912, "learning_rate": 4.415196721933298e-05, "loss": 0.037, "step": 52220 }, { "epoch": 0.25615, "grad_norm": 0.11038938909769058, "learning_rate": 4.414931005147604e-05, "loss": 0.0386, "step": 52230 }, { "epoch": 0.2562, "grad_norm": 0.08587460964918137, "learning_rate": 4.414665236008414e-05, "loss": 0.0374, "step": 52240 }, { "epoch": 0.25625, "grad_norm": 0.1056613028049469, "learning_rate": 4.414399414522997e-05, "loss": 0.0402, "step": 52250 }, { "epoch": 0.2563, "grad_norm": 0.10318482667207718, "learning_rate": 4.41413354069862e-05, "loss": 0.0385, "step": 52260 }, { "epoch": 0.25635, "grad_norm": 0.11092269420623779, "learning_rate": 4.413867614542551e-05, "loss": 0.0383, "step": 52270 }, { "epoch": 0.2564, "grad_norm": 0.09598180651664734, "learning_rate": 4.4136016360620594e-05, "loss": 0.037, "step": 52280 }, { "epoch": 0.25645, "grad_norm": 0.10150059312582016, "learning_rate": 4.413335605264418e-05, "loss": 0.039, "step": 52290 }, { "epoch": 0.2565, "grad_norm": 0.10892947018146515, "learning_rate": 4.4130695221569007e-05, "loss": 0.0387, "step": 52300 }, { "epoch": 0.25655, "grad_norm": 0.09892286360263824, "learning_rate": 4.4128033867467805e-05, "loss": 0.0392, "step": 52310 }, { "epoch": 0.2566, "grad_norm": 0.10339682549238205, "learning_rate": 4.412537199041335e-05, "loss": 0.0392, "step": 52320 }, { "epoch": 0.25665, "grad_norm": 0.10403071343898773, "learning_rate": 4.4122709590478406e-05, "loss": 0.0374, "step": 52330 }, { "epoch": 0.2567, "grad_norm": 0.10536352545022964, "learning_rate": 4.412004666773577e-05, "loss": 0.0425, "step": 52340 }, { "epoch": 0.25675, "grad_norm": 0.0953894630074501, "learning_rate": 4.411738322225823e-05, "loss": 0.0421, "step": 52350 }, { "epoch": 0.2568, "grad_norm": 0.08324065059423447, "learning_rate": 4.411471925411863e-05, "loss": 0.039, "step": 52360 }, { "epoch": 0.25685, "grad_norm": 0.10176534205675125, "learning_rate": 4.411205476338978e-05, "loss": 0.0379, "step": 52370 }, { "epoch": 0.2569, "grad_norm": 0.10972446948289871, "learning_rate": 4.410938975014454e-05, "loss": 0.0381, "step": 52380 }, { "epoch": 0.25695, "grad_norm": 0.11949066072702408, "learning_rate": 4.4106724214455754e-05, "loss": 0.0389, "step": 52390 }, { "epoch": 0.257, "grad_norm": 0.1242421492934227, "learning_rate": 4.410405815639631e-05, "loss": 0.0393, "step": 52400 }, { "epoch": 0.25705, "grad_norm": 0.11890441179275513, "learning_rate": 4.410139157603909e-05, "loss": 0.0384, "step": 52410 }, { "epoch": 0.2571, "grad_norm": 0.124184750020504, "learning_rate": 4.4098724473457e-05, "loss": 0.0381, "step": 52420 }, { "epoch": 0.25715, "grad_norm": 0.1061735451221466, "learning_rate": 4.409605684872295e-05, "loss": 0.0377, "step": 52430 }, { "epoch": 0.2572, "grad_norm": 0.11340545862913132, "learning_rate": 4.40933887019099e-05, "loss": 0.0376, "step": 52440 }, { "epoch": 0.25725, "grad_norm": 0.11279773712158203, "learning_rate": 4.409072003309077e-05, "loss": 0.0373, "step": 52450 }, { "epoch": 0.2573, "grad_norm": 0.10331476479768753, "learning_rate": 4.408805084233852e-05, "loss": 0.0385, "step": 52460 }, { "epoch": 0.25735, "grad_norm": 0.11186760663986206, "learning_rate": 4.4085381129726136e-05, "loss": 0.0386, "step": 52470 }, { "epoch": 0.2574, "grad_norm": 0.119344562292099, "learning_rate": 4.4082710895326596e-05, "loss": 0.0402, "step": 52480 }, { "epoch": 0.25745, "grad_norm": 0.09493733942508698, "learning_rate": 4.408004013921291e-05, "loss": 0.0373, "step": 52490 }, { "epoch": 0.2575, "grad_norm": 0.09359245002269745, "learning_rate": 4.407736886145809e-05, "loss": 0.0383, "step": 52500 }, { "epoch": 0.25755, "grad_norm": 0.1006658598780632, "learning_rate": 4.4074697062135185e-05, "loss": 0.0393, "step": 52510 }, { "epoch": 0.2576, "grad_norm": 0.1178915798664093, "learning_rate": 4.4072024741317225e-05, "loss": 0.0389, "step": 52520 }, { "epoch": 0.25765, "grad_norm": 0.09350401163101196, "learning_rate": 4.406935189907727e-05, "loss": 0.0385, "step": 52530 }, { "epoch": 0.2577, "grad_norm": 0.11241558194160461, "learning_rate": 4.4066678535488404e-05, "loss": 0.0416, "step": 52540 }, { "epoch": 0.25775, "grad_norm": 0.1266544610261917, "learning_rate": 4.40640046506237e-05, "loss": 0.0387, "step": 52550 }, { "epoch": 0.2578, "grad_norm": 0.11901448667049408, "learning_rate": 4.4061330244556274e-05, "loss": 0.0413, "step": 52560 }, { "epoch": 0.25785, "grad_norm": 0.13161858916282654, "learning_rate": 4.4058655317359246e-05, "loss": 0.0376, "step": 52570 }, { "epoch": 0.2579, "grad_norm": 0.10740600526332855, "learning_rate": 4.4055979869105734e-05, "loss": 0.0383, "step": 52580 }, { "epoch": 0.25795, "grad_norm": 0.0965193584561348, "learning_rate": 4.40533038998689e-05, "loss": 0.0392, "step": 52590 }, { "epoch": 0.258, "grad_norm": 0.09509056806564331, "learning_rate": 4.405062740972189e-05, "loss": 0.0374, "step": 52600 }, { "epoch": 0.25805, "grad_norm": 0.09715612977743149, "learning_rate": 4.404795039873788e-05, "loss": 0.038, "step": 52610 }, { "epoch": 0.2581, "grad_norm": 0.11560391634702682, "learning_rate": 4.4045272866990073e-05, "loss": 0.0379, "step": 52620 }, { "epoch": 0.25815, "grad_norm": 0.1066320464015007, "learning_rate": 4.4042594814551654e-05, "loss": 0.0403, "step": 52630 }, { "epoch": 0.2582, "grad_norm": 0.11784431338310242, "learning_rate": 4.4039916241495845e-05, "loss": 0.0395, "step": 52640 }, { "epoch": 0.25825, "grad_norm": 0.1124262809753418, "learning_rate": 4.403723714789588e-05, "loss": 0.0396, "step": 52650 }, { "epoch": 0.2583, "grad_norm": 0.15125466883182526, "learning_rate": 4.403455753382501e-05, "loss": 0.0422, "step": 52660 }, { "epoch": 0.25835, "grad_norm": 0.11785928905010223, "learning_rate": 4.4031877399356476e-05, "loss": 0.0388, "step": 52670 }, { "epoch": 0.2584, "grad_norm": 0.10574966669082642, "learning_rate": 4.4029196744563574e-05, "loss": 0.0396, "step": 52680 }, { "epoch": 0.25845, "grad_norm": 0.10138683766126633, "learning_rate": 4.4026515569519574e-05, "loss": 0.0397, "step": 52690 }, { "epoch": 0.2585, "grad_norm": 0.09625864028930664, "learning_rate": 4.4023833874297786e-05, "loss": 0.0393, "step": 52700 }, { "epoch": 0.25855, "grad_norm": 0.09360393136739731, "learning_rate": 4.402115165897153e-05, "loss": 0.0405, "step": 52710 }, { "epoch": 0.2586, "grad_norm": 0.11491934210062027, "learning_rate": 4.4018468923614136e-05, "loss": 0.0417, "step": 52720 }, { "epoch": 0.25865, "grad_norm": 0.09822811186313629, "learning_rate": 4.4015785668298945e-05, "loss": 0.0411, "step": 52730 }, { "epoch": 0.2587, "grad_norm": 0.12619754672050476, "learning_rate": 4.401310189309932e-05, "loss": 0.0388, "step": 52740 }, { "epoch": 0.25875, "grad_norm": 0.09798824787139893, "learning_rate": 4.4010417598088624e-05, "loss": 0.0393, "step": 52750 }, { "epoch": 0.2588, "grad_norm": 0.08708862215280533, "learning_rate": 4.400773278334026e-05, "loss": 0.0386, "step": 52760 }, { "epoch": 0.25885, "grad_norm": 0.10267054289579391, "learning_rate": 4.400504744892763e-05, "loss": 0.0387, "step": 52770 }, { "epoch": 0.2589, "grad_norm": 0.09680944681167603, "learning_rate": 4.4002361594924125e-05, "loss": 0.0411, "step": 52780 }, { "epoch": 0.25895, "grad_norm": 0.0983702540397644, "learning_rate": 4.39996752214032e-05, "loss": 0.0407, "step": 52790 }, { "epoch": 0.259, "grad_norm": 0.09816974401473999, "learning_rate": 4.39969883284383e-05, "loss": 0.0374, "step": 52800 }, { "epoch": 0.25905, "grad_norm": 0.11069867759943008, "learning_rate": 4.399430091610287e-05, "loss": 0.0389, "step": 52810 }, { "epoch": 0.2591, "grad_norm": 0.10115727037191391, "learning_rate": 4.3991612984470386e-05, "loss": 0.0392, "step": 52820 }, { "epoch": 0.25915, "grad_norm": 0.11012036353349686, "learning_rate": 4.398892453361434e-05, "loss": 0.0391, "step": 52830 }, { "epoch": 0.2592, "grad_norm": 0.10272771865129471, "learning_rate": 4.3986235563608233e-05, "loss": 0.0409, "step": 52840 }, { "epoch": 0.25925, "grad_norm": 0.10734359920024872, "learning_rate": 4.398354607452558e-05, "loss": 0.0372, "step": 52850 }, { "epoch": 0.2593, "grad_norm": 0.09947605431079865, "learning_rate": 4.39808560664399e-05, "loss": 0.0395, "step": 52860 }, { "epoch": 0.25935, "grad_norm": 0.11368408799171448, "learning_rate": 4.3978165539424756e-05, "loss": 0.0421, "step": 52870 }, { "epoch": 0.2594, "grad_norm": 0.11971694231033325, "learning_rate": 4.397547449355369e-05, "loss": 0.0398, "step": 52880 }, { "epoch": 0.25945, "grad_norm": 0.10816139727830887, "learning_rate": 4.397278292890028e-05, "loss": 0.0407, "step": 52890 }, { "epoch": 0.2595, "grad_norm": 0.12127848714590073, "learning_rate": 4.397009084553812e-05, "loss": 0.0397, "step": 52900 }, { "epoch": 0.25955, "grad_norm": 0.10986391454935074, "learning_rate": 4.396739824354079e-05, "loss": 0.0394, "step": 52910 }, { "epoch": 0.2596, "grad_norm": 0.10677799582481384, "learning_rate": 4.396470512298193e-05, "loss": 0.0378, "step": 52920 }, { "epoch": 0.25965, "grad_norm": 0.10446598380804062, "learning_rate": 4.396201148393515e-05, "loss": 0.0392, "step": 52930 }, { "epoch": 0.2597, "grad_norm": 0.12632323801517487, "learning_rate": 4.39593173264741e-05, "loss": 0.0382, "step": 52940 }, { "epoch": 0.25975, "grad_norm": 0.09477602690458298, "learning_rate": 4.395662265067244e-05, "loss": 0.0377, "step": 52950 }, { "epoch": 0.2598, "grad_norm": 0.12109529972076416, "learning_rate": 4.395392745660384e-05, "loss": 0.0382, "step": 52960 }, { "epoch": 0.25985, "grad_norm": 0.1306433379650116, "learning_rate": 4.395123174434198e-05, "loss": 0.0391, "step": 52970 }, { "epoch": 0.2599, "grad_norm": 0.11219587177038193, "learning_rate": 4.394853551396056e-05, "loss": 0.039, "step": 52980 }, { "epoch": 0.25995, "grad_norm": 0.10092771053314209, "learning_rate": 4.3945838765533307e-05, "loss": 0.038, "step": 52990 }, { "epoch": 0.26, "grad_norm": 0.09246230125427246, "learning_rate": 4.394314149913393e-05, "loss": 0.0383, "step": 53000 }, { "epoch": 0.26005, "grad_norm": 0.097933329641819, "learning_rate": 4.394044371483619e-05, "loss": 0.038, "step": 53010 }, { "epoch": 0.2601, "grad_norm": 0.10322330892086029, "learning_rate": 4.393774541271383e-05, "loss": 0.0374, "step": 53020 }, { "epoch": 0.26015, "grad_norm": 0.10426066815853119, "learning_rate": 4.393504659284063e-05, "loss": 0.0381, "step": 53030 }, { "epoch": 0.2602, "grad_norm": 0.09258675575256348, "learning_rate": 4.3932347255290365e-05, "loss": 0.0375, "step": 53040 }, { "epoch": 0.26025, "grad_norm": 0.10530385375022888, "learning_rate": 4.3929647400136835e-05, "loss": 0.0394, "step": 53050 }, { "epoch": 0.2603, "grad_norm": 0.10726174712181091, "learning_rate": 4.3926947027453866e-05, "loss": 0.0373, "step": 53060 }, { "epoch": 0.26035, "grad_norm": 0.10103695839643478, "learning_rate": 4.392424613731527e-05, "loss": 0.039, "step": 53070 }, { "epoch": 0.2604, "grad_norm": 0.11701173335313797, "learning_rate": 4.39215447297949e-05, "loss": 0.0385, "step": 53080 }, { "epoch": 0.26045, "grad_norm": 0.10399830341339111, "learning_rate": 4.3918842804966586e-05, "loss": 0.0387, "step": 53090 }, { "epoch": 0.2605, "grad_norm": 0.10775898396968842, "learning_rate": 4.391614036290423e-05, "loss": 0.038, "step": 53100 }, { "epoch": 0.26055, "grad_norm": 0.10917066037654877, "learning_rate": 4.391343740368171e-05, "loss": 0.0392, "step": 53110 }, { "epoch": 0.2606, "grad_norm": 0.11006568372249603, "learning_rate": 4.391073392737291e-05, "loss": 0.0376, "step": 53120 }, { "epoch": 0.26065, "grad_norm": 0.09199454635381699, "learning_rate": 4.390802993405175e-05, "loss": 0.0377, "step": 53130 }, { "epoch": 0.2607, "grad_norm": 0.1270432323217392, "learning_rate": 4.3905325423792155e-05, "loss": 0.039, "step": 53140 }, { "epoch": 0.26075, "grad_norm": 0.08557243645191193, "learning_rate": 4.390262039666807e-05, "loss": 0.0371, "step": 53150 }, { "epoch": 0.2608, "grad_norm": 0.09341472387313843, "learning_rate": 4.3899914852753436e-05, "loss": 0.0399, "step": 53160 }, { "epoch": 0.26085, "grad_norm": 0.10942547768354416, "learning_rate": 4.389720879212223e-05, "loss": 0.0381, "step": 53170 }, { "epoch": 0.2609, "grad_norm": 0.09050387144088745, "learning_rate": 4.389450221484844e-05, "loss": 0.0366, "step": 53180 }, { "epoch": 0.26095, "grad_norm": 0.10948771238327026, "learning_rate": 4.389179512100606e-05, "loss": 0.0378, "step": 53190 }, { "epoch": 0.261, "grad_norm": 0.09486088156700134, "learning_rate": 4.3889087510669094e-05, "loss": 0.0368, "step": 53200 }, { "epoch": 0.26105, "grad_norm": 0.10282183438539505, "learning_rate": 4.3886379383911574e-05, "loss": 0.0393, "step": 53210 }, { "epoch": 0.2611, "grad_norm": 0.1014777421951294, "learning_rate": 4.3883670740807534e-05, "loss": 0.0387, "step": 53220 }, { "epoch": 0.26115, "grad_norm": 0.13208040595054626, "learning_rate": 4.388096158143104e-05, "loss": 0.0401, "step": 53230 }, { "epoch": 0.2612, "grad_norm": 0.12502078711986542, "learning_rate": 4.3878251905856135e-05, "loss": 0.0377, "step": 53240 }, { "epoch": 0.26125, "grad_norm": 0.09379098564386368, "learning_rate": 4.3875541714156926e-05, "loss": 0.039, "step": 53250 }, { "epoch": 0.2613, "grad_norm": 0.10392840951681137, "learning_rate": 4.3872831006407495e-05, "loss": 0.0384, "step": 53260 }, { "epoch": 0.26135, "grad_norm": 0.09636663645505905, "learning_rate": 4.3870119782681954e-05, "loss": 0.0383, "step": 53270 }, { "epoch": 0.2614, "grad_norm": 0.09062938392162323, "learning_rate": 4.386740804305443e-05, "loss": 0.0385, "step": 53280 }, { "epoch": 0.26145, "grad_norm": 0.09627866744995117, "learning_rate": 4.386469578759905e-05, "loss": 0.0402, "step": 53290 }, { "epoch": 0.2615, "grad_norm": 0.11795052140951157, "learning_rate": 4.386198301638999e-05, "loss": 0.0404, "step": 53300 }, { "epoch": 0.26155, "grad_norm": 0.1151648685336113, "learning_rate": 4.3859269729501383e-05, "loss": 0.0392, "step": 53310 }, { "epoch": 0.2616, "grad_norm": 0.12234436720609665, "learning_rate": 4.385655592700743e-05, "loss": 0.0394, "step": 53320 }, { "epoch": 0.26165, "grad_norm": 0.11547648161649704, "learning_rate": 4.385384160898233e-05, "loss": 0.0396, "step": 53330 }, { "epoch": 0.2617, "grad_norm": 0.09618788212537766, "learning_rate": 4.385112677550027e-05, "loss": 0.0383, "step": 53340 }, { "epoch": 0.26175, "grad_norm": 0.09515061974525452, "learning_rate": 4.38484114266355e-05, "loss": 0.0407, "step": 53350 }, { "epoch": 0.2618, "grad_norm": 0.10868088155984879, "learning_rate": 4.384569556246223e-05, "loss": 0.0415, "step": 53360 }, { "epoch": 0.26185, "grad_norm": 0.09142906963825226, "learning_rate": 4.384297918305474e-05, "loss": 0.04, "step": 53370 }, { "epoch": 0.2619, "grad_norm": 0.09424825012683868, "learning_rate": 4.384026228848727e-05, "loss": 0.0399, "step": 53380 }, { "epoch": 0.26195, "grad_norm": 0.10501080006361008, "learning_rate": 4.38375448788341e-05, "loss": 0.0382, "step": 53390 }, { "epoch": 0.262, "grad_norm": 0.10195177793502808, "learning_rate": 4.383482695416954e-05, "loss": 0.0383, "step": 53400 }, { "epoch": 0.26205, "grad_norm": 0.09079153090715408, "learning_rate": 4.383210851456788e-05, "loss": 0.0395, "step": 53410 }, { "epoch": 0.2621, "grad_norm": 0.11704879254102707, "learning_rate": 4.382938956010345e-05, "loss": 0.0392, "step": 53420 }, { "epoch": 0.26215, "grad_norm": 0.10230205208063126, "learning_rate": 4.382667009085059e-05, "loss": 0.0396, "step": 53430 }, { "epoch": 0.2622, "grad_norm": 0.13031253218650818, "learning_rate": 4.382395010688364e-05, "loss": 0.0391, "step": 53440 }, { "epoch": 0.26225, "grad_norm": 0.10187526047229767, "learning_rate": 4.382122960827696e-05, "loss": 0.0417, "step": 53450 }, { "epoch": 0.2623, "grad_norm": 0.09725139290094376, "learning_rate": 4.3818508595104934e-05, "loss": 0.0383, "step": 53460 }, { "epoch": 0.26235, "grad_norm": 0.11895791441202164, "learning_rate": 4.381578706744196e-05, "loss": 0.0387, "step": 53470 }, { "epoch": 0.2624, "grad_norm": 0.11287042498588562, "learning_rate": 4.381306502536243e-05, "loss": 0.038, "step": 53480 }, { "epoch": 0.26245, "grad_norm": 0.11742764711380005, "learning_rate": 4.381034246894077e-05, "loss": 0.0409, "step": 53490 }, { "epoch": 0.2625, "grad_norm": 0.08674655854701996, "learning_rate": 4.3807619398251415e-05, "loss": 0.0397, "step": 53500 }, { "epoch": 0.26255, "grad_norm": 0.12602120637893677, "learning_rate": 4.380489581336881e-05, "loss": 0.0386, "step": 53510 }, { "epoch": 0.2626, "grad_norm": 0.13071084022521973, "learning_rate": 4.380217171436742e-05, "loss": 0.039, "step": 53520 }, { "epoch": 0.26265, "grad_norm": 0.15403853356838226, "learning_rate": 4.3799447101321723e-05, "loss": 0.0402, "step": 53530 }, { "epoch": 0.2627, "grad_norm": 0.12097310274839401, "learning_rate": 4.37967219743062e-05, "loss": 0.0398, "step": 53540 }, { "epoch": 0.26275, "grad_norm": 0.15257878601551056, "learning_rate": 4.3793996333395356e-05, "loss": 0.0372, "step": 53550 }, { "epoch": 0.2628, "grad_norm": 0.13050629198551178, "learning_rate": 4.379127017866372e-05, "loss": 0.038, "step": 53560 }, { "epoch": 0.26285, "grad_norm": 0.10135944187641144, "learning_rate": 4.3788543510185807e-05, "loss": 0.0366, "step": 53570 }, { "epoch": 0.2629, "grad_norm": 0.09226872771978378, "learning_rate": 4.378581632803618e-05, "loss": 0.039, "step": 53580 }, { "epoch": 0.26295, "grad_norm": 0.11682125180959702, "learning_rate": 4.378308863228939e-05, "loss": 0.0382, "step": 53590 }, { "epoch": 0.263, "grad_norm": 0.10629759728908539, "learning_rate": 4.378036042302002e-05, "loss": 0.0405, "step": 53600 }, { "epoch": 0.26305, "grad_norm": 0.1006932407617569, "learning_rate": 4.377763170030265e-05, "loss": 0.0395, "step": 53610 }, { "epoch": 0.2631, "grad_norm": 0.10320789366960526, "learning_rate": 4.377490246421187e-05, "loss": 0.039, "step": 53620 }, { "epoch": 0.26315, "grad_norm": 0.14634302258491516, "learning_rate": 4.377217271482232e-05, "loss": 0.0388, "step": 53630 }, { "epoch": 0.2632, "grad_norm": 0.09399808943271637, "learning_rate": 4.376944245220863e-05, "loss": 0.0383, "step": 53640 }, { "epoch": 0.26325, "grad_norm": 0.09940533339977264, "learning_rate": 4.3766711676445423e-05, "loss": 0.0378, "step": 53650 }, { "epoch": 0.2633, "grad_norm": 0.10146481543779373, "learning_rate": 4.3763980387607374e-05, "loss": 0.0384, "step": 53660 }, { "epoch": 0.26335, "grad_norm": 0.09821083396673203, "learning_rate": 4.3761248585769147e-05, "loss": 0.0396, "step": 53670 }, { "epoch": 0.2634, "grad_norm": 0.1172725036740303, "learning_rate": 4.3758516271005435e-05, "loss": 0.0394, "step": 53680 }, { "epoch": 0.26345, "grad_norm": 0.1124570220708847, "learning_rate": 4.375578344339093e-05, "loss": 0.0401, "step": 53690 }, { "epoch": 0.2635, "grad_norm": 0.11581005156040192, "learning_rate": 4.375305010300036e-05, "loss": 0.0397, "step": 53700 }, { "epoch": 0.26355, "grad_norm": 0.09203293919563293, "learning_rate": 4.3750316249908435e-05, "loss": 0.04, "step": 53710 }, { "epoch": 0.2636, "grad_norm": 0.11718445271253586, "learning_rate": 4.3747581884189913e-05, "loss": 0.039, "step": 53720 }, { "epoch": 0.26365, "grad_norm": 0.10248976945877075, "learning_rate": 4.374484700591955e-05, "loss": 0.041, "step": 53730 }, { "epoch": 0.2637, "grad_norm": 0.09773720800876617, "learning_rate": 4.3742111615172104e-05, "loss": 0.0404, "step": 53740 }, { "epoch": 0.26375, "grad_norm": 0.12032605707645416, "learning_rate": 4.3739375712022375e-05, "loss": 0.0411, "step": 53750 }, { "epoch": 0.2638, "grad_norm": 0.09681548923254013, "learning_rate": 4.373663929654515e-05, "loss": 0.0379, "step": 53760 }, { "epoch": 0.26385, "grad_norm": 0.13609491288661957, "learning_rate": 4.3733902368815245e-05, "loss": 0.0387, "step": 53770 }, { "epoch": 0.2639, "grad_norm": 0.10241185128688812, "learning_rate": 4.3731164928907485e-05, "loss": 0.0379, "step": 53780 }, { "epoch": 0.26395, "grad_norm": 0.1285761594772339, "learning_rate": 4.372842697689672e-05, "loss": 0.0407, "step": 53790 }, { "epoch": 0.264, "grad_norm": 0.10861656069755554, "learning_rate": 4.372568851285779e-05, "loss": 0.04, "step": 53800 }, { "epoch": 0.26405, "grad_norm": 0.11549534648656845, "learning_rate": 4.372294953686558e-05, "loss": 0.0384, "step": 53810 }, { "epoch": 0.2641, "grad_norm": 0.10912875831127167, "learning_rate": 4.3720210048994957e-05, "loss": 0.04, "step": 53820 }, { "epoch": 0.26415, "grad_norm": 0.10230275243520737, "learning_rate": 4.3717470049320825e-05, "loss": 0.0372, "step": 53830 }, { "epoch": 0.2642, "grad_norm": 0.10299071669578552, "learning_rate": 4.3714729537918095e-05, "loss": 0.0407, "step": 53840 }, { "epoch": 0.26425, "grad_norm": 0.10616176575422287, "learning_rate": 4.371198851486169e-05, "loss": 0.0376, "step": 53850 }, { "epoch": 0.2643, "grad_norm": 0.12114676833152771, "learning_rate": 4.370924698022655e-05, "loss": 0.0402, "step": 53860 }, { "epoch": 0.26435, "grad_norm": 0.0981198400259018, "learning_rate": 4.370650493408762e-05, "loss": 0.0373, "step": 53870 }, { "epoch": 0.2644, "grad_norm": 0.1140938326716423, "learning_rate": 4.3703762376519876e-05, "loss": 0.0392, "step": 53880 }, { "epoch": 0.26445, "grad_norm": 0.12672537565231323, "learning_rate": 4.37010193075983e-05, "loss": 0.0385, "step": 53890 }, { "epoch": 0.2645, "grad_norm": 0.14617519080638885, "learning_rate": 4.369827572739788e-05, "loss": 0.0394, "step": 53900 }, { "epoch": 0.26455, "grad_norm": 0.1404951810836792, "learning_rate": 4.369553163599362e-05, "loss": 0.0388, "step": 53910 }, { "epoch": 0.2646, "grad_norm": 0.12953104078769684, "learning_rate": 4.369278703346055e-05, "loss": 0.0417, "step": 53920 }, { "epoch": 0.26465, "grad_norm": 0.10081244260072708, "learning_rate": 4.369004191987371e-05, "loss": 0.0383, "step": 53930 }, { "epoch": 0.2647, "grad_norm": 0.10385037958621979, "learning_rate": 4.3687296295308144e-05, "loss": 0.0398, "step": 53940 }, { "epoch": 0.26475, "grad_norm": 0.11250067502260208, "learning_rate": 4.368455015983892e-05, "loss": 0.0404, "step": 53950 }, { "epoch": 0.2648, "grad_norm": 0.13306987285614014, "learning_rate": 4.368180351354111e-05, "loss": 0.0397, "step": 53960 }, { "epoch": 0.26485, "grad_norm": 0.11967480927705765, "learning_rate": 4.3679056356489814e-05, "loss": 0.038, "step": 53970 }, { "epoch": 0.2649, "grad_norm": 0.11501402407884598, "learning_rate": 4.367630868876013e-05, "loss": 0.0384, "step": 53980 }, { "epoch": 0.26495, "grad_norm": 0.130889892578125, "learning_rate": 4.367356051042718e-05, "loss": 0.0434, "step": 53990 }, { "epoch": 0.265, "grad_norm": 0.16248153150081635, "learning_rate": 4.367081182156611e-05, "loss": 0.0408, "step": 54000 }, { "epoch": 0.26505, "grad_norm": 0.13356177508831024, "learning_rate": 4.366806262225206e-05, "loss": 0.0381, "step": 54010 }, { "epoch": 0.2651, "grad_norm": 0.1282336264848709, "learning_rate": 4.3665312912560185e-05, "loss": 0.0388, "step": 54020 }, { "epoch": 0.26515, "grad_norm": 0.1705833077430725, "learning_rate": 4.366256269256567e-05, "loss": 0.0402, "step": 54030 }, { "epoch": 0.2652, "grad_norm": 0.13103726506233215, "learning_rate": 4.36598119623437e-05, "loss": 0.0395, "step": 54040 }, { "epoch": 0.26525, "grad_norm": 0.10220030695199966, "learning_rate": 4.365706072196948e-05, "loss": 0.0385, "step": 54050 }, { "epoch": 0.2653, "grad_norm": 0.08819513767957687, "learning_rate": 4.365430897151823e-05, "loss": 0.0365, "step": 54060 }, { "epoch": 0.26535, "grad_norm": 0.10846425592899323, "learning_rate": 4.365155671106518e-05, "loss": 0.0391, "step": 54070 }, { "epoch": 0.2654, "grad_norm": 0.11298328638076782, "learning_rate": 4.364880394068558e-05, "loss": 0.0371, "step": 54080 }, { "epoch": 0.26545, "grad_norm": 0.152954563498497, "learning_rate": 4.364605066045469e-05, "loss": 0.0382, "step": 54090 }, { "epoch": 0.2655, "grad_norm": 0.11766555905342102, "learning_rate": 4.364329687044777e-05, "loss": 0.0367, "step": 54100 }, { "epoch": 0.26555, "grad_norm": 0.10459113866090775, "learning_rate": 4.3640542570740115e-05, "loss": 0.0367, "step": 54110 }, { "epoch": 0.2656, "grad_norm": 0.10225178301334381, "learning_rate": 4.363778776140704e-05, "loss": 0.0391, "step": 54120 }, { "epoch": 0.26565, "grad_norm": 0.11881320923566818, "learning_rate": 4.363503244252385e-05, "loss": 0.037, "step": 54130 }, { "epoch": 0.2657, "grad_norm": 0.09808455407619476, "learning_rate": 4.363227661416587e-05, "loss": 0.039, "step": 54140 }, { "epoch": 0.26575, "grad_norm": 0.09890187531709671, "learning_rate": 4.362952027640844e-05, "loss": 0.0376, "step": 54150 }, { "epoch": 0.2658, "grad_norm": 0.08728187531232834, "learning_rate": 4.3626763429326936e-05, "loss": 0.0405, "step": 54160 }, { "epoch": 0.26585, "grad_norm": 0.09770821779966354, "learning_rate": 4.3624006072996714e-05, "loss": 0.0384, "step": 54170 }, { "epoch": 0.2659, "grad_norm": 0.0924566313624382, "learning_rate": 4.362124820749316e-05, "loss": 0.037, "step": 54180 }, { "epoch": 0.26595, "grad_norm": 0.10226110368967056, "learning_rate": 4.361848983289167e-05, "loss": 0.0385, "step": 54190 }, { "epoch": 0.266, "grad_norm": 0.08714452385902405, "learning_rate": 4.3615730949267674e-05, "loss": 0.0377, "step": 54200 }, { "epoch": 0.26605, "grad_norm": 0.11017415672540665, "learning_rate": 4.361297155669659e-05, "loss": 0.0396, "step": 54210 }, { "epoch": 0.2661, "grad_norm": 0.09896216541528702, "learning_rate": 4.361021165525384e-05, "loss": 0.0371, "step": 54220 }, { "epoch": 0.26615, "grad_norm": 0.11052402853965759, "learning_rate": 4.360745124501491e-05, "loss": 0.0378, "step": 54230 }, { "epoch": 0.2662, "grad_norm": 0.08323037624359131, "learning_rate": 4.360469032605525e-05, "loss": 0.0385, "step": 54240 }, { "epoch": 0.26625, "grad_norm": 0.09808001667261124, "learning_rate": 4.360192889845034e-05, "loss": 0.0374, "step": 54250 }, { "epoch": 0.2663, "grad_norm": 0.10530462861061096, "learning_rate": 4.3599166962275684e-05, "loss": 0.0384, "step": 54260 }, { "epoch": 0.26635, "grad_norm": 0.10739065706729889, "learning_rate": 4.359640451760679e-05, "loss": 0.0385, "step": 54270 }, { "epoch": 0.2664, "grad_norm": 0.09916096925735474, "learning_rate": 4.359364156451919e-05, "loss": 0.0399, "step": 54280 }, { "epoch": 0.26645, "grad_norm": 0.11747587472200394, "learning_rate": 4.3590878103088405e-05, "loss": 0.0373, "step": 54290 }, { "epoch": 0.2665, "grad_norm": 0.10611972212791443, "learning_rate": 4.3588114133390005e-05, "loss": 0.0387, "step": 54300 }, { "epoch": 0.26655, "grad_norm": 0.09637123346328735, "learning_rate": 4.358534965549954e-05, "loss": 0.0389, "step": 54310 }, { "epoch": 0.2666, "grad_norm": 0.10092896968126297, "learning_rate": 4.358258466949261e-05, "loss": 0.0379, "step": 54320 }, { "epoch": 0.26665, "grad_norm": 0.09020482748746872, "learning_rate": 4.3579819175444794e-05, "loss": 0.0381, "step": 54330 }, { "epoch": 0.2667, "grad_norm": 0.09622832387685776, "learning_rate": 4.3577053173431695e-05, "loss": 0.0387, "step": 54340 }, { "epoch": 0.26675, "grad_norm": 0.10070313513278961, "learning_rate": 4.357428666352894e-05, "loss": 0.0397, "step": 54350 }, { "epoch": 0.2668, "grad_norm": 0.1218617707490921, "learning_rate": 4.3571519645812166e-05, "loss": 0.0377, "step": 54360 }, { "epoch": 0.26685, "grad_norm": 0.11019979417324066, "learning_rate": 4.356875212035702e-05, "loss": 0.039, "step": 54370 }, { "epoch": 0.2669, "grad_norm": 0.10644866526126862, "learning_rate": 4.3565984087239175e-05, "loss": 0.0384, "step": 54380 }, { "epoch": 0.26695, "grad_norm": 0.0956096425652504, "learning_rate": 4.35632155465343e-05, "loss": 0.0389, "step": 54390 }, { "epoch": 0.267, "grad_norm": 0.12280172109603882, "learning_rate": 4.3560446498318085e-05, "loss": 0.0391, "step": 54400 }, { "epoch": 0.26705, "grad_norm": 0.09611979871988297, "learning_rate": 4.355767694266623e-05, "loss": 0.0378, "step": 54410 }, { "epoch": 0.2671, "grad_norm": 0.09847360104322433, "learning_rate": 4.3554906879654465e-05, "loss": 0.0377, "step": 54420 }, { "epoch": 0.26715, "grad_norm": 0.09786097705364227, "learning_rate": 4.3552136309358514e-05, "loss": 0.0376, "step": 54430 }, { "epoch": 0.2672, "grad_norm": 0.09535615146160126, "learning_rate": 4.3549365231854125e-05, "loss": 0.0382, "step": 54440 }, { "epoch": 0.26725, "grad_norm": 0.10359787940979004, "learning_rate": 4.354659364721706e-05, "loss": 0.0399, "step": 54450 }, { "epoch": 0.2673, "grad_norm": 0.11358862370252609, "learning_rate": 4.354382155552309e-05, "loss": 0.0415, "step": 54460 }, { "epoch": 0.26735, "grad_norm": 0.11824797838926315, "learning_rate": 4.3541048956848004e-05, "loss": 0.0382, "step": 54470 }, { "epoch": 0.2674, "grad_norm": 0.10883744060993195, "learning_rate": 4.353827585126762e-05, "loss": 0.0379, "step": 54480 }, { "epoch": 0.26745, "grad_norm": 0.09778952598571777, "learning_rate": 4.353550223885772e-05, "loss": 0.0382, "step": 54490 }, { "epoch": 0.2675, "grad_norm": 0.10703311860561371, "learning_rate": 4.353272811969416e-05, "loss": 0.0382, "step": 54500 }, { "epoch": 0.26755, "grad_norm": 0.10027310997247696, "learning_rate": 4.352995349385278e-05, "loss": 0.0386, "step": 54510 }, { "epoch": 0.2676, "grad_norm": 0.10753300040960312, "learning_rate": 4.352717836140943e-05, "loss": 0.0392, "step": 54520 }, { "epoch": 0.26765, "grad_norm": 0.10875657200813293, "learning_rate": 4.3524402722439976e-05, "loss": 0.0381, "step": 54530 }, { "epoch": 0.2677, "grad_norm": 0.08940254896879196, "learning_rate": 4.3521626577020316e-05, "loss": 0.039, "step": 54540 }, { "epoch": 0.26775, "grad_norm": 0.10491291433572769, "learning_rate": 4.351884992522635e-05, "loss": 0.0383, "step": 54550 }, { "epoch": 0.2678, "grad_norm": 0.11627072840929031, "learning_rate": 4.3516072767133974e-05, "loss": 0.0395, "step": 54560 }, { "epoch": 0.26785, "grad_norm": 0.16696451604366302, "learning_rate": 4.351329510281913e-05, "loss": 0.0408, "step": 54570 }, { "epoch": 0.2679, "grad_norm": 0.11519969999790192, "learning_rate": 4.3510516932357754e-05, "loss": 0.0381, "step": 54580 }, { "epoch": 0.26795, "grad_norm": 0.13323043286800385, "learning_rate": 4.35077382558258e-05, "loss": 0.0399, "step": 54590 }, { "epoch": 0.268, "grad_norm": 0.10563565790653229, "learning_rate": 4.3504959073299235e-05, "loss": 0.0395, "step": 54600 }, { "epoch": 0.26805, "grad_norm": 0.12867824733257294, "learning_rate": 4.3502179384854035e-05, "loss": 0.0406, "step": 54610 }, { "epoch": 0.2681, "grad_norm": 0.1132294312119484, "learning_rate": 4.34993991905662e-05, "loss": 0.0415, "step": 54620 }, { "epoch": 0.26815, "grad_norm": 0.1161569133400917, "learning_rate": 4.3496618490511754e-05, "loss": 0.038, "step": 54630 }, { "epoch": 0.2682, "grad_norm": 0.10462360829114914, "learning_rate": 4.34938372847667e-05, "loss": 0.0385, "step": 54640 }, { "epoch": 0.26825, "grad_norm": 0.1060120090842247, "learning_rate": 4.349105557340708e-05, "loss": 0.0407, "step": 54650 }, { "epoch": 0.2683, "grad_norm": 0.11183242499828339, "learning_rate": 4.3488273356508945e-05, "loss": 0.0385, "step": 54660 }, { "epoch": 0.26835, "grad_norm": 0.13913311064243317, "learning_rate": 4.3485490634148375e-05, "loss": 0.0386, "step": 54670 }, { "epoch": 0.2684, "grad_norm": 0.1398865282535553, "learning_rate": 4.348270740640142e-05, "loss": 0.0418, "step": 54680 }, { "epoch": 0.26845, "grad_norm": 0.11413382738828659, "learning_rate": 4.34799236733442e-05, "loss": 0.0383, "step": 54690 }, { "epoch": 0.2685, "grad_norm": 0.10674172639846802, "learning_rate": 4.34771394350528e-05, "loss": 0.0411, "step": 54700 }, { "epoch": 0.26855, "grad_norm": 0.08494861423969269, "learning_rate": 4.3474354691603356e-05, "loss": 0.0395, "step": 54710 }, { "epoch": 0.2686, "grad_norm": 0.10439600795507431, "learning_rate": 4.3471569443072e-05, "loss": 0.0388, "step": 54720 }, { "epoch": 0.26865, "grad_norm": 0.11908277869224548, "learning_rate": 4.346878368953486e-05, "loss": 0.0382, "step": 54730 }, { "epoch": 0.2687, "grad_norm": 0.11009802669286728, "learning_rate": 4.346599743106813e-05, "loss": 0.0407, "step": 54740 }, { "epoch": 0.26875, "grad_norm": 0.0999823585152626, "learning_rate": 4.3463210667747956e-05, "loss": 0.038, "step": 54750 }, { "epoch": 0.2688, "grad_norm": 0.09358491003513336, "learning_rate": 4.346042339965054e-05, "loss": 0.0375, "step": 54760 }, { "epoch": 0.26885, "grad_norm": 0.10833162814378738, "learning_rate": 4.3457635626852084e-05, "loss": 0.0387, "step": 54770 }, { "epoch": 0.2689, "grad_norm": 0.09545520693063736, "learning_rate": 4.3454847349428804e-05, "loss": 0.038, "step": 54780 }, { "epoch": 0.26895, "grad_norm": 0.09593231976032257, "learning_rate": 4.345205856745693e-05, "loss": 0.038, "step": 54790 }, { "epoch": 0.269, "grad_norm": 0.1229575127363205, "learning_rate": 4.344926928101271e-05, "loss": 0.0375, "step": 54800 }, { "epoch": 0.26905, "grad_norm": 0.1230328157544136, "learning_rate": 4.34464794901724e-05, "loss": 0.0394, "step": 54810 }, { "epoch": 0.2691, "grad_norm": 0.16736532747745514, "learning_rate": 4.344368919501226e-05, "loss": 0.0425, "step": 54820 }, { "epoch": 0.26915, "grad_norm": 0.13367053866386414, "learning_rate": 4.3440898395608595e-05, "loss": 0.0431, "step": 54830 }, { "epoch": 0.2692, "grad_norm": 0.14473852515220642, "learning_rate": 4.34381070920377e-05, "loss": 0.0413, "step": 54840 }, { "epoch": 0.26925, "grad_norm": 0.10313988476991653, "learning_rate": 4.343531528437588e-05, "loss": 0.0417, "step": 54850 }, { "epoch": 0.2693, "grad_norm": 0.10479586571455002, "learning_rate": 4.343252297269946e-05, "loss": 0.0397, "step": 54860 }, { "epoch": 0.26935, "grad_norm": 0.0953693762421608, "learning_rate": 4.34297301570848e-05, "loss": 0.0399, "step": 54870 }, { "epoch": 0.2694, "grad_norm": 0.10397922992706299, "learning_rate": 4.342693683760823e-05, "loss": 0.0389, "step": 54880 }, { "epoch": 0.26945, "grad_norm": 0.09787308424711227, "learning_rate": 4.342414301434613e-05, "loss": 0.0386, "step": 54890 }, { "epoch": 0.2695, "grad_norm": 0.10238681733608246, "learning_rate": 4.342134868737488e-05, "loss": 0.038, "step": 54900 }, { "epoch": 0.26955, "grad_norm": 0.09361415356397629, "learning_rate": 4.341855385677089e-05, "loss": 0.0402, "step": 54910 }, { "epoch": 0.2696, "grad_norm": 0.09431982785463333, "learning_rate": 4.3415758522610543e-05, "loss": 0.0415, "step": 54920 }, { "epoch": 0.26965, "grad_norm": 0.10237084329128265, "learning_rate": 4.3412962684970285e-05, "loss": 0.0373, "step": 54930 }, { "epoch": 0.2697, "grad_norm": 0.0966499000787735, "learning_rate": 4.341016634392654e-05, "loss": 0.0389, "step": 54940 }, { "epoch": 0.26975, "grad_norm": 0.07389848679304123, "learning_rate": 4.340736949955577e-05, "loss": 0.0378, "step": 54950 }, { "epoch": 0.2698, "grad_norm": 0.08758895099163055, "learning_rate": 4.3404572151934425e-05, "loss": 0.0387, "step": 54960 }, { "epoch": 0.26985, "grad_norm": 0.10281972587108612, "learning_rate": 4.3401774301139e-05, "loss": 0.0396, "step": 54970 }, { "epoch": 0.2699, "grad_norm": 0.07996245473623276, "learning_rate": 4.3398975947245965e-05, "loss": 0.0386, "step": 54980 }, { "epoch": 0.26995, "grad_norm": 0.09434854984283447, "learning_rate": 4.3396177090331856e-05, "loss": 0.0378, "step": 54990 }, { "epoch": 0.27, "grad_norm": 0.10240872949361801, "learning_rate": 4.3393377730473164e-05, "loss": 0.0383, "step": 55000 }, { "epoch": 0.27005, "grad_norm": 0.09521150588989258, "learning_rate": 4.339057786774644e-05, "loss": 0.038, "step": 55010 }, { "epoch": 0.2701, "grad_norm": 0.09409971535205841, "learning_rate": 4.3387777502228225e-05, "loss": 0.0393, "step": 55020 }, { "epoch": 0.27015, "grad_norm": 0.11078613996505737, "learning_rate": 4.338497663399509e-05, "loss": 0.0385, "step": 55030 }, { "epoch": 0.2702, "grad_norm": 0.09805911779403687, "learning_rate": 4.338217526312359e-05, "loss": 0.0401, "step": 55040 }, { "epoch": 0.27025, "grad_norm": 0.09694908559322357, "learning_rate": 4.337937338969033e-05, "loss": 0.0385, "step": 55050 }, { "epoch": 0.2703, "grad_norm": 0.09191986173391342, "learning_rate": 4.3376571013771897e-05, "loss": 0.0394, "step": 55060 }, { "epoch": 0.27035, "grad_norm": 0.09870189428329468, "learning_rate": 4.3373768135444926e-05, "loss": 0.041, "step": 55070 }, { "epoch": 0.2704, "grad_norm": 0.10547038167715073, "learning_rate": 4.3370964754786035e-05, "loss": 0.0384, "step": 55080 }, { "epoch": 0.27045, "grad_norm": 0.09930089116096497, "learning_rate": 4.336816087187186e-05, "loss": 0.0391, "step": 55090 }, { "epoch": 0.2705, "grad_norm": 0.11283731460571289, "learning_rate": 4.3365356486779084e-05, "loss": 0.0395, "step": 55100 }, { "epoch": 0.27055, "grad_norm": 0.1049695685505867, "learning_rate": 4.336255159958435e-05, "loss": 0.0416, "step": 55110 }, { "epoch": 0.2706, "grad_norm": 0.095323346555233, "learning_rate": 4.335974621036436e-05, "loss": 0.0385, "step": 55120 }, { "epoch": 0.27065, "grad_norm": 0.11626312881708145, "learning_rate": 4.33569403191958e-05, "loss": 0.0439, "step": 55130 }, { "epoch": 0.2707, "grad_norm": 0.10035998374223709, "learning_rate": 4.335413392615539e-05, "loss": 0.0383, "step": 55140 }, { "epoch": 0.27075, "grad_norm": 0.09624806046485901, "learning_rate": 4.3351327031319856e-05, "loss": 0.039, "step": 55150 }, { "epoch": 0.2708, "grad_norm": 0.08691024035215378, "learning_rate": 4.3348519634765934e-05, "loss": 0.0391, "step": 55160 }, { "epoch": 0.27085, "grad_norm": 0.09586155414581299, "learning_rate": 4.334571173657037e-05, "loss": 0.0372, "step": 55170 }, { "epoch": 0.2709, "grad_norm": 0.08697967976331711, "learning_rate": 4.3342903336809956e-05, "loss": 0.0389, "step": 55180 }, { "epoch": 0.27095, "grad_norm": 0.09988107532262802, "learning_rate": 4.334009443556144e-05, "loss": 0.0389, "step": 55190 }, { "epoch": 0.271, "grad_norm": 0.11418605595827103, "learning_rate": 4.333728503290164e-05, "loss": 0.0383, "step": 55200 }, { "epoch": 0.27105, "grad_norm": 0.0969906598329544, "learning_rate": 4.333447512890736e-05, "loss": 0.0374, "step": 55210 }, { "epoch": 0.2711, "grad_norm": 0.10156113654375076, "learning_rate": 4.3331664723655414e-05, "loss": 0.0389, "step": 55220 }, { "epoch": 0.27115, "grad_norm": 0.09116654843091965, "learning_rate": 4.3328853817222635e-05, "loss": 0.0372, "step": 55230 }, { "epoch": 0.2712, "grad_norm": 0.10540885478258133, "learning_rate": 4.332604240968588e-05, "loss": 0.0378, "step": 55240 }, { "epoch": 0.27125, "grad_norm": 0.09678299725055695, "learning_rate": 4.332323050112202e-05, "loss": 0.0379, "step": 55250 }, { "epoch": 0.2713, "grad_norm": 0.11525113135576248, "learning_rate": 4.3320418091607916e-05, "loss": 0.0388, "step": 55260 }, { "epoch": 0.27135, "grad_norm": 0.1061660423874855, "learning_rate": 4.331760518122046e-05, "loss": 0.038, "step": 55270 }, { "epoch": 0.2714, "grad_norm": 0.11649101972579956, "learning_rate": 4.3314791770036564e-05, "loss": 0.039, "step": 55280 }, { "epoch": 0.27145, "grad_norm": 0.1300489753484726, "learning_rate": 4.331197785813314e-05, "loss": 0.0383, "step": 55290 }, { "epoch": 0.2715, "grad_norm": 0.10463863611221313, "learning_rate": 4.330916344558713e-05, "loss": 0.0375, "step": 55300 }, { "epoch": 0.27155, "grad_norm": 0.1016702651977539, "learning_rate": 4.330634853247546e-05, "loss": 0.0381, "step": 55310 }, { "epoch": 0.2716, "grad_norm": 0.10234900563955307, "learning_rate": 4.3303533118875104e-05, "loss": 0.0387, "step": 55320 }, { "epoch": 0.27165, "grad_norm": 0.1014292985200882, "learning_rate": 4.330071720486302e-05, "loss": 0.037, "step": 55330 }, { "epoch": 0.2717, "grad_norm": 0.08624016493558884, "learning_rate": 4.329790079051621e-05, "loss": 0.0376, "step": 55340 }, { "epoch": 0.27175, "grad_norm": 0.097990982234478, "learning_rate": 4.3295083875911667e-05, "loss": 0.0369, "step": 55350 }, { "epoch": 0.2718, "grad_norm": 0.10517366230487823, "learning_rate": 4.329226646112641e-05, "loss": 0.0387, "step": 55360 }, { "epoch": 0.27185, "grad_norm": 0.1022857278585434, "learning_rate": 4.3289448546237443e-05, "loss": 0.041, "step": 55370 }, { "epoch": 0.2719, "grad_norm": 0.10659095644950867, "learning_rate": 4.3286630131321835e-05, "loss": 0.0398, "step": 55380 }, { "epoch": 0.27195, "grad_norm": 0.09299805760383606, "learning_rate": 4.3283811216456624e-05, "loss": 0.0384, "step": 55390 }, { "epoch": 0.272, "grad_norm": 0.12613967061042786, "learning_rate": 4.328099180171889e-05, "loss": 0.0387, "step": 55400 }, { "epoch": 0.27205, "grad_norm": 0.10692556947469711, "learning_rate": 4.3278171887185706e-05, "loss": 0.0391, "step": 55410 }, { "epoch": 0.2721, "grad_norm": 0.0877951979637146, "learning_rate": 4.3275351472934166e-05, "loss": 0.0377, "step": 55420 }, { "epoch": 0.27215, "grad_norm": 0.10409408807754517, "learning_rate": 4.3272530559041384e-05, "loss": 0.0368, "step": 55430 }, { "epoch": 0.2722, "grad_norm": 0.09759706258773804, "learning_rate": 4.326970914558448e-05, "loss": 0.0377, "step": 55440 }, { "epoch": 0.27225, "grad_norm": 0.10468898713588715, "learning_rate": 4.3266887232640596e-05, "loss": 0.0383, "step": 55450 }, { "epoch": 0.2723, "grad_norm": 0.10428676009178162, "learning_rate": 4.326406482028688e-05, "loss": 0.0411, "step": 55460 }, { "epoch": 0.27235, "grad_norm": 0.10064087808132172, "learning_rate": 4.326124190860048e-05, "loss": 0.0381, "step": 55470 }, { "epoch": 0.2724, "grad_norm": 0.08288514614105225, "learning_rate": 4.32584184976586e-05, "loss": 0.0393, "step": 55480 }, { "epoch": 0.27245, "grad_norm": 0.09690169990062714, "learning_rate": 4.3255594587538403e-05, "loss": 0.0401, "step": 55490 }, { "epoch": 0.2725, "grad_norm": 0.11273466795682907, "learning_rate": 4.3252770178317124e-05, "loss": 0.0394, "step": 55500 }, { "epoch": 0.27255, "grad_norm": 0.08136719465255737, "learning_rate": 4.324994527007196e-05, "loss": 0.0383, "step": 55510 }, { "epoch": 0.2726, "grad_norm": 0.09851282089948654, "learning_rate": 4.324711986288015e-05, "loss": 0.0412, "step": 55520 }, { "epoch": 0.27265, "grad_norm": 0.12646430730819702, "learning_rate": 4.324429395681893e-05, "loss": 0.0405, "step": 55530 }, { "epoch": 0.2727, "grad_norm": 0.11000816524028778, "learning_rate": 4.324146755196558e-05, "loss": 0.0397, "step": 55540 }, { "epoch": 0.27275, "grad_norm": 0.08530110120773315, "learning_rate": 4.3238640648397344e-05, "loss": 0.0389, "step": 55550 }, { "epoch": 0.2728, "grad_norm": 0.08713596314191818, "learning_rate": 4.3235813246191535e-05, "loss": 0.0398, "step": 55560 }, { "epoch": 0.27285, "grad_norm": 0.09457286447286606, "learning_rate": 4.323298534542545e-05, "loss": 0.0391, "step": 55570 }, { "epoch": 0.2729, "grad_norm": 0.11076316237449646, "learning_rate": 4.323015694617638e-05, "loss": 0.0383, "step": 55580 }, { "epoch": 0.27295, "grad_norm": 0.11398779600858688, "learning_rate": 4.3227328048521674e-05, "loss": 0.0417, "step": 55590 }, { "epoch": 0.273, "grad_norm": 0.09488118439912796, "learning_rate": 4.322449865253867e-05, "loss": 0.0395, "step": 55600 }, { "epoch": 0.27305, "grad_norm": 0.11086251586675644, "learning_rate": 4.322166875830472e-05, "loss": 0.0383, "step": 55610 }, { "epoch": 0.2731, "grad_norm": 0.11592381447553635, "learning_rate": 4.3218838365897184e-05, "loss": 0.0373, "step": 55620 }, { "epoch": 0.27315, "grad_norm": 0.09241390973329544, "learning_rate": 4.321600747539346e-05, "loss": 0.0411, "step": 55630 }, { "epoch": 0.2732, "grad_norm": 0.13217687606811523, "learning_rate": 4.321317608687093e-05, "loss": 0.042, "step": 55640 }, { "epoch": 0.27325, "grad_norm": 0.0901438444852829, "learning_rate": 4.3210344200407e-05, "loss": 0.0372, "step": 55650 }, { "epoch": 0.2733, "grad_norm": 0.10367929190397263, "learning_rate": 4.320751181607912e-05, "loss": 0.0378, "step": 55660 }, { "epoch": 0.27335, "grad_norm": 0.10702197253704071, "learning_rate": 4.32046789339647e-05, "loss": 0.0378, "step": 55670 }, { "epoch": 0.2734, "grad_norm": 0.132543683052063, "learning_rate": 4.320184555414119e-05, "loss": 0.0395, "step": 55680 }, { "epoch": 0.27345, "grad_norm": 0.11184263229370117, "learning_rate": 4.319901167668607e-05, "loss": 0.0396, "step": 55690 }, { "epoch": 0.2735, "grad_norm": 0.10397180914878845, "learning_rate": 4.31961773016768e-05, "loss": 0.0377, "step": 55700 }, { "epoch": 0.27355, "grad_norm": 0.12112978845834732, "learning_rate": 4.319334242919088e-05, "loss": 0.0409, "step": 55710 }, { "epoch": 0.2736, "grad_norm": 0.10749992728233337, "learning_rate": 4.3190507059305817e-05, "loss": 0.0387, "step": 55720 }, { "epoch": 0.27365, "grad_norm": 0.1014375239610672, "learning_rate": 4.3187671192099124e-05, "loss": 0.0399, "step": 55730 }, { "epoch": 0.2737, "grad_norm": 0.12156882882118225, "learning_rate": 4.318483482764833e-05, "loss": 0.0387, "step": 55740 }, { "epoch": 0.27375, "grad_norm": 0.12430386245250702, "learning_rate": 4.3181997966030986e-05, "loss": 0.0397, "step": 55750 }, { "epoch": 0.2738, "grad_norm": 0.10626768320798874, "learning_rate": 4.317916060732465e-05, "loss": 0.0404, "step": 55760 }, { "epoch": 0.27385, "grad_norm": 0.15057958662509918, "learning_rate": 4.317632275160689e-05, "loss": 0.0403, "step": 55770 }, { "epoch": 0.2739, "grad_norm": 0.10483145713806152, "learning_rate": 4.317348439895529e-05, "loss": 0.0383, "step": 55780 }, { "epoch": 0.27395, "grad_norm": 0.09630624204874039, "learning_rate": 4.3170645549447463e-05, "loss": 0.0383, "step": 55790 }, { "epoch": 0.274, "grad_norm": 0.09001373499631882, "learning_rate": 4.316780620316101e-05, "loss": 0.0378, "step": 55800 }, { "epoch": 0.27405, "grad_norm": 0.10086652636528015, "learning_rate": 4.316496636017355e-05, "loss": 0.0413, "step": 55810 }, { "epoch": 0.2741, "grad_norm": 0.11561109870672226, "learning_rate": 4.316212602056276e-05, "loss": 0.0378, "step": 55820 }, { "epoch": 0.27415, "grad_norm": 0.08696012198925018, "learning_rate": 4.315928518440624e-05, "loss": 0.038, "step": 55830 }, { "epoch": 0.2742, "grad_norm": 0.08141034841537476, "learning_rate": 4.3156443851781695e-05, "loss": 0.0383, "step": 55840 }, { "epoch": 0.27425, "grad_norm": 0.08150725811719894, "learning_rate": 4.31536020227668e-05, "loss": 0.0376, "step": 55850 }, { "epoch": 0.2743, "grad_norm": 0.07563025504350662, "learning_rate": 4.3150759697439246e-05, "loss": 0.0368, "step": 55860 }, { "epoch": 0.27435, "grad_norm": 0.0782807469367981, "learning_rate": 4.3147916875876734e-05, "loss": 0.036, "step": 55870 }, { "epoch": 0.2744, "grad_norm": 0.09279277920722961, "learning_rate": 4.3145073558157e-05, "loss": 0.039, "step": 55880 }, { "epoch": 0.27445, "grad_norm": 0.0960644781589508, "learning_rate": 4.314222974435776e-05, "loss": 0.0372, "step": 55890 }, { "epoch": 0.2745, "grad_norm": 0.10694056004285812, "learning_rate": 4.313938543455679e-05, "loss": 0.0402, "step": 55900 }, { "epoch": 0.27455, "grad_norm": 0.10038580745458603, "learning_rate": 4.313654062883183e-05, "loss": 0.0392, "step": 55910 }, { "epoch": 0.2746, "grad_norm": 0.13570621609687805, "learning_rate": 4.313369532726066e-05, "loss": 0.0386, "step": 55920 }, { "epoch": 0.27465, "grad_norm": 0.10145073384046555, "learning_rate": 4.313084952992108e-05, "loss": 0.0402, "step": 55930 }, { "epoch": 0.2747, "grad_norm": 0.10217112302780151, "learning_rate": 4.3128003236890876e-05, "loss": 0.0375, "step": 55940 }, { "epoch": 0.27475, "grad_norm": 0.09518636018037796, "learning_rate": 4.312515644824788e-05, "loss": 0.0394, "step": 55950 }, { "epoch": 0.2748, "grad_norm": 0.09258914738893509, "learning_rate": 4.312230916406991e-05, "loss": 0.038, "step": 55960 }, { "epoch": 0.27485, "grad_norm": 0.09878391027450562, "learning_rate": 4.311946138443482e-05, "loss": 0.0379, "step": 55970 }, { "epoch": 0.2749, "grad_norm": 0.09210311621427536, "learning_rate": 4.311661310942047e-05, "loss": 0.0366, "step": 55980 }, { "epoch": 0.27495, "grad_norm": 0.09525465220212936, "learning_rate": 4.311376433910471e-05, "loss": 0.0375, "step": 55990 }, { "epoch": 0.275, "grad_norm": 0.105075404047966, "learning_rate": 4.3110915073565444e-05, "loss": 0.0392, "step": 56000 }, { "epoch": 0.27505, "grad_norm": 0.09033656865358353, "learning_rate": 4.3108065312880566e-05, "loss": 0.0406, "step": 56010 }, { "epoch": 0.2751, "grad_norm": 0.09966182708740234, "learning_rate": 4.3105215057127984e-05, "loss": 0.0363, "step": 56020 }, { "epoch": 0.27515, "grad_norm": 0.09546273201704025, "learning_rate": 4.3102364306385624e-05, "loss": 0.0371, "step": 56030 }, { "epoch": 0.2752, "grad_norm": 0.09017182886600494, "learning_rate": 4.309951306073142e-05, "loss": 0.0371, "step": 56040 }, { "epoch": 0.27525, "grad_norm": 0.1062690019607544, "learning_rate": 4.3096661320243334e-05, "loss": 0.0365, "step": 56050 }, { "epoch": 0.2753, "grad_norm": 0.10952750593423843, "learning_rate": 4.3093809084999325e-05, "loss": 0.0377, "step": 56060 }, { "epoch": 0.27535, "grad_norm": 0.11846937239170074, "learning_rate": 4.3090956355077375e-05, "loss": 0.0364, "step": 56070 }, { "epoch": 0.2754, "grad_norm": 0.11436047405004501, "learning_rate": 4.308810313055547e-05, "loss": 0.0394, "step": 56080 }, { "epoch": 0.27545, "grad_norm": 0.09996142238378525, "learning_rate": 4.308524941151163e-05, "loss": 0.0381, "step": 56090 }, { "epoch": 0.2755, "grad_norm": 0.0903242826461792, "learning_rate": 4.3082395198023854e-05, "loss": 0.037, "step": 56100 }, { "epoch": 0.27555, "grad_norm": 0.08647549897432327, "learning_rate": 4.307954049017019e-05, "loss": 0.0411, "step": 56110 }, { "epoch": 0.2756, "grad_norm": 0.10381969064474106, "learning_rate": 4.307668528802868e-05, "loss": 0.0396, "step": 56120 }, { "epoch": 0.27565, "grad_norm": 0.09873152524232864, "learning_rate": 4.3073829591677396e-05, "loss": 0.0399, "step": 56130 }, { "epoch": 0.2757, "grad_norm": 0.09237905591726303, "learning_rate": 4.307097340119439e-05, "loss": 0.0366, "step": 56140 }, { "epoch": 0.27575, "grad_norm": 0.11224275082349777, "learning_rate": 4.3068116716657764e-05, "loss": 0.0367, "step": 56150 }, { "epoch": 0.2758, "grad_norm": 0.10116568952798843, "learning_rate": 4.3065259538145616e-05, "loss": 0.038, "step": 56160 }, { "epoch": 0.27585, "grad_norm": 0.11430336534976959, "learning_rate": 4.306240186573606e-05, "loss": 0.0369, "step": 56170 }, { "epoch": 0.2759, "grad_norm": 0.09505539387464523, "learning_rate": 4.305954369950722e-05, "loss": 0.0383, "step": 56180 }, { "epoch": 0.27595, "grad_norm": 0.09002593159675598, "learning_rate": 4.305668503953724e-05, "loss": 0.0401, "step": 56190 }, { "epoch": 0.276, "grad_norm": 0.10569896548986435, "learning_rate": 4.3053825885904264e-05, "loss": 0.0381, "step": 56200 }, { "epoch": 0.27605, "grad_norm": 0.10630550235509872, "learning_rate": 4.3050966238686483e-05, "loss": 0.0374, "step": 56210 }, { "epoch": 0.2761, "grad_norm": 0.09257291257381439, "learning_rate": 4.3048106097962066e-05, "loss": 0.0383, "step": 56220 }, { "epoch": 0.27615, "grad_norm": 0.1080874353647232, "learning_rate": 4.30452454638092e-05, "loss": 0.0379, "step": 56230 }, { "epoch": 0.2762, "grad_norm": 0.10577518492937088, "learning_rate": 4.304238433630612e-05, "loss": 0.0417, "step": 56240 }, { "epoch": 0.27625, "grad_norm": 0.10467962175607681, "learning_rate": 4.303952271553101e-05, "loss": 0.0383, "step": 56250 }, { "epoch": 0.2763, "grad_norm": 0.12002705782651901, "learning_rate": 4.303666060156214e-05, "loss": 0.0397, "step": 56260 }, { "epoch": 0.27635, "grad_norm": 0.10973189771175385, "learning_rate": 4.303379799447774e-05, "loss": 0.0372, "step": 56270 }, { "epoch": 0.2764, "grad_norm": 0.10119590163230896, "learning_rate": 4.3030934894356076e-05, "loss": 0.0382, "step": 56280 }, { "epoch": 0.27645, "grad_norm": 0.11402632296085358, "learning_rate": 4.302807130127543e-05, "loss": 0.0407, "step": 56290 }, { "epoch": 0.2765, "grad_norm": 0.10487694293260574, "learning_rate": 4.302520721531409e-05, "loss": 0.0393, "step": 56300 }, { "epoch": 0.27655, "grad_norm": 0.11392984539270401, "learning_rate": 4.302234263655035e-05, "loss": 0.0374, "step": 56310 }, { "epoch": 0.2766, "grad_norm": 0.11727987974882126, "learning_rate": 4.301947756506254e-05, "loss": 0.0392, "step": 56320 }, { "epoch": 0.27665, "grad_norm": 0.14777207374572754, "learning_rate": 4.301661200092898e-05, "loss": 0.0384, "step": 56330 }, { "epoch": 0.2767, "grad_norm": 0.10190843045711517, "learning_rate": 4.3013745944228014e-05, "loss": 0.0386, "step": 56340 }, { "epoch": 0.27675, "grad_norm": 0.10420718789100647, "learning_rate": 4.3010879395038e-05, "loss": 0.0407, "step": 56350 }, { "epoch": 0.2768, "grad_norm": 0.09990087896585464, "learning_rate": 4.300801235343731e-05, "loss": 0.0384, "step": 56360 }, { "epoch": 0.27685, "grad_norm": 0.11819495260715485, "learning_rate": 4.3005144819504335e-05, "loss": 0.0381, "step": 56370 }, { "epoch": 0.2769, "grad_norm": 0.08432481437921524, "learning_rate": 4.300227679331745e-05, "loss": 0.0369, "step": 56380 }, { "epoch": 0.27695, "grad_norm": 0.10276342183351517, "learning_rate": 4.29994082749551e-05, "loss": 0.0384, "step": 56390 }, { "epoch": 0.277, "grad_norm": 0.10119026154279709, "learning_rate": 4.2996539264495674e-05, "loss": 0.0371, "step": 56400 }, { "epoch": 0.27705, "grad_norm": 0.11653642356395721, "learning_rate": 4.2993669762017636e-05, "loss": 0.0382, "step": 56410 }, { "epoch": 0.2771, "grad_norm": 0.09874815493822098, "learning_rate": 4.299079976759942e-05, "loss": 0.0366, "step": 56420 }, { "epoch": 0.27715, "grad_norm": 0.09425827860832214, "learning_rate": 4.2987929281319505e-05, "loss": 0.0373, "step": 56430 }, { "epoch": 0.2772, "grad_norm": 0.08276829123497009, "learning_rate": 4.2985058303256357e-05, "loss": 0.0382, "step": 56440 }, { "epoch": 0.27725, "grad_norm": 0.10070496797561646, "learning_rate": 4.298218683348846e-05, "loss": 0.0371, "step": 56450 }, { "epoch": 0.2773, "grad_norm": 0.0893116220831871, "learning_rate": 4.2979314872094343e-05, "loss": 0.039, "step": 56460 }, { "epoch": 0.27735, "grad_norm": 0.13710635900497437, "learning_rate": 4.297644241915251e-05, "loss": 0.0398, "step": 56470 }, { "epoch": 0.2774, "grad_norm": 0.09827051311731339, "learning_rate": 4.2973569474741496e-05, "loss": 0.0376, "step": 56480 }, { "epoch": 0.27745, "grad_norm": 0.08752574771642685, "learning_rate": 4.297069603893984e-05, "loss": 0.0399, "step": 56490 }, { "epoch": 0.2775, "grad_norm": 0.11656755954027176, "learning_rate": 4.296782211182611e-05, "loss": 0.039, "step": 56500 }, { "epoch": 0.27755, "grad_norm": 0.08865627646446228, "learning_rate": 4.296494769347887e-05, "loss": 0.0384, "step": 56510 }, { "epoch": 0.2776, "grad_norm": 0.11651672422885895, "learning_rate": 4.2962072783976714e-05, "loss": 0.0384, "step": 56520 }, { "epoch": 0.27765, "grad_norm": 0.09058934450149536, "learning_rate": 4.2959197383398234e-05, "loss": 0.0389, "step": 56530 }, { "epoch": 0.2777, "grad_norm": 0.10706375539302826, "learning_rate": 4.295632149182205e-05, "loss": 0.0377, "step": 56540 }, { "epoch": 0.27775, "grad_norm": 0.09810793399810791, "learning_rate": 4.295344510932677e-05, "loss": 0.0379, "step": 56550 }, { "epoch": 0.2778, "grad_norm": 0.102114737033844, "learning_rate": 4.295056823599106e-05, "loss": 0.0383, "step": 56560 }, { "epoch": 0.27785, "grad_norm": 0.09272314608097076, "learning_rate": 4.294769087189354e-05, "loss": 0.0363, "step": 56570 }, { "epoch": 0.2779, "grad_norm": 0.10033122450113297, "learning_rate": 4.29448130171129e-05, "loss": 0.0379, "step": 56580 }, { "epoch": 0.27795, "grad_norm": 0.08752158284187317, "learning_rate": 4.2941934671727826e-05, "loss": 0.0382, "step": 56590 }, { "epoch": 0.278, "grad_norm": 0.0926806703209877, "learning_rate": 4.293905583581699e-05, "loss": 0.0366, "step": 56600 }, { "epoch": 0.27805, "grad_norm": 0.08347244560718536, "learning_rate": 4.293617650945911e-05, "loss": 0.0382, "step": 56610 }, { "epoch": 0.2781, "grad_norm": 0.09313614666461945, "learning_rate": 4.29332966927329e-05, "loss": 0.0384, "step": 56620 }, { "epoch": 0.27815, "grad_norm": 0.08686469495296478, "learning_rate": 4.2930416385717095e-05, "loss": 0.0381, "step": 56630 }, { "epoch": 0.2782, "grad_norm": 0.13193097710609436, "learning_rate": 4.292753558849044e-05, "loss": 0.04, "step": 56640 }, { "epoch": 0.27825, "grad_norm": 0.09673771262168884, "learning_rate": 4.2924654301131705e-05, "loss": 0.0388, "step": 56650 }, { "epoch": 0.2783, "grad_norm": 0.09554016590118408, "learning_rate": 4.292177252371965e-05, "loss": 0.0389, "step": 56660 }, { "epoch": 0.27835, "grad_norm": 0.09741620719432831, "learning_rate": 4.291889025633307e-05, "loss": 0.0389, "step": 56670 }, { "epoch": 0.2784, "grad_norm": 0.09482339769601822, "learning_rate": 4.291600749905076e-05, "loss": 0.039, "step": 56680 }, { "epoch": 0.27845, "grad_norm": 0.0791984349489212, "learning_rate": 4.291312425195153e-05, "loss": 0.0416, "step": 56690 }, { "epoch": 0.2785, "grad_norm": 0.09493456780910492, "learning_rate": 4.291024051511422e-05, "loss": 0.0377, "step": 56700 }, { "epoch": 0.27855, "grad_norm": 0.0838426798582077, "learning_rate": 4.290735628861766e-05, "loss": 0.0372, "step": 56710 }, { "epoch": 0.2786, "grad_norm": 0.10570637881755829, "learning_rate": 4.29044715725407e-05, "loss": 0.0377, "step": 56720 }, { "epoch": 0.27865, "grad_norm": 0.09676932543516159, "learning_rate": 4.290158636696223e-05, "loss": 0.0384, "step": 56730 }, { "epoch": 0.2787, "grad_norm": 0.0901820957660675, "learning_rate": 4.28987006719611e-05, "loss": 0.038, "step": 56740 }, { "epoch": 0.27875, "grad_norm": 0.07807715237140656, "learning_rate": 4.289581448761623e-05, "loss": 0.0371, "step": 56750 }, { "epoch": 0.2788, "grad_norm": 0.07762596756219864, "learning_rate": 4.28929278140065e-05, "loss": 0.0365, "step": 56760 }, { "epoch": 0.27885, "grad_norm": 0.10255575180053711, "learning_rate": 4.2890040651210856e-05, "loss": 0.0383, "step": 56770 }, { "epoch": 0.2789, "grad_norm": 0.10037130117416382, "learning_rate": 4.288715299930822e-05, "loss": 0.0397, "step": 56780 }, { "epoch": 0.27895, "grad_norm": 0.09931264072656631, "learning_rate": 4.2884264858377544e-05, "loss": 0.0363, "step": 56790 }, { "epoch": 0.279, "grad_norm": 0.09609793871641159, "learning_rate": 4.2881376228497776e-05, "loss": 0.0365, "step": 56800 }, { "epoch": 0.27905, "grad_norm": 0.11774688214063644, "learning_rate": 4.287848710974791e-05, "loss": 0.0378, "step": 56810 }, { "epoch": 0.2791, "grad_norm": 0.11118960380554199, "learning_rate": 4.287559750220692e-05, "loss": 0.0378, "step": 56820 }, { "epoch": 0.27915, "grad_norm": 0.09443902224302292, "learning_rate": 4.287270740595381e-05, "loss": 0.0388, "step": 56830 }, { "epoch": 0.2792, "grad_norm": 0.11444979161024094, "learning_rate": 4.286981682106759e-05, "loss": 0.0381, "step": 56840 }, { "epoch": 0.27925, "grad_norm": 0.10338012874126434, "learning_rate": 4.286692574762729e-05, "loss": 0.0369, "step": 56850 }, { "epoch": 0.2793, "grad_norm": 0.09579575806856155, "learning_rate": 4.2864034185711955e-05, "loss": 0.0377, "step": 56860 }, { "epoch": 0.27935, "grad_norm": 0.08403484523296356, "learning_rate": 4.286114213540063e-05, "loss": 0.0389, "step": 56870 }, { "epoch": 0.2794, "grad_norm": 0.09917088598012924, "learning_rate": 4.2858249596772404e-05, "loss": 0.0386, "step": 56880 }, { "epoch": 0.27945, "grad_norm": 0.09660632908344269, "learning_rate": 4.2855356569906335e-05, "loss": 0.0394, "step": 56890 }, { "epoch": 0.2795, "grad_norm": 0.10014674812555313, "learning_rate": 4.2852463054881523e-05, "loss": 0.04, "step": 56900 }, { "epoch": 0.27955, "grad_norm": 0.10737550258636475, "learning_rate": 4.2849569051777083e-05, "loss": 0.0402, "step": 56910 }, { "epoch": 0.2796, "grad_norm": 0.11162016540765762, "learning_rate": 4.284667456067213e-05, "loss": 0.0381, "step": 56920 }, { "epoch": 0.27965, "grad_norm": 0.1165522113442421, "learning_rate": 4.2843779581645796e-05, "loss": 0.0428, "step": 56930 }, { "epoch": 0.2797, "grad_norm": 0.10214854031801224, "learning_rate": 4.2840884114777235e-05, "loss": 0.039, "step": 56940 }, { "epoch": 0.27975, "grad_norm": 0.09295105934143066, "learning_rate": 4.2837988160145605e-05, "loss": 0.0372, "step": 56950 }, { "epoch": 0.2798, "grad_norm": 0.11330553889274597, "learning_rate": 4.283509171783008e-05, "loss": 0.038, "step": 56960 }, { "epoch": 0.27985, "grad_norm": 0.09909593313932419, "learning_rate": 4.283219478790984e-05, "loss": 0.0374, "step": 56970 }, { "epoch": 0.2799, "grad_norm": 0.11746770143508911, "learning_rate": 4.282929737046411e-05, "loss": 0.038, "step": 56980 }, { "epoch": 0.27995, "grad_norm": 0.17033761739730835, "learning_rate": 4.282639946557208e-05, "loss": 0.0372, "step": 56990 }, { "epoch": 0.28, "grad_norm": 0.09611111134290695, "learning_rate": 4.2823501073312975e-05, "loss": 0.0408, "step": 57000 }, { "epoch": 0.28005, "grad_norm": 0.09751928597688675, "learning_rate": 4.282060219376606e-05, "loss": 0.0407, "step": 57010 }, { "epoch": 0.2801, "grad_norm": 0.09460132569074631, "learning_rate": 4.281770282701057e-05, "loss": 0.0399, "step": 57020 }, { "epoch": 0.28015, "grad_norm": 0.08417051285505295, "learning_rate": 4.2814802973125776e-05, "loss": 0.0383, "step": 57030 }, { "epoch": 0.2802, "grad_norm": 0.112062469124794, "learning_rate": 4.281190263219097e-05, "loss": 0.0379, "step": 57040 }, { "epoch": 0.28025, "grad_norm": 0.09878487139940262, "learning_rate": 4.280900180428543e-05, "loss": 0.0377, "step": 57050 }, { "epoch": 0.2803, "grad_norm": 0.10927791148424149, "learning_rate": 4.280610048948848e-05, "loss": 0.0372, "step": 57060 }, { "epoch": 0.28035, "grad_norm": 0.10309398174285889, "learning_rate": 4.280319868787942e-05, "loss": 0.0376, "step": 57070 }, { "epoch": 0.2804, "grad_norm": 0.10313721746206284, "learning_rate": 4.28002963995376e-05, "loss": 0.0374, "step": 57080 }, { "epoch": 0.28045, "grad_norm": 0.09849844127893448, "learning_rate": 4.279739362454237e-05, "loss": 0.0381, "step": 57090 }, { "epoch": 0.2805, "grad_norm": 0.09395359456539154, "learning_rate": 4.2794490362973084e-05, "loss": 0.0369, "step": 57100 }, { "epoch": 0.28055, "grad_norm": 0.1996801346540451, "learning_rate": 4.2791586614909105e-05, "loss": 0.0389, "step": 57110 }, { "epoch": 0.2806, "grad_norm": 0.14284029603004456, "learning_rate": 4.278868238042984e-05, "loss": 0.0392, "step": 57120 }, { "epoch": 0.28065, "grad_norm": 0.10570540279150009, "learning_rate": 4.278577765961469e-05, "loss": 0.0392, "step": 57130 }, { "epoch": 0.2807, "grad_norm": 0.104091115295887, "learning_rate": 4.2782872452543056e-05, "loss": 0.0379, "step": 57140 }, { "epoch": 0.28075, "grad_norm": 0.10061943531036377, "learning_rate": 4.277996675929437e-05, "loss": 0.0384, "step": 57150 }, { "epoch": 0.2808, "grad_norm": 0.09896393865346909, "learning_rate": 4.277706057994806e-05, "loss": 0.0382, "step": 57160 }, { "epoch": 0.28085, "grad_norm": 0.08161722123622894, "learning_rate": 4.27741539145836e-05, "loss": 0.0377, "step": 57170 }, { "epoch": 0.2809, "grad_norm": 0.11624377965927124, "learning_rate": 4.277124676328045e-05, "loss": 0.0387, "step": 57180 }, { "epoch": 0.28095, "grad_norm": 0.095398910343647, "learning_rate": 4.27683391261181e-05, "loss": 0.0387, "step": 57190 }, { "epoch": 0.281, "grad_norm": 0.0983344167470932, "learning_rate": 4.2765431003176015e-05, "loss": 0.0386, "step": 57200 }, { "epoch": 0.28105, "grad_norm": 0.10947199165821075, "learning_rate": 4.276252239453373e-05, "loss": 0.0383, "step": 57210 }, { "epoch": 0.2811, "grad_norm": 0.08485335111618042, "learning_rate": 4.275961330027076e-05, "loss": 0.0373, "step": 57220 }, { "epoch": 0.28115, "grad_norm": 0.12160183489322662, "learning_rate": 4.2756703720466626e-05, "loss": 0.0404, "step": 57230 }, { "epoch": 0.2812, "grad_norm": 0.09800686687231064, "learning_rate": 4.275379365520089e-05, "loss": 0.0391, "step": 57240 }, { "epoch": 0.28125, "grad_norm": 0.11026581376791, "learning_rate": 4.2750883104553096e-05, "loss": 0.041, "step": 57250 }, { "epoch": 0.2813, "grad_norm": 0.10947220027446747, "learning_rate": 4.274797206860284e-05, "loss": 0.04, "step": 57260 }, { "epoch": 0.28135, "grad_norm": 0.11520034074783325, "learning_rate": 4.2745060547429685e-05, "loss": 0.0381, "step": 57270 }, { "epoch": 0.2814, "grad_norm": 0.09699587523937225, "learning_rate": 4.274214854111324e-05, "loss": 0.0377, "step": 57280 }, { "epoch": 0.28145, "grad_norm": 0.08693718910217285, "learning_rate": 4.2739236049733124e-05, "loss": 0.0378, "step": 57290 }, { "epoch": 0.2815, "grad_norm": 0.09276087582111359, "learning_rate": 4.273632307336896e-05, "loss": 0.0374, "step": 57300 }, { "epoch": 0.28155, "grad_norm": 0.08943061530590057, "learning_rate": 4.273340961210038e-05, "loss": 0.0366, "step": 57310 }, { "epoch": 0.2816, "grad_norm": 0.09189813584089279, "learning_rate": 4.273049566600705e-05, "loss": 0.0365, "step": 57320 }, { "epoch": 0.28165, "grad_norm": 0.09366247057914734, "learning_rate": 4.272758123516863e-05, "loss": 0.0389, "step": 57330 }, { "epoch": 0.2817, "grad_norm": 0.10908438265323639, "learning_rate": 4.2724666319664794e-05, "loss": 0.0388, "step": 57340 }, { "epoch": 0.28175, "grad_norm": 0.10651381313800812, "learning_rate": 4.2721750919575246e-05, "loss": 0.0384, "step": 57350 }, { "epoch": 0.2818, "grad_norm": 0.13720935583114624, "learning_rate": 4.271883503497967e-05, "loss": 0.0409, "step": 57360 }, { "epoch": 0.28185, "grad_norm": 0.1090945228934288, "learning_rate": 4.271591866595782e-05, "loss": 0.0377, "step": 57370 }, { "epoch": 0.2819, "grad_norm": 0.11343448609113693, "learning_rate": 4.27130018125894e-05, "loss": 0.0387, "step": 57380 }, { "epoch": 0.28195, "grad_norm": 0.12336157262325287, "learning_rate": 4.271008447495417e-05, "loss": 0.0391, "step": 57390 }, { "epoch": 0.282, "grad_norm": 0.0998220294713974, "learning_rate": 4.270716665313188e-05, "loss": 0.0383, "step": 57400 }, { "epoch": 0.28205, "grad_norm": 0.10613575577735901, "learning_rate": 4.27042483472023e-05, "loss": 0.0362, "step": 57410 }, { "epoch": 0.2821, "grad_norm": 0.10241192579269409, "learning_rate": 4.2701329557245225e-05, "loss": 0.0382, "step": 57420 }, { "epoch": 0.28215, "grad_norm": 0.09192397445440292, "learning_rate": 4.269841028334046e-05, "loss": 0.0384, "step": 57430 }, { "epoch": 0.2822, "grad_norm": 0.10546057671308517, "learning_rate": 4.26954905255678e-05, "loss": 0.0386, "step": 57440 }, { "epoch": 0.28225, "grad_norm": 0.0989101231098175, "learning_rate": 4.2692570284007074e-05, "loss": 0.0385, "step": 57450 }, { "epoch": 0.2823, "grad_norm": 0.10276032984256744, "learning_rate": 4.268964955873813e-05, "loss": 0.0377, "step": 57460 }, { "epoch": 0.28235, "grad_norm": 0.0878804475069046, "learning_rate": 4.2686728349840805e-05, "loss": 0.0371, "step": 57470 }, { "epoch": 0.2824, "grad_norm": 0.09753140062093735, "learning_rate": 4.268380665739498e-05, "loss": 0.0377, "step": 57480 }, { "epoch": 0.28245, "grad_norm": 0.09146375954151154, "learning_rate": 4.268088448148051e-05, "loss": 0.038, "step": 57490 }, { "epoch": 0.2825, "grad_norm": 0.08938898146152496, "learning_rate": 4.2677961822177315e-05, "loss": 0.0381, "step": 57500 }, { "epoch": 0.28255, "grad_norm": 0.11164992302656174, "learning_rate": 4.267503867956528e-05, "loss": 0.0373, "step": 57510 }, { "epoch": 0.2826, "grad_norm": 0.09699451923370361, "learning_rate": 4.267211505372433e-05, "loss": 0.0425, "step": 57520 }, { "epoch": 0.28265, "grad_norm": 0.09400709718465805, "learning_rate": 4.26691909447344e-05, "loss": 0.0376, "step": 57530 }, { "epoch": 0.2827, "grad_norm": 0.09006451070308685, "learning_rate": 4.266626635267541e-05, "loss": 0.0383, "step": 57540 }, { "epoch": 0.28275, "grad_norm": 0.08708031475543976, "learning_rate": 4.266334127762734e-05, "loss": 0.0395, "step": 57550 }, { "epoch": 0.2828, "grad_norm": 0.09977874159812927, "learning_rate": 4.266041571967016e-05, "loss": 0.0371, "step": 57560 }, { "epoch": 0.28285, "grad_norm": 0.11364461481571198, "learning_rate": 4.265748967888385e-05, "loss": 0.0382, "step": 57570 }, { "epoch": 0.2829, "grad_norm": 0.08485016971826553, "learning_rate": 4.2654563155348406e-05, "loss": 0.0396, "step": 57580 }, { "epoch": 0.28295, "grad_norm": 0.10243216156959534, "learning_rate": 4.2651636149143835e-05, "loss": 0.0381, "step": 57590 }, { "epoch": 0.283, "grad_norm": 0.12172680348157883, "learning_rate": 4.2648708660350164e-05, "loss": 0.0382, "step": 57600 }, { "epoch": 0.28305, "grad_norm": 0.10779182612895966, "learning_rate": 4.264578068904742e-05, "loss": 0.0387, "step": 57610 }, { "epoch": 0.2831, "grad_norm": 0.0988893210887909, "learning_rate": 4.264285223531568e-05, "loss": 0.0369, "step": 57620 }, { "epoch": 0.28315, "grad_norm": 0.0954919382929802, "learning_rate": 4.2639923299234976e-05, "loss": 0.0379, "step": 57630 }, { "epoch": 0.2832, "grad_norm": 0.1120079979300499, "learning_rate": 4.263699388088539e-05, "loss": 0.0402, "step": 57640 }, { "epoch": 0.28325, "grad_norm": 0.10489992797374725, "learning_rate": 4.263406398034703e-05, "loss": 0.0373, "step": 57650 }, { "epoch": 0.2833, "grad_norm": 0.09630849212408066, "learning_rate": 4.263113359769998e-05, "loss": 0.0407, "step": 57660 }, { "epoch": 0.28335, "grad_norm": 0.0975610762834549, "learning_rate": 4.262820273302436e-05, "loss": 0.0382, "step": 57670 }, { "epoch": 0.2834, "grad_norm": 0.10288140922784805, "learning_rate": 4.2625271386400304e-05, "loss": 0.0385, "step": 57680 }, { "epoch": 0.28345, "grad_norm": 0.1018190085887909, "learning_rate": 4.262233955790794e-05, "loss": 0.0381, "step": 57690 }, { "epoch": 0.2835, "grad_norm": 0.09703332185745239, "learning_rate": 4.261940724762744e-05, "loss": 0.0392, "step": 57700 }, { "epoch": 0.28355, "grad_norm": 0.08421315252780914, "learning_rate": 4.261647445563897e-05, "loss": 0.0377, "step": 57710 }, { "epoch": 0.2836, "grad_norm": 0.08458781242370605, "learning_rate": 4.26135411820227e-05, "loss": 0.038, "step": 57720 }, { "epoch": 0.28365, "grad_norm": 0.12038341164588928, "learning_rate": 4.261060742685883e-05, "loss": 0.0395, "step": 57730 }, { "epoch": 0.2837, "grad_norm": 0.10978465527296066, "learning_rate": 4.260767319022757e-05, "loss": 0.0397, "step": 57740 }, { "epoch": 0.28375, "grad_norm": 0.12968258559703827, "learning_rate": 4.260473847220915e-05, "loss": 0.0401, "step": 57750 }, { "epoch": 0.2838, "grad_norm": 0.11993337422609329, "learning_rate": 4.2601803272883784e-05, "loss": 0.0386, "step": 57760 }, { "epoch": 0.28385, "grad_norm": 0.12599441409111023, "learning_rate": 4.259886759233173e-05, "loss": 0.039, "step": 57770 }, { "epoch": 0.2839, "grad_norm": 0.0750201940536499, "learning_rate": 4.259593143063325e-05, "loss": 0.0378, "step": 57780 }, { "epoch": 0.28395, "grad_norm": 0.0985063910484314, "learning_rate": 4.259299478786861e-05, "loss": 0.0394, "step": 57790 }, { "epoch": 0.284, "grad_norm": 0.09771952033042908, "learning_rate": 4.2590057664118106e-05, "loss": 0.0376, "step": 57800 }, { "epoch": 0.28405, "grad_norm": 0.1153845489025116, "learning_rate": 4.258712005946204e-05, "loss": 0.0406, "step": 57810 }, { "epoch": 0.2841, "grad_norm": 0.09112930297851562, "learning_rate": 4.258418197398071e-05, "loss": 0.0398, "step": 57820 }, { "epoch": 0.28415, "grad_norm": 0.10500326007604599, "learning_rate": 4.258124340775445e-05, "loss": 0.0411, "step": 57830 }, { "epoch": 0.2842, "grad_norm": 0.10515421628952026, "learning_rate": 4.257830436086361e-05, "loss": 0.0452, "step": 57840 }, { "epoch": 0.28425, "grad_norm": 0.09916632622480392, "learning_rate": 4.257536483338852e-05, "loss": 0.0395, "step": 57850 }, { "epoch": 0.2843, "grad_norm": 0.09431524574756622, "learning_rate": 4.257242482540956e-05, "loss": 0.0404, "step": 57860 }, { "epoch": 0.28435, "grad_norm": 0.12175924330949783, "learning_rate": 4.256948433700712e-05, "loss": 0.0417, "step": 57870 }, { "epoch": 0.2844, "grad_norm": 0.09342382103204727, "learning_rate": 4.2566543368261564e-05, "loss": 0.0431, "step": 57880 }, { "epoch": 0.28445, "grad_norm": 0.09715968370437622, "learning_rate": 4.256360191925332e-05, "loss": 0.0387, "step": 57890 }, { "epoch": 0.2845, "grad_norm": 0.09505413472652435, "learning_rate": 4.256065999006279e-05, "loss": 0.0379, "step": 57900 }, { "epoch": 0.28455, "grad_norm": 0.0904054269194603, "learning_rate": 4.255771758077042e-05, "loss": 0.0387, "step": 57910 }, { "epoch": 0.2846, "grad_norm": 0.0966554805636406, "learning_rate": 4.255477469145665e-05, "loss": 0.0395, "step": 57920 }, { "epoch": 0.28465, "grad_norm": 0.09629569947719574, "learning_rate": 4.255183132220192e-05, "loss": 0.04, "step": 57930 }, { "epoch": 0.2847, "grad_norm": 0.10692469030618668, "learning_rate": 4.254888747308673e-05, "loss": 0.0395, "step": 57940 }, { "epoch": 0.28475, "grad_norm": 0.10003534704446793, "learning_rate": 4.254594314419155e-05, "loss": 0.0387, "step": 57950 }, { "epoch": 0.2848, "grad_norm": 0.09970128536224365, "learning_rate": 4.254299833559687e-05, "loss": 0.0395, "step": 57960 }, { "epoch": 0.28485, "grad_norm": 0.11436878144741058, "learning_rate": 4.2540053047383214e-05, "loss": 0.0403, "step": 57970 }, { "epoch": 0.2849, "grad_norm": 0.10870227962732315, "learning_rate": 4.2537107279631084e-05, "loss": 0.0385, "step": 57980 }, { "epoch": 0.28495, "grad_norm": 0.09994117170572281, "learning_rate": 4.2534161032421037e-05, "loss": 0.04, "step": 57990 }, { "epoch": 0.285, "grad_norm": 0.11786071956157684, "learning_rate": 4.2531214305833614e-05, "loss": 0.0401, "step": 58000 }, { "epoch": 0.28505, "grad_norm": 0.09488601982593536, "learning_rate": 4.252826709994938e-05, "loss": 0.0396, "step": 58010 }, { "epoch": 0.2851, "grad_norm": 0.10463389754295349, "learning_rate": 4.252531941484891e-05, "loss": 0.0393, "step": 58020 }, { "epoch": 0.28515, "grad_norm": 0.10832806676626205, "learning_rate": 4.252237125061279e-05, "loss": 0.0405, "step": 58030 }, { "epoch": 0.2852, "grad_norm": 0.11303062736988068, "learning_rate": 4.251942260732161e-05, "loss": 0.0422, "step": 58040 }, { "epoch": 0.28525, "grad_norm": 0.10420338064432144, "learning_rate": 4.251647348505601e-05, "loss": 0.0391, "step": 58050 }, { "epoch": 0.2853, "grad_norm": 0.14655640721321106, "learning_rate": 4.25135238838966e-05, "loss": 0.0394, "step": 58060 }, { "epoch": 0.28535, "grad_norm": 0.09848672896623611, "learning_rate": 4.251057380392404e-05, "loss": 0.0408, "step": 58070 }, { "epoch": 0.2854, "grad_norm": 0.09891166538000107, "learning_rate": 4.250762324521896e-05, "loss": 0.0392, "step": 58080 }, { "epoch": 0.28545, "grad_norm": 0.0940348282456398, "learning_rate": 4.250467220786204e-05, "loss": 0.0384, "step": 58090 }, { "epoch": 0.2855, "grad_norm": 0.09893086552619934, "learning_rate": 4.250172069193395e-05, "loss": 0.0378, "step": 58100 }, { "epoch": 0.28555, "grad_norm": 0.1129058450460434, "learning_rate": 4.24987686975154e-05, "loss": 0.0369, "step": 58110 }, { "epoch": 0.2856, "grad_norm": 0.1151210144162178, "learning_rate": 4.249581622468709e-05, "loss": 0.0389, "step": 58120 }, { "epoch": 0.28565, "grad_norm": 0.09511999785900116, "learning_rate": 4.2492863273529734e-05, "loss": 0.0367, "step": 58130 }, { "epoch": 0.2857, "grad_norm": 0.12105721980333328, "learning_rate": 4.2489909844124066e-05, "loss": 0.0398, "step": 58140 }, { "epoch": 0.28575, "grad_norm": 0.11214801669120789, "learning_rate": 4.248695593655083e-05, "loss": 0.0407, "step": 58150 }, { "epoch": 0.2858, "grad_norm": 0.10821537673473358, "learning_rate": 4.248400155089079e-05, "loss": 0.0377, "step": 58160 }, { "epoch": 0.28585, "grad_norm": 0.10529662668704987, "learning_rate": 4.2481046687224726e-05, "loss": 0.0382, "step": 58170 }, { "epoch": 0.2859, "grad_norm": 0.10574857145547867, "learning_rate": 4.2478091345633405e-05, "loss": 0.0388, "step": 58180 }, { "epoch": 0.28595, "grad_norm": 0.11062496155500412, "learning_rate": 4.247513552619763e-05, "loss": 0.0376, "step": 58190 }, { "epoch": 0.286, "grad_norm": 0.10402261465787888, "learning_rate": 4.247217922899822e-05, "loss": 0.0382, "step": 58200 }, { "epoch": 0.28605, "grad_norm": 0.10761887580156326, "learning_rate": 4.2469222454115996e-05, "loss": 0.0392, "step": 58210 }, { "epoch": 0.2861, "grad_norm": 0.09106829017400742, "learning_rate": 4.246626520163179e-05, "loss": 0.0376, "step": 58220 }, { "epoch": 0.28615, "grad_norm": 0.08656405657529831, "learning_rate": 4.246330747162646e-05, "loss": 0.0373, "step": 58230 }, { "epoch": 0.2862, "grad_norm": 0.09331633150577545, "learning_rate": 4.246034926418085e-05, "loss": 0.0396, "step": 58240 }, { "epoch": 0.28625, "grad_norm": 0.13578954339027405, "learning_rate": 4.245739057937586e-05, "loss": 0.0388, "step": 58250 }, { "epoch": 0.2863, "grad_norm": 0.1069922149181366, "learning_rate": 4.245443141729237e-05, "loss": 0.0383, "step": 58260 }, { "epoch": 0.28635, "grad_norm": 0.09611210972070694, "learning_rate": 4.245147177801129e-05, "loss": 0.0386, "step": 58270 }, { "epoch": 0.2864, "grad_norm": 0.11979424953460693, "learning_rate": 4.2448511661613514e-05, "loss": 0.0383, "step": 58280 }, { "epoch": 0.28645, "grad_norm": 0.11691362410783768, "learning_rate": 4.244555106817999e-05, "loss": 0.0414, "step": 58290 }, { "epoch": 0.2865, "grad_norm": 0.11565831303596497, "learning_rate": 4.2442589997791655e-05, "loss": 0.0392, "step": 58300 }, { "epoch": 0.28655, "grad_norm": 0.12460681796073914, "learning_rate": 4.2439628450529455e-05, "loss": 0.0406, "step": 58310 }, { "epoch": 0.2866, "grad_norm": 0.11439589411020279, "learning_rate": 4.2436666426474374e-05, "loss": 0.0398, "step": 58320 }, { "epoch": 0.28665, "grad_norm": 0.11800641566514969, "learning_rate": 4.243370392570738e-05, "loss": 0.0434, "step": 58330 }, { "epoch": 0.2867, "grad_norm": 0.11225130409002304, "learning_rate": 4.2430740948309475e-05, "loss": 0.0391, "step": 58340 }, { "epoch": 0.28675, "grad_norm": 0.12273237109184265, "learning_rate": 4.2427777494361656e-05, "loss": 0.0404, "step": 58350 }, { "epoch": 0.2868, "grad_norm": 0.11912668496370316, "learning_rate": 4.242481356394495e-05, "loss": 0.0397, "step": 58360 }, { "epoch": 0.28685, "grad_norm": 0.1070471778512001, "learning_rate": 4.242184915714038e-05, "loss": 0.039, "step": 58370 }, { "epoch": 0.2869, "grad_norm": 0.09075344353914261, "learning_rate": 4.241888427402901e-05, "loss": 0.0382, "step": 58380 }, { "epoch": 0.28695, "grad_norm": 0.09854944050312042, "learning_rate": 4.2415918914691877e-05, "loss": 0.0386, "step": 58390 }, { "epoch": 0.287, "grad_norm": 0.09506674110889435, "learning_rate": 4.241295307921007e-05, "loss": 0.0365, "step": 58400 }, { "epoch": 0.28705, "grad_norm": 0.1081521064043045, "learning_rate": 4.240998676766467e-05, "loss": 0.0374, "step": 58410 }, { "epoch": 0.2871, "grad_norm": 0.0927954763174057, "learning_rate": 4.240701998013677e-05, "loss": 0.0387, "step": 58420 }, { "epoch": 0.28715, "grad_norm": 0.08340541273355484, "learning_rate": 4.240405271670749e-05, "loss": 0.0364, "step": 58430 }, { "epoch": 0.2872, "grad_norm": 0.07858669012784958, "learning_rate": 4.240108497745793e-05, "loss": 0.0377, "step": 58440 }, { "epoch": 0.28725, "grad_norm": 0.09130463004112244, "learning_rate": 4.239811676246925e-05, "loss": 0.0378, "step": 58450 }, { "epoch": 0.2873, "grad_norm": 0.09747853130102158, "learning_rate": 4.23951480718226e-05, "loss": 0.0379, "step": 58460 }, { "epoch": 0.28735, "grad_norm": 0.11840526014566422, "learning_rate": 4.239217890559914e-05, "loss": 0.0384, "step": 58470 }, { "epoch": 0.2874, "grad_norm": 0.114509217441082, "learning_rate": 4.238920926388004e-05, "loss": 0.0375, "step": 58480 }, { "epoch": 0.28745, "grad_norm": 0.10365968942642212, "learning_rate": 4.2386239146746484e-05, "loss": 0.0389, "step": 58490 }, { "epoch": 0.2875, "grad_norm": 0.11859627813100815, "learning_rate": 4.238326855427969e-05, "loss": 0.037, "step": 58500 }, { "epoch": 0.28755, "grad_norm": 0.10479515790939331, "learning_rate": 4.2380297486560855e-05, "loss": 0.0398, "step": 58510 }, { "epoch": 0.2876, "grad_norm": 0.09964556246995926, "learning_rate": 4.237732594367122e-05, "loss": 0.038, "step": 58520 }, { "epoch": 0.28765, "grad_norm": 0.11377781629562378, "learning_rate": 4.237435392569203e-05, "loss": 0.0381, "step": 58530 }, { "epoch": 0.2877, "grad_norm": 0.10320976376533508, "learning_rate": 4.2371381432704525e-05, "loss": 0.0374, "step": 58540 }, { "epoch": 0.28775, "grad_norm": 0.10899948328733444, "learning_rate": 4.236840846478998e-05, "loss": 0.0376, "step": 58550 }, { "epoch": 0.2878, "grad_norm": 0.10679149627685547, "learning_rate": 4.236543502202966e-05, "loss": 0.0387, "step": 58560 }, { "epoch": 0.28785, "grad_norm": 0.1140972375869751, "learning_rate": 4.236246110450488e-05, "loss": 0.0391, "step": 58570 }, { "epoch": 0.2879, "grad_norm": 0.1361556500196457, "learning_rate": 4.235948671229694e-05, "loss": 0.0376, "step": 58580 }, { "epoch": 0.28795, "grad_norm": 0.11948247998952866, "learning_rate": 4.2356511845487156e-05, "loss": 0.0387, "step": 58590 }, { "epoch": 0.288, "grad_norm": 0.10225441306829453, "learning_rate": 4.2353536504156855e-05, "loss": 0.0386, "step": 58600 }, { "epoch": 0.28805, "grad_norm": 0.10306891798973083, "learning_rate": 4.235056068838738e-05, "loss": 0.0373, "step": 58610 }, { "epoch": 0.2881, "grad_norm": 0.0958067774772644, "learning_rate": 4.2347584398260096e-05, "loss": 0.0374, "step": 58620 }, { "epoch": 0.28815, "grad_norm": 0.11552973836660385, "learning_rate": 4.234460763385638e-05, "loss": 0.0376, "step": 58630 }, { "epoch": 0.2882, "grad_norm": 0.10933344811201096, "learning_rate": 4.2341630395257594e-05, "loss": 0.037, "step": 58640 }, { "epoch": 0.28825, "grad_norm": 0.08934877812862396, "learning_rate": 4.233865268254516e-05, "loss": 0.0366, "step": 58650 }, { "epoch": 0.2883, "grad_norm": 0.09421779215335846, "learning_rate": 4.233567449580047e-05, "loss": 0.0365, "step": 58660 }, { "epoch": 0.28835, "grad_norm": 0.09506764262914658, "learning_rate": 4.233269583510495e-05, "loss": 0.0376, "step": 58670 }, { "epoch": 0.2884, "grad_norm": 0.09250709414482117, "learning_rate": 4.232971670054005e-05, "loss": 0.0362, "step": 58680 }, { "epoch": 0.28845, "grad_norm": 0.08452494442462921, "learning_rate": 4.2326737092187194e-05, "loss": 0.0372, "step": 58690 }, { "epoch": 0.2885, "grad_norm": 0.09364336729049683, "learning_rate": 4.232375701012785e-05, "loss": 0.036, "step": 58700 }, { "epoch": 0.28855, "grad_norm": 0.08511223644018173, "learning_rate": 4.2320776454443514e-05, "loss": 0.0369, "step": 58710 }, { "epoch": 0.2886, "grad_norm": 0.08995082229375839, "learning_rate": 4.2317795425215645e-05, "loss": 0.0355, "step": 58720 }, { "epoch": 0.28865, "grad_norm": 0.09742347151041031, "learning_rate": 4.231481392252576e-05, "loss": 0.0423, "step": 58730 }, { "epoch": 0.2887, "grad_norm": 0.09668943285942078, "learning_rate": 4.2311831946455366e-05, "loss": 0.0376, "step": 58740 }, { "epoch": 0.28875, "grad_norm": 0.11909755319356918, "learning_rate": 4.230884949708599e-05, "loss": 0.0362, "step": 58750 }, { "epoch": 0.2888, "grad_norm": 0.09919518232345581, "learning_rate": 4.2305866574499166e-05, "loss": 0.0367, "step": 58760 }, { "epoch": 0.28885, "grad_norm": 0.11793835461139679, "learning_rate": 4.230288317877646e-05, "loss": 0.0414, "step": 58770 }, { "epoch": 0.2889, "grad_norm": 0.13174089789390564, "learning_rate": 4.2299899309999424e-05, "loss": 0.038, "step": 58780 }, { "epoch": 0.28895, "grad_norm": 0.08976200968027115, "learning_rate": 4.229691496824965e-05, "loss": 0.0369, "step": 58790 }, { "epoch": 0.289, "grad_norm": 0.0908234715461731, "learning_rate": 4.229393015360871e-05, "loss": 0.0365, "step": 58800 }, { "epoch": 0.28905, "grad_norm": 0.07467728108167648, "learning_rate": 4.229094486615821e-05, "loss": 0.0371, "step": 58810 }, { "epoch": 0.2891, "grad_norm": 0.08888581395149231, "learning_rate": 4.228795910597978e-05, "loss": 0.0385, "step": 58820 }, { "epoch": 0.28915, "grad_norm": 0.08418140560388565, "learning_rate": 4.228497287315504e-05, "loss": 0.0373, "step": 58830 }, { "epoch": 0.2892, "grad_norm": 0.10168585926294327, "learning_rate": 4.2281986167765644e-05, "loss": 0.0364, "step": 58840 }, { "epoch": 0.28925, "grad_norm": 0.08546897768974304, "learning_rate": 4.227899898989323e-05, "loss": 0.0369, "step": 58850 }, { "epoch": 0.2893, "grad_norm": 0.10331574082374573, "learning_rate": 4.2276011339619476e-05, "loss": 0.0366, "step": 58860 }, { "epoch": 0.28935, "grad_norm": 0.08570695668458939, "learning_rate": 4.2273023217026066e-05, "loss": 0.0351, "step": 58870 }, { "epoch": 0.2894, "grad_norm": 0.10039761662483215, "learning_rate": 4.2270034622194685e-05, "loss": 0.0367, "step": 58880 }, { "epoch": 0.28945, "grad_norm": 0.09401549398899078, "learning_rate": 4.226704555520705e-05, "loss": 0.0345, "step": 58890 }, { "epoch": 0.2895, "grad_norm": 0.10001719743013382, "learning_rate": 4.226405601614487e-05, "loss": 0.0386, "step": 58900 }, { "epoch": 0.28955, "grad_norm": 0.1016169935464859, "learning_rate": 4.22610660050899e-05, "loss": 0.0369, "step": 58910 }, { "epoch": 0.2896, "grad_norm": 0.09658100455999374, "learning_rate": 4.2258075522123854e-05, "loss": 0.0353, "step": 58920 }, { "epoch": 0.28965, "grad_norm": 0.09994114190340042, "learning_rate": 4.225508456732851e-05, "loss": 0.0366, "step": 58930 }, { "epoch": 0.2897, "grad_norm": 0.0924081951379776, "learning_rate": 4.225209314078564e-05, "loss": 0.0396, "step": 58940 }, { "epoch": 0.28975, "grad_norm": 0.10897225141525269, "learning_rate": 4.224910124257702e-05, "loss": 0.0411, "step": 58950 }, { "epoch": 0.2898, "grad_norm": 0.08845224976539612, "learning_rate": 4.224610887278446e-05, "loss": 0.0387, "step": 58960 }, { "epoch": 0.28985, "grad_norm": 0.10787303745746613, "learning_rate": 4.224311603148976e-05, "loss": 0.0412, "step": 58970 }, { "epoch": 0.2899, "grad_norm": 0.08971309661865234, "learning_rate": 4.2240122718774747e-05, "loss": 0.0374, "step": 58980 }, { "epoch": 0.28995, "grad_norm": 0.08946974575519562, "learning_rate": 4.2237128934721246e-05, "loss": 0.0381, "step": 58990 }, { "epoch": 0.29, "grad_norm": 0.08195340633392334, "learning_rate": 4.223413467941113e-05, "loss": 0.0377, "step": 59000 }, { "epoch": 0.29005, "grad_norm": 0.0776698887348175, "learning_rate": 4.223113995292624e-05, "loss": 0.0378, "step": 59010 }, { "epoch": 0.2901, "grad_norm": 0.07512795925140381, "learning_rate": 4.2228144755348444e-05, "loss": 0.039, "step": 59020 }, { "epoch": 0.29015, "grad_norm": 0.097700335085392, "learning_rate": 4.2225149086759664e-05, "loss": 0.038, "step": 59030 }, { "epoch": 0.2902, "grad_norm": 0.10088939964771271, "learning_rate": 4.222215294724177e-05, "loss": 0.0388, "step": 59040 }, { "epoch": 0.29025, "grad_norm": 0.09535989910364151, "learning_rate": 4.221915633687668e-05, "loss": 0.039, "step": 59050 }, { "epoch": 0.2903, "grad_norm": 0.10953981429338455, "learning_rate": 4.221615925574633e-05, "loss": 0.039, "step": 59060 }, { "epoch": 0.29035, "grad_norm": 0.0990564301609993, "learning_rate": 4.2213161703932644e-05, "loss": 0.0361, "step": 59070 }, { "epoch": 0.2904, "grad_norm": 0.08613920211791992, "learning_rate": 4.2210163681517603e-05, "loss": 0.0392, "step": 59080 }, { "epoch": 0.29045, "grad_norm": 0.08893074840307236, "learning_rate": 4.220716518858314e-05, "loss": 0.0381, "step": 59090 }, { "epoch": 0.2905, "grad_norm": 0.08558481186628342, "learning_rate": 4.2204166225211246e-05, "loss": 0.0373, "step": 59100 }, { "epoch": 0.29055, "grad_norm": 0.09346235543489456, "learning_rate": 4.2201166791483915e-05, "loss": 0.0373, "step": 59110 }, { "epoch": 0.2906, "grad_norm": 0.09380681067705154, "learning_rate": 4.219816688748314e-05, "loss": 0.038, "step": 59120 }, { "epoch": 0.29065, "grad_norm": 0.08615744858980179, "learning_rate": 4.219516651329095e-05, "loss": 0.0381, "step": 59130 }, { "epoch": 0.2907, "grad_norm": 0.08506422489881516, "learning_rate": 4.2192165668989356e-05, "loss": 0.037, "step": 59140 }, { "epoch": 0.29075, "grad_norm": 0.10388914495706558, "learning_rate": 4.218916435466042e-05, "loss": 0.0418, "step": 59150 }, { "epoch": 0.2908, "grad_norm": 0.09521077573299408, "learning_rate": 4.218616257038619e-05, "loss": 0.038, "step": 59160 }, { "epoch": 0.29085, "grad_norm": 0.08011168241500854, "learning_rate": 4.2183160316248726e-05, "loss": 0.0383, "step": 59170 }, { "epoch": 0.2909, "grad_norm": 0.09949786216020584, "learning_rate": 4.218015759233012e-05, "loss": 0.0383, "step": 59180 }, { "epoch": 0.29095, "grad_norm": 0.10403574258089066, "learning_rate": 4.2177154398712456e-05, "loss": 0.0377, "step": 59190 }, { "epoch": 0.291, "grad_norm": 0.10081654787063599, "learning_rate": 4.2174150735477844e-05, "loss": 0.0374, "step": 59200 }, { "epoch": 0.29105, "grad_norm": 0.090993233025074, "learning_rate": 4.217114660270841e-05, "loss": 0.0378, "step": 59210 }, { "epoch": 0.2911, "grad_norm": 0.10115541517734528, "learning_rate": 4.2168142000486267e-05, "loss": 0.0388, "step": 59220 }, { "epoch": 0.29115, "grad_norm": 0.11779844015836716, "learning_rate": 4.216513692889358e-05, "loss": 0.0401, "step": 59230 }, { "epoch": 0.2912, "grad_norm": 0.12547267973423004, "learning_rate": 4.216213138801249e-05, "loss": 0.0375, "step": 59240 }, { "epoch": 0.29125, "grad_norm": 0.10295387357473373, "learning_rate": 4.215912537792519e-05, "loss": 0.0367, "step": 59250 }, { "epoch": 0.2913, "grad_norm": 0.1021818071603775, "learning_rate": 4.215611889871384e-05, "loss": 0.0375, "step": 59260 }, { "epoch": 0.29135, "grad_norm": 0.10994240641593933, "learning_rate": 4.215311195046064e-05, "loss": 0.0384, "step": 59270 }, { "epoch": 0.2914, "grad_norm": 0.10220217704772949, "learning_rate": 4.21501045332478e-05, "loss": 0.0392, "step": 59280 }, { "epoch": 0.29145, "grad_norm": 0.10150808095932007, "learning_rate": 4.214709664715756e-05, "loss": 0.0391, "step": 59290 }, { "epoch": 0.2915, "grad_norm": 0.1190982237458229, "learning_rate": 4.214408829227213e-05, "loss": 0.0392, "step": 59300 }, { "epoch": 0.29155, "grad_norm": 0.10309015214443207, "learning_rate": 4.214107946867377e-05, "loss": 0.0371, "step": 59310 }, { "epoch": 0.2916, "grad_norm": 0.11924099177122116, "learning_rate": 4.2138070176444736e-05, "loss": 0.0378, "step": 59320 }, { "epoch": 0.29165, "grad_norm": 0.115843266248703, "learning_rate": 4.21350604156673e-05, "loss": 0.0366, "step": 59330 }, { "epoch": 0.2917, "grad_norm": 0.09781511127948761, "learning_rate": 4.213205018642375e-05, "loss": 0.0378, "step": 59340 }, { "epoch": 0.29175, "grad_norm": 0.10209403187036514, "learning_rate": 4.2129039488796384e-05, "loss": 0.0379, "step": 59350 }, { "epoch": 0.2918, "grad_norm": 0.10987348854541779, "learning_rate": 4.212602832286752e-05, "loss": 0.0381, "step": 59360 }, { "epoch": 0.29185, "grad_norm": 0.09803928434848785, "learning_rate": 4.212301668871946e-05, "loss": 0.0364, "step": 59370 }, { "epoch": 0.2919, "grad_norm": 0.09594839811325073, "learning_rate": 4.212000458643457e-05, "loss": 0.0365, "step": 59380 }, { "epoch": 0.29195, "grad_norm": 0.14262939989566803, "learning_rate": 4.211699201609518e-05, "loss": 0.0385, "step": 59390 }, { "epoch": 0.292, "grad_norm": 0.16029612720012665, "learning_rate": 4.211397897778366e-05, "loss": 0.0402, "step": 59400 }, { "epoch": 0.29205, "grad_norm": 0.11073275655508041, "learning_rate": 4.211096547158239e-05, "loss": 0.0395, "step": 59410 }, { "epoch": 0.2921, "grad_norm": 0.11205213516950607, "learning_rate": 4.210795149757375e-05, "loss": 0.0384, "step": 59420 }, { "epoch": 0.29215, "grad_norm": 0.1112198457121849, "learning_rate": 4.2104937055840144e-05, "loss": 0.0411, "step": 59430 }, { "epoch": 0.2922, "grad_norm": 0.09706633538007736, "learning_rate": 4.210192214646398e-05, "loss": 0.037, "step": 59440 }, { "epoch": 0.29225, "grad_norm": 0.08984319865703583, "learning_rate": 4.209890676952769e-05, "loss": 0.0378, "step": 59450 }, { "epoch": 0.2923, "grad_norm": 0.09882747381925583, "learning_rate": 4.2095890925113715e-05, "loss": 0.0377, "step": 59460 }, { "epoch": 0.29235, "grad_norm": 0.10908032208681107, "learning_rate": 4.2092874613304506e-05, "loss": 0.0362, "step": 59470 }, { "epoch": 0.2924, "grad_norm": 0.11629695445299149, "learning_rate": 4.208985783418252e-05, "loss": 0.0376, "step": 59480 }, { "epoch": 0.29245, "grad_norm": 0.10098670423030853, "learning_rate": 4.2086840587830255e-05, "loss": 0.0382, "step": 59490 }, { "epoch": 0.2925, "grad_norm": 0.1128614991903305, "learning_rate": 4.2083822874330175e-05, "loss": 0.037, "step": 59500 }, { "epoch": 0.29255, "grad_norm": 0.09345178306102753, "learning_rate": 4.2080804693764805e-05, "loss": 0.0379, "step": 59510 }, { "epoch": 0.2926, "grad_norm": 0.09811430424451828, "learning_rate": 4.207778604621664e-05, "loss": 0.0366, "step": 59520 }, { "epoch": 0.29265, "grad_norm": 0.09692283719778061, "learning_rate": 4.2074766931768225e-05, "loss": 0.0376, "step": 59530 }, { "epoch": 0.2927, "grad_norm": 0.09886656701564789, "learning_rate": 4.20717473505021e-05, "loss": 0.0375, "step": 59540 }, { "epoch": 0.29275, "grad_norm": 0.0967152789235115, "learning_rate": 4.2068727302500815e-05, "loss": 0.0374, "step": 59550 }, { "epoch": 0.2928, "grad_norm": 0.0911996141076088, "learning_rate": 4.2065706787846936e-05, "loss": 0.0375, "step": 59560 }, { "epoch": 0.29285, "grad_norm": 0.09192916005849838, "learning_rate": 4.206268580662305e-05, "loss": 0.0361, "step": 59570 }, { "epoch": 0.2929, "grad_norm": 0.07742298394441605, "learning_rate": 4.2059664358911734e-05, "loss": 0.0376, "step": 59580 }, { "epoch": 0.29295, "grad_norm": 0.09748908132314682, "learning_rate": 4.2056642444795616e-05, "loss": 0.0376, "step": 59590 }, { "epoch": 0.293, "grad_norm": 0.09673763811588287, "learning_rate": 4.2053620064357294e-05, "loss": 0.037, "step": 59600 }, { "epoch": 0.29305, "grad_norm": 0.10387150943279266, "learning_rate": 4.20505972176794e-05, "loss": 0.0387, "step": 59610 }, { "epoch": 0.2931, "grad_norm": 0.08987797051668167, "learning_rate": 4.204757390484459e-05, "loss": 0.0393, "step": 59620 }, { "epoch": 0.29315, "grad_norm": 0.10657423734664917, "learning_rate": 4.2044550125935514e-05, "loss": 0.0384, "step": 59630 }, { "epoch": 0.2932, "grad_norm": 0.10422495007514954, "learning_rate": 4.204152588103485e-05, "loss": 0.0383, "step": 59640 }, { "epoch": 0.29325, "grad_norm": 0.09066640585660934, "learning_rate": 4.2038501170225254e-05, "loss": 0.0398, "step": 59650 }, { "epoch": 0.2933, "grad_norm": 0.08937875926494598, "learning_rate": 4.2035475993589447e-05, "loss": 0.0389, "step": 59660 }, { "epoch": 0.29335, "grad_norm": 0.10382039844989777, "learning_rate": 4.203245035121012e-05, "loss": 0.041, "step": 59670 }, { "epoch": 0.2934, "grad_norm": 0.10572951287031174, "learning_rate": 4.202942424317001e-05, "loss": 0.0403, "step": 59680 }, { "epoch": 0.29345, "grad_norm": 0.09443110972642899, "learning_rate": 4.202639766955183e-05, "loss": 0.0398, "step": 59690 }, { "epoch": 0.2935, "grad_norm": 0.10440312325954437, "learning_rate": 4.202337063043834e-05, "loss": 0.0377, "step": 59700 }, { "epoch": 0.29355, "grad_norm": 0.0963246151804924, "learning_rate": 4.202034312591229e-05, "loss": 0.0377, "step": 59710 }, { "epoch": 0.2936, "grad_norm": 0.08949219435453415, "learning_rate": 4.2017315156056445e-05, "loss": 0.0377, "step": 59720 }, { "epoch": 0.29365, "grad_norm": 0.10338666290044785, "learning_rate": 4.201428672095361e-05, "loss": 0.0377, "step": 59730 }, { "epoch": 0.2937, "grad_norm": 0.08472144603729248, "learning_rate": 4.2011257820686554e-05, "loss": 0.0389, "step": 59740 }, { "epoch": 0.29375, "grad_norm": 0.0787237361073494, "learning_rate": 4.200822845533812e-05, "loss": 0.0378, "step": 59750 }, { "epoch": 0.2938, "grad_norm": 0.08679846674203873, "learning_rate": 4.200519862499109e-05, "loss": 0.037, "step": 59760 }, { "epoch": 0.29385, "grad_norm": 0.09616634249687195, "learning_rate": 4.2002168329728325e-05, "loss": 0.0393, "step": 59770 }, { "epoch": 0.2939, "grad_norm": 0.11898311227560043, "learning_rate": 4.199913756963267e-05, "loss": 0.0384, "step": 59780 }, { "epoch": 0.29395, "grad_norm": 0.10533025115728378, "learning_rate": 4.1996106344786976e-05, "loss": 0.038, "step": 59790 }, { "epoch": 0.294, "grad_norm": 0.09638418257236481, "learning_rate": 4.1993074655274126e-05, "loss": 0.0383, "step": 59800 }, { "epoch": 0.29405, "grad_norm": 0.10283535718917847, "learning_rate": 4.1990042501176985e-05, "loss": 0.0391, "step": 59810 }, { "epoch": 0.2941, "grad_norm": 0.10665203630924225, "learning_rate": 4.1987009882578476e-05, "loss": 0.038, "step": 59820 }, { "epoch": 0.29415, "grad_norm": 0.09289417415857315, "learning_rate": 4.198397679956149e-05, "loss": 0.0388, "step": 59830 }, { "epoch": 0.2942, "grad_norm": 0.09182269871234894, "learning_rate": 4.198094325220897e-05, "loss": 0.0376, "step": 59840 }, { "epoch": 0.29425, "grad_norm": 0.10201194882392883, "learning_rate": 4.197790924060383e-05, "loss": 0.0376, "step": 59850 }, { "epoch": 0.2943, "grad_norm": 0.08951577544212341, "learning_rate": 4.197487476482903e-05, "loss": 0.0382, "step": 59860 }, { "epoch": 0.29435, "grad_norm": 0.10263777524232864, "learning_rate": 4.197183982496754e-05, "loss": 0.0405, "step": 59870 }, { "epoch": 0.2944, "grad_norm": 0.12024150788784027, "learning_rate": 4.196880442110232e-05, "loss": 0.0408, "step": 59880 }, { "epoch": 0.29445, "grad_norm": 0.09682836383581161, "learning_rate": 4.196576855331637e-05, "loss": 0.0386, "step": 59890 }, { "epoch": 0.2945, "grad_norm": 0.09489523619413376, "learning_rate": 4.196273222169267e-05, "loss": 0.0378, "step": 59900 }, { "epoch": 0.29455, "grad_norm": 0.09791134297847748, "learning_rate": 4.195969542631425e-05, "loss": 0.0384, "step": 59910 }, { "epoch": 0.2946, "grad_norm": 0.08972064405679703, "learning_rate": 4.195665816726412e-05, "loss": 0.0372, "step": 59920 }, { "epoch": 0.29465, "grad_norm": 0.07786723971366882, "learning_rate": 4.195362044462534e-05, "loss": 0.0375, "step": 59930 }, { "epoch": 0.2947, "grad_norm": 0.08396299183368683, "learning_rate": 4.195058225848094e-05, "loss": 0.0373, "step": 59940 }, { "epoch": 0.29475, "grad_norm": 0.07986466586589813, "learning_rate": 4.194754360891398e-05, "loss": 0.038, "step": 59950 }, { "epoch": 0.2948, "grad_norm": 0.08281272649765015, "learning_rate": 4.1944504496007555e-05, "loss": 0.0407, "step": 59960 }, { "epoch": 0.29485, "grad_norm": 0.09473776817321777, "learning_rate": 4.194146491984474e-05, "loss": 0.0369, "step": 59970 }, { "epoch": 0.2949, "grad_norm": 0.10042299330234528, "learning_rate": 4.193842488050864e-05, "loss": 0.0401, "step": 59980 }, { "epoch": 0.29495, "grad_norm": 0.10005058348178864, "learning_rate": 4.1935384378082366e-05, "loss": 0.0396, "step": 59990 }, { "epoch": 0.295, "grad_norm": 0.08495144546031952, "learning_rate": 4.193234341264905e-05, "loss": 0.0392, "step": 60000 }, { "epoch": 0.29505, "grad_norm": 0.09953828155994415, "learning_rate": 4.1929301984291825e-05, "loss": 0.0396, "step": 60010 }, { "epoch": 0.2951, "grad_norm": 0.08857406675815582, "learning_rate": 4.1926260093093836e-05, "loss": 0.0375, "step": 60020 }, { "epoch": 0.29515, "grad_norm": 0.0942673310637474, "learning_rate": 4.192321773913826e-05, "loss": 0.0386, "step": 60030 }, { "epoch": 0.2952, "grad_norm": 0.0972483903169632, "learning_rate": 4.192017492250827e-05, "loss": 0.0401, "step": 60040 }, { "epoch": 0.29525, "grad_norm": 0.09403170645236969, "learning_rate": 4.1917131643287056e-05, "loss": 0.0374, "step": 60050 }, { "epoch": 0.2953, "grad_norm": 0.12300402671098709, "learning_rate": 4.191408790155781e-05, "loss": 0.0395, "step": 60060 }, { "epoch": 0.29535, "grad_norm": 0.13763009011745453, "learning_rate": 4.191104369740376e-05, "loss": 0.0392, "step": 60070 }, { "epoch": 0.2954, "grad_norm": 0.09596195816993713, "learning_rate": 4.190799903090813e-05, "loss": 0.0402, "step": 60080 }, { "epoch": 0.29545, "grad_norm": 0.10017865896224976, "learning_rate": 4.1904953902154156e-05, "loss": 0.0395, "step": 60090 }, { "epoch": 0.2955, "grad_norm": 0.10523372888565063, "learning_rate": 4.1901908311225094e-05, "loss": 0.0395, "step": 60100 }, { "epoch": 0.29555, "grad_norm": 0.1086510494351387, "learning_rate": 4.189886225820421e-05, "loss": 0.0386, "step": 60110 }, { "epoch": 0.2956, "grad_norm": 0.09998047351837158, "learning_rate": 4.189581574317478e-05, "loss": 0.042, "step": 60120 }, { "epoch": 0.29565, "grad_norm": 0.09882159531116486, "learning_rate": 4.1892768766220094e-05, "loss": 0.0375, "step": 60130 }, { "epoch": 0.2957, "grad_norm": 0.12305544316768646, "learning_rate": 4.1889721327423456e-05, "loss": 0.0392, "step": 60140 }, { "epoch": 0.29575, "grad_norm": 0.1286933273077011, "learning_rate": 4.188667342686818e-05, "loss": 0.0385, "step": 60150 }, { "epoch": 0.2958, "grad_norm": 0.11229319125413895, "learning_rate": 4.188362506463761e-05, "loss": 0.0387, "step": 60160 }, { "epoch": 0.29585, "grad_norm": 0.09351322799921036, "learning_rate": 4.188057624081506e-05, "loss": 0.038, "step": 60170 }, { "epoch": 0.2959, "grad_norm": 0.10067510604858398, "learning_rate": 4.18775269554839e-05, "loss": 0.0406, "step": 60180 }, { "epoch": 0.29595, "grad_norm": 0.10245190560817719, "learning_rate": 4.187447720872749e-05, "loss": 0.0382, "step": 60190 }, { "epoch": 0.296, "grad_norm": 0.10589483380317688, "learning_rate": 4.187142700062922e-05, "loss": 0.0391, "step": 60200 }, { "epoch": 0.29605, "grad_norm": 0.10421047359704971, "learning_rate": 4.186837633127247e-05, "loss": 0.0357, "step": 60210 }, { "epoch": 0.2961, "grad_norm": 0.09676158428192139, "learning_rate": 4.1865325200740644e-05, "loss": 0.0362, "step": 60220 }, { "epoch": 0.29615, "grad_norm": 0.1064562052488327, "learning_rate": 4.1862273609117174e-05, "loss": 0.0373, "step": 60230 }, { "epoch": 0.2962, "grad_norm": 0.09847971051931381, "learning_rate": 4.185922155648547e-05, "loss": 0.0378, "step": 60240 }, { "epoch": 0.29625, "grad_norm": 0.1274549812078476, "learning_rate": 4.185616904292898e-05, "loss": 0.0395, "step": 60250 }, { "epoch": 0.2963, "grad_norm": 0.1018499881029129, "learning_rate": 4.185311606853117e-05, "loss": 0.0389, "step": 60260 }, { "epoch": 0.29635, "grad_norm": 0.11894772946834564, "learning_rate": 4.18500626333755e-05, "loss": 0.037, "step": 60270 }, { "epoch": 0.2964, "grad_norm": 0.08830223232507706, "learning_rate": 4.184700873754544e-05, "loss": 0.0369, "step": 60280 }, { "epoch": 0.29645, "grad_norm": 0.10243300348520279, "learning_rate": 4.184395438112449e-05, "loss": 0.0363, "step": 60290 }, { "epoch": 0.2965, "grad_norm": 0.1040797159075737, "learning_rate": 4.1840899564196156e-05, "loss": 0.0416, "step": 60300 }, { "epoch": 0.29655, "grad_norm": 0.1132705807685852, "learning_rate": 4.1837844286843955e-05, "loss": 0.038, "step": 60310 }, { "epoch": 0.2966, "grad_norm": 0.10648635029792786, "learning_rate": 4.183478854915142e-05, "loss": 0.0378, "step": 60320 }, { "epoch": 0.29665, "grad_norm": 0.12978768348693848, "learning_rate": 4.183173235120209e-05, "loss": 0.0382, "step": 60330 }, { "epoch": 0.2967, "grad_norm": 0.10505112260580063, "learning_rate": 4.182867569307952e-05, "loss": 0.0374, "step": 60340 }, { "epoch": 0.29675, "grad_norm": 0.10616233944892883, "learning_rate": 4.182561857486727e-05, "loss": 0.037, "step": 60350 }, { "epoch": 0.2968, "grad_norm": 0.0917801707983017, "learning_rate": 4.182256099664894e-05, "loss": 0.0367, "step": 60360 }, { "epoch": 0.29685, "grad_norm": 0.13801418244838715, "learning_rate": 4.181950295850811e-05, "loss": 0.0379, "step": 60370 }, { "epoch": 0.2969, "grad_norm": 0.10303942114114761, "learning_rate": 4.1816444460528393e-05, "loss": 0.0365, "step": 60380 }, { "epoch": 0.29695, "grad_norm": 0.10049451142549515, "learning_rate": 4.181338550279339e-05, "loss": 0.0375, "step": 60390 }, { "epoch": 0.297, "grad_norm": 0.09753605723381042, "learning_rate": 4.1810326085386755e-05, "loss": 0.0368, "step": 60400 }, { "epoch": 0.29705, "grad_norm": 0.09278786927461624, "learning_rate": 4.180726620839212e-05, "loss": 0.0377, "step": 60410 }, { "epoch": 0.2971, "grad_norm": 0.10221299529075623, "learning_rate": 4.180420587189313e-05, "loss": 0.0377, "step": 60420 }, { "epoch": 0.29715, "grad_norm": 0.09834998100996017, "learning_rate": 4.180114507597347e-05, "loss": 0.0379, "step": 60430 }, { "epoch": 0.2972, "grad_norm": 0.08894462883472443, "learning_rate": 4.1798083820716815e-05, "loss": 0.0387, "step": 60440 }, { "epoch": 0.29725, "grad_norm": 0.092961885035038, "learning_rate": 4.179502210620687e-05, "loss": 0.0371, "step": 60450 }, { "epoch": 0.2973, "grad_norm": 0.10489032417535782, "learning_rate": 4.179195993252731e-05, "loss": 0.0381, "step": 60460 }, { "epoch": 0.29735, "grad_norm": 0.09162574261426926, "learning_rate": 4.1788897299761884e-05, "loss": 0.0385, "step": 60470 }, { "epoch": 0.2974, "grad_norm": 0.08949447423219681, "learning_rate": 4.1785834207994316e-05, "loss": 0.036, "step": 60480 }, { "epoch": 0.29745, "grad_norm": 0.0965668112039566, "learning_rate": 4.178277065730835e-05, "loss": 0.0395, "step": 60490 }, { "epoch": 0.2975, "grad_norm": 0.06762854009866714, "learning_rate": 4.177970664778773e-05, "loss": 0.0358, "step": 60500 }, { "epoch": 0.29755, "grad_norm": 0.1038472130894661, "learning_rate": 4.177664217951624e-05, "loss": 0.0367, "step": 60510 }, { "epoch": 0.2976, "grad_norm": 0.08165507763624191, "learning_rate": 4.1773577252577656e-05, "loss": 0.038, "step": 60520 }, { "epoch": 0.29765, "grad_norm": 0.11089900881052017, "learning_rate": 4.1770511867055776e-05, "loss": 0.0395, "step": 60530 }, { "epoch": 0.2977, "grad_norm": 0.07820553332567215, "learning_rate": 4.1767446023034385e-05, "loss": 0.0364, "step": 60540 }, { "epoch": 0.29775, "grad_norm": 0.09922687709331512, "learning_rate": 4.176437972059733e-05, "loss": 0.0371, "step": 60550 }, { "epoch": 0.2978, "grad_norm": 0.0939578264951706, "learning_rate": 4.176131295982843e-05, "loss": 0.0366, "step": 60560 }, { "epoch": 0.29785, "grad_norm": 0.09753625839948654, "learning_rate": 4.175824574081153e-05, "loss": 0.037, "step": 60570 }, { "epoch": 0.2979, "grad_norm": 0.09230932593345642, "learning_rate": 4.1755178063630493e-05, "loss": 0.0367, "step": 60580 }, { "epoch": 0.29795, "grad_norm": 0.10648012906312943, "learning_rate": 4.175210992836918e-05, "loss": 0.0365, "step": 60590 }, { "epoch": 0.298, "grad_norm": 0.10602240264415741, "learning_rate": 4.1749041335111464e-05, "loss": 0.0365, "step": 60600 }, { "epoch": 0.29805, "grad_norm": 0.10287895053625107, "learning_rate": 4.174597228394126e-05, "loss": 0.0366, "step": 60610 }, { "epoch": 0.2981, "grad_norm": 0.10624495148658752, "learning_rate": 4.174290277494246e-05, "loss": 0.0378, "step": 60620 }, { "epoch": 0.29815, "grad_norm": 0.09934336692094803, "learning_rate": 4.1739832808199e-05, "loss": 0.0362, "step": 60630 }, { "epoch": 0.2982, "grad_norm": 0.1049598678946495, "learning_rate": 4.1736762383794795e-05, "loss": 0.0386, "step": 60640 }, { "epoch": 0.29825, "grad_norm": 0.08692259341478348, "learning_rate": 4.1733691501813786e-05, "loss": 0.0376, "step": 60650 }, { "epoch": 0.2983, "grad_norm": 0.094350665807724, "learning_rate": 4.173062016233994e-05, "loss": 0.0385, "step": 60660 }, { "epoch": 0.29835, "grad_norm": 0.09607906639575958, "learning_rate": 4.172754836545723e-05, "loss": 0.0373, "step": 60670 }, { "epoch": 0.2984, "grad_norm": 0.10610980540513992, "learning_rate": 4.172447611124963e-05, "loss": 0.0368, "step": 60680 }, { "epoch": 0.29845, "grad_norm": 0.11003467440605164, "learning_rate": 4.172140339980114e-05, "loss": 0.0394, "step": 60690 }, { "epoch": 0.2985, "grad_norm": 0.11963500082492828, "learning_rate": 4.171833023119576e-05, "loss": 0.0419, "step": 60700 }, { "epoch": 0.29855, "grad_norm": 0.08699667453765869, "learning_rate": 4.1715256605517504e-05, "loss": 0.0375, "step": 60710 }, { "epoch": 0.2986, "grad_norm": 0.09019137918949127, "learning_rate": 4.171218252285042e-05, "loss": 0.0395, "step": 60720 }, { "epoch": 0.29865, "grad_norm": 0.09844812005758286, "learning_rate": 4.170910798327854e-05, "loss": 0.039, "step": 60730 }, { "epoch": 0.2987, "grad_norm": 0.11068392544984818, "learning_rate": 4.170603298688593e-05, "loss": 0.0394, "step": 60740 }, { "epoch": 0.29875, "grad_norm": 0.09484200179576874, "learning_rate": 4.170295753375665e-05, "loss": 0.0368, "step": 60750 }, { "epoch": 0.2988, "grad_norm": 0.09472393989562988, "learning_rate": 4.169988162397479e-05, "loss": 0.0372, "step": 60760 }, { "epoch": 0.29885, "grad_norm": 0.09127482771873474, "learning_rate": 4.169680525762444e-05, "loss": 0.0376, "step": 60770 }, { "epoch": 0.2989, "grad_norm": 0.09734153747558594, "learning_rate": 4.169372843478971e-05, "loss": 0.0374, "step": 60780 }, { "epoch": 0.29895, "grad_norm": 0.11162359267473221, "learning_rate": 4.1690651155554704e-05, "loss": 0.037, "step": 60790 }, { "epoch": 0.299, "grad_norm": 0.11358215659856796, "learning_rate": 4.168757342000358e-05, "loss": 0.0379, "step": 60800 }, { "epoch": 0.29905, "grad_norm": 0.10406457632780075, "learning_rate": 4.1684495228220454e-05, "loss": 0.0366, "step": 60810 }, { "epoch": 0.2991, "grad_norm": 0.1159927248954773, "learning_rate": 4.168141658028951e-05, "loss": 0.0413, "step": 60820 }, { "epoch": 0.29915, "grad_norm": 0.09065935015678406, "learning_rate": 4.16783374762949e-05, "loss": 0.0374, "step": 60830 }, { "epoch": 0.2992, "grad_norm": 0.08514414727687836, "learning_rate": 4.1675257916320804e-05, "loss": 0.0381, "step": 60840 }, { "epoch": 0.29925, "grad_norm": 0.10681270807981491, "learning_rate": 4.167217790045143e-05, "loss": 0.0383, "step": 60850 }, { "epoch": 0.2993, "grad_norm": 0.09594421088695526, "learning_rate": 4.166909742877097e-05, "loss": 0.038, "step": 60860 }, { "epoch": 0.29935, "grad_norm": 0.10875742882490158, "learning_rate": 4.166601650136364e-05, "loss": 0.0369, "step": 60870 }, { "epoch": 0.2994, "grad_norm": 0.1058308482170105, "learning_rate": 4.166293511831369e-05, "loss": 0.0374, "step": 60880 }, { "epoch": 0.29945, "grad_norm": 0.12089107185602188, "learning_rate": 4.165985327970535e-05, "loss": 0.0371, "step": 60890 }, { "epoch": 0.2995, "grad_norm": 0.10122127830982208, "learning_rate": 4.165677098562288e-05, "loss": 0.0402, "step": 60900 }, { "epoch": 0.29955, "grad_norm": 0.10833931714296341, "learning_rate": 4.1653688236150554e-05, "loss": 0.0381, "step": 60910 }, { "epoch": 0.2996, "grad_norm": 0.10205668210983276, "learning_rate": 4.165060503137265e-05, "loss": 0.0386, "step": 60920 }, { "epoch": 0.29965, "grad_norm": 0.10592308640480042, "learning_rate": 4.164752137137345e-05, "loss": 0.0376, "step": 60930 }, { "epoch": 0.2997, "grad_norm": 0.11171428859233856, "learning_rate": 4.164443725623728e-05, "loss": 0.0392, "step": 60940 }, { "epoch": 0.29975, "grad_norm": 0.11343561112880707, "learning_rate": 4.164135268604844e-05, "loss": 0.0382, "step": 60950 }, { "epoch": 0.2998, "grad_norm": 0.12067008018493652, "learning_rate": 4.163826766089127e-05, "loss": 0.0369, "step": 60960 }, { "epoch": 0.29985, "grad_norm": 0.11105100810527802, "learning_rate": 4.163518218085012e-05, "loss": 0.0414, "step": 60970 }, { "epoch": 0.2999, "grad_norm": 0.11540385335683823, "learning_rate": 4.1632096246009335e-05, "loss": 0.0377, "step": 60980 }, { "epoch": 0.29995, "grad_norm": 0.08897673338651657, "learning_rate": 4.1629009856453284e-05, "loss": 0.039, "step": 60990 }, { "epoch": 0.3, "grad_norm": 0.09491023421287537, "learning_rate": 4.162592301226635e-05, "loss": 0.038, "step": 61000 }, { "epoch": 0.30005, "grad_norm": 0.09792555868625641, "learning_rate": 4.162283571353293e-05, "loss": 0.0419, "step": 61010 }, { "epoch": 0.3001, "grad_norm": 0.10005024075508118, "learning_rate": 4.161974796033743e-05, "loss": 0.0379, "step": 61020 }, { "epoch": 0.30015, "grad_norm": 0.09802912175655365, "learning_rate": 4.161665975276426e-05, "loss": 0.0375, "step": 61030 }, { "epoch": 0.3002, "grad_norm": 0.11515079438686371, "learning_rate": 4.1613571090897855e-05, "loss": 0.0381, "step": 61040 }, { "epoch": 0.30025, "grad_norm": 0.10093576461076736, "learning_rate": 4.161048197482266e-05, "loss": 0.0387, "step": 61050 }, { "epoch": 0.3003, "grad_norm": 0.10900209844112396, "learning_rate": 4.160739240462312e-05, "loss": 0.0384, "step": 61060 }, { "epoch": 0.30035, "grad_norm": 0.08780990540981293, "learning_rate": 4.160430238038372e-05, "loss": 0.0382, "step": 61070 }, { "epoch": 0.3004, "grad_norm": 0.09186938405036926, "learning_rate": 4.160121190218893e-05, "loss": 0.0387, "step": 61080 }, { "epoch": 0.30045, "grad_norm": 0.11051493883132935, "learning_rate": 4.1598120970123245e-05, "loss": 0.0389, "step": 61090 }, { "epoch": 0.3005, "grad_norm": 0.093411386013031, "learning_rate": 4.159502958427116e-05, "loss": 0.0368, "step": 61100 }, { "epoch": 0.30055, "grad_norm": 0.11050494015216827, "learning_rate": 4.159193774471721e-05, "loss": 0.0405, "step": 61110 }, { "epoch": 0.3006, "grad_norm": 0.10686563700437546, "learning_rate": 4.158884545154591e-05, "loss": 0.0391, "step": 61120 }, { "epoch": 0.30065, "grad_norm": 0.09879803657531738, "learning_rate": 4.158575270484181e-05, "loss": 0.038, "step": 61130 }, { "epoch": 0.3007, "grad_norm": 0.10461480170488358, "learning_rate": 4.1582659504689456e-05, "loss": 0.0376, "step": 61140 }, { "epoch": 0.30075, "grad_norm": 0.08467607200145721, "learning_rate": 4.157956585117343e-05, "loss": 0.0404, "step": 61150 }, { "epoch": 0.3008, "grad_norm": 0.0974237322807312, "learning_rate": 4.15764717443783e-05, "loss": 0.0382, "step": 61160 }, { "epoch": 0.30085, "grad_norm": 0.09864193946123123, "learning_rate": 4.157337718438865e-05, "loss": 0.0407, "step": 61170 }, { "epoch": 0.3009, "grad_norm": 0.0968838632106781, "learning_rate": 4.157028217128911e-05, "loss": 0.0384, "step": 61180 }, { "epoch": 0.30095, "grad_norm": 0.10413113236427307, "learning_rate": 4.1567186705164265e-05, "loss": 0.0422, "step": 61190 }, { "epoch": 0.301, "grad_norm": 0.0874795988202095, "learning_rate": 4.1564090786098776e-05, "loss": 0.0374, "step": 61200 }, { "epoch": 0.30105, "grad_norm": 0.13343235850334167, "learning_rate": 4.156099441417726e-05, "loss": 0.0412, "step": 61210 }, { "epoch": 0.3011, "grad_norm": 0.08971703797578812, "learning_rate": 4.1557897589484376e-05, "loss": 0.0375, "step": 61220 }, { "epoch": 0.30115, "grad_norm": 0.10634627938270569, "learning_rate": 4.155480031210479e-05, "loss": 0.0373, "step": 61230 }, { "epoch": 0.3012, "grad_norm": 0.08652137964963913, "learning_rate": 4.1551702582123186e-05, "loss": 0.0384, "step": 61240 }, { "epoch": 0.30125, "grad_norm": 0.09820088744163513, "learning_rate": 4.154860439962425e-05, "loss": 0.0382, "step": 61250 }, { "epoch": 0.3013, "grad_norm": 0.08968545496463776, "learning_rate": 4.154550576469269e-05, "loss": 0.0392, "step": 61260 }, { "epoch": 0.30135, "grad_norm": 0.11606442183256149, "learning_rate": 4.154240667741322e-05, "loss": 0.0396, "step": 61270 }, { "epoch": 0.3014, "grad_norm": 0.09368910640478134, "learning_rate": 4.1539307137870567e-05, "loss": 0.0379, "step": 61280 }, { "epoch": 0.30145, "grad_norm": 0.09235761314630508, "learning_rate": 4.1536207146149467e-05, "loss": 0.0368, "step": 61290 }, { "epoch": 0.3015, "grad_norm": 0.11601971834897995, "learning_rate": 4.153310670233467e-05, "loss": 0.0398, "step": 61300 }, { "epoch": 0.30155, "grad_norm": 0.10703108459711075, "learning_rate": 4.153000580651095e-05, "loss": 0.0405, "step": 61310 }, { "epoch": 0.3016, "grad_norm": 0.09087284654378891, "learning_rate": 4.152690445876308e-05, "loss": 0.0383, "step": 61320 }, { "epoch": 0.30165, "grad_norm": 0.09212212264537811, "learning_rate": 4.152380265917586e-05, "loss": 0.0393, "step": 61330 }, { "epoch": 0.3017, "grad_norm": 0.11663784086704254, "learning_rate": 4.1520700407834076e-05, "loss": 0.0426, "step": 61340 }, { "epoch": 0.30175, "grad_norm": 0.10482209920883179, "learning_rate": 4.1517597704822555e-05, "loss": 0.0385, "step": 61350 }, { "epoch": 0.3018, "grad_norm": 0.10460387915372849, "learning_rate": 4.151449455022611e-05, "loss": 0.0391, "step": 61360 }, { "epoch": 0.30185, "grad_norm": 0.08725716918706894, "learning_rate": 4.151139094412959e-05, "loss": 0.0379, "step": 61370 }, { "epoch": 0.3019, "grad_norm": 0.08314737677574158, "learning_rate": 4.150828688661785e-05, "loss": 0.0384, "step": 61380 }, { "epoch": 0.30195, "grad_norm": 0.09015455842018127, "learning_rate": 4.150518237777575e-05, "loss": 0.038, "step": 61390 }, { "epoch": 0.302, "grad_norm": 0.07633396238088608, "learning_rate": 4.1502077417688156e-05, "loss": 0.0405, "step": 61400 }, { "epoch": 0.30205, "grad_norm": 0.10376927256584167, "learning_rate": 4.149897200643997e-05, "loss": 0.0393, "step": 61410 }, { "epoch": 0.3021, "grad_norm": 0.09429501742124557, "learning_rate": 4.1495866144116094e-05, "loss": 0.0374, "step": 61420 }, { "epoch": 0.30215, "grad_norm": 0.10373689979314804, "learning_rate": 4.149275983080142e-05, "loss": 0.0396, "step": 61430 }, { "epoch": 0.3022, "grad_norm": 0.08532211929559708, "learning_rate": 4.148965306658089e-05, "loss": 0.0394, "step": 61440 }, { "epoch": 0.30225, "grad_norm": 0.11602317541837692, "learning_rate": 4.148654585153945e-05, "loss": 0.0383, "step": 61450 }, { "epoch": 0.3023, "grad_norm": 0.08118918538093567, "learning_rate": 4.148343818576204e-05, "loss": 0.0385, "step": 61460 }, { "epoch": 0.30235, "grad_norm": 0.10779253393411636, "learning_rate": 4.1480330069333616e-05, "loss": 0.037, "step": 61470 }, { "epoch": 0.3024, "grad_norm": 0.08785419166088104, "learning_rate": 4.147722150233916e-05, "loss": 0.041, "step": 61480 }, { "epoch": 0.30245, "grad_norm": 0.09721635282039642, "learning_rate": 4.147411248486366e-05, "loss": 0.0402, "step": 61490 }, { "epoch": 0.3025, "grad_norm": 0.10442981123924255, "learning_rate": 4.1471003016992116e-05, "loss": 0.0395, "step": 61500 }, { "epoch": 0.30255, "grad_norm": 0.0965740829706192, "learning_rate": 4.146789309880953e-05, "loss": 0.0387, "step": 61510 }, { "epoch": 0.3026, "grad_norm": 0.11614526808261871, "learning_rate": 4.146478273040094e-05, "loss": 0.0374, "step": 61520 }, { "epoch": 0.30265, "grad_norm": 0.12318149209022522, "learning_rate": 4.1461671911851375e-05, "loss": 0.0378, "step": 61530 }, { "epoch": 0.3027, "grad_norm": 0.1019563376903534, "learning_rate": 4.145856064324589e-05, "loss": 0.0368, "step": 61540 }, { "epoch": 0.30275, "grad_norm": 0.0958062931895256, "learning_rate": 4.145544892466953e-05, "loss": 0.0365, "step": 61550 }, { "epoch": 0.3028, "grad_norm": 0.11094732582569122, "learning_rate": 4.1452336756207374e-05, "loss": 0.038, "step": 61560 }, { "epoch": 0.30285, "grad_norm": 0.10732755064964294, "learning_rate": 4.144922413794453e-05, "loss": 0.036, "step": 61570 }, { "epoch": 0.3029, "grad_norm": 0.09656397253274918, "learning_rate": 4.1446111069966066e-05, "loss": 0.0374, "step": 61580 }, { "epoch": 0.30295, "grad_norm": 0.09352206438779831, "learning_rate": 4.1442997552357105e-05, "loss": 0.0373, "step": 61590 }, { "epoch": 0.303, "grad_norm": 0.09997440129518509, "learning_rate": 4.143988358520277e-05, "loss": 0.0382, "step": 61600 }, { "epoch": 0.30305, "grad_norm": 0.11431118845939636, "learning_rate": 4.14367691685882e-05, "loss": 0.0411, "step": 61610 }, { "epoch": 0.3031, "grad_norm": 0.1606724113225937, "learning_rate": 4.143365430259852e-05, "loss": 0.0388, "step": 61620 }, { "epoch": 0.30315, "grad_norm": 0.14557507634162903, "learning_rate": 4.143053898731891e-05, "loss": 0.0372, "step": 61630 }, { "epoch": 0.3032, "grad_norm": 0.11492512375116348, "learning_rate": 4.1427423222834547e-05, "loss": 0.0369, "step": 61640 }, { "epoch": 0.30325, "grad_norm": 0.08114670217037201, "learning_rate": 4.1424307009230594e-05, "loss": 0.0369, "step": 61650 }, { "epoch": 0.3033, "grad_norm": 0.09293445199728012, "learning_rate": 4.1421190346592263e-05, "loss": 0.0359, "step": 61660 }, { "epoch": 0.30335, "grad_norm": 0.09854632616043091, "learning_rate": 4.141807323500476e-05, "loss": 0.0379, "step": 61670 }, { "epoch": 0.3034, "grad_norm": 0.10344306379556656, "learning_rate": 4.141495567455329e-05, "loss": 0.0372, "step": 61680 }, { "epoch": 0.30345, "grad_norm": 0.10848110914230347, "learning_rate": 4.141183766532312e-05, "loss": 0.0376, "step": 61690 }, { "epoch": 0.3035, "grad_norm": 0.10579539835453033, "learning_rate": 4.1408719207399453e-05, "loss": 0.0391, "step": 61700 }, { "epoch": 0.30355, "grad_norm": 0.09526227414608002, "learning_rate": 4.1405600300867575e-05, "loss": 0.0376, "step": 61710 }, { "epoch": 0.3036, "grad_norm": 0.0759005919098854, "learning_rate": 4.140248094581275e-05, "loss": 0.0374, "step": 61720 }, { "epoch": 0.30365, "grad_norm": 0.08883017301559448, "learning_rate": 4.139936114232026e-05, "loss": 0.0369, "step": 61730 }, { "epoch": 0.3037, "grad_norm": 0.09480241686105728, "learning_rate": 4.139624089047539e-05, "loss": 0.0386, "step": 61740 }, { "epoch": 0.30375, "grad_norm": 0.09421525150537491, "learning_rate": 4.139312019036346e-05, "loss": 0.0379, "step": 61750 }, { "epoch": 0.3038, "grad_norm": 0.09258712083101273, "learning_rate": 4.138999904206978e-05, "loss": 0.0383, "step": 61760 }, { "epoch": 0.30385, "grad_norm": 0.08552666008472443, "learning_rate": 4.1386877445679686e-05, "loss": 0.0371, "step": 61770 }, { "epoch": 0.3039, "grad_norm": 0.08578373491764069, "learning_rate": 4.138375540127852e-05, "loss": 0.0396, "step": 61780 }, { "epoch": 0.30395, "grad_norm": 0.10112480074167252, "learning_rate": 4.1380632908951634e-05, "loss": 0.0409, "step": 61790 }, { "epoch": 0.304, "grad_norm": 0.11652129888534546, "learning_rate": 4.137750996878439e-05, "loss": 0.0403, "step": 61800 }, { "epoch": 0.30405, "grad_norm": 0.11524508148431778, "learning_rate": 4.137438658086219e-05, "loss": 0.0384, "step": 61810 }, { "epoch": 0.3041, "grad_norm": 0.10093747079372406, "learning_rate": 4.13712627452704e-05, "loss": 0.0375, "step": 61820 }, { "epoch": 0.30415, "grad_norm": 0.09673056751489639, "learning_rate": 4.1368138462094445e-05, "loss": 0.0381, "step": 61830 }, { "epoch": 0.3042, "grad_norm": 0.0936247855424881, "learning_rate": 4.136501373141973e-05, "loss": 0.0382, "step": 61840 }, { "epoch": 0.30425, "grad_norm": 0.11220092326402664, "learning_rate": 4.1361888553331695e-05, "loss": 0.0375, "step": 61850 }, { "epoch": 0.3043, "grad_norm": 0.1188722476363182, "learning_rate": 4.1358762927915775e-05, "loss": 0.0386, "step": 61860 }, { "epoch": 0.30435, "grad_norm": 0.12074930220842361, "learning_rate": 4.1355636855257406e-05, "loss": 0.0431, "step": 61870 }, { "epoch": 0.3044, "grad_norm": 0.10390708595514297, "learning_rate": 4.1352510335442084e-05, "loss": 0.042, "step": 61880 }, { "epoch": 0.30445, "grad_norm": 0.09604662656784058, "learning_rate": 4.1349383368555265e-05, "loss": 0.039, "step": 61890 }, { "epoch": 0.3045, "grad_norm": 0.09382264316082001, "learning_rate": 4.134625595468246e-05, "loss": 0.0359, "step": 61900 }, { "epoch": 0.30455, "grad_norm": 0.1026986688375473, "learning_rate": 4.1343128093909144e-05, "loss": 0.0435, "step": 61910 }, { "epoch": 0.3046, "grad_norm": 0.10203906893730164, "learning_rate": 4.133999978632085e-05, "loss": 0.0384, "step": 61920 }, { "epoch": 0.30465, "grad_norm": 0.1025913804769516, "learning_rate": 4.13368710320031e-05, "loss": 0.0384, "step": 61930 }, { "epoch": 0.3047, "grad_norm": 0.07931026816368103, "learning_rate": 4.1333741831041425e-05, "loss": 0.0374, "step": 61940 }, { "epoch": 0.30475, "grad_norm": 0.11639677733182907, "learning_rate": 4.13306121835214e-05, "loss": 0.038, "step": 61950 }, { "epoch": 0.3048, "grad_norm": 0.10649111866950989, "learning_rate": 4.132748208952857e-05, "loss": 0.0375, "step": 61960 }, { "epoch": 0.30485, "grad_norm": 0.09318245202302933, "learning_rate": 4.132435154914851e-05, "loss": 0.037, "step": 61970 }, { "epoch": 0.3049, "grad_norm": 0.09532459825277328, "learning_rate": 4.132122056246681e-05, "loss": 0.0373, "step": 61980 }, { "epoch": 0.30495, "grad_norm": 0.09357064962387085, "learning_rate": 4.131808912956907e-05, "loss": 0.0371, "step": 61990 }, { "epoch": 0.305, "grad_norm": 0.10159306228160858, "learning_rate": 4.131495725054091e-05, "loss": 0.0362, "step": 62000 }, { "epoch": 0.30505, "grad_norm": 0.10437101870775223, "learning_rate": 4.1311824925467946e-05, "loss": 0.0372, "step": 62010 }, { "epoch": 0.3051, "grad_norm": 0.0874381735920906, "learning_rate": 4.1308692154435815e-05, "loss": 0.0378, "step": 62020 }, { "epoch": 0.30515, "grad_norm": 0.09519375115633011, "learning_rate": 4.130555893753016e-05, "loss": 0.0362, "step": 62030 }, { "epoch": 0.3052, "grad_norm": 0.10409439355134964, "learning_rate": 4.1302425274836666e-05, "loss": 0.037, "step": 62040 }, { "epoch": 0.30525, "grad_norm": 0.11408756673336029, "learning_rate": 4.129929116644098e-05, "loss": 0.0373, "step": 62050 }, { "epoch": 0.3053, "grad_norm": 0.10046551376581192, "learning_rate": 4.1296156612428794e-05, "loss": 0.0367, "step": 62060 }, { "epoch": 0.30535, "grad_norm": 0.10562136769294739, "learning_rate": 4.129302161288582e-05, "loss": 0.038, "step": 62070 }, { "epoch": 0.3054, "grad_norm": 0.11253020167350769, "learning_rate": 4.128988616789774e-05, "loss": 0.0402, "step": 62080 }, { "epoch": 0.30545, "grad_norm": 0.10092043876647949, "learning_rate": 4.1286750277550304e-05, "loss": 0.0367, "step": 62090 }, { "epoch": 0.3055, "grad_norm": 0.09704675525426865, "learning_rate": 4.1283613941929234e-05, "loss": 0.037, "step": 62100 }, { "epoch": 0.30555, "grad_norm": 0.09652874618768692, "learning_rate": 4.1280477161120265e-05, "loss": 0.0372, "step": 62110 }, { "epoch": 0.3056, "grad_norm": 0.11451993137598038, "learning_rate": 4.127733993520918e-05, "loss": 0.037, "step": 62120 }, { "epoch": 0.30565, "grad_norm": 0.1000591367483139, "learning_rate": 4.1274202264281724e-05, "loss": 0.0378, "step": 62130 }, { "epoch": 0.3057, "grad_norm": 0.11066377907991409, "learning_rate": 4.12710641484237e-05, "loss": 0.0391, "step": 62140 }, { "epoch": 0.30575, "grad_norm": 0.08408109843730927, "learning_rate": 4.12679255877209e-05, "loss": 0.0366, "step": 62150 }, { "epoch": 0.3058, "grad_norm": 0.08459489792585373, "learning_rate": 4.126478658225912e-05, "loss": 0.0352, "step": 62160 }, { "epoch": 0.30585, "grad_norm": 0.08361006528139114, "learning_rate": 4.1261647132124184e-05, "loss": 0.0361, "step": 62170 }, { "epoch": 0.3059, "grad_norm": 0.1002361848950386, "learning_rate": 4.125850723740192e-05, "loss": 0.0369, "step": 62180 }, { "epoch": 0.30595, "grad_norm": 0.12000847607851028, "learning_rate": 4.1255366898178184e-05, "loss": 0.0377, "step": 62190 }, { "epoch": 0.306, "grad_norm": 0.10912597924470901, "learning_rate": 4.125222611453882e-05, "loss": 0.0377, "step": 62200 }, { "epoch": 0.30605, "grad_norm": 0.09594231098890305, "learning_rate": 4.12490848865697e-05, "loss": 0.0364, "step": 62210 }, { "epoch": 0.3061, "grad_norm": 0.09830238670110703, "learning_rate": 4.1245943214356705e-05, "loss": 0.0376, "step": 62220 }, { "epoch": 0.30615, "grad_norm": 0.1080409362912178, "learning_rate": 4.124280109798573e-05, "loss": 0.0362, "step": 62230 }, { "epoch": 0.3062, "grad_norm": 0.10195433348417282, "learning_rate": 4.123965853754267e-05, "loss": 0.0357, "step": 62240 }, { "epoch": 0.30625, "grad_norm": 0.113552026450634, "learning_rate": 4.123651553311345e-05, "loss": 0.0379, "step": 62250 }, { "epoch": 0.3063, "grad_norm": 0.10659865289926529, "learning_rate": 4.123337208478399e-05, "loss": 0.0385, "step": 62260 }, { "epoch": 0.30635, "grad_norm": 0.09573084115982056, "learning_rate": 4.1230228192640236e-05, "loss": 0.0377, "step": 62270 }, { "epoch": 0.3064, "grad_norm": 0.09120679646730423, "learning_rate": 4.1227083856768145e-05, "loss": 0.039, "step": 62280 }, { "epoch": 0.30645, "grad_norm": 0.09591097384691238, "learning_rate": 4.122393907725368e-05, "loss": 0.0365, "step": 62290 }, { "epoch": 0.3065, "grad_norm": 0.09726311266422272, "learning_rate": 4.1220793854182804e-05, "loss": 0.0389, "step": 62300 }, { "epoch": 0.30655, "grad_norm": 0.10283922404050827, "learning_rate": 4.121764818764153e-05, "loss": 0.0378, "step": 62310 }, { "epoch": 0.3066, "grad_norm": 0.09351787716150284, "learning_rate": 4.121450207771584e-05, "loss": 0.0374, "step": 62320 }, { "epoch": 0.30665, "grad_norm": 0.09554977715015411, "learning_rate": 4.121135552449176e-05, "loss": 0.0371, "step": 62330 }, { "epoch": 0.3067, "grad_norm": 0.09455087035894394, "learning_rate": 4.1208208528055306e-05, "loss": 0.0378, "step": 62340 }, { "epoch": 0.30675, "grad_norm": 0.11537139862775803, "learning_rate": 4.1205061088492517e-05, "loss": 0.0404, "step": 62350 }, { "epoch": 0.3068, "grad_norm": 0.11361251771450043, "learning_rate": 4.1201913205889455e-05, "loss": 0.0385, "step": 62360 }, { "epoch": 0.30685, "grad_norm": 0.0990777239203453, "learning_rate": 4.119876488033216e-05, "loss": 0.0378, "step": 62370 }, { "epoch": 0.3069, "grad_norm": 0.09623073786497116, "learning_rate": 4.119561611190673e-05, "loss": 0.0378, "step": 62380 }, { "epoch": 0.30695, "grad_norm": 0.11010082811117172, "learning_rate": 4.1192466900699236e-05, "loss": 0.0388, "step": 62390 }, { "epoch": 0.307, "grad_norm": 0.09047936648130417, "learning_rate": 4.1189317246795784e-05, "loss": 0.0384, "step": 62400 }, { "epoch": 0.30705, "grad_norm": 0.11480408161878586, "learning_rate": 4.1186167150282475e-05, "loss": 0.0399, "step": 62410 }, { "epoch": 0.3071, "grad_norm": 0.11480441689491272, "learning_rate": 4.118301661124544e-05, "loss": 0.0375, "step": 62420 }, { "epoch": 0.30715, "grad_norm": 0.10686799138784409, "learning_rate": 4.117986562977081e-05, "loss": 0.0386, "step": 62430 }, { "epoch": 0.3072, "grad_norm": 0.09445648640394211, "learning_rate": 4.117671420594473e-05, "loss": 0.0382, "step": 62440 }, { "epoch": 0.30725, "grad_norm": 0.11411401629447937, "learning_rate": 4.117356233985337e-05, "loss": 0.0404, "step": 62450 }, { "epoch": 0.3073, "grad_norm": 0.09053072333335876, "learning_rate": 4.117041003158288e-05, "loss": 0.0385, "step": 62460 }, { "epoch": 0.30735, "grad_norm": 0.07754967361688614, "learning_rate": 4.1167257281219455e-05, "loss": 0.0383, "step": 62470 }, { "epoch": 0.3074, "grad_norm": 0.08269201964139938, "learning_rate": 4.1164104088849296e-05, "loss": 0.0392, "step": 62480 }, { "epoch": 0.30745, "grad_norm": 0.10286622494459152, "learning_rate": 4.11609504545586e-05, "loss": 0.0402, "step": 62490 }, { "epoch": 0.3075, "grad_norm": 0.09792700409889221, "learning_rate": 4.1157796378433596e-05, "loss": 0.0385, "step": 62500 }, { "epoch": 0.30755, "grad_norm": 0.08387406915426254, "learning_rate": 4.11546418605605e-05, "loss": 0.0359, "step": 62510 }, { "epoch": 0.3076, "grad_norm": 0.08836230635643005, "learning_rate": 4.1151486901025574e-05, "loss": 0.0397, "step": 62520 }, { "epoch": 0.30765, "grad_norm": 0.09530341625213623, "learning_rate": 4.1148331499915056e-05, "loss": 0.0373, "step": 62530 }, { "epoch": 0.3077, "grad_norm": 0.09608582407236099, "learning_rate": 4.1145175657315225e-05, "loss": 0.0372, "step": 62540 }, { "epoch": 0.30775, "grad_norm": 0.09119854867458344, "learning_rate": 4.1142019373312355e-05, "loss": 0.0402, "step": 62550 }, { "epoch": 0.3078, "grad_norm": 0.10277434438467026, "learning_rate": 4.113886264799275e-05, "loss": 0.0365, "step": 62560 }, { "epoch": 0.30785, "grad_norm": 0.10991762578487396, "learning_rate": 4.1135705481442685e-05, "loss": 0.0375, "step": 62570 }, { "epoch": 0.3079, "grad_norm": 0.13433682918548584, "learning_rate": 4.1132547873748503e-05, "loss": 0.0379, "step": 62580 }, { "epoch": 0.30795, "grad_norm": 0.1160324215888977, "learning_rate": 4.112938982499652e-05, "loss": 0.0366, "step": 62590 }, { "epoch": 0.308, "grad_norm": 0.09958013147115707, "learning_rate": 4.1126231335273094e-05, "loss": 0.039, "step": 62600 }, { "epoch": 0.30805, "grad_norm": 0.10945141315460205, "learning_rate": 4.1123072404664545e-05, "loss": 0.0366, "step": 62610 }, { "epoch": 0.3081, "grad_norm": 0.08697067946195602, "learning_rate": 4.111991303325726e-05, "loss": 0.0374, "step": 62620 }, { "epoch": 0.30815, "grad_norm": 0.0961507260799408, "learning_rate": 4.1116753221137606e-05, "loss": 0.0364, "step": 62630 }, { "epoch": 0.3082, "grad_norm": 0.09394659847021103, "learning_rate": 4.1113592968391976e-05, "loss": 0.037, "step": 62640 }, { "epoch": 0.30825, "grad_norm": 0.09779747575521469, "learning_rate": 4.1110432275106767e-05, "loss": 0.0365, "step": 62650 }, { "epoch": 0.3083, "grad_norm": 0.08492156118154526, "learning_rate": 4.110727114136839e-05, "loss": 0.0372, "step": 62660 }, { "epoch": 0.30835, "grad_norm": 0.1093636080622673, "learning_rate": 4.1104109567263274e-05, "loss": 0.0377, "step": 62670 }, { "epoch": 0.3084, "grad_norm": 0.12288608402013779, "learning_rate": 4.110094755287785e-05, "loss": 0.0367, "step": 62680 }, { "epoch": 0.30845, "grad_norm": 0.10244695097208023, "learning_rate": 4.109778509829857e-05, "loss": 0.04, "step": 62690 }, { "epoch": 0.3085, "grad_norm": 0.10393958538770676, "learning_rate": 4.109462220361189e-05, "loss": 0.0397, "step": 62700 }, { "epoch": 0.30855, "grad_norm": 0.11706296354532242, "learning_rate": 4.109145886890429e-05, "loss": 0.0397, "step": 62710 }, { "epoch": 0.3086, "grad_norm": 0.11853691935539246, "learning_rate": 4.1088295094262255e-05, "loss": 0.0395, "step": 62720 }, { "epoch": 0.30865, "grad_norm": 0.12326949834823608, "learning_rate": 4.108513087977227e-05, "loss": 0.0379, "step": 62730 }, { "epoch": 0.3087, "grad_norm": 0.10332608968019485, "learning_rate": 4.1081966225520846e-05, "loss": 0.037, "step": 62740 }, { "epoch": 0.30875, "grad_norm": 0.11756819486618042, "learning_rate": 4.107880113159451e-05, "loss": 0.0384, "step": 62750 }, { "epoch": 0.3088, "grad_norm": 0.11053383350372314, "learning_rate": 4.107563559807979e-05, "loss": 0.0359, "step": 62760 }, { "epoch": 0.30885, "grad_norm": 0.09963206201791763, "learning_rate": 4.107246962506324e-05, "loss": 0.0369, "step": 62770 }, { "epoch": 0.3089, "grad_norm": 0.09865821152925491, "learning_rate": 4.106930321263139e-05, "loss": 0.0376, "step": 62780 }, { "epoch": 0.30895, "grad_norm": 0.10817153006792068, "learning_rate": 4.106613636087085e-05, "loss": 0.0368, "step": 62790 }, { "epoch": 0.309, "grad_norm": 0.10829388350248337, "learning_rate": 4.106296906986816e-05, "loss": 0.0376, "step": 62800 }, { "epoch": 0.30905, "grad_norm": 0.10261911153793335, "learning_rate": 4.105980133970995e-05, "loss": 0.0362, "step": 62810 }, { "epoch": 0.3091, "grad_norm": 0.09937945753335953, "learning_rate": 4.105663317048278e-05, "loss": 0.0363, "step": 62820 }, { "epoch": 0.30915, "grad_norm": 0.10094765573740005, "learning_rate": 4.10534645622733e-05, "loss": 0.0377, "step": 62830 }, { "epoch": 0.3092, "grad_norm": 0.11168407648801804, "learning_rate": 4.1050295515168144e-05, "loss": 0.0373, "step": 62840 }, { "epoch": 0.30925, "grad_norm": 0.09110624343156815, "learning_rate": 4.104712602925392e-05, "loss": 0.0375, "step": 62850 }, { "epoch": 0.3093, "grad_norm": 0.08740352094173431, "learning_rate": 4.104395610461731e-05, "loss": 0.0415, "step": 62860 }, { "epoch": 0.30935, "grad_norm": 0.08553145825862885, "learning_rate": 4.104078574134497e-05, "loss": 0.0381, "step": 62870 }, { "epoch": 0.3094, "grad_norm": 0.09509292244911194, "learning_rate": 4.103761493952357e-05, "loss": 0.0362, "step": 62880 }, { "epoch": 0.30945, "grad_norm": 0.09452133625745773, "learning_rate": 4.10344436992398e-05, "loss": 0.0381, "step": 62890 }, { "epoch": 0.3095, "grad_norm": 0.10518445074558258, "learning_rate": 4.103127202058036e-05, "loss": 0.0384, "step": 62900 }, { "epoch": 0.30955, "grad_norm": 0.11385580897331238, "learning_rate": 4.102809990363197e-05, "loss": 0.039, "step": 62910 }, { "epoch": 0.3096, "grad_norm": 0.09979929029941559, "learning_rate": 4.102492734848136e-05, "loss": 0.0381, "step": 62920 }, { "epoch": 0.30965, "grad_norm": 0.08985389769077301, "learning_rate": 4.1021754355215235e-05, "loss": 0.0361, "step": 62930 }, { "epoch": 0.3097, "grad_norm": 0.0899069532752037, "learning_rate": 4.101858092392038e-05, "loss": 0.0379, "step": 62940 }, { "epoch": 0.30975, "grad_norm": 0.09491459280252457, "learning_rate": 4.101540705468354e-05, "loss": 0.0396, "step": 62950 }, { "epoch": 0.3098, "grad_norm": 0.10776514559984207, "learning_rate": 4.1012232747591484e-05, "loss": 0.0375, "step": 62960 }, { "epoch": 0.30985, "grad_norm": 0.09491831809282303, "learning_rate": 4.1009058002730995e-05, "loss": 0.0422, "step": 62970 }, { "epoch": 0.3099, "grad_norm": 0.11386746913194656, "learning_rate": 4.1005882820188885e-05, "loss": 0.0389, "step": 62980 }, { "epoch": 0.30995, "grad_norm": 0.10596064478158951, "learning_rate": 4.100270720005195e-05, "loss": 0.0375, "step": 62990 }, { "epoch": 0.31, "grad_norm": 0.1110059916973114, "learning_rate": 4.099953114240701e-05, "loss": 0.0362, "step": 63000 }, { "epoch": 0.31005, "grad_norm": 0.1058363988995552, "learning_rate": 4.09963546473409e-05, "loss": 0.0379, "step": 63010 }, { "epoch": 0.3101, "grad_norm": 0.10297834128141403, "learning_rate": 4.099317771494046e-05, "loss": 0.0382, "step": 63020 }, { "epoch": 0.31015, "grad_norm": 0.10393298417329788, "learning_rate": 4.0990000345292546e-05, "loss": 0.0367, "step": 63030 }, { "epoch": 0.3102, "grad_norm": 0.11371879279613495, "learning_rate": 4.098682253848404e-05, "loss": 0.0381, "step": 63040 }, { "epoch": 0.31025, "grad_norm": 0.11789979785680771, "learning_rate": 4.0983644294601805e-05, "loss": 0.0376, "step": 63050 }, { "epoch": 0.3103, "grad_norm": 0.10600556433200836, "learning_rate": 4.098046561373274e-05, "loss": 0.0383, "step": 63060 }, { "epoch": 0.31035, "grad_norm": 0.11378031224012375, "learning_rate": 4.097728649596376e-05, "loss": 0.0378, "step": 63070 }, { "epoch": 0.3104, "grad_norm": 0.1041090115904808, "learning_rate": 4.097410694138175e-05, "loss": 0.0379, "step": 63080 }, { "epoch": 0.31045, "grad_norm": 0.11633723229169846, "learning_rate": 4.0970926950073674e-05, "loss": 0.0374, "step": 63090 }, { "epoch": 0.3105, "grad_norm": 0.10892637819051743, "learning_rate": 4.096774652212645e-05, "loss": 0.037, "step": 63100 }, { "epoch": 0.31055, "grad_norm": 0.09778828173875809, "learning_rate": 4.096456565762703e-05, "loss": 0.0367, "step": 63110 }, { "epoch": 0.3106, "grad_norm": 0.10582304745912552, "learning_rate": 4.096138435666239e-05, "loss": 0.0375, "step": 63120 }, { "epoch": 0.31065, "grad_norm": 0.09552489221096039, "learning_rate": 4.09582026193195e-05, "loss": 0.0382, "step": 63130 }, { "epoch": 0.3107, "grad_norm": 0.11684516072273254, "learning_rate": 4.095502044568533e-05, "loss": 0.0391, "step": 63140 }, { "epoch": 0.31075, "grad_norm": 0.09883775562047958, "learning_rate": 4.0951837835846906e-05, "loss": 0.0382, "step": 63150 }, { "epoch": 0.3108, "grad_norm": 0.08349844813346863, "learning_rate": 4.0948654789891235e-05, "loss": 0.0394, "step": 63160 }, { "epoch": 0.31085, "grad_norm": 0.1069011241197586, "learning_rate": 4.094547130790532e-05, "loss": 0.0392, "step": 63170 }, { "epoch": 0.3109, "grad_norm": 0.11482026427984238, "learning_rate": 4.094228738997622e-05, "loss": 0.038, "step": 63180 }, { "epoch": 0.31095, "grad_norm": 0.08558998256921768, "learning_rate": 4.093910303619097e-05, "loss": 0.0377, "step": 63190 }, { "epoch": 0.311, "grad_norm": 0.0929851084947586, "learning_rate": 4.0935918246636626e-05, "loss": 0.0394, "step": 63200 }, { "epoch": 0.31105, "grad_norm": 0.09259190410375595, "learning_rate": 4.0932733021400266e-05, "loss": 0.0368, "step": 63210 }, { "epoch": 0.3111, "grad_norm": 0.11861695349216461, "learning_rate": 4.092954736056897e-05, "loss": 0.0415, "step": 63220 }, { "epoch": 0.31115, "grad_norm": 0.09823275357484818, "learning_rate": 4.092636126422984e-05, "loss": 0.0384, "step": 63230 }, { "epoch": 0.3112, "grad_norm": 0.11202265322208405, "learning_rate": 4.092317473246997e-05, "loss": 0.0388, "step": 63240 }, { "epoch": 0.31125, "grad_norm": 0.09932032227516174, "learning_rate": 4.091998776537649e-05, "loss": 0.0373, "step": 63250 }, { "epoch": 0.3113, "grad_norm": 0.10629276931285858, "learning_rate": 4.091680036303652e-05, "loss": 0.0387, "step": 63260 }, { "epoch": 0.31135, "grad_norm": 0.08321334421634674, "learning_rate": 4.091361252553721e-05, "loss": 0.037, "step": 63270 }, { "epoch": 0.3114, "grad_norm": 0.08534108102321625, "learning_rate": 4.091042425296571e-05, "loss": 0.0366, "step": 63280 }, { "epoch": 0.31145, "grad_norm": 0.07628346979618073, "learning_rate": 4.090723554540919e-05, "loss": 0.039, "step": 63290 }, { "epoch": 0.3115, "grad_norm": 0.08024024963378906, "learning_rate": 4.090404640295483e-05, "loss": 0.0353, "step": 63300 }, { "epoch": 0.31155, "grad_norm": 0.08957171440124512, "learning_rate": 4.090085682568982e-05, "loss": 0.0372, "step": 63310 }, { "epoch": 0.3116, "grad_norm": 0.0850076675415039, "learning_rate": 4.089766681370135e-05, "loss": 0.0363, "step": 63320 }, { "epoch": 0.31165, "grad_norm": 0.115052230656147, "learning_rate": 4.089447636707664e-05, "loss": 0.0365, "step": 63330 }, { "epoch": 0.3117, "grad_norm": 0.07677609473466873, "learning_rate": 4.0891285485902915e-05, "loss": 0.0357, "step": 63340 }, { "epoch": 0.31175, "grad_norm": 0.09936800599098206, "learning_rate": 4.088809417026742e-05, "loss": 0.0366, "step": 63350 }, { "epoch": 0.3118, "grad_norm": 0.09300931543111801, "learning_rate": 4.08849024202574e-05, "loss": 0.0369, "step": 63360 }, { "epoch": 0.31185, "grad_norm": 0.09681177884340286, "learning_rate": 4.0881710235960115e-05, "loss": 0.0381, "step": 63370 }, { "epoch": 0.3119, "grad_norm": 0.08285442739725113, "learning_rate": 4.087851761746284e-05, "loss": 0.0361, "step": 63380 }, { "epoch": 0.31195, "grad_norm": 0.11278854310512543, "learning_rate": 4.0875324564852856e-05, "loss": 0.0383, "step": 63390 }, { "epoch": 0.312, "grad_norm": 0.10942468047142029, "learning_rate": 4.0872131078217465e-05, "loss": 0.0361, "step": 63400 }, { "epoch": 0.31205, "grad_norm": 0.08108315616846085, "learning_rate": 4.086893715764397e-05, "loss": 0.0371, "step": 63410 }, { "epoch": 0.3121, "grad_norm": 0.09134598076343536, "learning_rate": 4.08657428032197e-05, "loss": 0.0376, "step": 63420 }, { "epoch": 0.31215, "grad_norm": 0.07536963373422623, "learning_rate": 4.086254801503198e-05, "loss": 0.0371, "step": 63430 }, { "epoch": 0.3122, "grad_norm": 0.09576810896396637, "learning_rate": 4.085935279316815e-05, "loss": 0.0373, "step": 63440 }, { "epoch": 0.31225, "grad_norm": 0.09800707548856735, "learning_rate": 4.0856157137715576e-05, "loss": 0.0386, "step": 63450 }, { "epoch": 0.3123, "grad_norm": 0.10401442646980286, "learning_rate": 4.085296104876163e-05, "loss": 0.0389, "step": 63460 }, { "epoch": 0.31235, "grad_norm": 0.14090527594089508, "learning_rate": 4.084976452639367e-05, "loss": 0.0391, "step": 63470 }, { "epoch": 0.3124, "grad_norm": 0.10894068330526352, "learning_rate": 4.084656757069911e-05, "loss": 0.0393, "step": 63480 }, { "epoch": 0.31245, "grad_norm": 0.09572970867156982, "learning_rate": 4.084337018176535e-05, "loss": 0.0379, "step": 63490 }, { "epoch": 0.3125, "grad_norm": 0.10507770627737045, "learning_rate": 4.08401723596798e-05, "loss": 0.0379, "step": 63500 }, { "epoch": 0.31255, "grad_norm": 0.10381949692964554, "learning_rate": 4.083697410452989e-05, "loss": 0.039, "step": 63510 }, { "epoch": 0.3126, "grad_norm": 0.09521272033452988, "learning_rate": 4.083377541640305e-05, "loss": 0.0379, "step": 63520 }, { "epoch": 0.31265, "grad_norm": 0.10659544169902802, "learning_rate": 4.0830576295386744e-05, "loss": 0.0378, "step": 63530 }, { "epoch": 0.3127, "grad_norm": 0.09491292387247086, "learning_rate": 4.082737674156844e-05, "loss": 0.0357, "step": 63540 }, { "epoch": 0.31275, "grad_norm": 0.08107301592826843, "learning_rate": 4.082417675503558e-05, "loss": 0.0367, "step": 63550 }, { "epoch": 0.3128, "grad_norm": 0.0824798122048378, "learning_rate": 4.082097633587569e-05, "loss": 0.0385, "step": 63560 }, { "epoch": 0.31285, "grad_norm": 0.10407048463821411, "learning_rate": 4.081777548417625e-05, "loss": 0.0386, "step": 63570 }, { "epoch": 0.3129, "grad_norm": 0.09542399644851685, "learning_rate": 4.081457420002476e-05, "loss": 0.0393, "step": 63580 }, { "epoch": 0.31295, "grad_norm": 0.07698172330856323, "learning_rate": 4.0811372483508745e-05, "loss": 0.0368, "step": 63590 }, { "epoch": 0.313, "grad_norm": 0.08906321972608566, "learning_rate": 4.080817033471577e-05, "loss": 0.0376, "step": 63600 }, { "epoch": 0.31305, "grad_norm": 0.07813603430986404, "learning_rate": 4.080496775373334e-05, "loss": 0.0358, "step": 63610 }, { "epoch": 0.3131, "grad_norm": 0.0924825519323349, "learning_rate": 4.080176474064904e-05, "loss": 0.0378, "step": 63620 }, { "epoch": 0.31315, "grad_norm": 0.10021001100540161, "learning_rate": 4.079856129555042e-05, "loss": 0.0374, "step": 63630 }, { "epoch": 0.3132, "grad_norm": 0.11586814373731613, "learning_rate": 4.079535741852507e-05, "loss": 0.0394, "step": 63640 }, { "epoch": 0.31325, "grad_norm": 0.12628233432769775, "learning_rate": 4.079215310966059e-05, "loss": 0.038, "step": 63650 }, { "epoch": 0.3133, "grad_norm": 0.10121853649616241, "learning_rate": 4.078894836904457e-05, "loss": 0.0386, "step": 63660 }, { "epoch": 0.31335, "grad_norm": 0.09781887382268906, "learning_rate": 4.078574319676463e-05, "loss": 0.0375, "step": 63670 }, { "epoch": 0.3134, "grad_norm": 0.1199178546667099, "learning_rate": 4.078253759290841e-05, "loss": 0.0387, "step": 63680 }, { "epoch": 0.31345, "grad_norm": 0.12737081944942474, "learning_rate": 4.077933155756354e-05, "loss": 0.0379, "step": 63690 }, { "epoch": 0.3135, "grad_norm": 0.11249277740716934, "learning_rate": 4.0776125090817666e-05, "loss": 0.0392, "step": 63700 }, { "epoch": 0.31355, "grad_norm": 0.1020110547542572, "learning_rate": 4.077291819275847e-05, "loss": 0.0384, "step": 63710 }, { "epoch": 0.3136, "grad_norm": 0.11463608592748642, "learning_rate": 4.0769710863473604e-05, "loss": 0.0394, "step": 63720 }, { "epoch": 0.31365, "grad_norm": 0.10257559269666672, "learning_rate": 4.076650310305077e-05, "loss": 0.0418, "step": 63730 }, { "epoch": 0.3137, "grad_norm": 0.10356997698545456, "learning_rate": 4.076329491157768e-05, "loss": 0.0393, "step": 63740 }, { "epoch": 0.31375, "grad_norm": 0.12238980829715729, "learning_rate": 4.0760086289142006e-05, "loss": 0.0374, "step": 63750 }, { "epoch": 0.3138, "grad_norm": 0.10171011835336685, "learning_rate": 4.075687723583151e-05, "loss": 0.0386, "step": 63760 }, { "epoch": 0.31385, "grad_norm": 0.15728776156902313, "learning_rate": 4.0753667751733906e-05, "loss": 0.0376, "step": 63770 }, { "epoch": 0.3139, "grad_norm": 0.12389139831066132, "learning_rate": 4.075045783693694e-05, "loss": 0.0383, "step": 63780 }, { "epoch": 0.31395, "grad_norm": 0.12089716643095016, "learning_rate": 4.074724749152837e-05, "loss": 0.0377, "step": 63790 }, { "epoch": 0.314, "grad_norm": 0.08855020999908447, "learning_rate": 4.074403671559598e-05, "loss": 0.0391, "step": 63800 }, { "epoch": 0.31405, "grad_norm": 0.11645334213972092, "learning_rate": 4.0740825509227544e-05, "loss": 0.0407, "step": 63810 }, { "epoch": 0.3141, "grad_norm": 0.10966397821903229, "learning_rate": 4.073761387251084e-05, "loss": 0.0385, "step": 63820 }, { "epoch": 0.31415, "grad_norm": 0.10489311069250107, "learning_rate": 4.0734401805533696e-05, "loss": 0.0373, "step": 63830 }, { "epoch": 0.3142, "grad_norm": 0.1026468500494957, "learning_rate": 4.073118930838391e-05, "loss": 0.0367, "step": 63840 }, { "epoch": 0.31425, "grad_norm": 0.09668806195259094, "learning_rate": 4.072797638114931e-05, "loss": 0.037, "step": 63850 }, { "epoch": 0.3143, "grad_norm": 0.0955677479505539, "learning_rate": 4.072476302391776e-05, "loss": 0.0377, "step": 63860 }, { "epoch": 0.31435, "grad_norm": 0.11412930488586426, "learning_rate": 4.072154923677709e-05, "loss": 0.036, "step": 63870 }, { "epoch": 0.3144, "grad_norm": 0.10534878075122833, "learning_rate": 4.071833501981517e-05, "loss": 0.0385, "step": 63880 }, { "epoch": 0.31445, "grad_norm": 0.10487980395555496, "learning_rate": 4.071512037311988e-05, "loss": 0.0378, "step": 63890 }, { "epoch": 0.3145, "grad_norm": 0.10983464121818542, "learning_rate": 4.07119052967791e-05, "loss": 0.0384, "step": 63900 }, { "epoch": 0.31455, "grad_norm": 0.08953792601823807, "learning_rate": 4.070868979088073e-05, "loss": 0.0373, "step": 63910 }, { "epoch": 0.3146, "grad_norm": 0.10473524034023285, "learning_rate": 4.070547385551269e-05, "loss": 0.0386, "step": 63920 }, { "epoch": 0.31465, "grad_norm": 0.08791254460811615, "learning_rate": 4.070225749076289e-05, "loss": 0.0369, "step": 63930 }, { "epoch": 0.3147, "grad_norm": 0.09786912798881531, "learning_rate": 4.069904069671927e-05, "loss": 0.0372, "step": 63940 }, { "epoch": 0.31475, "grad_norm": 0.09092038869857788, "learning_rate": 4.069582347346977e-05, "loss": 0.0379, "step": 63950 }, { "epoch": 0.3148, "grad_norm": 0.10886016488075256, "learning_rate": 4.069260582110236e-05, "loss": 0.0379, "step": 63960 }, { "epoch": 0.31485, "grad_norm": 0.09442399442195892, "learning_rate": 4.0689387739705e-05, "loss": 0.0396, "step": 63970 }, { "epoch": 0.3149, "grad_norm": 0.07843463867902756, "learning_rate": 4.0686169229365665e-05, "loss": 0.0392, "step": 63980 }, { "epoch": 0.31495, "grad_norm": 0.089840367436409, "learning_rate": 4.068295029017236e-05, "loss": 0.0391, "step": 63990 }, { "epoch": 0.315, "grad_norm": 0.08948429673910141, "learning_rate": 4.0679730922213096e-05, "loss": 0.0398, "step": 64000 }, { "epoch": 0.31505, "grad_norm": 0.08338318765163422, "learning_rate": 4.067651112557587e-05, "loss": 0.0381, "step": 64010 }, { "epoch": 0.3151, "grad_norm": 0.12214217334985733, "learning_rate": 4.0673290900348726e-05, "loss": 0.0402, "step": 64020 }, { "epoch": 0.31515, "grad_norm": 0.0753837451338768, "learning_rate": 4.0670070246619694e-05, "loss": 0.0362, "step": 64030 }, { "epoch": 0.3152, "grad_norm": 0.09924887120723724, "learning_rate": 4.066684916447682e-05, "loss": 0.0396, "step": 64040 }, { "epoch": 0.31525, "grad_norm": 0.11461825668811798, "learning_rate": 4.066362765400819e-05, "loss": 0.0408, "step": 64050 }, { "epoch": 0.3153, "grad_norm": 0.11488715559244156, "learning_rate": 4.066040571530185e-05, "loss": 0.0398, "step": 64060 }, { "epoch": 0.31535, "grad_norm": 0.11786787211894989, "learning_rate": 4.0657183348445917e-05, "loss": 0.0383, "step": 64070 }, { "epoch": 0.3154, "grad_norm": 0.10072103142738342, "learning_rate": 4.065396055352846e-05, "loss": 0.0386, "step": 64080 }, { "epoch": 0.31545, "grad_norm": 0.10096098482608795, "learning_rate": 4.065073733063761e-05, "loss": 0.0366, "step": 64090 }, { "epoch": 0.3155, "grad_norm": 0.08004684001207352, "learning_rate": 4.064751367986148e-05, "loss": 0.0386, "step": 64100 }, { "epoch": 0.31555, "grad_norm": 0.10719065368175507, "learning_rate": 4.06442896012882e-05, "loss": 0.0375, "step": 64110 }, { "epoch": 0.3156, "grad_norm": 0.11431484669446945, "learning_rate": 4.064106509500592e-05, "loss": 0.0381, "step": 64120 }, { "epoch": 0.31565, "grad_norm": 0.09725914150476456, "learning_rate": 4.06378401611028e-05, "loss": 0.0382, "step": 64130 }, { "epoch": 0.3157, "grad_norm": 0.09252629429101944, "learning_rate": 4.0634614799667004e-05, "loss": 0.0374, "step": 64140 }, { "epoch": 0.31575, "grad_norm": 0.13072961568832397, "learning_rate": 4.0631389010786716e-05, "loss": 0.0384, "step": 64150 }, { "epoch": 0.3158, "grad_norm": 0.10663977265357971, "learning_rate": 4.0628162794550116e-05, "loss": 0.0385, "step": 64160 }, { "epoch": 0.31585, "grad_norm": 0.09481567144393921, "learning_rate": 4.0624936151045426e-05, "loss": 0.0389, "step": 64170 }, { "epoch": 0.3159, "grad_norm": 0.1195489764213562, "learning_rate": 4.062170908036085e-05, "loss": 0.0365, "step": 64180 }, { "epoch": 0.31595, "grad_norm": 0.1233643889427185, "learning_rate": 4.061848158258461e-05, "loss": 0.0379, "step": 64190 }, { "epoch": 0.316, "grad_norm": 0.09934791922569275, "learning_rate": 4.0615253657804955e-05, "loss": 0.0367, "step": 64200 }, { "epoch": 0.31605, "grad_norm": 0.09175673127174377, "learning_rate": 4.0612025306110137e-05, "loss": 0.0385, "step": 64210 }, { "epoch": 0.3161, "grad_norm": 0.09873286634683609, "learning_rate": 4.06087965275884e-05, "loss": 0.0395, "step": 64220 }, { "epoch": 0.31615, "grad_norm": 0.07790535688400269, "learning_rate": 4.060556732232804e-05, "loss": 0.0381, "step": 64230 }, { "epoch": 0.3162, "grad_norm": 0.10746274143457413, "learning_rate": 4.0602337690417325e-05, "loss": 0.0368, "step": 64240 }, { "epoch": 0.31625, "grad_norm": 0.0758485198020935, "learning_rate": 4.0599107631944565e-05, "loss": 0.0364, "step": 64250 }, { "epoch": 0.3163, "grad_norm": 0.08690139651298523, "learning_rate": 4.059587714699806e-05, "loss": 0.037, "step": 64260 }, { "epoch": 0.31635, "grad_norm": 0.0894489660859108, "learning_rate": 4.0592646235666136e-05, "loss": 0.0386, "step": 64270 }, { "epoch": 0.3164, "grad_norm": 0.10154829174280167, "learning_rate": 4.0589414898037124e-05, "loss": 0.0377, "step": 64280 }, { "epoch": 0.31645, "grad_norm": 0.09624433517456055, "learning_rate": 4.0586183134199355e-05, "loss": 0.0385, "step": 64290 }, { "epoch": 0.3165, "grad_norm": 0.09763739258050919, "learning_rate": 4.0582950944241204e-05, "loss": 0.0376, "step": 64300 }, { "epoch": 0.31655, "grad_norm": 0.11039437353610992, "learning_rate": 4.057971832825103e-05, "loss": 0.0402, "step": 64310 }, { "epoch": 0.3166, "grad_norm": 0.09372076392173767, "learning_rate": 4.05764852863172e-05, "loss": 0.0405, "step": 64320 }, { "epoch": 0.31665, "grad_norm": 0.08872032910585403, "learning_rate": 4.057325181852812e-05, "loss": 0.0383, "step": 64330 }, { "epoch": 0.3167, "grad_norm": 0.11525247991085052, "learning_rate": 4.057001792497218e-05, "loss": 0.0399, "step": 64340 }, { "epoch": 0.31675, "grad_norm": 0.08598694950342178, "learning_rate": 4.0566783605737804e-05, "loss": 0.0379, "step": 64350 }, { "epoch": 0.3168, "grad_norm": 0.08638902008533478, "learning_rate": 4.0563548860913415e-05, "loss": 0.037, "step": 64360 }, { "epoch": 0.31685, "grad_norm": 0.09434332698583603, "learning_rate": 4.056031369058745e-05, "loss": 0.0367, "step": 64370 }, { "epoch": 0.3169, "grad_norm": 0.10789995640516281, "learning_rate": 4.055707809484834e-05, "loss": 0.0361, "step": 64380 }, { "epoch": 0.31695, "grad_norm": 0.09658708423376083, "learning_rate": 4.055384207378457e-05, "loss": 0.0364, "step": 64390 }, { "epoch": 0.317, "grad_norm": 0.10439179092645645, "learning_rate": 4.05506056274846e-05, "loss": 0.0376, "step": 64400 }, { "epoch": 0.31705, "grad_norm": 0.11971355229616165, "learning_rate": 4.054736875603692e-05, "loss": 0.036, "step": 64410 }, { "epoch": 0.3171, "grad_norm": 0.09868525713682175, "learning_rate": 4.0544131459530006e-05, "loss": 0.0366, "step": 64420 }, { "epoch": 0.31715, "grad_norm": 0.0924951508641243, "learning_rate": 4.0540893738052385e-05, "loss": 0.0378, "step": 64430 }, { "epoch": 0.3172, "grad_norm": 0.11395670473575592, "learning_rate": 4.053765559169257e-05, "loss": 0.0382, "step": 64440 }, { "epoch": 0.31725, "grad_norm": 0.09016165137290955, "learning_rate": 4.053441702053908e-05, "loss": 0.0371, "step": 64450 }, { "epoch": 0.3173, "grad_norm": 0.09708821773529053, "learning_rate": 4.053117802468047e-05, "loss": 0.0369, "step": 64460 }, { "epoch": 0.31735, "grad_norm": 0.08717033267021179, "learning_rate": 4.05279386042053e-05, "loss": 0.0361, "step": 64470 }, { "epoch": 0.3174, "grad_norm": 0.1037134975194931, "learning_rate": 4.05246987592021e-05, "loss": 0.0377, "step": 64480 }, { "epoch": 0.31745, "grad_norm": 0.09291183948516846, "learning_rate": 4.052145848975948e-05, "loss": 0.0362, "step": 64490 }, { "epoch": 0.3175, "grad_norm": 0.1136283352971077, "learning_rate": 4.051821779596601e-05, "loss": 0.0365, "step": 64500 }, { "epoch": 0.31755, "grad_norm": 0.09991339594125748, "learning_rate": 4.05149766779103e-05, "loss": 0.0406, "step": 64510 }, { "epoch": 0.3176, "grad_norm": 0.09657027572393417, "learning_rate": 4.051173513568096e-05, "loss": 0.0377, "step": 64520 }, { "epoch": 0.31765, "grad_norm": 0.11914631724357605, "learning_rate": 4.0508493169366604e-05, "loss": 0.0397, "step": 64530 }, { "epoch": 0.3177, "grad_norm": 0.11885827034711838, "learning_rate": 4.050525077905587e-05, "loss": 0.0397, "step": 64540 }, { "epoch": 0.31775, "grad_norm": 0.09189107269048691, "learning_rate": 4.050200796483741e-05, "loss": 0.0365, "step": 64550 }, { "epoch": 0.3178, "grad_norm": 0.09286625683307648, "learning_rate": 4.049876472679987e-05, "loss": 0.0384, "step": 64560 }, { "epoch": 0.31785, "grad_norm": 0.1071949303150177, "learning_rate": 4.0495521065031926e-05, "loss": 0.0399, "step": 64570 }, { "epoch": 0.3179, "grad_norm": 0.08723582327365875, "learning_rate": 4.049227697962226e-05, "loss": 0.0394, "step": 64580 }, { "epoch": 0.31795, "grad_norm": 0.1001315489411354, "learning_rate": 4.048903247065956e-05, "loss": 0.0359, "step": 64590 }, { "epoch": 0.318, "grad_norm": 0.09844236075878143, "learning_rate": 4.048578753823253e-05, "loss": 0.0369, "step": 64600 }, { "epoch": 0.31805, "grad_norm": 0.10941297560930252, "learning_rate": 4.048254218242989e-05, "loss": 0.0378, "step": 64610 }, { "epoch": 0.3181, "grad_norm": 0.10665342956781387, "learning_rate": 4.047929640334036e-05, "loss": 0.0381, "step": 64620 }, { "epoch": 0.31815, "grad_norm": 0.0897960439324379, "learning_rate": 4.047605020105268e-05, "loss": 0.0377, "step": 64630 }, { "epoch": 0.3182, "grad_norm": 0.07692000269889832, "learning_rate": 4.04728035756556e-05, "loss": 0.0372, "step": 64640 }, { "epoch": 0.31825, "grad_norm": 0.09068204462528229, "learning_rate": 4.04695565272379e-05, "loss": 0.0369, "step": 64650 }, { "epoch": 0.3183, "grad_norm": 0.09132274240255356, "learning_rate": 4.046630905588832e-05, "loss": 0.0376, "step": 64660 }, { "epoch": 0.31835, "grad_norm": 0.10166731476783752, "learning_rate": 4.046306116169567e-05, "loss": 0.0387, "step": 64670 }, { "epoch": 0.3184, "grad_norm": 0.10080720484256744, "learning_rate": 4.0459812844748724e-05, "loss": 0.0388, "step": 64680 }, { "epoch": 0.31845, "grad_norm": 0.09786974638700485, "learning_rate": 4.045656410513631e-05, "loss": 0.0371, "step": 64690 }, { "epoch": 0.3185, "grad_norm": 0.12344319373369217, "learning_rate": 4.045331494294724e-05, "loss": 0.0365, "step": 64700 }, { "epoch": 0.31855, "grad_norm": 0.10499534755945206, "learning_rate": 4.045006535827035e-05, "loss": 0.0372, "step": 64710 }, { "epoch": 0.3186, "grad_norm": 0.1006593182682991, "learning_rate": 4.044681535119447e-05, "loss": 0.04, "step": 64720 }, { "epoch": 0.31865, "grad_norm": 0.10956310480833054, "learning_rate": 4.044356492180847e-05, "loss": 0.0373, "step": 64730 }, { "epoch": 0.3187, "grad_norm": 0.10918961465358734, "learning_rate": 4.0440314070201194e-05, "loss": 0.0372, "step": 64740 }, { "epoch": 0.31875, "grad_norm": 0.09064657986164093, "learning_rate": 4.0437062796461545e-05, "loss": 0.037, "step": 64750 }, { "epoch": 0.3188, "grad_norm": 0.08278244733810425, "learning_rate": 4.043381110067839e-05, "loss": 0.0379, "step": 64760 }, { "epoch": 0.31885, "grad_norm": 0.08891545236110687, "learning_rate": 4.043055898294064e-05, "loss": 0.0382, "step": 64770 }, { "epoch": 0.3189, "grad_norm": 0.1029762253165245, "learning_rate": 4.042730644333721e-05, "loss": 0.0368, "step": 64780 }, { "epoch": 0.31895, "grad_norm": 0.12306119501590729, "learning_rate": 4.042405348195701e-05, "loss": 0.0376, "step": 64790 }, { "epoch": 0.319, "grad_norm": 0.12125366926193237, "learning_rate": 4.042080009888899e-05, "loss": 0.0368, "step": 64800 }, { "epoch": 0.31905, "grad_norm": 0.12127557396888733, "learning_rate": 4.0417546294222074e-05, "loss": 0.0396, "step": 64810 }, { "epoch": 0.3191, "grad_norm": 0.13900983333587646, "learning_rate": 4.041429206804525e-05, "loss": 0.0374, "step": 64820 }, { "epoch": 0.31915, "grad_norm": 0.12804409861564636, "learning_rate": 4.041103742044746e-05, "loss": 0.0385, "step": 64830 }, { "epoch": 0.3192, "grad_norm": 0.11414000391960144, "learning_rate": 4.040778235151771e-05, "loss": 0.0372, "step": 64840 }, { "epoch": 0.31925, "grad_norm": 0.11278680711984634, "learning_rate": 4.040452686134497e-05, "loss": 0.0368, "step": 64850 }, { "epoch": 0.3193, "grad_norm": 0.11015470325946808, "learning_rate": 4.0401270950018256e-05, "loss": 0.037, "step": 64860 }, { "epoch": 0.31935, "grad_norm": 0.12102688103914261, "learning_rate": 4.0398014617626576e-05, "loss": 0.0377, "step": 64870 }, { "epoch": 0.3194, "grad_norm": 0.10942655801773071, "learning_rate": 4.039475786425896e-05, "loss": 0.0374, "step": 64880 }, { "epoch": 0.31945, "grad_norm": 0.1215989887714386, "learning_rate": 4.039150069000445e-05, "loss": 0.0372, "step": 64890 }, { "epoch": 0.3195, "grad_norm": 0.13428235054016113, "learning_rate": 4.03882430949521e-05, "loss": 0.0379, "step": 64900 }, { "epoch": 0.31955, "grad_norm": 0.09986699372529984, "learning_rate": 4.038498507919096e-05, "loss": 0.0369, "step": 64910 }, { "epoch": 0.3196, "grad_norm": 0.12329811602830887, "learning_rate": 4.03817266428101e-05, "loss": 0.0374, "step": 64920 }, { "epoch": 0.31965, "grad_norm": 0.1049598753452301, "learning_rate": 4.037846778589862e-05, "loss": 0.0372, "step": 64930 }, { "epoch": 0.3197, "grad_norm": 0.10766629129648209, "learning_rate": 4.037520850854561e-05, "loss": 0.0378, "step": 64940 }, { "epoch": 0.31975, "grad_norm": 0.130364328622818, "learning_rate": 4.0371948810840175e-05, "loss": 0.0373, "step": 64950 }, { "epoch": 0.3198, "grad_norm": 0.12801463901996613, "learning_rate": 4.036868869287144e-05, "loss": 0.0362, "step": 64960 }, { "epoch": 0.31985, "grad_norm": 0.10812928527593613, "learning_rate": 4.036542815472851e-05, "loss": 0.0366, "step": 64970 }, { "epoch": 0.3199, "grad_norm": 0.09909074008464813, "learning_rate": 4.0362167196500566e-05, "loss": 0.0367, "step": 64980 }, { "epoch": 0.31995, "grad_norm": 0.0915384441614151, "learning_rate": 4.035890581827673e-05, "loss": 0.0356, "step": 64990 }, { "epoch": 0.32, "grad_norm": 0.09630433470010757, "learning_rate": 4.035564402014619e-05, "loss": 0.0373, "step": 65000 }, { "epoch": 0.32005, "grad_norm": 0.10088414698839188, "learning_rate": 4.03523818021981e-05, "loss": 0.0372, "step": 65010 }, { "epoch": 0.3201, "grad_norm": 0.08994408696889877, "learning_rate": 4.034911916452167e-05, "loss": 0.0366, "step": 65020 }, { "epoch": 0.32015, "grad_norm": 0.10887617617845535, "learning_rate": 4.034585610720608e-05, "loss": 0.0383, "step": 65030 }, { "epoch": 0.3202, "grad_norm": 0.087371826171875, "learning_rate": 4.034259263034056e-05, "loss": 0.0378, "step": 65040 }, { "epoch": 0.32025, "grad_norm": 0.09730211645364761, "learning_rate": 4.033932873401431e-05, "loss": 0.0376, "step": 65050 }, { "epoch": 0.3203, "grad_norm": 0.10706541687250137, "learning_rate": 4.0336064418316575e-05, "loss": 0.0365, "step": 65060 }, { "epoch": 0.32035, "grad_norm": 0.09982568770647049, "learning_rate": 4.0332799683336605e-05, "loss": 0.0374, "step": 65070 }, { "epoch": 0.3204, "grad_norm": 0.08777441084384918, "learning_rate": 4.0329534529163654e-05, "loss": 0.0358, "step": 65080 }, { "epoch": 0.32045, "grad_norm": 0.10409170389175415, "learning_rate": 4.032626895588698e-05, "loss": 0.0358, "step": 65090 }, { "epoch": 0.3205, "grad_norm": 0.09802231192588806, "learning_rate": 4.032300296359588e-05, "loss": 0.0365, "step": 65100 }, { "epoch": 0.32055, "grad_norm": 0.09521409869194031, "learning_rate": 4.031973655237963e-05, "loss": 0.0402, "step": 65110 }, { "epoch": 0.3206, "grad_norm": 0.09686526656150818, "learning_rate": 4.031646972232754e-05, "loss": 0.0378, "step": 65120 }, { "epoch": 0.32065, "grad_norm": 0.1024775430560112, "learning_rate": 4.031320247352892e-05, "loss": 0.0367, "step": 65130 }, { "epoch": 0.3207, "grad_norm": 0.07837732136249542, "learning_rate": 4.03099348060731e-05, "loss": 0.0361, "step": 65140 }, { "epoch": 0.32075, "grad_norm": 0.07916653156280518, "learning_rate": 4.030666672004941e-05, "loss": 0.0387, "step": 65150 }, { "epoch": 0.3208, "grad_norm": 0.08275351673364639, "learning_rate": 4.030339821554721e-05, "loss": 0.0352, "step": 65160 }, { "epoch": 0.32085, "grad_norm": 0.10535655915737152, "learning_rate": 4.030012929265585e-05, "loss": 0.0372, "step": 65170 }, { "epoch": 0.3209, "grad_norm": 0.0935136154294014, "learning_rate": 4.0296859951464695e-05, "loss": 0.0362, "step": 65180 }, { "epoch": 0.32095, "grad_norm": 0.09714420139789581, "learning_rate": 4.0293590192063145e-05, "loss": 0.0367, "step": 65190 }, { "epoch": 0.321, "grad_norm": 0.0949711725115776, "learning_rate": 4.029032001454058e-05, "loss": 0.0398, "step": 65200 }, { "epoch": 0.32105, "grad_norm": 0.09688550233840942, "learning_rate": 4.028704941898641e-05, "loss": 0.0378, "step": 65210 }, { "epoch": 0.3211, "grad_norm": 0.10137657821178436, "learning_rate": 4.028377840549005e-05, "loss": 0.0364, "step": 65220 }, { "epoch": 0.32115, "grad_norm": 0.1072549819946289, "learning_rate": 4.028050697414094e-05, "loss": 0.0395, "step": 65230 }, { "epoch": 0.3212, "grad_norm": 0.10294804722070694, "learning_rate": 4.02772351250285e-05, "loss": 0.0393, "step": 65240 }, { "epoch": 0.32125, "grad_norm": 0.11168816685676575, "learning_rate": 4.027396285824219e-05, "loss": 0.0362, "step": 65250 }, { "epoch": 0.3213, "grad_norm": 0.08691563457250595, "learning_rate": 4.027069017387148e-05, "loss": 0.0372, "step": 65260 }, { "epoch": 0.32135, "grad_norm": 0.08215939253568649, "learning_rate": 4.026741707200584e-05, "loss": 0.0372, "step": 65270 }, { "epoch": 0.3214, "grad_norm": 0.09381280094385147, "learning_rate": 4.026414355273475e-05, "loss": 0.0374, "step": 65280 }, { "epoch": 0.32145, "grad_norm": 0.09605218470096588, "learning_rate": 4.02608696161477e-05, "loss": 0.0376, "step": 65290 }, { "epoch": 0.3215, "grad_norm": 0.09198276698589325, "learning_rate": 4.0257595262334214e-05, "loss": 0.0396, "step": 65300 }, { "epoch": 0.32155, "grad_norm": 0.10652513802051544, "learning_rate": 4.025432049138381e-05, "loss": 0.0373, "step": 65310 }, { "epoch": 0.3216, "grad_norm": 0.09276243299245834, "learning_rate": 4.0251045303386013e-05, "loss": 0.0368, "step": 65320 }, { "epoch": 0.32165, "grad_norm": 0.08338119834661484, "learning_rate": 4.024776969843037e-05, "loss": 0.0369, "step": 65330 }, { "epoch": 0.3217, "grad_norm": 0.07244552671909332, "learning_rate": 4.024449367660642e-05, "loss": 0.0366, "step": 65340 }, { "epoch": 0.32175, "grad_norm": 0.08666352927684784, "learning_rate": 4.0241217238003746e-05, "loss": 0.0377, "step": 65350 }, { "epoch": 0.3218, "grad_norm": 0.08412259817123413, "learning_rate": 4.023794038271193e-05, "loss": 0.0356, "step": 65360 }, { "epoch": 0.32185, "grad_norm": 0.10810157656669617, "learning_rate": 4.0234663110820534e-05, "loss": 0.037, "step": 65370 }, { "epoch": 0.3219, "grad_norm": 0.08649826794862747, "learning_rate": 4.0231385422419174e-05, "loss": 0.0369, "step": 65380 }, { "epoch": 0.32195, "grad_norm": 0.09026186913251877, "learning_rate": 4.0228107317597464e-05, "loss": 0.0353, "step": 65390 }, { "epoch": 0.322, "grad_norm": 0.08323583006858826, "learning_rate": 4.0224828796445014e-05, "loss": 0.036, "step": 65400 }, { "epoch": 0.32205, "grad_norm": 0.08100961893796921, "learning_rate": 4.022154985905147e-05, "loss": 0.036, "step": 65410 }, { "epoch": 0.3221, "grad_norm": 0.08172884583473206, "learning_rate": 4.021827050550647e-05, "loss": 0.0371, "step": 65420 }, { "epoch": 0.32215, "grad_norm": 0.09196305274963379, "learning_rate": 4.021499073589967e-05, "loss": 0.0369, "step": 65430 }, { "epoch": 0.3222, "grad_norm": 0.10631976276636124, "learning_rate": 4.021171055032074e-05, "loss": 0.0363, "step": 65440 }, { "epoch": 0.32225, "grad_norm": 0.08054377138614655, "learning_rate": 4.0208429948859364e-05, "loss": 0.0366, "step": 65450 }, { "epoch": 0.3223, "grad_norm": 0.09362701326608658, "learning_rate": 4.020514893160522e-05, "loss": 0.0372, "step": 65460 }, { "epoch": 0.32235, "grad_norm": 0.08705075085163116, "learning_rate": 4.020186749864802e-05, "loss": 0.0354, "step": 65470 }, { "epoch": 0.3224, "grad_norm": 0.11856447160243988, "learning_rate": 4.019858565007747e-05, "loss": 0.0364, "step": 65480 }, { "epoch": 0.32245, "grad_norm": 0.12213760614395142, "learning_rate": 4.0195303385983305e-05, "loss": 0.0359, "step": 65490 }, { "epoch": 0.3225, "grad_norm": 0.10082264244556427, "learning_rate": 4.0192020706455245e-05, "loss": 0.0373, "step": 65500 }, { "epoch": 0.32255, "grad_norm": 0.09375528991222382, "learning_rate": 4.018873761158305e-05, "loss": 0.0359, "step": 65510 }, { "epoch": 0.3226, "grad_norm": 0.09272167086601257, "learning_rate": 4.018545410145648e-05, "loss": 0.0361, "step": 65520 }, { "epoch": 0.32265, "grad_norm": 0.10465720295906067, "learning_rate": 4.018217017616529e-05, "loss": 0.0379, "step": 65530 }, { "epoch": 0.3227, "grad_norm": 0.12206566333770752, "learning_rate": 4.017888583579928e-05, "loss": 0.0377, "step": 65540 }, { "epoch": 0.32275, "grad_norm": 0.1141703650355339, "learning_rate": 4.017560108044823e-05, "loss": 0.0366, "step": 65550 }, { "epoch": 0.3228, "grad_norm": 0.10635753720998764, "learning_rate": 4.017231591020194e-05, "loss": 0.0386, "step": 65560 }, { "epoch": 0.32285, "grad_norm": 0.11412060260772705, "learning_rate": 4.016903032515025e-05, "loss": 0.0388, "step": 65570 }, { "epoch": 0.3229, "grad_norm": 0.09945717453956604, "learning_rate": 4.016574432538296e-05, "loss": 0.0366, "step": 65580 }, { "epoch": 0.32295, "grad_norm": 0.11253881454467773, "learning_rate": 4.0162457910989914e-05, "loss": 0.0374, "step": 65590 }, { "epoch": 0.323, "grad_norm": 0.09585455060005188, "learning_rate": 4.015917108206097e-05, "loss": 0.0393, "step": 65600 }, { "epoch": 0.32305, "grad_norm": 0.08950097113847733, "learning_rate": 4.015588383868598e-05, "loss": 0.0381, "step": 65610 }, { "epoch": 0.3231, "grad_norm": 0.09589240700006485, "learning_rate": 4.015259618095483e-05, "loss": 0.0385, "step": 65620 }, { "epoch": 0.32315, "grad_norm": 0.10538499057292938, "learning_rate": 4.014930810895738e-05, "loss": 0.0388, "step": 65630 }, { "epoch": 0.3232, "grad_norm": 0.11811120808124542, "learning_rate": 4.014601962278354e-05, "loss": 0.0388, "step": 65640 }, { "epoch": 0.32325, "grad_norm": 0.1357794851064682, "learning_rate": 4.014273072252322e-05, "loss": 0.0386, "step": 65650 }, { "epoch": 0.3233, "grad_norm": 0.12051770836114883, "learning_rate": 4.0139441408266326e-05, "loss": 0.0388, "step": 65660 }, { "epoch": 0.32335, "grad_norm": 0.12320932745933533, "learning_rate": 4.0136151680102794e-05, "loss": 0.0372, "step": 65670 }, { "epoch": 0.3234, "grad_norm": 0.10769601166248322, "learning_rate": 4.013286153812256e-05, "loss": 0.0383, "step": 65680 }, { "epoch": 0.32345, "grad_norm": 0.10716307908296585, "learning_rate": 4.012957098241558e-05, "loss": 0.0394, "step": 65690 }, { "epoch": 0.3235, "grad_norm": 0.10903730243444443, "learning_rate": 4.0126280013071806e-05, "loss": 0.0389, "step": 65700 }, { "epoch": 0.32355, "grad_norm": 0.11699903011322021, "learning_rate": 4.0122988630181226e-05, "loss": 0.038, "step": 65710 }, { "epoch": 0.3236, "grad_norm": 0.09041581302881241, "learning_rate": 4.011969683383381e-05, "loss": 0.0401, "step": 65720 }, { "epoch": 0.32365, "grad_norm": 0.0852922797203064, "learning_rate": 4.0116404624119576e-05, "loss": 0.0372, "step": 65730 }, { "epoch": 0.3237, "grad_norm": 0.10173098742961884, "learning_rate": 4.0113112001128505e-05, "loss": 0.0376, "step": 65740 }, { "epoch": 0.32375, "grad_norm": 0.11126632243394852, "learning_rate": 4.010981896495064e-05, "loss": 0.0364, "step": 65750 }, { "epoch": 0.3238, "grad_norm": 0.09293050318956375, "learning_rate": 4.0106525515676e-05, "loss": 0.0362, "step": 65760 }, { "epoch": 0.32385, "grad_norm": 0.08062175661325455, "learning_rate": 4.010323165339462e-05, "loss": 0.0357, "step": 65770 }, { "epoch": 0.3239, "grad_norm": 0.07949662208557129, "learning_rate": 4.009993737819656e-05, "loss": 0.0362, "step": 65780 }, { "epoch": 0.32395, "grad_norm": 0.08509853482246399, "learning_rate": 4.009664269017189e-05, "loss": 0.0376, "step": 65790 }, { "epoch": 0.324, "grad_norm": 0.09357540309429169, "learning_rate": 4.009334758941068e-05, "loss": 0.0357, "step": 65800 }, { "epoch": 0.32405, "grad_norm": 0.08863934129476547, "learning_rate": 4.009005207600302e-05, "loss": 0.0367, "step": 65810 }, { "epoch": 0.3241, "grad_norm": 0.11384686082601547, "learning_rate": 4.008675615003901e-05, "loss": 0.0372, "step": 65820 }, { "epoch": 0.32415, "grad_norm": 0.08933109045028687, "learning_rate": 4.008345981160874e-05, "loss": 0.0356, "step": 65830 }, { "epoch": 0.3242, "grad_norm": 0.09838990122079849, "learning_rate": 4.008016306080236e-05, "loss": 0.0371, "step": 65840 }, { "epoch": 0.32425, "grad_norm": 0.09985664486885071, "learning_rate": 4.007686589770997e-05, "loss": 0.0356, "step": 65850 }, { "epoch": 0.3243, "grad_norm": 0.10246867686510086, "learning_rate": 4.0073568322421744e-05, "loss": 0.0398, "step": 65860 }, { "epoch": 0.32435, "grad_norm": 0.09228799492120743, "learning_rate": 4.007027033502782e-05, "loss": 0.0375, "step": 65870 }, { "epoch": 0.3244, "grad_norm": 0.0932326391339302, "learning_rate": 4.006697193561837e-05, "loss": 0.0364, "step": 65880 }, { "epoch": 0.32445, "grad_norm": 0.11617203056812286, "learning_rate": 4.006367312428356e-05, "loss": 0.0387, "step": 65890 }, { "epoch": 0.3245, "grad_norm": 0.0983414575457573, "learning_rate": 4.006037390111359e-05, "loss": 0.0375, "step": 65900 }, { "epoch": 0.32455, "grad_norm": 0.08532964438199997, "learning_rate": 4.005707426619866e-05, "loss": 0.0369, "step": 65910 }, { "epoch": 0.3246, "grad_norm": 0.08981236815452576, "learning_rate": 4.005377421962897e-05, "loss": 0.0367, "step": 65920 }, { "epoch": 0.32465, "grad_norm": 0.08878609538078308, "learning_rate": 4.005047376149475e-05, "loss": 0.0377, "step": 65930 }, { "epoch": 0.3247, "grad_norm": 0.11349914222955704, "learning_rate": 4.004717289188623e-05, "loss": 0.0384, "step": 65940 }, { "epoch": 0.32475, "grad_norm": 0.1028319001197815, "learning_rate": 4.004387161089365e-05, "loss": 0.0383, "step": 65950 }, { "epoch": 0.3248, "grad_norm": 0.0931456908583641, "learning_rate": 4.0040569918607285e-05, "loss": 0.0371, "step": 65960 }, { "epoch": 0.32485, "grad_norm": 0.10074255615472794, "learning_rate": 4.003726781511738e-05, "loss": 0.038, "step": 65970 }, { "epoch": 0.3249, "grad_norm": 0.10310529172420502, "learning_rate": 4.0033965300514226e-05, "loss": 0.0379, "step": 65980 }, { "epoch": 0.32495, "grad_norm": 0.08930498361587524, "learning_rate": 4.003066237488811e-05, "loss": 0.0373, "step": 65990 }, { "epoch": 0.325, "grad_norm": 0.12736685574054718, "learning_rate": 4.002735903832933e-05, "loss": 0.0419, "step": 66000 }, { "epoch": 0.32505, "grad_norm": 0.10972797870635986, "learning_rate": 4.0024055290928196e-05, "loss": 0.0367, "step": 66010 }, { "epoch": 0.3251, "grad_norm": 0.10036025196313858, "learning_rate": 4.002075113277504e-05, "loss": 0.0377, "step": 66020 }, { "epoch": 0.32515, "grad_norm": 0.09857061505317688, "learning_rate": 4.001744656396019e-05, "loss": 0.0391, "step": 66030 }, { "epoch": 0.3252, "grad_norm": 0.09945893287658691, "learning_rate": 4.001414158457399e-05, "loss": 0.0374, "step": 66040 }, { "epoch": 0.32525, "grad_norm": 0.0812176913022995, "learning_rate": 4.00108361947068e-05, "loss": 0.0372, "step": 66050 }, { "epoch": 0.3253, "grad_norm": 0.08450525254011154, "learning_rate": 4.000753039444899e-05, "loss": 0.0356, "step": 66060 }, { "epoch": 0.32535, "grad_norm": 0.09813328087329865, "learning_rate": 4.000422418389094e-05, "loss": 0.0389, "step": 66070 }, { "epoch": 0.3254, "grad_norm": 0.09411191940307617, "learning_rate": 4.000091756312302e-05, "loss": 0.0365, "step": 66080 }, { "epoch": 0.32545, "grad_norm": 0.10968726128339767, "learning_rate": 3.9997610532235665e-05, "loss": 0.0379, "step": 66090 }, { "epoch": 0.3255, "grad_norm": 0.1049523800611496, "learning_rate": 3.999430309131927e-05, "loss": 0.0364, "step": 66100 }, { "epoch": 0.32555, "grad_norm": 0.11498141288757324, "learning_rate": 3.999099524046427e-05, "loss": 0.0367, "step": 66110 }, { "epoch": 0.3256, "grad_norm": 0.11578301340341568, "learning_rate": 3.998768697976108e-05, "loss": 0.0388, "step": 66120 }, { "epoch": 0.32565, "grad_norm": 0.11545883119106293, "learning_rate": 3.998437830930016e-05, "loss": 0.0374, "step": 66130 }, { "epoch": 0.3257, "grad_norm": 0.11564458906650543, "learning_rate": 3.9981069229171965e-05, "loss": 0.0366, "step": 66140 }, { "epoch": 0.32575, "grad_norm": 0.11431770771741867, "learning_rate": 3.997775973946697e-05, "loss": 0.0395, "step": 66150 }, { "epoch": 0.3258, "grad_norm": 0.1111539676785469, "learning_rate": 3.997444984027565e-05, "loss": 0.0371, "step": 66160 }, { "epoch": 0.32585, "grad_norm": 0.08865462243556976, "learning_rate": 3.99711395316885e-05, "loss": 0.0355, "step": 66170 }, { "epoch": 0.3259, "grad_norm": 0.0973217785358429, "learning_rate": 3.9967828813796015e-05, "loss": 0.0372, "step": 66180 }, { "epoch": 0.32595, "grad_norm": 0.08040197193622589, "learning_rate": 3.9964517686688716e-05, "loss": 0.0374, "step": 66190 }, { "epoch": 0.326, "grad_norm": 0.10195370018482208, "learning_rate": 3.996120615045712e-05, "loss": 0.0377, "step": 66200 }, { "epoch": 0.32605, "grad_norm": 0.09814627468585968, "learning_rate": 3.9957894205191776e-05, "loss": 0.0378, "step": 66210 }, { "epoch": 0.3261, "grad_norm": 0.07224318385124207, "learning_rate": 3.995458185098322e-05, "loss": 0.0382, "step": 66220 }, { "epoch": 0.32615, "grad_norm": 0.09805803000926971, "learning_rate": 3.995126908792201e-05, "loss": 0.0376, "step": 66230 }, { "epoch": 0.3262, "grad_norm": 0.1111101359128952, "learning_rate": 3.9947955916098734e-05, "loss": 0.0373, "step": 66240 }, { "epoch": 0.32625, "grad_norm": 0.11266911774873734, "learning_rate": 3.9944642335603946e-05, "loss": 0.0377, "step": 66250 }, { "epoch": 0.3263, "grad_norm": 0.12269485741853714, "learning_rate": 3.994132834652825e-05, "loss": 0.039, "step": 66260 }, { "epoch": 0.32635, "grad_norm": 0.08595847338438034, "learning_rate": 3.993801394896226e-05, "loss": 0.0364, "step": 66270 }, { "epoch": 0.3264, "grad_norm": 0.08507047593593597, "learning_rate": 3.9934699142996576e-05, "loss": 0.0376, "step": 66280 }, { "epoch": 0.32645, "grad_norm": 0.09251835197210312, "learning_rate": 3.993138392872182e-05, "loss": 0.037, "step": 66290 }, { "epoch": 0.3265, "grad_norm": 0.10652598738670349, "learning_rate": 3.992806830622865e-05, "loss": 0.0398, "step": 66300 }, { "epoch": 0.32655, "grad_norm": 0.08765865862369537, "learning_rate": 3.99247522756077e-05, "loss": 0.037, "step": 66310 }, { "epoch": 0.3266, "grad_norm": 0.07849414646625519, "learning_rate": 3.992143583694962e-05, "loss": 0.0367, "step": 66320 }, { "epoch": 0.32665, "grad_norm": 0.09718433022499084, "learning_rate": 3.99181189903451e-05, "loss": 0.0385, "step": 66330 }, { "epoch": 0.3267, "grad_norm": 0.11125581711530685, "learning_rate": 3.99148017358848e-05, "loss": 0.0376, "step": 66340 }, { "epoch": 0.32675, "grad_norm": 0.09389445185661316, "learning_rate": 3.991148407365943e-05, "loss": 0.0373, "step": 66350 }, { "epoch": 0.3268, "grad_norm": 0.07964462786912918, "learning_rate": 3.990816600375969e-05, "loss": 0.0382, "step": 66360 }, { "epoch": 0.32685, "grad_norm": 0.08663036674261093, "learning_rate": 3.990484752627629e-05, "loss": 0.0372, "step": 66370 }, { "epoch": 0.3269, "grad_norm": 0.09641426801681519, "learning_rate": 3.9901528641299955e-05, "loss": 0.0385, "step": 66380 }, { "epoch": 0.32695, "grad_norm": 0.08874489367008209, "learning_rate": 3.989820934892143e-05, "loss": 0.036, "step": 66390 }, { "epoch": 0.327, "grad_norm": 0.08502322435379028, "learning_rate": 3.9894889649231455e-05, "loss": 0.0383, "step": 66400 }, { "epoch": 0.32705, "grad_norm": 0.12889431416988373, "learning_rate": 3.98915695423208e-05, "loss": 0.0377, "step": 66410 }, { "epoch": 0.3271, "grad_norm": 0.10460399091243744, "learning_rate": 3.988824902828022e-05, "loss": 0.0397, "step": 66420 }, { "epoch": 0.32715, "grad_norm": 0.1179177463054657, "learning_rate": 3.98849281072005e-05, "loss": 0.0377, "step": 66430 }, { "epoch": 0.3272, "grad_norm": 0.10219012945890427, "learning_rate": 3.988160677917245e-05, "loss": 0.0378, "step": 66440 }, { "epoch": 0.32725, "grad_norm": 0.11003606021404266, "learning_rate": 3.987828504428685e-05, "loss": 0.0378, "step": 66450 }, { "epoch": 0.3273, "grad_norm": 0.10524841398000717, "learning_rate": 3.987496290263454e-05, "loss": 0.0389, "step": 66460 }, { "epoch": 0.32735, "grad_norm": 0.09106507152318954, "learning_rate": 3.987164035430632e-05, "loss": 0.0364, "step": 66470 }, { "epoch": 0.3274, "grad_norm": 0.07917270809412003, "learning_rate": 3.986831739939305e-05, "loss": 0.0374, "step": 66480 }, { "epoch": 0.32745, "grad_norm": 0.08859952539205551, "learning_rate": 3.986499403798556e-05, "loss": 0.0366, "step": 66490 }, { "epoch": 0.3275, "grad_norm": 0.1100822240114212, "learning_rate": 3.986167027017472e-05, "loss": 0.0398, "step": 66500 }, { "epoch": 0.32755, "grad_norm": 0.10014413297176361, "learning_rate": 3.9858346096051405e-05, "loss": 0.0352, "step": 66510 }, { "epoch": 0.3276, "grad_norm": 0.08480783551931381, "learning_rate": 3.985502151570648e-05, "loss": 0.0366, "step": 66520 }, { "epoch": 0.32765, "grad_norm": 0.10877691209316254, "learning_rate": 3.9851696529230847e-05, "loss": 0.0384, "step": 66530 }, { "epoch": 0.3277, "grad_norm": 0.10374827682971954, "learning_rate": 3.984837113671541e-05, "loss": 0.037, "step": 66540 }, { "epoch": 0.32775, "grad_norm": 0.09818617254495621, "learning_rate": 3.984504533825109e-05, "loss": 0.0363, "step": 66550 }, { "epoch": 0.3278, "grad_norm": 0.1003548726439476, "learning_rate": 3.984171913392881e-05, "loss": 0.0395, "step": 66560 }, { "epoch": 0.32785, "grad_norm": 0.09324333071708679, "learning_rate": 3.9838392523839496e-05, "loss": 0.0376, "step": 66570 }, { "epoch": 0.3279, "grad_norm": 0.07978849112987518, "learning_rate": 3.983506550807411e-05, "loss": 0.0365, "step": 66580 }, { "epoch": 0.32795, "grad_norm": 0.09464648365974426, "learning_rate": 3.98317380867236e-05, "loss": 0.0357, "step": 66590 }, { "epoch": 0.328, "grad_norm": 0.08341842144727707, "learning_rate": 3.982841025987896e-05, "loss": 0.035, "step": 66600 }, { "epoch": 0.32805, "grad_norm": 0.07485797256231308, "learning_rate": 3.982508202763114e-05, "loss": 0.0348, "step": 66610 }, { "epoch": 0.3281, "grad_norm": 0.09925837814807892, "learning_rate": 3.982175339007115e-05, "loss": 0.0356, "step": 66620 }, { "epoch": 0.32815, "grad_norm": 0.10949580371379852, "learning_rate": 3.981842434728999e-05, "loss": 0.0373, "step": 66630 }, { "epoch": 0.3282, "grad_norm": 0.08591295033693314, "learning_rate": 3.981509489937868e-05, "loss": 0.0375, "step": 66640 }, { "epoch": 0.32825, "grad_norm": 0.10751231759786606, "learning_rate": 3.981176504642823e-05, "loss": 0.0384, "step": 66650 }, { "epoch": 0.3283, "grad_norm": 0.10512945801019669, "learning_rate": 3.980843478852969e-05, "loss": 0.0417, "step": 66660 }, { "epoch": 0.32835, "grad_norm": 0.09879367053508759, "learning_rate": 3.980510412577412e-05, "loss": 0.0364, "step": 66670 }, { "epoch": 0.3284, "grad_norm": 0.10238294303417206, "learning_rate": 3.980177305825256e-05, "loss": 0.036, "step": 66680 }, { "epoch": 0.32845, "grad_norm": 0.09616518765687943, "learning_rate": 3.979844158605608e-05, "loss": 0.0369, "step": 66690 }, { "epoch": 0.3285, "grad_norm": 0.08645855635404587, "learning_rate": 3.979510970927577e-05, "loss": 0.0374, "step": 66700 }, { "epoch": 0.32855, "grad_norm": 0.11920953541994095, "learning_rate": 3.979177742800271e-05, "loss": 0.0393, "step": 66710 }, { "epoch": 0.3286, "grad_norm": 0.11412829160690308, "learning_rate": 3.978844474232802e-05, "loss": 0.042, "step": 66720 }, { "epoch": 0.32865, "grad_norm": 0.10297642648220062, "learning_rate": 3.978511165234281e-05, "loss": 0.0403, "step": 66730 }, { "epoch": 0.3287, "grad_norm": 0.07714718580245972, "learning_rate": 3.97817781581382e-05, "loss": 0.0387, "step": 66740 }, { "epoch": 0.32875, "grad_norm": 0.12992724776268005, "learning_rate": 3.977844425980532e-05, "loss": 0.0427, "step": 66750 }, { "epoch": 0.3288, "grad_norm": 0.12231067568063736, "learning_rate": 3.977510995743533e-05, "loss": 0.041, "step": 66760 }, { "epoch": 0.32885, "grad_norm": 0.09669715911149979, "learning_rate": 3.977177525111939e-05, "loss": 0.0374, "step": 66770 }, { "epoch": 0.3289, "grad_norm": 0.12006594240665436, "learning_rate": 3.976844014094866e-05, "loss": 0.0393, "step": 66780 }, { "epoch": 0.32895, "grad_norm": 0.1673240065574646, "learning_rate": 3.9765104627014324e-05, "loss": 0.0383, "step": 66790 }, { "epoch": 0.329, "grad_norm": 0.10522366315126419, "learning_rate": 3.976176870940758e-05, "loss": 0.0381, "step": 66800 }, { "epoch": 0.32905, "grad_norm": 0.07761319726705551, "learning_rate": 3.975843238821961e-05, "loss": 0.0377, "step": 66810 }, { "epoch": 0.3291, "grad_norm": 0.08520624786615372, "learning_rate": 3.9755095663541665e-05, "loss": 0.0405, "step": 66820 }, { "epoch": 0.32915, "grad_norm": 0.09112141281366348, "learning_rate": 3.9751758535464935e-05, "loss": 0.038, "step": 66830 }, { "epoch": 0.3292, "grad_norm": 0.08923102170228958, "learning_rate": 3.9748421004080664e-05, "loss": 0.0367, "step": 66840 }, { "epoch": 0.32925, "grad_norm": 0.0915558859705925, "learning_rate": 3.9745083069480114e-05, "loss": 0.0372, "step": 66850 }, { "epoch": 0.3293, "grad_norm": 0.10059615224599838, "learning_rate": 3.974174473175453e-05, "loss": 0.0374, "step": 66860 }, { "epoch": 0.32935, "grad_norm": 0.10322733968496323, "learning_rate": 3.973840599099518e-05, "loss": 0.0377, "step": 66870 }, { "epoch": 0.3294, "grad_norm": 0.08271342515945435, "learning_rate": 3.973506684729335e-05, "loss": 0.0384, "step": 66880 }, { "epoch": 0.32945, "grad_norm": 0.10284489393234253, "learning_rate": 3.973172730074033e-05, "loss": 0.0375, "step": 66890 }, { "epoch": 0.3295, "grad_norm": 0.13020607829093933, "learning_rate": 3.972838735142741e-05, "loss": 0.0397, "step": 66900 }, { "epoch": 0.32955, "grad_norm": 0.10525212436914444, "learning_rate": 3.9725046999445924e-05, "loss": 0.0369, "step": 66910 }, { "epoch": 0.3296, "grad_norm": 0.0913553461432457, "learning_rate": 3.972170624488718e-05, "loss": 0.0373, "step": 66920 }, { "epoch": 0.32965, "grad_norm": 0.10340815782546997, "learning_rate": 3.971836508784252e-05, "loss": 0.0382, "step": 66930 }, { "epoch": 0.3297, "grad_norm": 0.11414355039596558, "learning_rate": 3.971502352840328e-05, "loss": 0.0373, "step": 66940 }, { "epoch": 0.32975, "grad_norm": 0.0910719633102417, "learning_rate": 3.971168156666084e-05, "loss": 0.0373, "step": 66950 }, { "epoch": 0.3298, "grad_norm": 0.10548502206802368, "learning_rate": 3.9708339202706545e-05, "loss": 0.0413, "step": 66960 }, { "epoch": 0.32985, "grad_norm": 0.10033611953258514, "learning_rate": 3.970499643663178e-05, "loss": 0.0366, "step": 66970 }, { "epoch": 0.3299, "grad_norm": 0.10521698743104935, "learning_rate": 3.970165326852794e-05, "loss": 0.0374, "step": 66980 }, { "epoch": 0.32995, "grad_norm": 0.08181363344192505, "learning_rate": 3.969830969848642e-05, "loss": 0.0377, "step": 66990 }, { "epoch": 0.33, "grad_norm": 0.08763346076011658, "learning_rate": 3.9694965726598634e-05, "loss": 0.0365, "step": 67000 }, { "epoch": 0.33005, "grad_norm": 0.07829241454601288, "learning_rate": 3.969162135295601e-05, "loss": 0.0365, "step": 67010 }, { "epoch": 0.3301, "grad_norm": 0.09384607523679733, "learning_rate": 3.968827657764997e-05, "loss": 0.0351, "step": 67020 }, { "epoch": 0.33015, "grad_norm": 0.09687677025794983, "learning_rate": 3.9684931400771974e-05, "loss": 0.0357, "step": 67030 }, { "epoch": 0.3302, "grad_norm": 0.09295211732387543, "learning_rate": 3.968158582241347e-05, "loss": 0.0358, "step": 67040 }, { "epoch": 0.33025, "grad_norm": 0.07670729607343674, "learning_rate": 3.967823984266592e-05, "loss": 0.0366, "step": 67050 }, { "epoch": 0.3303, "grad_norm": 0.07735750824213028, "learning_rate": 3.967489346162081e-05, "loss": 0.0387, "step": 67060 }, { "epoch": 0.33035, "grad_norm": 0.0834031030535698, "learning_rate": 3.967154667936963e-05, "loss": 0.036, "step": 67070 }, { "epoch": 0.3304, "grad_norm": 0.08556082099676132, "learning_rate": 3.966819949600387e-05, "loss": 0.0362, "step": 67080 }, { "epoch": 0.33045, "grad_norm": 0.07740224897861481, "learning_rate": 3.9664851911615055e-05, "loss": 0.0369, "step": 67090 }, { "epoch": 0.3305, "grad_norm": 0.08293741941452026, "learning_rate": 3.966150392629469e-05, "loss": 0.0357, "step": 67100 }, { "epoch": 0.33055, "grad_norm": 0.09485205262899399, "learning_rate": 3.965815554013431e-05, "loss": 0.0365, "step": 67110 }, { "epoch": 0.3306, "grad_norm": 0.07694607973098755, "learning_rate": 3.965480675322547e-05, "loss": 0.0367, "step": 67120 }, { "epoch": 0.33065, "grad_norm": 0.10260697454214096, "learning_rate": 3.965145756565972e-05, "loss": 0.0373, "step": 67130 }, { "epoch": 0.3307, "grad_norm": 0.09534306079149246, "learning_rate": 3.964810797752863e-05, "loss": 0.0366, "step": 67140 }, { "epoch": 0.33075, "grad_norm": 0.09196845442056656, "learning_rate": 3.9644757988923766e-05, "loss": 0.0364, "step": 67150 }, { "epoch": 0.3308, "grad_norm": 0.08703939616680145, "learning_rate": 3.9641407599936715e-05, "loss": 0.0367, "step": 67160 }, { "epoch": 0.33085, "grad_norm": 0.09351355582475662, "learning_rate": 3.9638056810659085e-05, "loss": 0.038, "step": 67170 }, { "epoch": 0.3309, "grad_norm": 0.09776463359594345, "learning_rate": 3.963470562118248e-05, "loss": 0.0378, "step": 67180 }, { "epoch": 0.33095, "grad_norm": 0.12001438438892365, "learning_rate": 3.9631354031598526e-05, "loss": 0.0382, "step": 67190 }, { "epoch": 0.331, "grad_norm": 0.10254254937171936, "learning_rate": 3.962800204199885e-05, "loss": 0.0385, "step": 67200 }, { "epoch": 0.33105, "grad_norm": 0.09995129704475403, "learning_rate": 3.962464965247509e-05, "loss": 0.0381, "step": 67210 }, { "epoch": 0.3311, "grad_norm": 0.08643165230751038, "learning_rate": 3.96212968631189e-05, "loss": 0.0379, "step": 67220 }, { "epoch": 0.33115, "grad_norm": 0.10039006918668747, "learning_rate": 3.961794367402195e-05, "loss": 0.0371, "step": 67230 }, { "epoch": 0.3312, "grad_norm": 0.0936351865530014, "learning_rate": 3.9614590085275914e-05, "loss": 0.039, "step": 67240 }, { "epoch": 0.33125, "grad_norm": 0.09664653241634369, "learning_rate": 3.961123609697247e-05, "loss": 0.0379, "step": 67250 }, { "epoch": 0.3313, "grad_norm": 0.08333840221166611, "learning_rate": 3.960788170920332e-05, "loss": 0.0379, "step": 67260 }, { "epoch": 0.33135, "grad_norm": 0.08787233382463455, "learning_rate": 3.960452692206018e-05, "loss": 0.0378, "step": 67270 }, { "epoch": 0.3314, "grad_norm": 0.09200208634138107, "learning_rate": 3.9601171735634756e-05, "loss": 0.0382, "step": 67280 }, { "epoch": 0.33145, "grad_norm": 0.08844379335641861, "learning_rate": 3.959781615001878e-05, "loss": 0.0381, "step": 67290 }, { "epoch": 0.3315, "grad_norm": 0.09280557930469513, "learning_rate": 3.9594460165303995e-05, "loss": 0.0372, "step": 67300 }, { "epoch": 0.33155, "grad_norm": 0.07958632707595825, "learning_rate": 3.959110378158216e-05, "loss": 0.0376, "step": 67310 }, { "epoch": 0.3316, "grad_norm": 0.09166911244392395, "learning_rate": 3.958774699894502e-05, "loss": 0.0384, "step": 67320 }, { "epoch": 0.33165, "grad_norm": 0.08712979406118393, "learning_rate": 3.9584389817484355e-05, "loss": 0.0369, "step": 67330 }, { "epoch": 0.3317, "grad_norm": 0.09324368089437485, "learning_rate": 3.958103223729196e-05, "loss": 0.0378, "step": 67340 }, { "epoch": 0.33175, "grad_norm": 0.07274219393730164, "learning_rate": 3.9577674258459616e-05, "loss": 0.0363, "step": 67350 }, { "epoch": 0.3318, "grad_norm": 0.09377812594175339, "learning_rate": 3.957431588107914e-05, "loss": 0.0414, "step": 67360 }, { "epoch": 0.33185, "grad_norm": 0.08584894984960556, "learning_rate": 3.957095710524233e-05, "loss": 0.0374, "step": 67370 }, { "epoch": 0.3319, "grad_norm": 0.10434996336698532, "learning_rate": 3.956759793104105e-05, "loss": 0.0376, "step": 67380 }, { "epoch": 0.33195, "grad_norm": 0.10280624777078629, "learning_rate": 3.95642383585671e-05, "loss": 0.0368, "step": 67390 }, { "epoch": 0.332, "grad_norm": 0.08698102086782455, "learning_rate": 3.956087838791235e-05, "loss": 0.0376, "step": 67400 }, { "epoch": 0.33205, "grad_norm": 0.0837353840470314, "learning_rate": 3.9557518019168645e-05, "loss": 0.0372, "step": 67410 }, { "epoch": 0.3321, "grad_norm": 0.08556614071130753, "learning_rate": 3.955415725242787e-05, "loss": 0.0375, "step": 67420 }, { "epoch": 0.33215, "grad_norm": 0.10648448020219803, "learning_rate": 3.955079608778191e-05, "loss": 0.0368, "step": 67430 }, { "epoch": 0.3322, "grad_norm": 0.09454721957445145, "learning_rate": 3.9547434525322644e-05, "loss": 0.0374, "step": 67440 }, { "epoch": 0.33225, "grad_norm": 0.09219378978013992, "learning_rate": 3.954407256514199e-05, "loss": 0.0364, "step": 67450 }, { "epoch": 0.3323, "grad_norm": 0.08805567026138306, "learning_rate": 3.954071020733185e-05, "loss": 0.0372, "step": 67460 }, { "epoch": 0.33235, "grad_norm": 0.08742700517177582, "learning_rate": 3.953734745198416e-05, "loss": 0.0369, "step": 67470 }, { "epoch": 0.3324, "grad_norm": 0.12774163484573364, "learning_rate": 3.953398429919085e-05, "loss": 0.0384, "step": 67480 }, { "epoch": 0.33245, "grad_norm": 0.10126247256994247, "learning_rate": 3.953062074904388e-05, "loss": 0.0364, "step": 67490 }, { "epoch": 0.3325, "grad_norm": 0.0901947095990181, "learning_rate": 3.952725680163518e-05, "loss": 0.0371, "step": 67500 }, { "epoch": 0.33255, "grad_norm": 0.12030135095119476, "learning_rate": 3.952389245705674e-05, "loss": 0.0381, "step": 67510 }, { "epoch": 0.3326, "grad_norm": 0.10265327990055084, "learning_rate": 3.952052771540055e-05, "loss": 0.0374, "step": 67520 }, { "epoch": 0.33265, "grad_norm": 0.08405707776546478, "learning_rate": 3.951716257675858e-05, "loss": 0.0364, "step": 67530 }, { "epoch": 0.3327, "grad_norm": 0.10454193502664566, "learning_rate": 3.951379704122283e-05, "loss": 0.0404, "step": 67540 }, { "epoch": 0.33275, "grad_norm": 0.14861930906772614, "learning_rate": 3.951043110888533e-05, "loss": 0.0378, "step": 67550 }, { "epoch": 0.3328, "grad_norm": 0.10401389747858047, "learning_rate": 3.9507064779838096e-05, "loss": 0.0386, "step": 67560 }, { "epoch": 0.33285, "grad_norm": 0.10092278569936752, "learning_rate": 3.950369805417316e-05, "loss": 0.0396, "step": 67570 }, { "epoch": 0.3329, "grad_norm": 0.09623225033283234, "learning_rate": 3.9500330931982567e-05, "loss": 0.0381, "step": 67580 }, { "epoch": 0.33295, "grad_norm": 0.10712605714797974, "learning_rate": 3.949696341335838e-05, "loss": 0.0381, "step": 67590 }, { "epoch": 0.333, "grad_norm": 0.10331765562295914, "learning_rate": 3.9493595498392645e-05, "loss": 0.0383, "step": 67600 }, { "epoch": 0.33305, "grad_norm": 0.09916549921035767, "learning_rate": 3.949022718717747e-05, "loss": 0.0371, "step": 67610 }, { "epoch": 0.3331, "grad_norm": 0.08550255000591278, "learning_rate": 3.948685847980491e-05, "loss": 0.0376, "step": 67620 }, { "epoch": 0.33315, "grad_norm": 0.08405419439077377, "learning_rate": 3.94834893763671e-05, "loss": 0.0385, "step": 67630 }, { "epoch": 0.3332, "grad_norm": 0.12460258603096008, "learning_rate": 3.948011987695612e-05, "loss": 0.0383, "step": 67640 }, { "epoch": 0.33325, "grad_norm": 0.09573324024677277, "learning_rate": 3.9476749981664106e-05, "loss": 0.037, "step": 67650 }, { "epoch": 0.3333, "grad_norm": 0.08493708819150925, "learning_rate": 3.947337969058319e-05, "loss": 0.037, "step": 67660 }, { "epoch": 0.33335, "grad_norm": 0.095456063747406, "learning_rate": 3.94700090038055e-05, "loss": 0.0375, "step": 67670 }, { "epoch": 0.3334, "grad_norm": 0.08934903144836426, "learning_rate": 3.946663792142321e-05, "loss": 0.0372, "step": 67680 }, { "epoch": 0.33345, "grad_norm": 0.10696164518594742, "learning_rate": 3.9463266443528466e-05, "loss": 0.0393, "step": 67690 }, { "epoch": 0.3335, "grad_norm": 0.09020401537418365, "learning_rate": 3.9459894570213454e-05, "loss": 0.0374, "step": 67700 }, { "epoch": 0.33355, "grad_norm": 0.07880588620901108, "learning_rate": 3.9456522301570364e-05, "loss": 0.0365, "step": 67710 }, { "epoch": 0.3336, "grad_norm": 0.07263457030057907, "learning_rate": 3.945314963769138e-05, "loss": 0.0381, "step": 67720 }, { "epoch": 0.33365, "grad_norm": 0.06893815100193024, "learning_rate": 3.944977657866871e-05, "loss": 0.0377, "step": 67730 }, { "epoch": 0.3337, "grad_norm": 0.1003556102514267, "learning_rate": 3.9446403124594586e-05, "loss": 0.0391, "step": 67740 }, { "epoch": 0.33375, "grad_norm": 0.0821806788444519, "learning_rate": 3.944302927556122e-05, "loss": 0.0379, "step": 67750 }, { "epoch": 0.3338, "grad_norm": 0.09835135191679001, "learning_rate": 3.943965503166086e-05, "loss": 0.0372, "step": 67760 }, { "epoch": 0.33385, "grad_norm": 0.0932500883936882, "learning_rate": 3.943628039298576e-05, "loss": 0.0384, "step": 67770 }, { "epoch": 0.3339, "grad_norm": 0.09037882089614868, "learning_rate": 3.943290535962818e-05, "loss": 0.0379, "step": 67780 }, { "epoch": 0.33395, "grad_norm": 0.07879551500082016, "learning_rate": 3.9429529931680384e-05, "loss": 0.0349, "step": 67790 }, { "epoch": 0.334, "grad_norm": 0.08956615626811981, "learning_rate": 3.9426154109234656e-05, "loss": 0.0359, "step": 67800 }, { "epoch": 0.33405, "grad_norm": 0.09419052302837372, "learning_rate": 3.94227778923833e-05, "loss": 0.0387, "step": 67810 }, { "epoch": 0.3341, "grad_norm": 0.08166324347257614, "learning_rate": 3.941940128121862e-05, "loss": 0.0366, "step": 67820 }, { "epoch": 0.33415, "grad_norm": 0.09600416570901871, "learning_rate": 3.941602427583292e-05, "loss": 0.0374, "step": 67830 }, { "epoch": 0.3342, "grad_norm": 0.10011889785528183, "learning_rate": 3.941264687631854e-05, "loss": 0.0361, "step": 67840 }, { "epoch": 0.33425, "grad_norm": 0.09820140898227692, "learning_rate": 3.94092690827678e-05, "loss": 0.0379, "step": 67850 }, { "epoch": 0.3343, "grad_norm": 0.1091008335351944, "learning_rate": 3.9405890895273053e-05, "loss": 0.0373, "step": 67860 }, { "epoch": 0.33435, "grad_norm": 0.11982117593288422, "learning_rate": 3.9402512313926674e-05, "loss": 0.039, "step": 67870 }, { "epoch": 0.3344, "grad_norm": 0.11701437830924988, "learning_rate": 3.9399133338821016e-05, "loss": 0.0367, "step": 67880 }, { "epoch": 0.33445, "grad_norm": 0.09619835019111633, "learning_rate": 3.9395753970048455e-05, "loss": 0.0369, "step": 67890 }, { "epoch": 0.3345, "grad_norm": 0.13266895711421967, "learning_rate": 3.939237420770139e-05, "loss": 0.0379, "step": 67900 }, { "epoch": 0.33455, "grad_norm": 0.10785192251205444, "learning_rate": 3.938899405187223e-05, "loss": 0.0395, "step": 67910 }, { "epoch": 0.3346, "grad_norm": 0.09762826561927795, "learning_rate": 3.938561350265336e-05, "loss": 0.0383, "step": 67920 }, { "epoch": 0.33465, "grad_norm": 0.09617773443460464, "learning_rate": 3.938223256013724e-05, "loss": 0.0367, "step": 67930 }, { "epoch": 0.3347, "grad_norm": 0.11434216797351837, "learning_rate": 3.937885122441628e-05, "loss": 0.0371, "step": 67940 }, { "epoch": 0.33475, "grad_norm": 0.09391623735427856, "learning_rate": 3.937546949558293e-05, "loss": 0.0374, "step": 67950 }, { "epoch": 0.3348, "grad_norm": 0.10000456124544144, "learning_rate": 3.937208737372964e-05, "loss": 0.0362, "step": 67960 }, { "epoch": 0.33485, "grad_norm": 0.10160036385059357, "learning_rate": 3.936870485894888e-05, "loss": 0.0375, "step": 67970 }, { "epoch": 0.3349, "grad_norm": 0.09816861897706985, "learning_rate": 3.9365321951333127e-05, "loss": 0.0377, "step": 67980 }, { "epoch": 0.33495, "grad_norm": 0.08815471082925797, "learning_rate": 3.936193865097487e-05, "loss": 0.0363, "step": 67990 }, { "epoch": 0.335, "grad_norm": 0.19023734331130981, "learning_rate": 3.935855495796661e-05, "loss": 0.0384, "step": 68000 }, { "epoch": 0.33505, "grad_norm": 0.13372373580932617, "learning_rate": 3.935517087240085e-05, "loss": 0.0392, "step": 68010 }, { "epoch": 0.3351, "grad_norm": 0.10422195494174957, "learning_rate": 3.9351786394370104e-05, "loss": 0.0369, "step": 68020 }, { "epoch": 0.33515, "grad_norm": 0.09163201600313187, "learning_rate": 3.9348401523966924e-05, "loss": 0.0365, "step": 68030 }, { "epoch": 0.3352, "grad_norm": 0.10625466704368591, "learning_rate": 3.934501626128383e-05, "loss": 0.0375, "step": 68040 }, { "epoch": 0.33525, "grad_norm": 0.11696978658437729, "learning_rate": 3.934163060641337e-05, "loss": 0.0379, "step": 68050 }, { "epoch": 0.3353, "grad_norm": 0.09626693278551102, "learning_rate": 3.933824455944813e-05, "loss": 0.0376, "step": 68060 }, { "epoch": 0.33535, "grad_norm": 0.07733645290136337, "learning_rate": 3.9334858120480666e-05, "loss": 0.038, "step": 68070 }, { "epoch": 0.3354, "grad_norm": 0.09125781804323196, "learning_rate": 3.9331471289603575e-05, "loss": 0.0354, "step": 68080 }, { "epoch": 0.33545, "grad_norm": 0.08020053058862686, "learning_rate": 3.932808406690943e-05, "loss": 0.0363, "step": 68090 }, { "epoch": 0.3355, "grad_norm": 0.13144421577453613, "learning_rate": 3.932469645249086e-05, "loss": 0.0387, "step": 68100 }, { "epoch": 0.33555, "grad_norm": 0.09514191746711731, "learning_rate": 3.932130844644045e-05, "loss": 0.0362, "step": 68110 }, { "epoch": 0.3356, "grad_norm": 0.0922185406088829, "learning_rate": 3.931792004885086e-05, "loss": 0.0388, "step": 68120 }, { "epoch": 0.33565, "grad_norm": 0.08498401194810867, "learning_rate": 3.931453125981472e-05, "loss": 0.0358, "step": 68130 }, { "epoch": 0.3357, "grad_norm": 0.07472465932369232, "learning_rate": 3.931114207942468e-05, "loss": 0.0357, "step": 68140 }, { "epoch": 0.33575, "grad_norm": 0.10260728746652603, "learning_rate": 3.930775250777338e-05, "loss": 0.0364, "step": 68150 }, { "epoch": 0.3358, "grad_norm": 0.10084313899278641, "learning_rate": 3.9304362544953506e-05, "loss": 0.0363, "step": 68160 }, { "epoch": 0.33585, "grad_norm": 0.09109058231115341, "learning_rate": 3.9300972191057726e-05, "loss": 0.0359, "step": 68170 }, { "epoch": 0.3359, "grad_norm": 0.09064600616693497, "learning_rate": 3.929758144617874e-05, "loss": 0.037, "step": 68180 }, { "epoch": 0.33595, "grad_norm": 0.10729323327541351, "learning_rate": 3.9294190310409264e-05, "loss": 0.0375, "step": 68190 }, { "epoch": 0.336, "grad_norm": 0.10835836082696915, "learning_rate": 3.929079878384198e-05, "loss": 0.0367, "step": 68200 }, { "epoch": 0.33605, "grad_norm": 0.09931907802820206, "learning_rate": 3.928740686656963e-05, "loss": 0.0358, "step": 68210 }, { "epoch": 0.3361, "grad_norm": 0.10069522261619568, "learning_rate": 3.9284014558684945e-05, "loss": 0.0373, "step": 68220 }, { "epoch": 0.33615, "grad_norm": 0.0890309140086174, "learning_rate": 3.928062186028067e-05, "loss": 0.0371, "step": 68230 }, { "epoch": 0.3362, "grad_norm": 0.10961360484361649, "learning_rate": 3.9277228771449555e-05, "loss": 0.0358, "step": 68240 }, { "epoch": 0.33625, "grad_norm": 0.08743534982204437, "learning_rate": 3.9273835292284364e-05, "loss": 0.0373, "step": 68250 }, { "epoch": 0.3363, "grad_norm": 0.09339070320129395, "learning_rate": 3.9270441422877894e-05, "loss": 0.0404, "step": 68260 }, { "epoch": 0.33635, "grad_norm": 0.11863364279270172, "learning_rate": 3.92670471633229e-05, "loss": 0.0376, "step": 68270 }, { "epoch": 0.3364, "grad_norm": 0.10614442825317383, "learning_rate": 3.9263652513712205e-05, "loss": 0.0371, "step": 68280 }, { "epoch": 0.33645, "grad_norm": 0.09804320335388184, "learning_rate": 3.926025747413861e-05, "loss": 0.038, "step": 68290 }, { "epoch": 0.3365, "grad_norm": 0.08753801882266998, "learning_rate": 3.925686204469492e-05, "loss": 0.043, "step": 68300 }, { "epoch": 0.33655, "grad_norm": 0.08489872515201569, "learning_rate": 3.9253466225474e-05, "loss": 0.0368, "step": 68310 }, { "epoch": 0.3366, "grad_norm": 0.09966583549976349, "learning_rate": 3.925007001656865e-05, "loss": 0.0376, "step": 68320 }, { "epoch": 0.33665, "grad_norm": 0.09853795915842056, "learning_rate": 3.9246673418071743e-05, "loss": 0.0392, "step": 68330 }, { "epoch": 0.3367, "grad_norm": 0.08462931960821152, "learning_rate": 3.9243276430076146e-05, "loss": 0.0368, "step": 68340 }, { "epoch": 0.33675, "grad_norm": 0.08958669006824493, "learning_rate": 3.9239879052674715e-05, "loss": 0.0361, "step": 68350 }, { "epoch": 0.3368, "grad_norm": 0.11544979363679886, "learning_rate": 3.9236481285960347e-05, "loss": 0.037, "step": 68360 }, { "epoch": 0.33685, "grad_norm": 0.08902397751808167, "learning_rate": 3.9233083130025916e-05, "loss": 0.0376, "step": 68370 }, { "epoch": 0.3369, "grad_norm": 0.08821864426136017, "learning_rate": 3.9229684584964346e-05, "loss": 0.0359, "step": 68380 }, { "epoch": 0.33695, "grad_norm": 0.07459808886051178, "learning_rate": 3.9226285650868546e-05, "loss": 0.0381, "step": 68390 }, { "epoch": 0.337, "grad_norm": 0.09547564387321472, "learning_rate": 3.9222886327831446e-05, "loss": 0.0363, "step": 68400 }, { "epoch": 0.33705, "grad_norm": 0.09121499210596085, "learning_rate": 3.921948661594597e-05, "loss": 0.0361, "step": 68410 }, { "epoch": 0.3371, "grad_norm": 0.0961172953248024, "learning_rate": 3.921608651530507e-05, "loss": 0.0378, "step": 68420 }, { "epoch": 0.33715, "grad_norm": 0.09326346218585968, "learning_rate": 3.921268602600171e-05, "loss": 0.038, "step": 68430 }, { "epoch": 0.3372, "grad_norm": 0.09295550733804703, "learning_rate": 3.9209285148128854e-05, "loss": 0.0362, "step": 68440 }, { "epoch": 0.33725, "grad_norm": 0.08030777424573898, "learning_rate": 3.920588388177948e-05, "loss": 0.035, "step": 68450 }, { "epoch": 0.3373, "grad_norm": 0.08462897688150406, "learning_rate": 3.920248222704658e-05, "loss": 0.0356, "step": 68460 }, { "epoch": 0.33735, "grad_norm": 0.08583926409482956, "learning_rate": 3.919908018402314e-05, "loss": 0.0374, "step": 68470 }, { "epoch": 0.3374, "grad_norm": 0.10724081099033356, "learning_rate": 3.91956777528022e-05, "loss": 0.0371, "step": 68480 }, { "epoch": 0.33745, "grad_norm": 0.09373513609170914, "learning_rate": 3.919227493347675e-05, "loss": 0.0356, "step": 68490 }, { "epoch": 0.3375, "grad_norm": 0.08826597779989243, "learning_rate": 3.918887172613983e-05, "loss": 0.0376, "step": 68500 }, { "epoch": 0.33755, "grad_norm": 0.10178831964731216, "learning_rate": 3.91854681308845e-05, "loss": 0.0359, "step": 68510 }, { "epoch": 0.3376, "grad_norm": 0.08865686506032944, "learning_rate": 3.91820641478038e-05, "loss": 0.035, "step": 68520 }, { "epoch": 0.33765, "grad_norm": 0.09183824807405472, "learning_rate": 3.917865977699079e-05, "loss": 0.0362, "step": 68530 }, { "epoch": 0.3377, "grad_norm": 0.097674161195755, "learning_rate": 3.917525501853855e-05, "loss": 0.0375, "step": 68540 }, { "epoch": 0.33775, "grad_norm": 0.10785385221242905, "learning_rate": 3.917184987254016e-05, "loss": 0.0371, "step": 68550 }, { "epoch": 0.3378, "grad_norm": 0.09957551211118698, "learning_rate": 3.916844433908872e-05, "loss": 0.0366, "step": 68560 }, { "epoch": 0.33785, "grad_norm": 0.09474816173315048, "learning_rate": 3.916503841827733e-05, "loss": 0.0357, "step": 68570 }, { "epoch": 0.3379, "grad_norm": 0.0891641154885292, "learning_rate": 3.916163211019912e-05, "loss": 0.0366, "step": 68580 }, { "epoch": 0.33795, "grad_norm": 0.08394962549209595, "learning_rate": 3.9158225414947206e-05, "loss": 0.037, "step": 68590 }, { "epoch": 0.338, "grad_norm": 0.09305551648139954, "learning_rate": 3.915481833261473e-05, "loss": 0.0367, "step": 68600 }, { "epoch": 0.33805, "grad_norm": 0.09584067761898041, "learning_rate": 3.9151410863294835e-05, "loss": 0.0374, "step": 68610 }, { "epoch": 0.3381, "grad_norm": 0.09269727021455765, "learning_rate": 3.914800300708068e-05, "loss": 0.0369, "step": 68620 }, { "epoch": 0.33815, "grad_norm": 0.09044751524925232, "learning_rate": 3.914459476406545e-05, "loss": 0.0388, "step": 68630 }, { "epoch": 0.3382, "grad_norm": 0.09777285903692245, "learning_rate": 3.91411861343423e-05, "loss": 0.0366, "step": 68640 }, { "epoch": 0.33825, "grad_norm": 0.10088469088077545, "learning_rate": 3.913777711800444e-05, "loss": 0.0376, "step": 68650 }, { "epoch": 0.3383, "grad_norm": 0.08271917700767517, "learning_rate": 3.9134367715145065e-05, "loss": 0.0379, "step": 68660 }, { "epoch": 0.33835, "grad_norm": 0.07132899016141891, "learning_rate": 3.9130957925857384e-05, "loss": 0.0379, "step": 68670 }, { "epoch": 0.3384, "grad_norm": 0.09053319692611694, "learning_rate": 3.912754775023463e-05, "loss": 0.0374, "step": 68680 }, { "epoch": 0.33845, "grad_norm": 0.10277561098337173, "learning_rate": 3.912413718837001e-05, "loss": 0.0368, "step": 68690 }, { "epoch": 0.3385, "grad_norm": 0.0785021260380745, "learning_rate": 3.9120726240356804e-05, "loss": 0.0374, "step": 68700 }, { "epoch": 0.33855, "grad_norm": 0.08113619685173035, "learning_rate": 3.911731490628824e-05, "loss": 0.0374, "step": 68710 }, { "epoch": 0.3386, "grad_norm": 0.11385636776685715, "learning_rate": 3.911390318625759e-05, "loss": 0.0371, "step": 68720 }, { "epoch": 0.33865, "grad_norm": 0.08692467212677002, "learning_rate": 3.911049108035813e-05, "loss": 0.037, "step": 68730 }, { "epoch": 0.3387, "grad_norm": 0.08841327577829361, "learning_rate": 3.9107078588683145e-05, "loss": 0.0371, "step": 68740 }, { "epoch": 0.33875, "grad_norm": 0.09725911170244217, "learning_rate": 3.910366571132593e-05, "loss": 0.0364, "step": 68750 }, { "epoch": 0.3388, "grad_norm": 0.0884573757648468, "learning_rate": 3.9100252448379795e-05, "loss": 0.0368, "step": 68760 }, { "epoch": 0.33885, "grad_norm": 0.08143701404333115, "learning_rate": 3.909683879993805e-05, "loss": 0.0359, "step": 68770 }, { "epoch": 0.3389, "grad_norm": 0.10178311169147491, "learning_rate": 3.9093424766094036e-05, "loss": 0.0373, "step": 68780 }, { "epoch": 0.33895, "grad_norm": 0.09373073279857635, "learning_rate": 3.909001034694108e-05, "loss": 0.0364, "step": 68790 }, { "epoch": 0.339, "grad_norm": 0.0956706777215004, "learning_rate": 3.908659554257254e-05, "loss": 0.0367, "step": 68800 }, { "epoch": 0.33905, "grad_norm": 0.1166117936372757, "learning_rate": 3.908318035308176e-05, "loss": 0.0359, "step": 68810 }, { "epoch": 0.3391, "grad_norm": 0.09963870793581009, "learning_rate": 3.9079764778562124e-05, "loss": 0.0367, "step": 68820 }, { "epoch": 0.33915, "grad_norm": 0.08719000965356827, "learning_rate": 3.9076348819107005e-05, "loss": 0.036, "step": 68830 }, { "epoch": 0.3392, "grad_norm": 0.08710069954395294, "learning_rate": 3.9072932474809805e-05, "loss": 0.0376, "step": 68840 }, { "epoch": 0.33925, "grad_norm": 0.09574553370475769, "learning_rate": 3.9069515745763914e-05, "loss": 0.0368, "step": 68850 }, { "epoch": 0.3393, "grad_norm": 0.08620046824216843, "learning_rate": 3.9066098632062744e-05, "loss": 0.0376, "step": 68860 }, { "epoch": 0.33935, "grad_norm": 0.10227379202842712, "learning_rate": 3.9062681133799726e-05, "loss": 0.0368, "step": 68870 }, { "epoch": 0.3394, "grad_norm": 0.08720734715461731, "learning_rate": 3.905926325106829e-05, "loss": 0.0381, "step": 68880 }, { "epoch": 0.33945, "grad_norm": 0.10980436950922012, "learning_rate": 3.905584498396188e-05, "loss": 0.0366, "step": 68890 }, { "epoch": 0.3395, "grad_norm": 0.09383834153413773, "learning_rate": 3.9052426332573945e-05, "loss": 0.0375, "step": 68900 }, { "epoch": 0.33955, "grad_norm": 0.08845657110214233, "learning_rate": 3.904900729699796e-05, "loss": 0.0368, "step": 68910 }, { "epoch": 0.3396, "grad_norm": 0.08985838294029236, "learning_rate": 3.904558787732738e-05, "loss": 0.0378, "step": 68920 }, { "epoch": 0.33965, "grad_norm": 0.07143373042345047, "learning_rate": 3.904216807365572e-05, "loss": 0.0378, "step": 68930 }, { "epoch": 0.3397, "grad_norm": 0.07896678149700165, "learning_rate": 3.903874788607645e-05, "loss": 0.0354, "step": 68940 }, { "epoch": 0.33975, "grad_norm": 0.10684600472450256, "learning_rate": 3.903532731468309e-05, "loss": 0.038, "step": 68950 }, { "epoch": 0.3398, "grad_norm": 0.08521618694067001, "learning_rate": 3.903190635956915e-05, "loss": 0.0366, "step": 68960 }, { "epoch": 0.33985, "grad_norm": 0.09305830299854279, "learning_rate": 3.902848502082817e-05, "loss": 0.0378, "step": 68970 }, { "epoch": 0.3399, "grad_norm": 0.09355901181697845, "learning_rate": 3.902506329855367e-05, "loss": 0.0361, "step": 68980 }, { "epoch": 0.33995, "grad_norm": 0.09021936357021332, "learning_rate": 3.902164119283922e-05, "loss": 0.0343, "step": 68990 }, { "epoch": 0.34, "grad_norm": 0.10385162383317947, "learning_rate": 3.901821870377836e-05, "loss": 0.0389, "step": 69000 }, { "epoch": 0.34005, "grad_norm": 0.09473024308681488, "learning_rate": 3.901479583146466e-05, "loss": 0.0361, "step": 69010 }, { "epoch": 0.3401, "grad_norm": 0.10816251486539841, "learning_rate": 3.9011372575991715e-05, "loss": 0.0366, "step": 69020 }, { "epoch": 0.34015, "grad_norm": 0.09471640735864639, "learning_rate": 3.900794893745311e-05, "loss": 0.0344, "step": 69030 }, { "epoch": 0.3402, "grad_norm": 0.09427373856306076, "learning_rate": 3.900452491594244e-05, "loss": 0.0369, "step": 69040 }, { "epoch": 0.34025, "grad_norm": 0.08891633152961731, "learning_rate": 3.9001100511553326e-05, "loss": 0.0368, "step": 69050 }, { "epoch": 0.3403, "grad_norm": 0.10900906473398209, "learning_rate": 3.899767572437938e-05, "loss": 0.0385, "step": 69060 }, { "epoch": 0.34035, "grad_norm": 0.11046279221773148, "learning_rate": 3.8994250554514236e-05, "loss": 0.0401, "step": 69070 }, { "epoch": 0.3404, "grad_norm": 0.08577143400907516, "learning_rate": 3.899082500205154e-05, "loss": 0.0383, "step": 69080 }, { "epoch": 0.34045, "grad_norm": 0.09973526746034622, "learning_rate": 3.8987399067084944e-05, "loss": 0.0365, "step": 69090 }, { "epoch": 0.3405, "grad_norm": 0.09257543087005615, "learning_rate": 3.898397274970811e-05, "loss": 0.0365, "step": 69100 }, { "epoch": 0.34055, "grad_norm": 0.07047468423843384, "learning_rate": 3.8980546050014724e-05, "loss": 0.0364, "step": 69110 }, { "epoch": 0.3406, "grad_norm": 0.09412377327680588, "learning_rate": 3.897711896809846e-05, "loss": 0.0352, "step": 69120 }, { "epoch": 0.34065, "grad_norm": 0.08309981226921082, "learning_rate": 3.8973691504053e-05, "loss": 0.0382, "step": 69130 }, { "epoch": 0.3407, "grad_norm": 0.09098193794488907, "learning_rate": 3.897026365797208e-05, "loss": 0.0357, "step": 69140 }, { "epoch": 0.34075, "grad_norm": 0.08813491463661194, "learning_rate": 3.896683542994939e-05, "loss": 0.039, "step": 69150 }, { "epoch": 0.3408, "grad_norm": 0.09963098168373108, "learning_rate": 3.8963406820078675e-05, "loss": 0.0391, "step": 69160 }, { "epoch": 0.34085, "grad_norm": 0.09404154866933823, "learning_rate": 3.8959977828453656e-05, "loss": 0.0375, "step": 69170 }, { "epoch": 0.3409, "grad_norm": 0.08720036596059799, "learning_rate": 3.895654845516809e-05, "loss": 0.0373, "step": 69180 }, { "epoch": 0.34095, "grad_norm": 0.10500133782625198, "learning_rate": 3.8953118700315735e-05, "loss": 0.0371, "step": 69190 }, { "epoch": 0.341, "grad_norm": 0.09468196332454681, "learning_rate": 3.894968856399035e-05, "loss": 0.0374, "step": 69200 }, { "epoch": 0.34105, "grad_norm": 0.09113126993179321, "learning_rate": 3.8946258046285724e-05, "loss": 0.0372, "step": 69210 }, { "epoch": 0.3411, "grad_norm": 0.11324943602085114, "learning_rate": 3.8942827147295645e-05, "loss": 0.0369, "step": 69220 }, { "epoch": 0.34115, "grad_norm": 0.11532092839479446, "learning_rate": 3.893939586711391e-05, "loss": 0.0366, "step": 69230 }, { "epoch": 0.3412, "grad_norm": 0.10300014168024063, "learning_rate": 3.893596420583433e-05, "loss": 0.0375, "step": 69240 }, { "epoch": 0.34125, "grad_norm": 0.09133999794721603, "learning_rate": 3.893253216355072e-05, "loss": 0.0376, "step": 69250 }, { "epoch": 0.3413, "grad_norm": 0.0787564367055893, "learning_rate": 3.892909974035691e-05, "loss": 0.038, "step": 69260 }, { "epoch": 0.34135, "grad_norm": 0.08994283527135849, "learning_rate": 3.892566693634675e-05, "loss": 0.0365, "step": 69270 }, { "epoch": 0.3414, "grad_norm": 0.09112702310085297, "learning_rate": 3.892223375161409e-05, "loss": 0.0357, "step": 69280 }, { "epoch": 0.34145, "grad_norm": 0.0852375328540802, "learning_rate": 3.891880018625279e-05, "loss": 0.0412, "step": 69290 }, { "epoch": 0.3415, "grad_norm": 0.09685932844877243, "learning_rate": 3.891536624035672e-05, "loss": 0.0396, "step": 69300 }, { "epoch": 0.34155, "grad_norm": 0.10497764497995377, "learning_rate": 3.891193191401977e-05, "loss": 0.0352, "step": 69310 }, { "epoch": 0.3416, "grad_norm": 0.09369406849145889, "learning_rate": 3.890849720733582e-05, "loss": 0.0364, "step": 69320 }, { "epoch": 0.34165, "grad_norm": 0.10982207208871841, "learning_rate": 3.8905062120398785e-05, "loss": 0.0363, "step": 69330 }, { "epoch": 0.3417, "grad_norm": 0.10184473544359207, "learning_rate": 3.890162665330258e-05, "loss": 0.0367, "step": 69340 }, { "epoch": 0.34175, "grad_norm": 0.09807814657688141, "learning_rate": 3.889819080614112e-05, "loss": 0.0376, "step": 69350 }, { "epoch": 0.3418, "grad_norm": 0.09532096236944199, "learning_rate": 3.8894754579008344e-05, "loss": 0.039, "step": 69360 }, { "epoch": 0.34185, "grad_norm": 0.08966340869665146, "learning_rate": 3.8891317971998196e-05, "loss": 0.0368, "step": 69370 }, { "epoch": 0.3419, "grad_norm": 0.07751365005970001, "learning_rate": 3.888788098520464e-05, "loss": 0.0365, "step": 69380 }, { "epoch": 0.34195, "grad_norm": 0.08647938817739487, "learning_rate": 3.8884443618721634e-05, "loss": 0.0365, "step": 69390 }, { "epoch": 0.342, "grad_norm": 0.09409962594509125, "learning_rate": 3.888100587264315e-05, "loss": 0.0366, "step": 69400 }, { "epoch": 0.34205, "grad_norm": 0.08813067525625229, "learning_rate": 3.887756774706318e-05, "loss": 0.0357, "step": 69410 }, { "epoch": 0.3421, "grad_norm": 0.07670162618160248, "learning_rate": 3.887412924207573e-05, "loss": 0.0362, "step": 69420 }, { "epoch": 0.34215, "grad_norm": 0.07312174886465073, "learning_rate": 3.887069035777479e-05, "loss": 0.0368, "step": 69430 }, { "epoch": 0.3422, "grad_norm": 0.09502144902944565, "learning_rate": 3.886725109425439e-05, "loss": 0.0368, "step": 69440 }, { "epoch": 0.34225, "grad_norm": 0.08605118095874786, "learning_rate": 3.8863811451608554e-05, "loss": 0.037, "step": 69450 }, { "epoch": 0.3423, "grad_norm": 0.08746170252561569, "learning_rate": 3.886037142993132e-05, "loss": 0.0372, "step": 69460 }, { "epoch": 0.34235, "grad_norm": 0.08239756524562836, "learning_rate": 3.885693102931675e-05, "loss": 0.0367, "step": 69470 }, { "epoch": 0.3424, "grad_norm": 0.08249850571155548, "learning_rate": 3.885349024985888e-05, "loss": 0.0363, "step": 69480 }, { "epoch": 0.34245, "grad_norm": 0.11191985756158829, "learning_rate": 3.8850049091651794e-05, "loss": 0.0378, "step": 69490 }, { "epoch": 0.3425, "grad_norm": 0.11292050033807755, "learning_rate": 3.8846607554789566e-05, "loss": 0.0406, "step": 69500 }, { "epoch": 0.34255, "grad_norm": 0.0940447673201561, "learning_rate": 3.8843165639366285e-05, "loss": 0.0382, "step": 69510 }, { "epoch": 0.3426, "grad_norm": 0.1097087562084198, "learning_rate": 3.8839723345476065e-05, "loss": 0.0376, "step": 69520 }, { "epoch": 0.34265, "grad_norm": 0.12192397564649582, "learning_rate": 3.883628067321301e-05, "loss": 0.0388, "step": 69530 }, { "epoch": 0.3427, "grad_norm": 0.10179516673088074, "learning_rate": 3.883283762267124e-05, "loss": 0.0385, "step": 69540 }, { "epoch": 0.34275, "grad_norm": 0.10461835563182831, "learning_rate": 3.882939419394488e-05, "loss": 0.0385, "step": 69550 }, { "epoch": 0.3428, "grad_norm": 0.10870037972927094, "learning_rate": 3.8825950387128074e-05, "loss": 0.0364, "step": 69560 }, { "epoch": 0.34285, "grad_norm": 0.09890930354595184, "learning_rate": 3.882250620231499e-05, "loss": 0.0381, "step": 69570 }, { "epoch": 0.3429, "grad_norm": 0.09251926839351654, "learning_rate": 3.8819061639599765e-05, "loss": 0.038, "step": 69580 }, { "epoch": 0.34295, "grad_norm": 0.09087437391281128, "learning_rate": 3.88156166990766e-05, "loss": 0.0368, "step": 69590 }, { "epoch": 0.343, "grad_norm": 0.09862448275089264, "learning_rate": 3.8812171380839655e-05, "loss": 0.0373, "step": 69600 }, { "epoch": 0.34305, "grad_norm": 0.08722679316997528, "learning_rate": 3.880872568498314e-05, "loss": 0.0367, "step": 69610 }, { "epoch": 0.3431, "grad_norm": 0.07909327000379562, "learning_rate": 3.880527961160125e-05, "loss": 0.0375, "step": 69620 }, { "epoch": 0.34315, "grad_norm": 0.07764890044927597, "learning_rate": 3.88018331607882e-05, "loss": 0.0366, "step": 69630 }, { "epoch": 0.3432, "grad_norm": 0.0877370536327362, "learning_rate": 3.879838633263822e-05, "loss": 0.0382, "step": 69640 }, { "epoch": 0.34325, "grad_norm": 0.09489338099956512, "learning_rate": 3.879493912724554e-05, "loss": 0.0365, "step": 69650 }, { "epoch": 0.3433, "grad_norm": 0.12813325226306915, "learning_rate": 3.87914915447044e-05, "loss": 0.0366, "step": 69660 }, { "epoch": 0.34335, "grad_norm": 0.10898727178573608, "learning_rate": 3.878804358510908e-05, "loss": 0.0368, "step": 69670 }, { "epoch": 0.3434, "grad_norm": 0.08357562869787216, "learning_rate": 3.878459524855381e-05, "loss": 0.0375, "step": 69680 }, { "epoch": 0.34345, "grad_norm": 0.10898575931787491, "learning_rate": 3.87811465351329e-05, "loss": 0.0364, "step": 69690 }, { "epoch": 0.3435, "grad_norm": 0.10167691111564636, "learning_rate": 3.877769744494061e-05, "loss": 0.0369, "step": 69700 }, { "epoch": 0.34355, "grad_norm": 0.1086648479104042, "learning_rate": 3.877424797807125e-05, "loss": 0.0363, "step": 69710 }, { "epoch": 0.3436, "grad_norm": 0.09656655043363571, "learning_rate": 3.877079813461912e-05, "loss": 0.0359, "step": 69720 }, { "epoch": 0.34365, "grad_norm": 0.1367729753255844, "learning_rate": 3.8767347914678556e-05, "loss": 0.0363, "step": 69730 }, { "epoch": 0.3437, "grad_norm": 0.11441062390804291, "learning_rate": 3.8763897318343864e-05, "loss": 0.0387, "step": 69740 }, { "epoch": 0.34375, "grad_norm": 0.09865527600049973, "learning_rate": 3.876044634570939e-05, "loss": 0.039, "step": 69750 }, { "epoch": 0.3438, "grad_norm": 0.09027211368083954, "learning_rate": 3.875699499686949e-05, "loss": 0.0348, "step": 69760 }, { "epoch": 0.34385, "grad_norm": 0.08302388340234756, "learning_rate": 3.87535432719185e-05, "loss": 0.0364, "step": 69770 }, { "epoch": 0.3439, "grad_norm": 0.12716825306415558, "learning_rate": 3.8750091170950814e-05, "loss": 0.0383, "step": 69780 }, { "epoch": 0.34395, "grad_norm": 0.10267481207847595, "learning_rate": 3.87466386940608e-05, "loss": 0.0358, "step": 69790 }, { "epoch": 0.344, "grad_norm": 0.10430262982845306, "learning_rate": 3.874318584134285e-05, "loss": 0.0368, "step": 69800 }, { "epoch": 0.34405, "grad_norm": 0.10200727730989456, "learning_rate": 3.873973261289136e-05, "loss": 0.0379, "step": 69810 }, { "epoch": 0.3441, "grad_norm": 0.09183640778064728, "learning_rate": 3.8736279008800747e-05, "loss": 0.038, "step": 69820 }, { "epoch": 0.34415, "grad_norm": 0.08767364174127579, "learning_rate": 3.8732825029165416e-05, "loss": 0.0364, "step": 69830 }, { "epoch": 0.3442, "grad_norm": 0.10637889057397842, "learning_rate": 3.872937067407981e-05, "loss": 0.0363, "step": 69840 }, { "epoch": 0.34425, "grad_norm": 0.11616005748510361, "learning_rate": 3.872591594363837e-05, "loss": 0.0379, "step": 69850 }, { "epoch": 0.3443, "grad_norm": 0.09851089864969254, "learning_rate": 3.872246083793555e-05, "loss": 0.0364, "step": 69860 }, { "epoch": 0.34435, "grad_norm": 0.0919380709528923, "learning_rate": 3.8719005357065804e-05, "loss": 0.0351, "step": 69870 }, { "epoch": 0.3444, "grad_norm": 0.0871756300330162, "learning_rate": 3.8715549501123604e-05, "loss": 0.0393, "step": 69880 }, { "epoch": 0.34445, "grad_norm": 0.10097227245569229, "learning_rate": 3.871209327020343e-05, "loss": 0.0365, "step": 69890 }, { "epoch": 0.3445, "grad_norm": 0.1065862774848938, "learning_rate": 3.870863666439978e-05, "loss": 0.0379, "step": 69900 }, { "epoch": 0.34455, "grad_norm": 0.08766256272792816, "learning_rate": 3.870517968380715e-05, "loss": 0.0378, "step": 69910 }, { "epoch": 0.3446, "grad_norm": 0.07498373836278915, "learning_rate": 3.8701722328520064e-05, "loss": 0.0366, "step": 69920 }, { "epoch": 0.34465, "grad_norm": 0.0901317447423935, "learning_rate": 3.869826459863303e-05, "loss": 0.0373, "step": 69930 }, { "epoch": 0.3447, "grad_norm": 0.09419835358858109, "learning_rate": 3.86948064942406e-05, "loss": 0.0386, "step": 69940 }, { "epoch": 0.34475, "grad_norm": 0.0863010361790657, "learning_rate": 3.869134801543729e-05, "loss": 0.0373, "step": 69950 }, { "epoch": 0.3448, "grad_norm": 0.08560824394226074, "learning_rate": 3.868788916231767e-05, "loss": 0.0379, "step": 69960 }, { "epoch": 0.34485, "grad_norm": 0.09437470883131027, "learning_rate": 3.868442993497631e-05, "loss": 0.0368, "step": 69970 }, { "epoch": 0.3449, "grad_norm": 0.10649093985557556, "learning_rate": 3.8680970333507774e-05, "loss": 0.0392, "step": 69980 }, { "epoch": 0.34495, "grad_norm": 0.10162971168756485, "learning_rate": 3.867751035800665e-05, "loss": 0.0369, "step": 69990 }, { "epoch": 0.345, "grad_norm": 0.09044211357831955, "learning_rate": 3.8674050008567534e-05, "loss": 0.0373, "step": 70000 }, { "epoch": 0.34505, "grad_norm": 0.10452600568532944, "learning_rate": 3.8670589285285025e-05, "loss": 0.0367, "step": 70010 }, { "epoch": 0.3451, "grad_norm": 0.09597063809633255, "learning_rate": 3.8667128188253734e-05, "loss": 0.037, "step": 70020 }, { "epoch": 0.34515, "grad_norm": 0.08981821686029434, "learning_rate": 3.8663666717568306e-05, "loss": 0.0365, "step": 70030 }, { "epoch": 0.3452, "grad_norm": 0.07237569987773895, "learning_rate": 3.8660204873323356e-05, "loss": 0.0354, "step": 70040 }, { "epoch": 0.34525, "grad_norm": 0.08156467229127884, "learning_rate": 3.865674265561353e-05, "loss": 0.0349, "step": 70050 }, { "epoch": 0.3453, "grad_norm": 0.08877598494291306, "learning_rate": 3.8653280064533506e-05, "loss": 0.036, "step": 70060 }, { "epoch": 0.34535, "grad_norm": 0.08256573975086212, "learning_rate": 3.864981710017792e-05, "loss": 0.0354, "step": 70070 }, { "epoch": 0.3454, "grad_norm": 0.07784508168697357, "learning_rate": 3.864635376264148e-05, "loss": 0.0367, "step": 70080 }, { "epoch": 0.34545, "grad_norm": 0.08144626766443253, "learning_rate": 3.864289005201883e-05, "loss": 0.0372, "step": 70090 }, { "epoch": 0.3455, "grad_norm": 0.08631348609924316, "learning_rate": 3.863942596840471e-05, "loss": 0.0363, "step": 70100 }, { "epoch": 0.34555, "grad_norm": 0.09809038043022156, "learning_rate": 3.8635961511893805e-05, "loss": 0.0366, "step": 70110 }, { "epoch": 0.3456, "grad_norm": 0.07484088093042374, "learning_rate": 3.8632496682580825e-05, "loss": 0.0373, "step": 70120 }, { "epoch": 0.34565, "grad_norm": 0.08303824067115784, "learning_rate": 3.862903148056052e-05, "loss": 0.0364, "step": 70130 }, { "epoch": 0.3457, "grad_norm": 0.07585174590349197, "learning_rate": 3.8625565905927605e-05, "loss": 0.0377, "step": 70140 }, { "epoch": 0.34575, "grad_norm": 0.10004965960979462, "learning_rate": 3.8622099958776835e-05, "loss": 0.036, "step": 70150 }, { "epoch": 0.3458, "grad_norm": 0.09887797385454178, "learning_rate": 3.861863363920298e-05, "loss": 0.0363, "step": 70160 }, { "epoch": 0.34585, "grad_norm": 0.09083712846040726, "learning_rate": 3.8615166947300794e-05, "loss": 0.0375, "step": 70170 }, { "epoch": 0.3459, "grad_norm": 0.09992069005966187, "learning_rate": 3.861169988316506e-05, "loss": 0.0369, "step": 70180 }, { "epoch": 0.34595, "grad_norm": 0.1044621393084526, "learning_rate": 3.860823244689056e-05, "loss": 0.0387, "step": 70190 }, { "epoch": 0.346, "grad_norm": 0.10451257228851318, "learning_rate": 3.86047646385721e-05, "loss": 0.0379, "step": 70200 }, { "epoch": 0.34605, "grad_norm": 0.1116936206817627, "learning_rate": 3.860129645830449e-05, "loss": 0.0395, "step": 70210 }, { "epoch": 0.3461, "grad_norm": 0.10671953111886978, "learning_rate": 3.859782790618254e-05, "loss": 0.0371, "step": 70220 }, { "epoch": 0.34615, "grad_norm": 0.09391382336616516, "learning_rate": 3.859435898230108e-05, "loss": 0.0366, "step": 70230 }, { "epoch": 0.3462, "grad_norm": 0.10093817859888077, "learning_rate": 3.859088968675496e-05, "loss": 0.0382, "step": 70240 }, { "epoch": 0.34625, "grad_norm": 0.09761158376932144, "learning_rate": 3.858742001963902e-05, "loss": 0.0376, "step": 70250 }, { "epoch": 0.3463, "grad_norm": 0.08548399806022644, "learning_rate": 3.858394998104812e-05, "loss": 0.0368, "step": 70260 }, { "epoch": 0.34635, "grad_norm": 0.12837553024291992, "learning_rate": 3.858047957107713e-05, "loss": 0.039, "step": 70270 }, { "epoch": 0.3464, "grad_norm": 0.09966098517179489, "learning_rate": 3.857700878982092e-05, "loss": 0.0386, "step": 70280 }, { "epoch": 0.34645, "grad_norm": 0.09091837704181671, "learning_rate": 3.857353763737441e-05, "loss": 0.0366, "step": 70290 }, { "epoch": 0.3465, "grad_norm": 0.0735870823264122, "learning_rate": 3.857006611383247e-05, "loss": 0.0369, "step": 70300 }, { "epoch": 0.34655, "grad_norm": 0.08942056447267532, "learning_rate": 3.856659421929003e-05, "loss": 0.0364, "step": 70310 }, { "epoch": 0.3466, "grad_norm": 0.09885366261005402, "learning_rate": 3.856312195384199e-05, "loss": 0.0368, "step": 70320 }, { "epoch": 0.34665, "grad_norm": 0.09341978281736374, "learning_rate": 3.855964931758329e-05, "loss": 0.0366, "step": 70330 }, { "epoch": 0.3467, "grad_norm": 0.09163305908441544, "learning_rate": 3.855617631060887e-05, "loss": 0.0377, "step": 70340 }, { "epoch": 0.34675, "grad_norm": 0.12091311812400818, "learning_rate": 3.8552702933013696e-05, "loss": 0.0361, "step": 70350 }, { "epoch": 0.3468, "grad_norm": 0.07424789667129517, "learning_rate": 3.854922918489271e-05, "loss": 0.036, "step": 70360 }, { "epoch": 0.34685, "grad_norm": 0.08816181868314743, "learning_rate": 3.8545755066340884e-05, "loss": 0.036, "step": 70370 }, { "epoch": 0.3469, "grad_norm": 0.09558789432048798, "learning_rate": 3.85422805774532e-05, "loss": 0.0361, "step": 70380 }, { "epoch": 0.34695, "grad_norm": 0.09851736575365067, "learning_rate": 3.853880571832466e-05, "loss": 0.0399, "step": 70390 }, { "epoch": 0.347, "grad_norm": 0.09676993638277054, "learning_rate": 3.853533048905026e-05, "loss": 0.0374, "step": 70400 }, { "epoch": 0.34705, "grad_norm": 0.1089349016547203, "learning_rate": 3.8531854889725e-05, "loss": 0.0364, "step": 70410 }, { "epoch": 0.3471, "grad_norm": 0.1273079216480255, "learning_rate": 3.852837892044392e-05, "loss": 0.0378, "step": 70420 }, { "epoch": 0.34715, "grad_norm": 0.11691662669181824, "learning_rate": 3.8524902581302035e-05, "loss": 0.0388, "step": 70430 }, { "epoch": 0.3472, "grad_norm": 0.09867251664400101, "learning_rate": 3.85214258723944e-05, "loss": 0.0391, "step": 70440 }, { "epoch": 0.34725, "grad_norm": 0.1259452998638153, "learning_rate": 3.851794879381606e-05, "loss": 0.0402, "step": 70450 }, { "epoch": 0.3473, "grad_norm": 0.09362509101629257, "learning_rate": 3.851447134566208e-05, "loss": 0.038, "step": 70460 }, { "epoch": 0.34735, "grad_norm": 0.08717688918113708, "learning_rate": 3.851099352802753e-05, "loss": 0.0379, "step": 70470 }, { "epoch": 0.3474, "grad_norm": 0.08880654722452164, "learning_rate": 3.8507515341007494e-05, "loss": 0.0397, "step": 70480 }, { "epoch": 0.34745, "grad_norm": 0.11957395076751709, "learning_rate": 3.8504036784697056e-05, "loss": 0.039, "step": 70490 }, { "epoch": 0.3475, "grad_norm": 0.09604611247777939, "learning_rate": 3.850055785919133e-05, "loss": 0.0376, "step": 70500 }, { "epoch": 0.34755, "grad_norm": 0.07652097940444946, "learning_rate": 3.8497078564585434e-05, "loss": 0.037, "step": 70510 }, { "epoch": 0.3476, "grad_norm": 0.09587130695581436, "learning_rate": 3.849359890097446e-05, "loss": 0.0408, "step": 70520 }, { "epoch": 0.34765, "grad_norm": 0.08766037970781326, "learning_rate": 3.849011886845357e-05, "loss": 0.0369, "step": 70530 }, { "epoch": 0.3477, "grad_norm": 0.09857996553182602, "learning_rate": 3.8486638467117904e-05, "loss": 0.0369, "step": 70540 }, { "epoch": 0.34775, "grad_norm": 0.0813198834657669, "learning_rate": 3.848315769706261e-05, "loss": 0.0356, "step": 70550 }, { "epoch": 0.3478, "grad_norm": 0.08867272734642029, "learning_rate": 3.847967655838284e-05, "loss": 0.0373, "step": 70560 }, { "epoch": 0.34785, "grad_norm": 0.09040363878011703, "learning_rate": 3.847619505117379e-05, "loss": 0.0387, "step": 70570 }, { "epoch": 0.3479, "grad_norm": 0.09970448166131973, "learning_rate": 3.8472713175530615e-05, "loss": 0.0369, "step": 70580 }, { "epoch": 0.34795, "grad_norm": 0.0712309181690216, "learning_rate": 3.8469230931548536e-05, "loss": 0.0367, "step": 70590 }, { "epoch": 0.348, "grad_norm": 0.1036083847284317, "learning_rate": 3.846574831932274e-05, "loss": 0.037, "step": 70600 }, { "epoch": 0.34805, "grad_norm": 0.08488594740629196, "learning_rate": 3.846226533894844e-05, "loss": 0.0361, "step": 70610 }, { "epoch": 0.3481, "grad_norm": 0.0831713154911995, "learning_rate": 3.8458781990520864e-05, "loss": 0.0361, "step": 70620 }, { "epoch": 0.34815, "grad_norm": 0.08196142315864563, "learning_rate": 3.8455298274135246e-05, "loss": 0.0365, "step": 70630 }, { "epoch": 0.3482, "grad_norm": 0.09471315890550613, "learning_rate": 3.8451814189886825e-05, "loss": 0.0378, "step": 70640 }, { "epoch": 0.34825, "grad_norm": 0.08712545782327652, "learning_rate": 3.8448329737870867e-05, "loss": 0.0371, "step": 70650 }, { "epoch": 0.3483, "grad_norm": 0.08729460090398788, "learning_rate": 3.844484491818261e-05, "loss": 0.0395, "step": 70660 }, { "epoch": 0.34835, "grad_norm": 0.0900210365653038, "learning_rate": 3.8441359730917357e-05, "loss": 0.0362, "step": 70670 }, { "epoch": 0.3484, "grad_norm": 0.09874347597360611, "learning_rate": 3.8437874176170373e-05, "loss": 0.0373, "step": 70680 }, { "epoch": 0.34845, "grad_norm": 0.11273519694805145, "learning_rate": 3.843438825403697e-05, "loss": 0.0391, "step": 70690 }, { "epoch": 0.3485, "grad_norm": 0.09694775193929672, "learning_rate": 3.8430901964612424e-05, "loss": 0.0359, "step": 70700 }, { "epoch": 0.34855, "grad_norm": 0.09418394416570663, "learning_rate": 3.842741530799207e-05, "loss": 0.037, "step": 70710 }, { "epoch": 0.3486, "grad_norm": 0.09429769963026047, "learning_rate": 3.842392828427123e-05, "loss": 0.0369, "step": 70720 }, { "epoch": 0.34865, "grad_norm": 0.0869753360748291, "learning_rate": 3.8420440893545226e-05, "loss": 0.0392, "step": 70730 }, { "epoch": 0.3487, "grad_norm": 0.086611308157444, "learning_rate": 3.8416953135909404e-05, "loss": 0.0363, "step": 70740 }, { "epoch": 0.34875, "grad_norm": 0.08878615498542786, "learning_rate": 3.8413465011459134e-05, "loss": 0.0363, "step": 70750 }, { "epoch": 0.3488, "grad_norm": 0.08758122473955154, "learning_rate": 3.840997652028978e-05, "loss": 0.0382, "step": 70760 }, { "epoch": 0.34885, "grad_norm": 0.0743941143155098, "learning_rate": 3.8406487662496686e-05, "loss": 0.0357, "step": 70770 }, { "epoch": 0.3489, "grad_norm": 0.08691000938415527, "learning_rate": 3.840299843817527e-05, "loss": 0.0364, "step": 70780 }, { "epoch": 0.34895, "grad_norm": 0.0793720930814743, "learning_rate": 3.8399508847420894e-05, "loss": 0.0367, "step": 70790 }, { "epoch": 0.349, "grad_norm": 0.09652606397867203, "learning_rate": 3.8396018890329e-05, "loss": 0.039, "step": 70800 }, { "epoch": 0.34905, "grad_norm": 0.08942319452762604, "learning_rate": 3.839252856699497e-05, "loss": 0.0362, "step": 70810 }, { "epoch": 0.3491, "grad_norm": 0.09653190523386002, "learning_rate": 3.838903787751425e-05, "loss": 0.0381, "step": 70820 }, { "epoch": 0.34915, "grad_norm": 0.10607529431581497, "learning_rate": 3.838554682198225e-05, "loss": 0.0376, "step": 70830 }, { "epoch": 0.3492, "grad_norm": 0.1037462130188942, "learning_rate": 3.838205540049445e-05, "loss": 0.0394, "step": 70840 }, { "epoch": 0.34925, "grad_norm": 0.10811994969844818, "learning_rate": 3.8378563613146264e-05, "loss": 0.037, "step": 70850 }, { "epoch": 0.3493, "grad_norm": 0.08250528573989868, "learning_rate": 3.837507146003319e-05, "loss": 0.038, "step": 70860 }, { "epoch": 0.34935, "grad_norm": 0.07942062616348267, "learning_rate": 3.837157894125067e-05, "loss": 0.0378, "step": 70870 }, { "epoch": 0.3494, "grad_norm": 0.09695404022932053, "learning_rate": 3.836808605689421e-05, "loss": 0.0358, "step": 70880 }, { "epoch": 0.34945, "grad_norm": 0.08719910681247711, "learning_rate": 3.836459280705931e-05, "loss": 0.0371, "step": 70890 }, { "epoch": 0.3495, "grad_norm": 0.10089585185050964, "learning_rate": 3.8361099191841455e-05, "loss": 0.0372, "step": 70900 }, { "epoch": 0.34955, "grad_norm": 0.0855243131518364, "learning_rate": 3.8357605211336164e-05, "loss": 0.037, "step": 70910 }, { "epoch": 0.3496, "grad_norm": 0.09252673387527466, "learning_rate": 3.835411086563897e-05, "loss": 0.0386, "step": 70920 }, { "epoch": 0.34965, "grad_norm": 0.08542817831039429, "learning_rate": 3.8350616154845404e-05, "loss": 0.0377, "step": 70930 }, { "epoch": 0.3497, "grad_norm": 0.1125243529677391, "learning_rate": 3.8347121079051005e-05, "loss": 0.0364, "step": 70940 }, { "epoch": 0.34975, "grad_norm": 0.08776703476905823, "learning_rate": 3.8343625638351336e-05, "loss": 0.0403, "step": 70950 }, { "epoch": 0.3498, "grad_norm": 0.08082476258277893, "learning_rate": 3.834012983284194e-05, "loss": 0.0372, "step": 70960 }, { "epoch": 0.34985, "grad_norm": 0.08643815666437149, "learning_rate": 3.833663366261842e-05, "loss": 0.0367, "step": 70970 }, { "epoch": 0.3499, "grad_norm": 0.08524535596370697, "learning_rate": 3.8333137127776345e-05, "loss": 0.0359, "step": 70980 }, { "epoch": 0.34995, "grad_norm": 0.09941412508487701, "learning_rate": 3.83296402284113e-05, "loss": 0.037, "step": 70990 }, { "epoch": 0.35, "grad_norm": 0.09036926180124283, "learning_rate": 3.832614296461891e-05, "loss": 0.0367, "step": 71000 }, { "epoch": 0.35005, "grad_norm": 0.10156653821468353, "learning_rate": 3.832264533649477e-05, "loss": 0.0362, "step": 71010 }, { "epoch": 0.3501, "grad_norm": 0.09857209026813507, "learning_rate": 3.8319147344134523e-05, "loss": 0.0385, "step": 71020 }, { "epoch": 0.35015, "grad_norm": 0.0842348113656044, "learning_rate": 3.831564898763378e-05, "loss": 0.0357, "step": 71030 }, { "epoch": 0.3502, "grad_norm": 0.0971158817410469, "learning_rate": 3.831215026708819e-05, "loss": 0.0371, "step": 71040 }, { "epoch": 0.35025, "grad_norm": 0.08740291744470596, "learning_rate": 3.830865118259342e-05, "loss": 0.0365, "step": 71050 }, { "epoch": 0.3503, "grad_norm": 0.09465018659830093, "learning_rate": 3.8305151734245136e-05, "loss": 0.0367, "step": 71060 }, { "epoch": 0.35035, "grad_norm": 0.09075374156236649, "learning_rate": 3.8301651922139e-05, "loss": 0.0368, "step": 71070 }, { "epoch": 0.3504, "grad_norm": 0.09688808023929596, "learning_rate": 3.829815174637069e-05, "loss": 0.0378, "step": 71080 }, { "epoch": 0.35045, "grad_norm": 0.09882215410470963, "learning_rate": 3.829465120703592e-05, "loss": 0.0376, "step": 71090 }, { "epoch": 0.3505, "grad_norm": 0.08773043751716614, "learning_rate": 3.829115030423036e-05, "loss": 0.0377, "step": 71100 }, { "epoch": 0.35055, "grad_norm": 0.09482423216104507, "learning_rate": 3.828764903804975e-05, "loss": 0.0401, "step": 71110 }, { "epoch": 0.3506, "grad_norm": 0.08655227720737457, "learning_rate": 3.828414740858981e-05, "loss": 0.0369, "step": 71120 }, { "epoch": 0.35065, "grad_norm": 0.09375464171171188, "learning_rate": 3.828064541594627e-05, "loss": 0.0379, "step": 71130 }, { "epoch": 0.3507, "grad_norm": 0.08788594603538513, "learning_rate": 3.827714306021488e-05, "loss": 0.041, "step": 71140 }, { "epoch": 0.35075, "grad_norm": 0.09271853417158127, "learning_rate": 3.8273640341491384e-05, "loss": 0.0387, "step": 71150 }, { "epoch": 0.3508, "grad_norm": 0.10908250510692596, "learning_rate": 3.8270137259871544e-05, "loss": 0.0394, "step": 71160 }, { "epoch": 0.35085, "grad_norm": 0.12239451706409454, "learning_rate": 3.8266633815451135e-05, "loss": 0.0389, "step": 71170 }, { "epoch": 0.3509, "grad_norm": 0.1414772868156433, "learning_rate": 3.8263130008325946e-05, "loss": 0.0399, "step": 71180 }, { "epoch": 0.35095, "grad_norm": 0.1427716463804245, "learning_rate": 3.8259625838591766e-05, "loss": 0.0377, "step": 71190 }, { "epoch": 0.351, "grad_norm": 0.11323218792676926, "learning_rate": 3.825612130634439e-05, "loss": 0.0383, "step": 71200 }, { "epoch": 0.35105, "grad_norm": 0.09754623472690582, "learning_rate": 3.8252616411679646e-05, "loss": 0.0379, "step": 71210 }, { "epoch": 0.3511, "grad_norm": 0.12253044545650482, "learning_rate": 3.824911115469335e-05, "loss": 0.0385, "step": 71220 }, { "epoch": 0.35115, "grad_norm": 0.07869323343038559, "learning_rate": 3.824560553548132e-05, "loss": 0.0373, "step": 71230 }, { "epoch": 0.3512, "grad_norm": 0.09849068522453308, "learning_rate": 3.824209955413942e-05, "loss": 0.0374, "step": 71240 }, { "epoch": 0.35125, "grad_norm": 0.08278708904981613, "learning_rate": 3.823859321076349e-05, "loss": 0.037, "step": 71250 }, { "epoch": 0.3513, "grad_norm": 0.09109234064817429, "learning_rate": 3.82350865054494e-05, "loss": 0.0364, "step": 71260 }, { "epoch": 0.35135, "grad_norm": 0.08854912966489792, "learning_rate": 3.8231579438293015e-05, "loss": 0.0381, "step": 71270 }, { "epoch": 0.3514, "grad_norm": 0.09258077293634415, "learning_rate": 3.822807200939022e-05, "loss": 0.0375, "step": 71280 }, { "epoch": 0.35145, "grad_norm": 0.08642219007015228, "learning_rate": 3.82245642188369e-05, "loss": 0.0368, "step": 71290 }, { "epoch": 0.3515, "grad_norm": 0.09005814790725708, "learning_rate": 3.822105606672897e-05, "loss": 0.0372, "step": 71300 }, { "epoch": 0.35155, "grad_norm": 0.08447232097387314, "learning_rate": 3.821754755316233e-05, "loss": 0.035, "step": 71310 }, { "epoch": 0.3516, "grad_norm": 0.08680635690689087, "learning_rate": 3.82140386782329e-05, "loss": 0.0368, "step": 71320 }, { "epoch": 0.35165, "grad_norm": 0.09256864339113235, "learning_rate": 3.821052944203663e-05, "loss": 0.037, "step": 71330 }, { "epoch": 0.3517, "grad_norm": 0.12238368391990662, "learning_rate": 3.8207019844669435e-05, "loss": 0.0364, "step": 71340 }, { "epoch": 0.35175, "grad_norm": 0.1358606368303299, "learning_rate": 3.820350988622728e-05, "loss": 0.038, "step": 71350 }, { "epoch": 0.3518, "grad_norm": 0.12239467352628708, "learning_rate": 3.8199999566806134e-05, "loss": 0.0382, "step": 71360 }, { "epoch": 0.35185, "grad_norm": 0.09452878683805466, "learning_rate": 3.8196488886501945e-05, "loss": 0.0374, "step": 71370 }, { "epoch": 0.3519, "grad_norm": 0.09258794039487839, "learning_rate": 3.8192977845410725e-05, "loss": 0.0395, "step": 71380 }, { "epoch": 0.35195, "grad_norm": 0.09202852100133896, "learning_rate": 3.818946644362844e-05, "loss": 0.037, "step": 71390 }, { "epoch": 0.352, "grad_norm": 0.08467541635036469, "learning_rate": 3.8185954681251094e-05, "loss": 0.0362, "step": 71400 }, { "epoch": 0.35205, "grad_norm": 0.10234204679727554, "learning_rate": 3.81824425583747e-05, "loss": 0.0375, "step": 71410 }, { "epoch": 0.3521, "grad_norm": 0.09726942330598831, "learning_rate": 3.817893007509529e-05, "loss": 0.0384, "step": 71420 }, { "epoch": 0.35215, "grad_norm": 0.08774566650390625, "learning_rate": 3.817541723150887e-05, "loss": 0.0372, "step": 71430 }, { "epoch": 0.3522, "grad_norm": 0.09790322929620743, "learning_rate": 3.81719040277115e-05, "loss": 0.0374, "step": 71440 }, { "epoch": 0.35225, "grad_norm": 0.10679417103528976, "learning_rate": 3.816839046379922e-05, "loss": 0.038, "step": 71450 }, { "epoch": 0.3523, "grad_norm": 0.0922967866063118, "learning_rate": 3.816487653986809e-05, "loss": 0.0374, "step": 71460 }, { "epoch": 0.35235, "grad_norm": 0.08391154557466507, "learning_rate": 3.816136225601418e-05, "loss": 0.0361, "step": 71470 }, { "epoch": 0.3524, "grad_norm": 0.1068667471408844, "learning_rate": 3.815784761233357e-05, "loss": 0.0362, "step": 71480 }, { "epoch": 0.35245, "grad_norm": 0.11418743431568146, "learning_rate": 3.815433260892235e-05, "loss": 0.0389, "step": 71490 }, { "epoch": 0.3525, "grad_norm": 0.10949188470840454, "learning_rate": 3.815081724587662e-05, "loss": 0.0387, "step": 71500 }, { "epoch": 0.35255, "grad_norm": 0.08463648706674576, "learning_rate": 3.81473015232925e-05, "loss": 0.0365, "step": 71510 }, { "epoch": 0.3526, "grad_norm": 0.08453572541475296, "learning_rate": 3.814378544126608e-05, "loss": 0.0353, "step": 71520 }, { "epoch": 0.35265, "grad_norm": 0.0713263601064682, "learning_rate": 3.814026899989351e-05, "loss": 0.0353, "step": 71530 }, { "epoch": 0.3527, "grad_norm": 0.08317417651414871, "learning_rate": 3.813675219927092e-05, "loss": 0.0361, "step": 71540 }, { "epoch": 0.35275, "grad_norm": 0.10706380009651184, "learning_rate": 3.8133235039494455e-05, "loss": 0.0347, "step": 71550 }, { "epoch": 0.3528, "grad_norm": 0.086525097489357, "learning_rate": 3.812971752066028e-05, "loss": 0.0338, "step": 71560 }, { "epoch": 0.35285, "grad_norm": 0.08943665027618408, "learning_rate": 3.812619964286457e-05, "loss": 0.0353, "step": 71570 }, { "epoch": 0.3529, "grad_norm": 0.11737718433141708, "learning_rate": 3.812268140620349e-05, "loss": 0.036, "step": 71580 }, { "epoch": 0.35295, "grad_norm": 0.09202095121145248, "learning_rate": 3.8119162810773224e-05, "loss": 0.0359, "step": 71590 }, { "epoch": 0.353, "grad_norm": 0.09099660068750381, "learning_rate": 3.8115643856669976e-05, "loss": 0.0353, "step": 71600 }, { "epoch": 0.35305, "grad_norm": 0.08286184817552567, "learning_rate": 3.811212454398996e-05, "loss": 0.0354, "step": 71610 }, { "epoch": 0.3531, "grad_norm": 0.08152265846729279, "learning_rate": 3.810860487282937e-05, "loss": 0.0382, "step": 71620 }, { "epoch": 0.35315, "grad_norm": 0.09711956232786179, "learning_rate": 3.810508484328446e-05, "loss": 0.0363, "step": 71630 }, { "epoch": 0.3532, "grad_norm": 0.09203144907951355, "learning_rate": 3.810156445545145e-05, "loss": 0.0373, "step": 71640 }, { "epoch": 0.35325, "grad_norm": 0.09962151199579239, "learning_rate": 3.809804370942659e-05, "loss": 0.0374, "step": 71650 }, { "epoch": 0.3533, "grad_norm": 0.10103832185268402, "learning_rate": 3.8094522605306135e-05, "loss": 0.0384, "step": 71660 }, { "epoch": 0.35335, "grad_norm": 0.09982626140117645, "learning_rate": 3.8091001143186354e-05, "loss": 0.0375, "step": 71670 }, { "epoch": 0.3534, "grad_norm": 0.12380947172641754, "learning_rate": 3.8087479323163513e-05, "loss": 0.0396, "step": 71680 }, { "epoch": 0.35345, "grad_norm": 0.10245148837566376, "learning_rate": 3.808395714533391e-05, "loss": 0.0376, "step": 71690 }, { "epoch": 0.3535, "grad_norm": 0.10948903858661652, "learning_rate": 3.8080434609793834e-05, "loss": 0.0376, "step": 71700 }, { "epoch": 0.35355, "grad_norm": 0.09909265488386154, "learning_rate": 3.807691171663959e-05, "loss": 0.0368, "step": 71710 }, { "epoch": 0.3536, "grad_norm": 0.09816741943359375, "learning_rate": 3.8073388465967496e-05, "loss": 0.0362, "step": 71720 }, { "epoch": 0.35365, "grad_norm": 0.08939754217863083, "learning_rate": 3.8069864857873866e-05, "loss": 0.0361, "step": 71730 }, { "epoch": 0.3537, "grad_norm": 0.11072148382663727, "learning_rate": 3.806634089245504e-05, "loss": 0.0388, "step": 71740 }, { "epoch": 0.35375, "grad_norm": 0.09038572758436203, "learning_rate": 3.8062816569807366e-05, "loss": 0.0359, "step": 71750 }, { "epoch": 0.3538, "grad_norm": 0.08297697454690933, "learning_rate": 3.80592918900272e-05, "loss": 0.035, "step": 71760 }, { "epoch": 0.35385, "grad_norm": 0.07661821693181992, "learning_rate": 3.805576685321089e-05, "loss": 0.0356, "step": 71770 }, { "epoch": 0.3539, "grad_norm": 0.08732740581035614, "learning_rate": 3.805224145945483e-05, "loss": 0.037, "step": 71780 }, { "epoch": 0.35395, "grad_norm": 0.09262505173683167, "learning_rate": 3.804871570885538e-05, "loss": 0.0358, "step": 71790 }, { "epoch": 0.354, "grad_norm": 0.0871446430683136, "learning_rate": 3.804518960150896e-05, "loss": 0.0366, "step": 71800 }, { "epoch": 0.35405, "grad_norm": 0.1006646603345871, "learning_rate": 3.8041663137511934e-05, "loss": 0.0363, "step": 71810 }, { "epoch": 0.3541, "grad_norm": 0.09728217124938965, "learning_rate": 3.8038136316960755e-05, "loss": 0.0383, "step": 71820 }, { "epoch": 0.35415, "grad_norm": 0.09145552664995193, "learning_rate": 3.803460913995182e-05, "loss": 0.0367, "step": 71830 }, { "epoch": 0.3542, "grad_norm": 0.10197024792432785, "learning_rate": 3.8031081606581575e-05, "loss": 0.0369, "step": 71840 }, { "epoch": 0.35425, "grad_norm": 0.1022845134139061, "learning_rate": 3.8027553716946454e-05, "loss": 0.0408, "step": 71850 }, { "epoch": 0.3543, "grad_norm": 0.09232233464717865, "learning_rate": 3.80240254711429e-05, "loss": 0.0363, "step": 71860 }, { "epoch": 0.35435, "grad_norm": 0.07501015812158585, "learning_rate": 3.802049686926739e-05, "loss": 0.0365, "step": 71870 }, { "epoch": 0.3544, "grad_norm": 0.09139897674322128, "learning_rate": 3.801696791141638e-05, "loss": 0.0379, "step": 71880 }, { "epoch": 0.35445, "grad_norm": 0.08283814787864685, "learning_rate": 3.8013438597686365e-05, "loss": 0.0369, "step": 71890 }, { "epoch": 0.3545, "grad_norm": 0.08500596880912781, "learning_rate": 3.800990892817382e-05, "loss": 0.0358, "step": 71900 }, { "epoch": 0.35455, "grad_norm": 0.08232578635215759, "learning_rate": 3.800637890297526e-05, "loss": 0.0375, "step": 71910 }, { "epoch": 0.3546, "grad_norm": 0.08956071734428406, "learning_rate": 3.8002848522187185e-05, "loss": 0.0375, "step": 71920 }, { "epoch": 0.35465, "grad_norm": 0.09098503738641739, "learning_rate": 3.799931778590611e-05, "loss": 0.0375, "step": 71930 }, { "epoch": 0.3547, "grad_norm": 0.08801445364952087, "learning_rate": 3.7995786694228584e-05, "loss": 0.0393, "step": 71940 }, { "epoch": 0.35475, "grad_norm": 0.0851890817284584, "learning_rate": 3.7992255247251115e-05, "loss": 0.0373, "step": 71950 }, { "epoch": 0.3548, "grad_norm": 0.1011219173669815, "learning_rate": 3.7988723445070285e-05, "loss": 0.0392, "step": 71960 }, { "epoch": 0.35485, "grad_norm": 0.08286673575639725, "learning_rate": 3.798519128778263e-05, "loss": 0.0373, "step": 71970 }, { "epoch": 0.3549, "grad_norm": 0.08837667107582092, "learning_rate": 3.798165877548472e-05, "loss": 0.0368, "step": 71980 }, { "epoch": 0.35495, "grad_norm": 0.08247746527194977, "learning_rate": 3.797812590827314e-05, "loss": 0.037, "step": 71990 }, { "epoch": 0.355, "grad_norm": 0.08544211834669113, "learning_rate": 3.797459268624446e-05, "loss": 0.0377, "step": 72000 }, { "epoch": 0.35505, "grad_norm": 0.08988383412361145, "learning_rate": 3.797105910949531e-05, "loss": 0.0378, "step": 72010 }, { "epoch": 0.3551, "grad_norm": 0.08067493885755539, "learning_rate": 3.796752517812227e-05, "loss": 0.0359, "step": 72020 }, { "epoch": 0.35515, "grad_norm": 0.08864334225654602, "learning_rate": 3.796399089222196e-05, "loss": 0.0367, "step": 72030 }, { "epoch": 0.3552, "grad_norm": 0.10649200528860092, "learning_rate": 3.796045625189101e-05, "loss": 0.0386, "step": 72040 }, { "epoch": 0.35525, "grad_norm": 0.0983104333281517, "learning_rate": 3.7956921257226064e-05, "loss": 0.0375, "step": 72050 }, { "epoch": 0.3553, "grad_norm": 0.10972260683774948, "learning_rate": 3.7953385908323744e-05, "loss": 0.0361, "step": 72060 }, { "epoch": 0.35535, "grad_norm": 0.09170223027467728, "learning_rate": 3.794985020528072e-05, "loss": 0.0363, "step": 72070 }, { "epoch": 0.3554, "grad_norm": 0.1001339927315712, "learning_rate": 3.794631414819367e-05, "loss": 0.0378, "step": 72080 }, { "epoch": 0.35545, "grad_norm": 0.07550124824047089, "learning_rate": 3.794277773715925e-05, "loss": 0.0375, "step": 72090 }, { "epoch": 0.3555, "grad_norm": 0.09928284585475922, "learning_rate": 3.793924097227414e-05, "loss": 0.0389, "step": 72100 }, { "epoch": 0.35555, "grad_norm": 0.09975890815258026, "learning_rate": 3.793570385363506e-05, "loss": 0.0386, "step": 72110 }, { "epoch": 0.3556, "grad_norm": 0.10257852077484131, "learning_rate": 3.793216638133869e-05, "loss": 0.0365, "step": 72120 }, { "epoch": 0.35565, "grad_norm": 0.09048584848642349, "learning_rate": 3.792862855548174e-05, "loss": 0.0378, "step": 72130 }, { "epoch": 0.3557, "grad_norm": 0.0968620702624321, "learning_rate": 3.792509037616094e-05, "loss": 0.0393, "step": 72140 }, { "epoch": 0.35575, "grad_norm": 0.1037416085600853, "learning_rate": 3.7921551843473036e-05, "loss": 0.0395, "step": 72150 }, { "epoch": 0.3558, "grad_norm": 0.0851578339934349, "learning_rate": 3.791801295751476e-05, "loss": 0.0372, "step": 72160 }, { "epoch": 0.35585, "grad_norm": 0.09236549586057663, "learning_rate": 3.791447371838285e-05, "loss": 0.0386, "step": 72170 }, { "epoch": 0.3559, "grad_norm": 0.10009481757879257, "learning_rate": 3.791093412617409e-05, "loss": 0.0377, "step": 72180 }, { "epoch": 0.35595, "grad_norm": 0.09812429547309875, "learning_rate": 3.7907394180985244e-05, "loss": 0.0362, "step": 72190 }, { "epoch": 0.356, "grad_norm": 0.0721142441034317, "learning_rate": 3.790385388291308e-05, "loss": 0.0354, "step": 72200 }, { "epoch": 0.35605, "grad_norm": 0.10122848302125931, "learning_rate": 3.790031323205441e-05, "loss": 0.0381, "step": 72210 }, { "epoch": 0.3561, "grad_norm": 0.08593422174453735, "learning_rate": 3.789677222850602e-05, "loss": 0.0377, "step": 72220 }, { "epoch": 0.35615, "grad_norm": 0.09920187294483185, "learning_rate": 3.7893230872364715e-05, "loss": 0.0373, "step": 72230 }, { "epoch": 0.3562, "grad_norm": 0.07431119680404663, "learning_rate": 3.788968916372733e-05, "loss": 0.0352, "step": 72240 }, { "epoch": 0.35625, "grad_norm": 0.0823153406381607, "learning_rate": 3.7886147102690675e-05, "loss": 0.0359, "step": 72250 }, { "epoch": 0.3563, "grad_norm": 0.08599826693534851, "learning_rate": 3.788260468935161e-05, "loss": 0.0367, "step": 72260 }, { "epoch": 0.35635, "grad_norm": 0.08760447800159454, "learning_rate": 3.787906192380697e-05, "loss": 0.0359, "step": 72270 }, { "epoch": 0.3564, "grad_norm": 0.09040172398090363, "learning_rate": 3.787551880615362e-05, "loss": 0.0351, "step": 72280 }, { "epoch": 0.35645, "grad_norm": 0.08009956032037735, "learning_rate": 3.7871975336488417e-05, "loss": 0.0357, "step": 72290 }, { "epoch": 0.3565, "grad_norm": 0.08367157727479935, "learning_rate": 3.786843151490824e-05, "loss": 0.0364, "step": 72300 }, { "epoch": 0.35655, "grad_norm": 0.08577068150043488, "learning_rate": 3.7864887341509984e-05, "loss": 0.0355, "step": 72310 }, { "epoch": 0.3566, "grad_norm": 0.0743073970079422, "learning_rate": 3.7861342816390546e-05, "loss": 0.0342, "step": 72320 }, { "epoch": 0.35665, "grad_norm": 0.10048245638608932, "learning_rate": 3.785779793964682e-05, "loss": 0.0356, "step": 72330 }, { "epoch": 0.3567, "grad_norm": 0.07976207137107849, "learning_rate": 3.785425271137573e-05, "loss": 0.0349, "step": 72340 }, { "epoch": 0.35675, "grad_norm": 0.10690147429704666, "learning_rate": 3.78507071316742e-05, "loss": 0.036, "step": 72350 }, { "epoch": 0.3568, "grad_norm": 0.08670288324356079, "learning_rate": 3.784716120063917e-05, "loss": 0.0352, "step": 72360 }, { "epoch": 0.35685, "grad_norm": 0.09053745865821838, "learning_rate": 3.784361491836758e-05, "loss": 0.0367, "step": 72370 }, { "epoch": 0.3569, "grad_norm": 0.07990558445453644, "learning_rate": 3.7840068284956374e-05, "loss": 0.0352, "step": 72380 }, { "epoch": 0.35695, "grad_norm": 0.10328347980976105, "learning_rate": 3.783652130050252e-05, "loss": 0.0384, "step": 72390 }, { "epoch": 0.357, "grad_norm": 0.09670775383710861, "learning_rate": 3.783297396510301e-05, "loss": 0.0373, "step": 72400 }, { "epoch": 0.35705, "grad_norm": 0.09959684312343597, "learning_rate": 3.782942627885482e-05, "loss": 0.0422, "step": 72410 }, { "epoch": 0.3571, "grad_norm": 0.09338296949863434, "learning_rate": 3.7825878241854916e-05, "loss": 0.0367, "step": 72420 }, { "epoch": 0.35715, "grad_norm": 0.09818318486213684, "learning_rate": 3.7822329854200335e-05, "loss": 0.0374, "step": 72430 }, { "epoch": 0.3572, "grad_norm": 0.10129484534263611, "learning_rate": 3.781878111598806e-05, "loss": 0.0347, "step": 72440 }, { "epoch": 0.35725, "grad_norm": 0.09188708662986755, "learning_rate": 3.781523202731513e-05, "loss": 0.0366, "step": 72450 }, { "epoch": 0.3573, "grad_norm": 0.07468585669994354, "learning_rate": 3.781168258827857e-05, "loss": 0.0374, "step": 72460 }, { "epoch": 0.35735, "grad_norm": 0.08601050078868866, "learning_rate": 3.7808132798975424e-05, "loss": 0.0368, "step": 72470 }, { "epoch": 0.3574, "grad_norm": 0.07531297206878662, "learning_rate": 3.7804582659502744e-05, "loss": 0.0366, "step": 72480 }, { "epoch": 0.35745, "grad_norm": 0.08619679510593414, "learning_rate": 3.7801032169957575e-05, "loss": 0.0361, "step": 72490 }, { "epoch": 0.3575, "grad_norm": 0.09112090617418289, "learning_rate": 3.7797481330437e-05, "loss": 0.0387, "step": 72500 }, { "epoch": 0.35755, "grad_norm": 0.08577617257833481, "learning_rate": 3.779393014103809e-05, "loss": 0.036, "step": 72510 }, { "epoch": 0.3576, "grad_norm": 0.08866028487682343, "learning_rate": 3.7790378601857936e-05, "loss": 0.0378, "step": 72520 }, { "epoch": 0.35765, "grad_norm": 0.10137222707271576, "learning_rate": 3.778682671299364e-05, "loss": 0.0356, "step": 72530 }, { "epoch": 0.3577, "grad_norm": 0.09635225683450699, "learning_rate": 3.7783274474542304e-05, "loss": 0.0364, "step": 72540 }, { "epoch": 0.35775, "grad_norm": 0.07238588482141495, "learning_rate": 3.777972188660105e-05, "loss": 0.0373, "step": 72550 }, { "epoch": 0.3578, "grad_norm": 0.0964500829577446, "learning_rate": 3.7776168949267e-05, "loss": 0.0392, "step": 72560 }, { "epoch": 0.35785, "grad_norm": 0.10754073411226273, "learning_rate": 3.7772615662637276e-05, "loss": 0.0374, "step": 72570 }, { "epoch": 0.3579, "grad_norm": 0.08237577229738235, "learning_rate": 3.7769062026809054e-05, "loss": 0.037, "step": 72580 }, { "epoch": 0.35795, "grad_norm": 0.07473357766866684, "learning_rate": 3.776550804187947e-05, "loss": 0.0376, "step": 72590 }, { "epoch": 0.358, "grad_norm": 0.09661019593477249, "learning_rate": 3.7761953707945685e-05, "loss": 0.0365, "step": 72600 }, { "epoch": 0.35805, "grad_norm": 0.08514625579118729, "learning_rate": 3.7758399025104896e-05, "loss": 0.0373, "step": 72610 }, { "epoch": 0.3581, "grad_norm": 0.07970965653657913, "learning_rate": 3.775484399345426e-05, "loss": 0.0387, "step": 72620 }, { "epoch": 0.35815, "grad_norm": 0.0916329026222229, "learning_rate": 3.775128861309097e-05, "loss": 0.0373, "step": 72630 }, { "epoch": 0.3582, "grad_norm": 0.08543266355991364, "learning_rate": 3.774773288411226e-05, "loss": 0.039, "step": 72640 }, { "epoch": 0.35825, "grad_norm": 0.09130579233169556, "learning_rate": 3.774417680661532e-05, "loss": 0.0368, "step": 72650 }, { "epoch": 0.3583, "grad_norm": 0.10344947874546051, "learning_rate": 3.7740620380697356e-05, "loss": 0.0369, "step": 72660 }, { "epoch": 0.35835, "grad_norm": 0.09073154628276825, "learning_rate": 3.773706360645563e-05, "loss": 0.0399, "step": 72670 }, { "epoch": 0.3584, "grad_norm": 0.09521952271461487, "learning_rate": 3.773350648398737e-05, "loss": 0.0367, "step": 72680 }, { "epoch": 0.35845, "grad_norm": 0.129849374294281, "learning_rate": 3.772994901338983e-05, "loss": 0.0369, "step": 72690 }, { "epoch": 0.3585, "grad_norm": 0.09686827659606934, "learning_rate": 3.772639119476026e-05, "loss": 0.0371, "step": 72700 }, { "epoch": 0.35855, "grad_norm": 0.10051210969686508, "learning_rate": 3.772283302819594e-05, "loss": 0.0369, "step": 72710 }, { "epoch": 0.3586, "grad_norm": 0.09047259390354156, "learning_rate": 3.771927451379414e-05, "loss": 0.0373, "step": 72720 }, { "epoch": 0.35865, "grad_norm": 0.08867276459932327, "learning_rate": 3.771571565165215e-05, "loss": 0.0379, "step": 72730 }, { "epoch": 0.3587, "grad_norm": 0.08804894238710403, "learning_rate": 3.771215644186729e-05, "loss": 0.0378, "step": 72740 }, { "epoch": 0.35875, "grad_norm": 0.08896362036466599, "learning_rate": 3.770859688453683e-05, "loss": 0.0366, "step": 72750 }, { "epoch": 0.3588, "grad_norm": 0.08998987823724747, "learning_rate": 3.770503697975811e-05, "loss": 0.0368, "step": 72760 }, { "epoch": 0.35885, "grad_norm": 0.09973672777414322, "learning_rate": 3.7701476727628447e-05, "loss": 0.0399, "step": 72770 }, { "epoch": 0.3589, "grad_norm": 0.08832769840955734, "learning_rate": 3.7697916128245194e-05, "loss": 0.0375, "step": 72780 }, { "epoch": 0.35895, "grad_norm": 0.09316246956586838, "learning_rate": 3.769435518170568e-05, "loss": 0.0379, "step": 72790 }, { "epoch": 0.359, "grad_norm": 0.08372035622596741, "learning_rate": 3.769079388810726e-05, "loss": 0.0371, "step": 72800 }, { "epoch": 0.35905, "grad_norm": 0.07352810353040695, "learning_rate": 3.7687232247547305e-05, "loss": 0.0389, "step": 72810 }, { "epoch": 0.3591, "grad_norm": 0.08349832147359848, "learning_rate": 3.768367026012319e-05, "loss": 0.0363, "step": 72820 }, { "epoch": 0.35915, "grad_norm": 0.0870920792222023, "learning_rate": 3.768010792593228e-05, "loss": 0.04, "step": 72830 }, { "epoch": 0.3592, "grad_norm": 0.0967041403055191, "learning_rate": 3.7676545245072e-05, "loss": 0.038, "step": 72840 }, { "epoch": 0.35925, "grad_norm": 0.08353424817323685, "learning_rate": 3.767298221763973e-05, "loss": 0.0367, "step": 72850 }, { "epoch": 0.3593, "grad_norm": 0.09391330182552338, "learning_rate": 3.76694188437329e-05, "loss": 0.038, "step": 72860 }, { "epoch": 0.35935, "grad_norm": 0.08620428293943405, "learning_rate": 3.7665855123448904e-05, "loss": 0.0365, "step": 72870 }, { "epoch": 0.3594, "grad_norm": 0.0925588607788086, "learning_rate": 3.766229105688518e-05, "loss": 0.0372, "step": 72880 }, { "epoch": 0.35945, "grad_norm": 0.0822024792432785, "learning_rate": 3.7658726644139185e-05, "loss": 0.0399, "step": 72890 }, { "epoch": 0.3595, "grad_norm": 0.08573483675718307, "learning_rate": 3.7655161885308365e-05, "loss": 0.0374, "step": 72900 }, { "epoch": 0.35955, "grad_norm": 0.08759527653455734, "learning_rate": 3.765159678049017e-05, "loss": 0.0367, "step": 72910 }, { "epoch": 0.3596, "grad_norm": 0.08639516681432724, "learning_rate": 3.764803132978206e-05, "loss": 0.037, "step": 72920 }, { "epoch": 0.35965, "grad_norm": 0.08695117384195328, "learning_rate": 3.764446553328154e-05, "loss": 0.0363, "step": 72930 }, { "epoch": 0.3597, "grad_norm": 0.11545746773481369, "learning_rate": 3.764089939108608e-05, "loss": 0.0395, "step": 72940 }, { "epoch": 0.35975, "grad_norm": 0.10340339690446854, "learning_rate": 3.7637332903293174e-05, "loss": 0.0377, "step": 72950 }, { "epoch": 0.3598, "grad_norm": 0.08980807662010193, "learning_rate": 3.763376607000034e-05, "loss": 0.0374, "step": 72960 }, { "epoch": 0.35985, "grad_norm": 0.0981464833021164, "learning_rate": 3.763019889130509e-05, "loss": 0.04, "step": 72970 }, { "epoch": 0.3599, "grad_norm": 0.09008200466632843, "learning_rate": 3.762663136730493e-05, "loss": 0.0361, "step": 72980 }, { "epoch": 0.35995, "grad_norm": 0.09525028616189957, "learning_rate": 3.7623063498097434e-05, "loss": 0.0384, "step": 72990 }, { "epoch": 0.36, "grad_norm": 0.10193130373954773, "learning_rate": 3.7619495283780114e-05, "loss": 0.0391, "step": 73000 }, { "epoch": 0.36005, "grad_norm": 0.10165940970182419, "learning_rate": 3.7615926724450534e-05, "loss": 0.0384, "step": 73010 }, { "epoch": 0.3601, "grad_norm": 0.10303997248411179, "learning_rate": 3.761235782020626e-05, "loss": 0.0365, "step": 73020 }, { "epoch": 0.36015, "grad_norm": 0.10546267777681351, "learning_rate": 3.7608788571144855e-05, "loss": 0.0364, "step": 73030 }, { "epoch": 0.3602, "grad_norm": 0.09056802093982697, "learning_rate": 3.760521897736391e-05, "loss": 0.0366, "step": 73040 }, { "epoch": 0.36025, "grad_norm": 0.11318932473659515, "learning_rate": 3.760164903896102e-05, "loss": 0.0374, "step": 73050 }, { "epoch": 0.3603, "grad_norm": 0.08870753645896912, "learning_rate": 3.7598078756033773e-05, "loss": 0.0391, "step": 73060 }, { "epoch": 0.36035, "grad_norm": 0.09608108550310135, "learning_rate": 3.7594508128679784e-05, "loss": 0.0383, "step": 73070 }, { "epoch": 0.3604, "grad_norm": 0.099686399102211, "learning_rate": 3.759093715699668e-05, "loss": 0.0387, "step": 73080 }, { "epoch": 0.36045, "grad_norm": 0.09866029024124146, "learning_rate": 3.7587365841082076e-05, "loss": 0.0355, "step": 73090 }, { "epoch": 0.3605, "grad_norm": 0.09272120893001556, "learning_rate": 3.758379418103363e-05, "loss": 0.0377, "step": 73100 }, { "epoch": 0.36055, "grad_norm": 0.08696601539850235, "learning_rate": 3.7580222176948974e-05, "loss": 0.0363, "step": 73110 }, { "epoch": 0.3606, "grad_norm": 0.10454379767179489, "learning_rate": 3.757664982892577e-05, "loss": 0.0361, "step": 73120 }, { "epoch": 0.36065, "grad_norm": 0.10213061422109604, "learning_rate": 3.757307713706168e-05, "loss": 0.037, "step": 73130 }, { "epoch": 0.3607, "grad_norm": 0.10697133094072342, "learning_rate": 3.7569504101454385e-05, "loss": 0.0365, "step": 73140 }, { "epoch": 0.36075, "grad_norm": 0.07808250933885574, "learning_rate": 3.7565930722201576e-05, "loss": 0.0361, "step": 73150 }, { "epoch": 0.3608, "grad_norm": 0.08140584081411362, "learning_rate": 3.756235699940094e-05, "loss": 0.0369, "step": 73160 }, { "epoch": 0.36085, "grad_norm": 0.08964422345161438, "learning_rate": 3.755878293315018e-05, "loss": 0.0376, "step": 73170 }, { "epoch": 0.3609, "grad_norm": 0.09899695217609406, "learning_rate": 3.755520852354702e-05, "loss": 0.0383, "step": 73180 }, { "epoch": 0.36095, "grad_norm": 0.09381364285945892, "learning_rate": 3.755163377068917e-05, "loss": 0.037, "step": 73190 }, { "epoch": 0.361, "grad_norm": 0.0851723849773407, "learning_rate": 3.7548058674674366e-05, "loss": 0.0377, "step": 73200 }, { "epoch": 0.36105, "grad_norm": 0.09449709206819534, "learning_rate": 3.754448323560035e-05, "loss": 0.0389, "step": 73210 }, { "epoch": 0.3611, "grad_norm": 0.08995748311281204, "learning_rate": 3.754090745356488e-05, "loss": 0.0382, "step": 73220 }, { "epoch": 0.36115, "grad_norm": 0.08525869995355606, "learning_rate": 3.753733132866571e-05, "loss": 0.0398, "step": 73230 }, { "epoch": 0.3612, "grad_norm": 0.09252028167247772, "learning_rate": 3.753375486100061e-05, "loss": 0.0382, "step": 73240 }, { "epoch": 0.36125, "grad_norm": 0.09120626747608185, "learning_rate": 3.753017805066737e-05, "loss": 0.0403, "step": 73250 }, { "epoch": 0.3613, "grad_norm": 0.08819838613271713, "learning_rate": 3.7526600897763764e-05, "loss": 0.0399, "step": 73260 }, { "epoch": 0.36135, "grad_norm": 0.11151785403490067, "learning_rate": 3.752302340238759e-05, "loss": 0.0385, "step": 73270 }, { "epoch": 0.3614, "grad_norm": 0.08899339288473129, "learning_rate": 3.751944556463667e-05, "loss": 0.0429, "step": 73280 }, { "epoch": 0.36145, "grad_norm": 0.09409180283546448, "learning_rate": 3.75158673846088e-05, "loss": 0.0403, "step": 73290 }, { "epoch": 0.3615, "grad_norm": 0.09247760474681854, "learning_rate": 3.7512288862401835e-05, "loss": 0.0391, "step": 73300 }, { "epoch": 0.36155, "grad_norm": 0.10189498215913773, "learning_rate": 3.750870999811358e-05, "loss": 0.0369, "step": 73310 }, { "epoch": 0.3616, "grad_norm": 0.08997152745723724, "learning_rate": 3.7505130791841896e-05, "loss": 0.0385, "step": 73320 }, { "epoch": 0.36165, "grad_norm": 0.09530501812696457, "learning_rate": 3.750155124368463e-05, "loss": 0.0376, "step": 73330 }, { "epoch": 0.3617, "grad_norm": 0.09209480881690979, "learning_rate": 3.749797135373966e-05, "loss": 0.0402, "step": 73340 }, { "epoch": 0.36175, "grad_norm": 0.09806963801383972, "learning_rate": 3.7494391122104834e-05, "loss": 0.0364, "step": 73350 }, { "epoch": 0.3618, "grad_norm": 0.09419302642345428, "learning_rate": 3.7490810548878066e-05, "loss": 0.038, "step": 73360 }, { "epoch": 0.36185, "grad_norm": 0.09023649245500565, "learning_rate": 3.748722963415722e-05, "loss": 0.0364, "step": 73370 }, { "epoch": 0.3619, "grad_norm": 0.07176271080970764, "learning_rate": 3.74836483780402e-05, "loss": 0.035, "step": 73380 }, { "epoch": 0.36195, "grad_norm": 0.09162287414073944, "learning_rate": 3.7480066780624935e-05, "loss": 0.0364, "step": 73390 }, { "epoch": 0.362, "grad_norm": 0.07958874106407166, "learning_rate": 3.7476484842009326e-05, "loss": 0.0359, "step": 73400 }, { "epoch": 0.36205, "grad_norm": 0.0949300155043602, "learning_rate": 3.747290256229131e-05, "loss": 0.0367, "step": 73410 }, { "epoch": 0.3621, "grad_norm": 0.08462786674499512, "learning_rate": 3.7469319941568827e-05, "loss": 0.0367, "step": 73420 }, { "epoch": 0.36215, "grad_norm": 0.10595318675041199, "learning_rate": 3.746573697993982e-05, "loss": 0.0364, "step": 73430 }, { "epoch": 0.3622, "grad_norm": 0.10279781371355057, "learning_rate": 3.7462153677502244e-05, "loss": 0.036, "step": 73440 }, { "epoch": 0.36225, "grad_norm": 0.10992176830768585, "learning_rate": 3.7458570034354076e-05, "loss": 0.0367, "step": 73450 }, { "epoch": 0.3623, "grad_norm": 0.10987858474254608, "learning_rate": 3.745498605059327e-05, "loss": 0.037, "step": 73460 }, { "epoch": 0.36235, "grad_norm": 0.10498002916574478, "learning_rate": 3.745140172631784e-05, "loss": 0.0355, "step": 73470 }, { "epoch": 0.3624, "grad_norm": 0.09899154305458069, "learning_rate": 3.744781706162576e-05, "loss": 0.0378, "step": 73480 }, { "epoch": 0.36245, "grad_norm": 0.10823096334934235, "learning_rate": 3.7444232056615036e-05, "loss": 0.0373, "step": 73490 }, { "epoch": 0.3625, "grad_norm": 0.11849300563335419, "learning_rate": 3.744064671138368e-05, "loss": 0.0371, "step": 73500 }, { "epoch": 0.36255, "grad_norm": 0.10961859673261642, "learning_rate": 3.7437061026029717e-05, "loss": 0.0364, "step": 73510 }, { "epoch": 0.3626, "grad_norm": 0.12412311881780624, "learning_rate": 3.7433475000651184e-05, "loss": 0.0384, "step": 73520 }, { "epoch": 0.36265, "grad_norm": 0.08983161300420761, "learning_rate": 3.7429888635346105e-05, "loss": 0.0361, "step": 73530 }, { "epoch": 0.3627, "grad_norm": 0.08294022083282471, "learning_rate": 3.7426301930212545e-05, "loss": 0.0385, "step": 73540 }, { "epoch": 0.36275, "grad_norm": 0.0846564844250679, "learning_rate": 3.7422714885348566e-05, "loss": 0.0375, "step": 73550 }, { "epoch": 0.3628, "grad_norm": 0.08093772828578949, "learning_rate": 3.7419127500852224e-05, "loss": 0.0367, "step": 73560 }, { "epoch": 0.36285, "grad_norm": 0.0944150909781456, "learning_rate": 3.74155397768216e-05, "loss": 0.0364, "step": 73570 }, { "epoch": 0.3629, "grad_norm": 0.09659276902675629, "learning_rate": 3.741195171335479e-05, "loss": 0.0358, "step": 73580 }, { "epoch": 0.36295, "grad_norm": 0.10250847786664963, "learning_rate": 3.740836331054987e-05, "loss": 0.0383, "step": 73590 }, { "epoch": 0.363, "grad_norm": 0.10492464154958725, "learning_rate": 3.740477456850496e-05, "loss": 0.0369, "step": 73600 }, { "epoch": 0.36305, "grad_norm": 0.08737901598215103, "learning_rate": 3.740118548731818e-05, "loss": 0.0368, "step": 73610 }, { "epoch": 0.3631, "grad_norm": 0.0963754653930664, "learning_rate": 3.739759606708765e-05, "loss": 0.0362, "step": 73620 }, { "epoch": 0.36315, "grad_norm": 0.09435886889696121, "learning_rate": 3.73940063079115e-05, "loss": 0.0355, "step": 73630 }, { "epoch": 0.3632, "grad_norm": 0.09186870604753494, "learning_rate": 3.739041620988788e-05, "loss": 0.0375, "step": 73640 }, { "epoch": 0.36325, "grad_norm": 0.10190404951572418, "learning_rate": 3.738682577311492e-05, "loss": 0.0385, "step": 73650 }, { "epoch": 0.3633, "grad_norm": 0.0959816426038742, "learning_rate": 3.7383234997690806e-05, "loss": 0.0365, "step": 73660 }, { "epoch": 0.36335, "grad_norm": 0.09658969938755035, "learning_rate": 3.73796438837137e-05, "loss": 0.0386, "step": 73670 }, { "epoch": 0.3634, "grad_norm": 0.10769996047019958, "learning_rate": 3.737605243128178e-05, "loss": 0.0372, "step": 73680 }, { "epoch": 0.36345, "grad_norm": 0.09856262058019638, "learning_rate": 3.737246064049323e-05, "loss": 0.038, "step": 73690 }, { "epoch": 0.3635, "grad_norm": 0.08360934257507324, "learning_rate": 3.7368868511446266e-05, "loss": 0.0365, "step": 73700 }, { "epoch": 0.36355, "grad_norm": 0.09821160137653351, "learning_rate": 3.7365276044239074e-05, "loss": 0.0375, "step": 73710 }, { "epoch": 0.3636, "grad_norm": 0.09756915271282196, "learning_rate": 3.736168323896988e-05, "loss": 0.0418, "step": 73720 }, { "epoch": 0.36365, "grad_norm": 0.09500919282436371, "learning_rate": 3.7358090095736905e-05, "loss": 0.0364, "step": 73730 }, { "epoch": 0.3637, "grad_norm": 0.08789382874965668, "learning_rate": 3.7354496614638405e-05, "loss": 0.0377, "step": 73740 }, { "epoch": 0.36375, "grad_norm": 0.09167249500751495, "learning_rate": 3.73509027957726e-05, "loss": 0.0361, "step": 73750 }, { "epoch": 0.3638, "grad_norm": 0.11411943286657333, "learning_rate": 3.734730863923776e-05, "loss": 0.0375, "step": 73760 }, { "epoch": 0.36385, "grad_norm": 0.09541446715593338, "learning_rate": 3.734371414513213e-05, "loss": 0.0369, "step": 73770 }, { "epoch": 0.3639, "grad_norm": 0.08876598626375198, "learning_rate": 3.7340119313554e-05, "loss": 0.0387, "step": 73780 }, { "epoch": 0.36395, "grad_norm": 0.10548962652683258, "learning_rate": 3.733652414460164e-05, "loss": 0.0377, "step": 73790 }, { "epoch": 0.364, "grad_norm": 0.10141220688819885, "learning_rate": 3.7332928638373346e-05, "loss": 0.0376, "step": 73800 }, { "epoch": 0.36405, "grad_norm": 0.09801637381315231, "learning_rate": 3.7329332794967414e-05, "loss": 0.0369, "step": 73810 }, { "epoch": 0.3641, "grad_norm": 0.10043822973966599, "learning_rate": 3.732573661448215e-05, "loss": 0.0379, "step": 73820 }, { "epoch": 0.36415, "grad_norm": 0.08055119961500168, "learning_rate": 3.73221400970159e-05, "loss": 0.0357, "step": 73830 }, { "epoch": 0.3642, "grad_norm": 0.09515772014856339, "learning_rate": 3.7318543242666946e-05, "loss": 0.0351, "step": 73840 }, { "epoch": 0.36425, "grad_norm": 0.09603604674339294, "learning_rate": 3.731494605153366e-05, "loss": 0.0359, "step": 73850 }, { "epoch": 0.3643, "grad_norm": 0.09765617549419403, "learning_rate": 3.731134852371436e-05, "loss": 0.0365, "step": 73860 }, { "epoch": 0.36435, "grad_norm": 0.09156297892332077, "learning_rate": 3.730775065930744e-05, "loss": 0.0361, "step": 73870 }, { "epoch": 0.3644, "grad_norm": 0.08625298738479614, "learning_rate": 3.7304152458411226e-05, "loss": 0.0383, "step": 73880 }, { "epoch": 0.36445, "grad_norm": 0.08274701237678528, "learning_rate": 3.730055392112411e-05, "loss": 0.0373, "step": 73890 }, { "epoch": 0.3645, "grad_norm": 0.0882805809378624, "learning_rate": 3.729695504754447e-05, "loss": 0.0368, "step": 73900 }, { "epoch": 0.36455, "grad_norm": 0.09052780270576477, "learning_rate": 3.729335583777069e-05, "loss": 0.0359, "step": 73910 }, { "epoch": 0.3646, "grad_norm": 0.0941757932305336, "learning_rate": 3.728975629190119e-05, "loss": 0.0377, "step": 73920 }, { "epoch": 0.36465, "grad_norm": 0.11285432428121567, "learning_rate": 3.7286156410034374e-05, "loss": 0.0397, "step": 73930 }, { "epoch": 0.3647, "grad_norm": 0.09500918537378311, "learning_rate": 3.7282556192268646e-05, "loss": 0.0376, "step": 73940 }, { "epoch": 0.36475, "grad_norm": 0.11643466353416443, "learning_rate": 3.727895563870245e-05, "loss": 0.0425, "step": 73950 }, { "epoch": 0.3648, "grad_norm": 0.10746884346008301, "learning_rate": 3.7275354749434226e-05, "loss": 0.0405, "step": 73960 }, { "epoch": 0.36485, "grad_norm": 0.11454451084136963, "learning_rate": 3.727175352456241e-05, "loss": 0.0391, "step": 73970 }, { "epoch": 0.3649, "grad_norm": 0.10189974308013916, "learning_rate": 3.726815196418546e-05, "loss": 0.0396, "step": 73980 }, { "epoch": 0.36495, "grad_norm": 0.08168889582157135, "learning_rate": 3.7264550068401846e-05, "loss": 0.0369, "step": 73990 }, { "epoch": 0.365, "grad_norm": 0.09755031764507294, "learning_rate": 3.726094783731004e-05, "loss": 0.037, "step": 74000 }, { "epoch": 0.36505, "grad_norm": 0.08322950452566147, "learning_rate": 3.725734527100854e-05, "loss": 0.0365, "step": 74010 }, { "epoch": 0.3651, "grad_norm": 0.091013602912426, "learning_rate": 3.725374236959581e-05, "loss": 0.0378, "step": 74020 }, { "epoch": 0.36515, "grad_norm": 0.09770730882883072, "learning_rate": 3.725013913317037e-05, "loss": 0.0369, "step": 74030 }, { "epoch": 0.3652, "grad_norm": 0.07589123398065567, "learning_rate": 3.7246535561830725e-05, "loss": 0.0376, "step": 74040 }, { "epoch": 0.36525, "grad_norm": 0.0765521451830864, "learning_rate": 3.7242931655675404e-05, "loss": 0.0363, "step": 74050 }, { "epoch": 0.3653, "grad_norm": 0.07517808675765991, "learning_rate": 3.7239327414802925e-05, "loss": 0.0362, "step": 74060 }, { "epoch": 0.36535, "grad_norm": 0.076322041451931, "learning_rate": 3.7235722839311835e-05, "loss": 0.0352, "step": 74070 }, { "epoch": 0.3654, "grad_norm": 0.07294822484254837, "learning_rate": 3.723211792930069e-05, "loss": 0.0369, "step": 74080 }, { "epoch": 0.36545, "grad_norm": 0.0938844084739685, "learning_rate": 3.722851268486802e-05, "loss": 0.0374, "step": 74090 }, { "epoch": 0.3655, "grad_norm": 0.09001727402210236, "learning_rate": 3.7224907106112414e-05, "loss": 0.0372, "step": 74100 }, { "epoch": 0.36555, "grad_norm": 0.0800582766532898, "learning_rate": 3.722130119313245e-05, "loss": 0.0419, "step": 74110 }, { "epoch": 0.3656, "grad_norm": 0.07695163041353226, "learning_rate": 3.7217694946026695e-05, "loss": 0.0356, "step": 74120 }, { "epoch": 0.36565, "grad_norm": 0.09566215425729752, "learning_rate": 3.7214088364893744e-05, "loss": 0.0403, "step": 74130 }, { "epoch": 0.3657, "grad_norm": 0.07777071744203568, "learning_rate": 3.7210481449832215e-05, "loss": 0.0382, "step": 74140 }, { "epoch": 0.36575, "grad_norm": 0.10269416868686676, "learning_rate": 3.7206874200940705e-05, "loss": 0.037, "step": 74150 }, { "epoch": 0.3658, "grad_norm": 0.08395794779062271, "learning_rate": 3.720326661831784e-05, "loss": 0.0375, "step": 74160 }, { "epoch": 0.36585, "grad_norm": 0.09718802571296692, "learning_rate": 3.719965870206224e-05, "loss": 0.0374, "step": 74170 }, { "epoch": 0.3659, "grad_norm": 0.08130740374326706, "learning_rate": 3.719605045227258e-05, "loss": 0.0385, "step": 74180 }, { "epoch": 0.36595, "grad_norm": 0.0812409371137619, "learning_rate": 3.719244186904747e-05, "loss": 0.0368, "step": 74190 }, { "epoch": 0.366, "grad_norm": 0.0834796354174614, "learning_rate": 3.7188832952485574e-05, "loss": 0.0357, "step": 74200 }, { "epoch": 0.36605, "grad_norm": 0.07309699803590775, "learning_rate": 3.718522370268557e-05, "loss": 0.0373, "step": 74210 }, { "epoch": 0.3661, "grad_norm": 0.09182523936033249, "learning_rate": 3.718161411974613e-05, "loss": 0.0407, "step": 74220 }, { "epoch": 0.36615, "grad_norm": 0.10953807830810547, "learning_rate": 3.7178004203765925e-05, "loss": 0.0371, "step": 74230 }, { "epoch": 0.3662, "grad_norm": 0.08421969413757324, "learning_rate": 3.7174393954843675e-05, "loss": 0.0351, "step": 74240 }, { "epoch": 0.36625, "grad_norm": 0.1005755364894867, "learning_rate": 3.7170783373078054e-05, "loss": 0.0353, "step": 74250 }, { "epoch": 0.3663, "grad_norm": 0.08802418410778046, "learning_rate": 3.7167172458567804e-05, "loss": 0.0355, "step": 74260 }, { "epoch": 0.36635, "grad_norm": 0.1034860908985138, "learning_rate": 3.7163561211411615e-05, "loss": 0.0378, "step": 74270 }, { "epoch": 0.3664, "grad_norm": 0.0703473761677742, "learning_rate": 3.715994963170824e-05, "loss": 0.0371, "step": 74280 }, { "epoch": 0.36645, "grad_norm": 0.09141998738050461, "learning_rate": 3.71563377195564e-05, "loss": 0.0381, "step": 74290 }, { "epoch": 0.3665, "grad_norm": 0.07497433573007584, "learning_rate": 3.715272547505487e-05, "loss": 0.0347, "step": 74300 }, { "epoch": 0.36655, "grad_norm": 0.08348723500967026, "learning_rate": 3.714911289830238e-05, "loss": 0.0344, "step": 74310 }, { "epoch": 0.3666, "grad_norm": 0.0912565067410469, "learning_rate": 3.71454999893977e-05, "loss": 0.0344, "step": 74320 }, { "epoch": 0.36665, "grad_norm": 0.07660182565450668, "learning_rate": 3.714188674843963e-05, "loss": 0.0346, "step": 74330 }, { "epoch": 0.3667, "grad_norm": 0.076512411236763, "learning_rate": 3.7138273175526934e-05, "loss": 0.0361, "step": 74340 }, { "epoch": 0.36675, "grad_norm": 0.07564838975667953, "learning_rate": 3.71346592707584e-05, "loss": 0.0355, "step": 74350 }, { "epoch": 0.3668, "grad_norm": 0.08536522090435028, "learning_rate": 3.713104503423285e-05, "loss": 0.0356, "step": 74360 }, { "epoch": 0.36685, "grad_norm": 0.06882265210151672, "learning_rate": 3.712743046604908e-05, "loss": 0.036, "step": 74370 }, { "epoch": 0.3669, "grad_norm": 0.07784570008516312, "learning_rate": 3.7123815566305926e-05, "loss": 0.0358, "step": 74380 }, { "epoch": 0.36695, "grad_norm": 0.08287815749645233, "learning_rate": 3.712020033510221e-05, "loss": 0.0359, "step": 74390 }, { "epoch": 0.367, "grad_norm": 0.08039284497499466, "learning_rate": 3.711658477253676e-05, "loss": 0.0374, "step": 74400 }, { "epoch": 0.36705, "grad_norm": 0.0996885672211647, "learning_rate": 3.711296887870844e-05, "loss": 0.0379, "step": 74410 }, { "epoch": 0.3671, "grad_norm": 0.09353972226381302, "learning_rate": 3.710935265371609e-05, "loss": 0.0359, "step": 74420 }, { "epoch": 0.36715, "grad_norm": 0.08280579000711441, "learning_rate": 3.710573609765861e-05, "loss": 0.0371, "step": 74430 }, { "epoch": 0.3672, "grad_norm": 0.0952754020690918, "learning_rate": 3.710211921063483e-05, "loss": 0.0362, "step": 74440 }, { "epoch": 0.36725, "grad_norm": 0.08442886918783188, "learning_rate": 3.7098501992743675e-05, "loss": 0.0359, "step": 74450 }, { "epoch": 0.3673, "grad_norm": 0.09808932989835739, "learning_rate": 3.709488444408401e-05, "loss": 0.0375, "step": 74460 }, { "epoch": 0.36735, "grad_norm": 0.10025553405284882, "learning_rate": 3.7091266564754754e-05, "loss": 0.0369, "step": 74470 }, { "epoch": 0.3674, "grad_norm": 0.1075684130191803, "learning_rate": 3.70876483548548e-05, "loss": 0.0386, "step": 74480 }, { "epoch": 0.36745, "grad_norm": 0.13738977909088135, "learning_rate": 3.70840298144831e-05, "loss": 0.0374, "step": 74490 }, { "epoch": 0.3675, "grad_norm": 0.11115943640470505, "learning_rate": 3.7080410943738555e-05, "loss": 0.0374, "step": 74500 }, { "epoch": 0.36755, "grad_norm": 0.12851819396018982, "learning_rate": 3.7076791742720114e-05, "loss": 0.0368, "step": 74510 }, { "epoch": 0.3676, "grad_norm": 0.12918046116828918, "learning_rate": 3.7073172211526725e-05, "loss": 0.0366, "step": 74520 }, { "epoch": 0.36765, "grad_norm": 0.10422967374324799, "learning_rate": 3.706955235025734e-05, "loss": 0.0358, "step": 74530 }, { "epoch": 0.3677, "grad_norm": 0.10385806113481522, "learning_rate": 3.706593215901093e-05, "loss": 0.036, "step": 74540 }, { "epoch": 0.36775, "grad_norm": 0.10638242214918137, "learning_rate": 3.706231163788647e-05, "loss": 0.0366, "step": 74550 }, { "epoch": 0.3678, "grad_norm": 0.1260419636964798, "learning_rate": 3.705869078698294e-05, "loss": 0.0356, "step": 74560 }, { "epoch": 0.36785, "grad_norm": 0.09771779179573059, "learning_rate": 3.705506960639933e-05, "loss": 0.0352, "step": 74570 }, { "epoch": 0.3679, "grad_norm": 0.1114879623055458, "learning_rate": 3.705144809623465e-05, "loss": 0.037, "step": 74580 }, { "epoch": 0.36795, "grad_norm": 0.11442957818508148, "learning_rate": 3.70478262565879e-05, "loss": 0.0374, "step": 74590 }, { "epoch": 0.368, "grad_norm": 0.08828095346689224, "learning_rate": 3.704420408755812e-05, "loss": 0.0349, "step": 74600 }, { "epoch": 0.36805, "grad_norm": 0.08895467966794968, "learning_rate": 3.704058158924431e-05, "loss": 0.0353, "step": 74610 }, { "epoch": 0.3681, "grad_norm": 0.09710677713155746, "learning_rate": 3.7036958761745535e-05, "loss": 0.0371, "step": 74620 }, { "epoch": 0.36815, "grad_norm": 0.09256210178136826, "learning_rate": 3.7033335605160825e-05, "loss": 0.0355, "step": 74630 }, { "epoch": 0.3682, "grad_norm": 0.09806990623474121, "learning_rate": 3.702971211958924e-05, "loss": 0.0347, "step": 74640 }, { "epoch": 0.36825, "grad_norm": 0.08915026485919952, "learning_rate": 3.7026088305129845e-05, "loss": 0.0368, "step": 74650 }, { "epoch": 0.3683, "grad_norm": 0.07760807871818542, "learning_rate": 3.702246416188171e-05, "loss": 0.0352, "step": 74660 }, { "epoch": 0.36835, "grad_norm": 0.08770003914833069, "learning_rate": 3.701883968994392e-05, "loss": 0.0349, "step": 74670 }, { "epoch": 0.3684, "grad_norm": 0.09773367643356323, "learning_rate": 3.7015214889415585e-05, "loss": 0.037, "step": 74680 }, { "epoch": 0.36845, "grad_norm": 0.10205940157175064, "learning_rate": 3.701158976039577e-05, "loss": 0.0359, "step": 74690 }, { "epoch": 0.3685, "grad_norm": 0.09153249114751816, "learning_rate": 3.7007964302983614e-05, "loss": 0.0388, "step": 74700 }, { "epoch": 0.36855, "grad_norm": 0.09360933303833008, "learning_rate": 3.700433851727822e-05, "loss": 0.0378, "step": 74710 }, { "epoch": 0.3686, "grad_norm": 0.10425516963005066, "learning_rate": 3.700071240337873e-05, "loss": 0.0379, "step": 74720 }, { "epoch": 0.36865, "grad_norm": 0.08516538888216019, "learning_rate": 3.6997085961384256e-05, "loss": 0.0351, "step": 74730 }, { "epoch": 0.3687, "grad_norm": 0.0863916203379631, "learning_rate": 3.699345919139397e-05, "loss": 0.0354, "step": 74740 }, { "epoch": 0.36875, "grad_norm": 0.08580009639263153, "learning_rate": 3.6989832093507007e-05, "loss": 0.0363, "step": 74750 }, { "epoch": 0.3688, "grad_norm": 0.08505256474018097, "learning_rate": 3.698620466782255e-05, "loss": 0.0343, "step": 74760 }, { "epoch": 0.36885, "grad_norm": 0.07350530475378036, "learning_rate": 3.6982576914439756e-05, "loss": 0.0355, "step": 74770 }, { "epoch": 0.3689, "grad_norm": 0.08322486281394958, "learning_rate": 3.6978948833457805e-05, "loss": 0.0366, "step": 74780 }, { "epoch": 0.36895, "grad_norm": 0.0836733728647232, "learning_rate": 3.6975320424975904e-05, "loss": 0.0356, "step": 74790 }, { "epoch": 0.369, "grad_norm": 0.08269577473402023, "learning_rate": 3.697169168909323e-05, "loss": 0.0359, "step": 74800 }, { "epoch": 0.36905, "grad_norm": 0.08767407387495041, "learning_rate": 3.6968062625909005e-05, "loss": 0.0352, "step": 74810 }, { "epoch": 0.3691, "grad_norm": 0.07590517401695251, "learning_rate": 3.696443323552244e-05, "loss": 0.0389, "step": 74820 }, { "epoch": 0.36915, "grad_norm": 0.08965615928173065, "learning_rate": 3.696080351803278e-05, "loss": 0.0361, "step": 74830 }, { "epoch": 0.3692, "grad_norm": 0.08823589980602264, "learning_rate": 3.6957173473539236e-05, "loss": 0.0359, "step": 74840 }, { "epoch": 0.36925, "grad_norm": 0.07415693253278732, "learning_rate": 3.695354310214106e-05, "loss": 0.0356, "step": 74850 }, { "epoch": 0.3693, "grad_norm": 0.08243182301521301, "learning_rate": 3.6949912403937507e-05, "loss": 0.0353, "step": 74860 }, { "epoch": 0.36935, "grad_norm": 0.08238965272903442, "learning_rate": 3.694628137902785e-05, "loss": 0.0361, "step": 74870 }, { "epoch": 0.3694, "grad_norm": 0.07500246912240982, "learning_rate": 3.694265002751133e-05, "loss": 0.0349, "step": 74880 }, { "epoch": 0.36945, "grad_norm": 0.09064202755689621, "learning_rate": 3.693901834948726e-05, "loss": 0.0375, "step": 74890 }, { "epoch": 0.3695, "grad_norm": 0.0864512175321579, "learning_rate": 3.6935386345054904e-05, "loss": 0.0369, "step": 74900 }, { "epoch": 0.36955, "grad_norm": 0.09356842935085297, "learning_rate": 3.6931754014313575e-05, "loss": 0.0358, "step": 74910 }, { "epoch": 0.3696, "grad_norm": 0.08547563850879669, "learning_rate": 3.6928121357362564e-05, "loss": 0.0349, "step": 74920 }, { "epoch": 0.36965, "grad_norm": 0.08710693567991257, "learning_rate": 3.6924488374301206e-05, "loss": 0.0366, "step": 74930 }, { "epoch": 0.3697, "grad_norm": 0.08611670881509781, "learning_rate": 3.692085506522881e-05, "loss": 0.0356, "step": 74940 }, { "epoch": 0.36975, "grad_norm": 0.0835421234369278, "learning_rate": 3.691722143024472e-05, "loss": 0.0355, "step": 74950 }, { "epoch": 0.3698, "grad_norm": 0.07917075604200363, "learning_rate": 3.691358746944827e-05, "loss": 0.0355, "step": 74960 }, { "epoch": 0.36985, "grad_norm": 0.08740229904651642, "learning_rate": 3.690995318293882e-05, "loss": 0.0361, "step": 74970 }, { "epoch": 0.3699, "grad_norm": 0.09352272748947144, "learning_rate": 3.690631857081572e-05, "loss": 0.0366, "step": 74980 }, { "epoch": 0.36995, "grad_norm": 0.10072267055511475, "learning_rate": 3.690268363317834e-05, "loss": 0.0367, "step": 74990 }, { "epoch": 0.37, "grad_norm": 0.08957238495349884, "learning_rate": 3.689904837012606e-05, "loss": 0.0346, "step": 75000 }, { "epoch": 0.37005, "grad_norm": 0.08622787892818451, "learning_rate": 3.6895412781758276e-05, "loss": 0.0365, "step": 75010 }, { "epoch": 0.3701, "grad_norm": 0.11911823600530624, "learning_rate": 3.689177686817437e-05, "loss": 0.0388, "step": 75020 }, { "epoch": 0.37015, "grad_norm": 0.11229342222213745, "learning_rate": 3.688814062947375e-05, "loss": 0.0372, "step": 75030 }, { "epoch": 0.3702, "grad_norm": 0.10843716561794281, "learning_rate": 3.688450406575584e-05, "loss": 0.0356, "step": 75040 }, { "epoch": 0.37025, "grad_norm": 0.09995651245117188, "learning_rate": 3.688086717712004e-05, "loss": 0.0355, "step": 75050 }, { "epoch": 0.3703, "grad_norm": 0.09323858469724655, "learning_rate": 3.6877229963665805e-05, "loss": 0.0347, "step": 75060 }, { "epoch": 0.37035, "grad_norm": 0.1024266853928566, "learning_rate": 3.6873592425492564e-05, "loss": 0.0363, "step": 75070 }, { "epoch": 0.3704, "grad_norm": 0.11296865344047546, "learning_rate": 3.686995456269977e-05, "loss": 0.0348, "step": 75080 }, { "epoch": 0.37045, "grad_norm": 0.087582528591156, "learning_rate": 3.686631637538687e-05, "loss": 0.0345, "step": 75090 }, { "epoch": 0.3705, "grad_norm": 0.11643913388252258, "learning_rate": 3.6862677863653345e-05, "loss": 0.0372, "step": 75100 }, { "epoch": 0.37055, "grad_norm": 0.08716107904911041, "learning_rate": 3.685903902759866e-05, "loss": 0.036, "step": 75110 }, { "epoch": 0.3706, "grad_norm": 0.0931491106748581, "learning_rate": 3.68553998673223e-05, "loss": 0.0361, "step": 75120 }, { "epoch": 0.37065, "grad_norm": 0.08382998406887054, "learning_rate": 3.6851760382923764e-05, "loss": 0.0344, "step": 75130 }, { "epoch": 0.3707, "grad_norm": 0.069289930164814, "learning_rate": 3.6848120574502555e-05, "loss": 0.0376, "step": 75140 }, { "epoch": 0.37075, "grad_norm": 0.08519791066646576, "learning_rate": 3.684448044215817e-05, "loss": 0.035, "step": 75150 }, { "epoch": 0.3708, "grad_norm": 0.10274061560630798, "learning_rate": 3.6840839985990154e-05, "loss": 0.037, "step": 75160 }, { "epoch": 0.37085, "grad_norm": 0.10534089058637619, "learning_rate": 3.6837199206098015e-05, "loss": 0.0396, "step": 75170 }, { "epoch": 0.3709, "grad_norm": 0.09910701960325241, "learning_rate": 3.683355810258129e-05, "loss": 0.0362, "step": 75180 }, { "epoch": 0.37095, "grad_norm": 0.07578156143426895, "learning_rate": 3.682991667553954e-05, "loss": 0.0359, "step": 75190 }, { "epoch": 0.371, "grad_norm": 0.09559353440999985, "learning_rate": 3.682627492507232e-05, "loss": 0.0378, "step": 75200 }, { "epoch": 0.37105, "grad_norm": 0.08370592445135117, "learning_rate": 3.6822632851279174e-05, "loss": 0.0363, "step": 75210 }, { "epoch": 0.3711, "grad_norm": 0.12570084631443024, "learning_rate": 3.68189904542597e-05, "loss": 0.0376, "step": 75220 }, { "epoch": 0.37115, "grad_norm": 0.07416093349456787, "learning_rate": 3.681534773411345e-05, "loss": 0.036, "step": 75230 }, { "epoch": 0.3712, "grad_norm": 0.08501870185136795, "learning_rate": 3.681170469094004e-05, "loss": 0.0383, "step": 75240 }, { "epoch": 0.37125, "grad_norm": 0.08055275678634644, "learning_rate": 3.680806132483906e-05, "loss": 0.0416, "step": 75250 }, { "epoch": 0.3713, "grad_norm": 0.08922593295574188, "learning_rate": 3.6804417635910123e-05, "loss": 0.0391, "step": 75260 }, { "epoch": 0.37135, "grad_norm": 0.11191854625940323, "learning_rate": 3.680077362425284e-05, "loss": 0.0388, "step": 75270 }, { "epoch": 0.3714, "grad_norm": 0.08514377474784851, "learning_rate": 3.6797129289966835e-05, "loss": 0.0373, "step": 75280 }, { "epoch": 0.37145, "grad_norm": 0.09239083528518677, "learning_rate": 3.679348463315176e-05, "loss": 0.0364, "step": 75290 }, { "epoch": 0.3715, "grad_norm": 0.0984504297375679, "learning_rate": 3.678983965390723e-05, "loss": 0.0366, "step": 75300 }, { "epoch": 0.37155, "grad_norm": 0.08384273201227188, "learning_rate": 3.678619435233292e-05, "loss": 0.0368, "step": 75310 }, { "epoch": 0.3716, "grad_norm": 0.07963405549526215, "learning_rate": 3.6782548728528485e-05, "loss": 0.0394, "step": 75320 }, { "epoch": 0.37165, "grad_norm": 0.10861895978450775, "learning_rate": 3.6778902782593594e-05, "loss": 0.0361, "step": 75330 }, { "epoch": 0.3717, "grad_norm": 0.10098570585250854, "learning_rate": 3.6775256514627925e-05, "loss": 0.0377, "step": 75340 }, { "epoch": 0.37175, "grad_norm": 0.08465471863746643, "learning_rate": 3.677160992473117e-05, "loss": 0.0385, "step": 75350 }, { "epoch": 0.3718, "grad_norm": 0.10758759826421738, "learning_rate": 3.676796301300302e-05, "loss": 0.0388, "step": 75360 }, { "epoch": 0.37185, "grad_norm": 0.08927234262228012, "learning_rate": 3.676431577954318e-05, "loss": 0.0363, "step": 75370 }, { "epoch": 0.3719, "grad_norm": 0.07932307571172714, "learning_rate": 3.6760668224451365e-05, "loss": 0.0385, "step": 75380 }, { "epoch": 0.37195, "grad_norm": 0.08853064477443695, "learning_rate": 3.675702034782731e-05, "loss": 0.0363, "step": 75390 }, { "epoch": 0.372, "grad_norm": 0.09992717951536179, "learning_rate": 3.675337214977073e-05, "loss": 0.0369, "step": 75400 }, { "epoch": 0.37205, "grad_norm": 0.13069744408130646, "learning_rate": 3.674972363038137e-05, "loss": 0.0369, "step": 75410 }, { "epoch": 0.3721, "grad_norm": 0.11567940562963486, "learning_rate": 3.674607478975898e-05, "loss": 0.0364, "step": 75420 }, { "epoch": 0.37215, "grad_norm": 0.08629253506660461, "learning_rate": 3.6742425628003316e-05, "loss": 0.0361, "step": 75430 }, { "epoch": 0.3722, "grad_norm": 0.0733034536242485, "learning_rate": 3.673877614521414e-05, "loss": 0.0361, "step": 75440 }, { "epoch": 0.37225, "grad_norm": 0.08365897089242935, "learning_rate": 3.6735126341491244e-05, "loss": 0.0363, "step": 75450 }, { "epoch": 0.3723, "grad_norm": 0.0908847376704216, "learning_rate": 3.67314762169344e-05, "loss": 0.0353, "step": 75460 }, { "epoch": 0.37235, "grad_norm": 0.10139103978872299, "learning_rate": 3.672782577164341e-05, "loss": 0.0346, "step": 75470 }, { "epoch": 0.3724, "grad_norm": 0.09700726717710495, "learning_rate": 3.672417500571806e-05, "loss": 0.0357, "step": 75480 }, { "epoch": 0.37245, "grad_norm": 0.09164400398731232, "learning_rate": 3.672052391925817e-05, "loss": 0.038, "step": 75490 }, { "epoch": 0.3725, "grad_norm": 0.1114017441868782, "learning_rate": 3.6716872512363566e-05, "loss": 0.0364, "step": 75500 }, { "epoch": 0.37255, "grad_norm": 0.09311135858297348, "learning_rate": 3.6713220785134064e-05, "loss": 0.0351, "step": 75510 }, { "epoch": 0.3726, "grad_norm": 0.09030146896839142, "learning_rate": 3.6709568737669505e-05, "loss": 0.0376, "step": 75520 }, { "epoch": 0.37265, "grad_norm": 0.08873016387224197, "learning_rate": 3.670591637006974e-05, "loss": 0.0362, "step": 75530 }, { "epoch": 0.3727, "grad_norm": 0.10129522532224655, "learning_rate": 3.6702263682434626e-05, "loss": 0.0371, "step": 75540 }, { "epoch": 0.37275, "grad_norm": 0.08039398491382599, "learning_rate": 3.6698610674864e-05, "loss": 0.037, "step": 75550 }, { "epoch": 0.3728, "grad_norm": 0.0955590084195137, "learning_rate": 3.669495734745777e-05, "loss": 0.0357, "step": 75560 }, { "epoch": 0.37285, "grad_norm": 0.08611071854829788, "learning_rate": 3.6691303700315796e-05, "loss": 0.0356, "step": 75570 }, { "epoch": 0.3729, "grad_norm": 0.08073154091835022, "learning_rate": 3.6687649733537964e-05, "loss": 0.0344, "step": 75580 }, { "epoch": 0.37295, "grad_norm": 0.10840560495853424, "learning_rate": 3.668399544722418e-05, "loss": 0.0367, "step": 75590 }, { "epoch": 0.373, "grad_norm": 0.07460062950849533, "learning_rate": 3.668034084147436e-05, "loss": 0.0363, "step": 75600 }, { "epoch": 0.37305, "grad_norm": 0.08653230220079422, "learning_rate": 3.667668591638841e-05, "loss": 0.0346, "step": 75610 }, { "epoch": 0.3731, "grad_norm": 0.08363914489746094, "learning_rate": 3.6673030672066245e-05, "loss": 0.0363, "step": 75620 }, { "epoch": 0.37315, "grad_norm": 0.08001896739006042, "learning_rate": 3.666937510860781e-05, "loss": 0.0354, "step": 75630 }, { "epoch": 0.3732, "grad_norm": 0.10220906883478165, "learning_rate": 3.6665719226113035e-05, "loss": 0.0364, "step": 75640 }, { "epoch": 0.37325, "grad_norm": 0.07813125103712082, "learning_rate": 3.666206302468189e-05, "loss": 0.0371, "step": 75650 }, { "epoch": 0.3733, "grad_norm": 0.08085132390260696, "learning_rate": 3.6658406504414325e-05, "loss": 0.0379, "step": 75660 }, { "epoch": 0.37335, "grad_norm": 0.10213213413953781, "learning_rate": 3.66547496654103e-05, "loss": 0.0393, "step": 75670 }, { "epoch": 0.3734, "grad_norm": 0.10202323645353317, "learning_rate": 3.66510925077698e-05, "loss": 0.0374, "step": 75680 }, { "epoch": 0.37345, "grad_norm": 0.08972320705652237, "learning_rate": 3.6647435031592804e-05, "loss": 0.0372, "step": 75690 }, { "epoch": 0.3735, "grad_norm": 0.10474187880754471, "learning_rate": 3.6643777236979314e-05, "loss": 0.0379, "step": 75700 }, { "epoch": 0.37355, "grad_norm": 0.10356242209672928, "learning_rate": 3.664011912402933e-05, "loss": 0.0379, "step": 75710 }, { "epoch": 0.3736, "grad_norm": 0.11297213286161423, "learning_rate": 3.6636460692842855e-05, "loss": 0.0382, "step": 75720 }, { "epoch": 0.37365, "grad_norm": 0.10405473411083221, "learning_rate": 3.663280194351992e-05, "loss": 0.0365, "step": 75730 }, { "epoch": 0.3737, "grad_norm": 0.07534322887659073, "learning_rate": 3.6629142876160546e-05, "loss": 0.0388, "step": 75740 }, { "epoch": 0.37375, "grad_norm": 0.0884098932147026, "learning_rate": 3.662548349086478e-05, "loss": 0.0369, "step": 75750 }, { "epoch": 0.3738, "grad_norm": 0.08216243982315063, "learning_rate": 3.662182378773267e-05, "loss": 0.0348, "step": 75760 }, { "epoch": 0.37385, "grad_norm": 0.07160743325948715, "learning_rate": 3.661816376686425e-05, "loss": 0.0363, "step": 75770 }, { "epoch": 0.3739, "grad_norm": 0.08498039096593857, "learning_rate": 3.6614503428359606e-05, "loss": 0.0339, "step": 75780 }, { "epoch": 0.37395, "grad_norm": 0.09113490581512451, "learning_rate": 3.66108427723188e-05, "loss": 0.0359, "step": 75790 }, { "epoch": 0.374, "grad_norm": 0.08843359351158142, "learning_rate": 3.660718179884191e-05, "loss": 0.0351, "step": 75800 }, { "epoch": 0.37405, "grad_norm": 0.09002623707056046, "learning_rate": 3.660352050802904e-05, "loss": 0.0373, "step": 75810 }, { "epoch": 0.3741, "grad_norm": 0.10079097002744675, "learning_rate": 3.6599858899980265e-05, "loss": 0.0362, "step": 75820 }, { "epoch": 0.37415, "grad_norm": 0.07080857455730438, "learning_rate": 3.6596196974795714e-05, "loss": 0.0348, "step": 75830 }, { "epoch": 0.3742, "grad_norm": 0.0856006070971489, "learning_rate": 3.65925347325755e-05, "loss": 0.0363, "step": 75840 }, { "epoch": 0.37425, "grad_norm": 0.07604727894067764, "learning_rate": 3.658887217341973e-05, "loss": 0.0351, "step": 75850 }, { "epoch": 0.3743, "grad_norm": 0.08135710656642914, "learning_rate": 3.658520929742855e-05, "loss": 0.0351, "step": 75860 }, { "epoch": 0.37435, "grad_norm": 0.07487417757511139, "learning_rate": 3.658154610470211e-05, "loss": 0.0346, "step": 75870 }, { "epoch": 0.3744, "grad_norm": 0.08480235934257507, "learning_rate": 3.657788259534054e-05, "loss": 0.0359, "step": 75880 }, { "epoch": 0.37445, "grad_norm": 0.07889080792665482, "learning_rate": 3.657421876944401e-05, "loss": 0.0352, "step": 75890 }, { "epoch": 0.3745, "grad_norm": 0.10762711614370346, "learning_rate": 3.6570554627112693e-05, "loss": 0.037, "step": 75900 }, { "epoch": 0.37455, "grad_norm": 0.11260473728179932, "learning_rate": 3.656689016844676e-05, "loss": 0.0369, "step": 75910 }, { "epoch": 0.3746, "grad_norm": 0.08627483248710632, "learning_rate": 3.656322539354639e-05, "loss": 0.0376, "step": 75920 }, { "epoch": 0.37465, "grad_norm": 0.08173470199108124, "learning_rate": 3.6559560302511785e-05, "loss": 0.0355, "step": 75930 }, { "epoch": 0.3747, "grad_norm": 0.07806728780269623, "learning_rate": 3.655589489544314e-05, "loss": 0.0355, "step": 75940 }, { "epoch": 0.37475, "grad_norm": 0.11050267517566681, "learning_rate": 3.655222917244068e-05, "loss": 0.0366, "step": 75950 }, { "epoch": 0.3748, "grad_norm": 0.08523990213871002, "learning_rate": 3.65485631336046e-05, "loss": 0.0359, "step": 75960 }, { "epoch": 0.37485, "grad_norm": 0.08433665335178375, "learning_rate": 3.6544896779035154e-05, "loss": 0.037, "step": 75970 }, { "epoch": 0.3749, "grad_norm": 0.09047384560108185, "learning_rate": 3.654123010883256e-05, "loss": 0.038, "step": 75980 }, { "epoch": 0.37495, "grad_norm": 0.08714261651039124, "learning_rate": 3.6537563123097075e-05, "loss": 0.037, "step": 75990 }, { "epoch": 0.375, "grad_norm": 0.08978958427906036, "learning_rate": 3.653389582192895e-05, "loss": 0.0396, "step": 76000 }, { "epoch": 0.37505, "grad_norm": 0.11172284185886383, "learning_rate": 3.653022820542844e-05, "loss": 0.0387, "step": 76010 }, { "epoch": 0.3751, "grad_norm": 0.08433420956134796, "learning_rate": 3.652656027369583e-05, "loss": 0.0369, "step": 76020 }, { "epoch": 0.37515, "grad_norm": 0.09933721274137497, "learning_rate": 3.652289202683138e-05, "loss": 0.0372, "step": 76030 }, { "epoch": 0.3752, "grad_norm": 0.09172473847866058, "learning_rate": 3.6519223464935406e-05, "loss": 0.0365, "step": 76040 }, { "epoch": 0.37525, "grad_norm": 0.08624282479286194, "learning_rate": 3.651555458810818e-05, "loss": 0.0371, "step": 76050 }, { "epoch": 0.3753, "grad_norm": 0.10207632929086685, "learning_rate": 3.651188539645002e-05, "loss": 0.0376, "step": 76060 }, { "epoch": 0.37535, "grad_norm": 0.08815249055624008, "learning_rate": 3.650821589006124e-05, "loss": 0.0369, "step": 76070 }, { "epoch": 0.3754, "grad_norm": 0.08788152039051056, "learning_rate": 3.650454606904216e-05, "loss": 0.0383, "step": 76080 }, { "epoch": 0.37545, "grad_norm": 0.08695410192012787, "learning_rate": 3.650087593349311e-05, "loss": 0.0359, "step": 76090 }, { "epoch": 0.3755, "grad_norm": 0.08386833220720291, "learning_rate": 3.649720548351444e-05, "loss": 0.0371, "step": 76100 }, { "epoch": 0.37555, "grad_norm": 0.13007734715938568, "learning_rate": 3.649353471920649e-05, "loss": 0.0388, "step": 76110 }, { "epoch": 0.3756, "grad_norm": 0.07977231591939926, "learning_rate": 3.648986364066962e-05, "loss": 0.0389, "step": 76120 }, { "epoch": 0.37565, "grad_norm": 0.08852393180131912, "learning_rate": 3.648619224800419e-05, "loss": 0.0372, "step": 76130 }, { "epoch": 0.3757, "grad_norm": 0.09893860667943954, "learning_rate": 3.648252054131057e-05, "loss": 0.0362, "step": 76140 }, { "epoch": 0.37575, "grad_norm": 0.0855567455291748, "learning_rate": 3.647884852068916e-05, "loss": 0.0367, "step": 76150 }, { "epoch": 0.3758, "grad_norm": 0.07578399777412415, "learning_rate": 3.647517618624035e-05, "loss": 0.0392, "step": 76160 }, { "epoch": 0.37585, "grad_norm": 0.07223352044820786, "learning_rate": 3.6471503538064527e-05, "loss": 0.0358, "step": 76170 }, { "epoch": 0.3759, "grad_norm": 0.07542915642261505, "learning_rate": 3.6467830576262114e-05, "loss": 0.0364, "step": 76180 }, { "epoch": 0.37595, "grad_norm": 0.07215103507041931, "learning_rate": 3.646415730093352e-05, "loss": 0.0364, "step": 76190 }, { "epoch": 0.376, "grad_norm": 0.08985480666160583, "learning_rate": 3.6460483712179164e-05, "loss": 0.0364, "step": 76200 }, { "epoch": 0.37605, "grad_norm": 0.09463430196046829, "learning_rate": 3.645680981009949e-05, "loss": 0.0366, "step": 76210 }, { "epoch": 0.3761, "grad_norm": 0.08468139916658401, "learning_rate": 3.645313559479495e-05, "loss": 0.0355, "step": 76220 }, { "epoch": 0.37615, "grad_norm": 0.10156545788049698, "learning_rate": 3.644946106636598e-05, "loss": 0.042, "step": 76230 }, { "epoch": 0.3762, "grad_norm": 0.09742777049541473, "learning_rate": 3.6445786224913036e-05, "loss": 0.0375, "step": 76240 }, { "epoch": 0.37625, "grad_norm": 0.08506327122449875, "learning_rate": 3.644211107053661e-05, "loss": 0.0359, "step": 76250 }, { "epoch": 0.3763, "grad_norm": 0.08412918448448181, "learning_rate": 3.643843560333716e-05, "loss": 0.0355, "step": 76260 }, { "epoch": 0.37635, "grad_norm": 0.09755247086286545, "learning_rate": 3.643475982341518e-05, "loss": 0.0368, "step": 76270 }, { "epoch": 0.3764, "grad_norm": 0.08067236840724945, "learning_rate": 3.6431083730871165e-05, "loss": 0.037, "step": 76280 }, { "epoch": 0.37645, "grad_norm": 0.09624320268630981, "learning_rate": 3.6427407325805615e-05, "loss": 0.0379, "step": 76290 }, { "epoch": 0.3765, "grad_norm": 0.07825248688459396, "learning_rate": 3.6423730608319036e-05, "loss": 0.0385, "step": 76300 }, { "epoch": 0.37655, "grad_norm": 0.08972030133008957, "learning_rate": 3.642005357851196e-05, "loss": 0.0359, "step": 76310 }, { "epoch": 0.3766, "grad_norm": 0.10110962390899658, "learning_rate": 3.64163762364849e-05, "loss": 0.0378, "step": 76320 }, { "epoch": 0.37665, "grad_norm": 0.09208884090185165, "learning_rate": 3.641269858233841e-05, "loss": 0.0372, "step": 76330 }, { "epoch": 0.3767, "grad_norm": 0.07630440592765808, "learning_rate": 3.6409020616173024e-05, "loss": 0.0361, "step": 76340 }, { "epoch": 0.37675, "grad_norm": 0.08968579024076462, "learning_rate": 3.640534233808931e-05, "loss": 0.0359, "step": 76350 }, { "epoch": 0.3768, "grad_norm": 0.07915177196264267, "learning_rate": 3.640166374818781e-05, "loss": 0.0371, "step": 76360 }, { "epoch": 0.37685, "grad_norm": 0.08181800693273544, "learning_rate": 3.6397984846569114e-05, "loss": 0.0369, "step": 76370 }, { "epoch": 0.3769, "grad_norm": 0.08259347826242447, "learning_rate": 3.639430563333379e-05, "loss": 0.035, "step": 76380 }, { "epoch": 0.37695, "grad_norm": 0.07583151012659073, "learning_rate": 3.639062610858243e-05, "loss": 0.0366, "step": 76390 }, { "epoch": 0.377, "grad_norm": 0.08552539348602295, "learning_rate": 3.6386946272415636e-05, "loss": 0.0366, "step": 76400 }, { "epoch": 0.37705, "grad_norm": 0.09559700638055801, "learning_rate": 3.638326612493401e-05, "loss": 0.0376, "step": 76410 }, { "epoch": 0.3771, "grad_norm": 0.08619485050439835, "learning_rate": 3.637958566623816e-05, "loss": 0.037, "step": 76420 }, { "epoch": 0.37715, "grad_norm": 0.080460324883461, "learning_rate": 3.637590489642871e-05, "loss": 0.0371, "step": 76430 }, { "epoch": 0.3772, "grad_norm": 0.07482301443815231, "learning_rate": 3.63722238156063e-05, "loss": 0.0361, "step": 76440 }, { "epoch": 0.37725, "grad_norm": 0.08932841569185257, "learning_rate": 3.636854242387156e-05, "loss": 0.0375, "step": 76450 }, { "epoch": 0.3773, "grad_norm": 0.10034222155809402, "learning_rate": 3.6364860721325145e-05, "loss": 0.0365, "step": 76460 }, { "epoch": 0.37735, "grad_norm": 0.11923787742853165, "learning_rate": 3.6361178708067705e-05, "loss": 0.039, "step": 76470 }, { "epoch": 0.3774, "grad_norm": 0.09061778336763382, "learning_rate": 3.635749638419991e-05, "loss": 0.0363, "step": 76480 }, { "epoch": 0.37745, "grad_norm": 0.08245281875133514, "learning_rate": 3.6353813749822425e-05, "loss": 0.0385, "step": 76490 }, { "epoch": 0.3775, "grad_norm": 0.0959947481751442, "learning_rate": 3.635013080503594e-05, "loss": 0.0379, "step": 76500 }, { "epoch": 0.37755, "grad_norm": 0.08744388073682785, "learning_rate": 3.6346447549941145e-05, "loss": 0.0365, "step": 76510 }, { "epoch": 0.3776, "grad_norm": 0.0963020771741867, "learning_rate": 3.634276398463873e-05, "loss": 0.0378, "step": 76520 }, { "epoch": 0.37765, "grad_norm": 0.09702512621879578, "learning_rate": 3.633908010922941e-05, "loss": 0.0392, "step": 76530 }, { "epoch": 0.3777, "grad_norm": 0.09394404292106628, "learning_rate": 3.6335395923813906e-05, "loss": 0.0381, "step": 76540 }, { "epoch": 0.37775, "grad_norm": 0.100669726729393, "learning_rate": 3.6331711428492934e-05, "loss": 0.0378, "step": 76550 }, { "epoch": 0.3778, "grad_norm": 0.10889852792024612, "learning_rate": 3.6328026623367236e-05, "loss": 0.0382, "step": 76560 }, { "epoch": 0.37785, "grad_norm": 0.09627027064561844, "learning_rate": 3.6324341508537534e-05, "loss": 0.0389, "step": 76570 }, { "epoch": 0.3779, "grad_norm": 0.11300645023584366, "learning_rate": 3.632065608410459e-05, "loss": 0.037, "step": 76580 }, { "epoch": 0.37795, "grad_norm": 0.09655319899320602, "learning_rate": 3.631697035016917e-05, "loss": 0.0367, "step": 76590 }, { "epoch": 0.378, "grad_norm": 0.09337843954563141, "learning_rate": 3.631328430683203e-05, "loss": 0.0378, "step": 76600 }, { "epoch": 0.37805, "grad_norm": 0.08331812173128128, "learning_rate": 3.630959795419394e-05, "loss": 0.0402, "step": 76610 }, { "epoch": 0.3781, "grad_norm": 0.08409467339515686, "learning_rate": 3.6305911292355696e-05, "loss": 0.0361, "step": 76620 }, { "epoch": 0.37815, "grad_norm": 0.08916690200567245, "learning_rate": 3.630222432141808e-05, "loss": 0.0381, "step": 76630 }, { "epoch": 0.3782, "grad_norm": 0.07716446369886398, "learning_rate": 3.6298537041481907e-05, "loss": 0.0352, "step": 76640 }, { "epoch": 0.37825, "grad_norm": 0.07186071574687958, "learning_rate": 3.629484945264797e-05, "loss": 0.0353, "step": 76650 }, { "epoch": 0.3783, "grad_norm": 0.09080082923173904, "learning_rate": 3.629116155501709e-05, "loss": 0.0341, "step": 76660 }, { "epoch": 0.37835, "grad_norm": 0.10306417942047119, "learning_rate": 3.628747334869009e-05, "loss": 0.0353, "step": 76670 }, { "epoch": 0.3784, "grad_norm": 0.0900692418217659, "learning_rate": 3.6283784833767824e-05, "loss": 0.0354, "step": 76680 }, { "epoch": 0.37845, "grad_norm": 0.0794641375541687, "learning_rate": 3.628009601035111e-05, "loss": 0.0344, "step": 76690 }, { "epoch": 0.3785, "grad_norm": 0.0730503723025322, "learning_rate": 3.627640687854081e-05, "loss": 0.0338, "step": 76700 }, { "epoch": 0.37855, "grad_norm": 0.07156781852245331, "learning_rate": 3.627271743843779e-05, "loss": 0.0344, "step": 76710 }, { "epoch": 0.3786, "grad_norm": 0.07406982779502869, "learning_rate": 3.62690276901429e-05, "loss": 0.0355, "step": 76720 }, { "epoch": 0.37865, "grad_norm": 0.06705506891012192, "learning_rate": 3.626533763375703e-05, "loss": 0.0375, "step": 76730 }, { "epoch": 0.3787, "grad_norm": 0.08933977782726288, "learning_rate": 3.626164726938106e-05, "loss": 0.0367, "step": 76740 }, { "epoch": 0.37875, "grad_norm": 0.0913522019982338, "learning_rate": 3.625795659711589e-05, "loss": 0.035, "step": 76750 }, { "epoch": 0.3788, "grad_norm": 0.08894678950309753, "learning_rate": 3.625426561706241e-05, "loss": 0.0366, "step": 76760 }, { "epoch": 0.37885, "grad_norm": 0.08685548603534698, "learning_rate": 3.6250574329321535e-05, "loss": 0.0356, "step": 76770 }, { "epoch": 0.3789, "grad_norm": 0.09826608002185822, "learning_rate": 3.624688273399419e-05, "loss": 0.0363, "step": 76780 }, { "epoch": 0.37895, "grad_norm": 0.08041828870773315, "learning_rate": 3.624319083118129e-05, "loss": 0.0363, "step": 76790 }, { "epoch": 0.379, "grad_norm": 0.10084494203329086, "learning_rate": 3.623949862098378e-05, "loss": 0.0352, "step": 76800 }, { "epoch": 0.37905, "grad_norm": 0.08826006203889847, "learning_rate": 3.623580610350261e-05, "loss": 0.0351, "step": 76810 }, { "epoch": 0.3791, "grad_norm": 0.0710434690117836, "learning_rate": 3.623211327883871e-05, "loss": 0.0352, "step": 76820 }, { "epoch": 0.37915, "grad_norm": 0.07930386066436768, "learning_rate": 3.622842014709305e-05, "loss": 0.0338, "step": 76830 }, { "epoch": 0.3792, "grad_norm": 0.10032349079847336, "learning_rate": 3.622472670836661e-05, "loss": 0.0365, "step": 76840 }, { "epoch": 0.37925, "grad_norm": 0.08352944254875183, "learning_rate": 3.6221032962760354e-05, "loss": 0.0355, "step": 76850 }, { "epoch": 0.3793, "grad_norm": 0.10487186908721924, "learning_rate": 3.621733891037527e-05, "loss": 0.0367, "step": 76860 }, { "epoch": 0.37935, "grad_norm": 0.08631854504346848, "learning_rate": 3.621364455131236e-05, "loss": 0.036, "step": 76870 }, { "epoch": 0.3794, "grad_norm": 0.08653370290994644, "learning_rate": 3.620994988567262e-05, "loss": 0.0375, "step": 76880 }, { "epoch": 0.37945, "grad_norm": 0.08340851217508316, "learning_rate": 3.6206254913557065e-05, "loss": 0.0372, "step": 76890 }, { "epoch": 0.3795, "grad_norm": 0.09805815666913986, "learning_rate": 3.62025596350667e-05, "loss": 0.0376, "step": 76900 }, { "epoch": 0.37955, "grad_norm": 0.08484770357608795, "learning_rate": 3.6198864050302574e-05, "loss": 0.0376, "step": 76910 }, { "epoch": 0.3796, "grad_norm": 0.07439166307449341, "learning_rate": 3.61951681593657e-05, "loss": 0.0396, "step": 76920 }, { "epoch": 0.37965, "grad_norm": 0.09397024661302567, "learning_rate": 3.619147196235715e-05, "loss": 0.0349, "step": 76930 }, { "epoch": 0.3797, "grad_norm": 0.09400299191474915, "learning_rate": 3.618777545937795e-05, "loss": 0.0387, "step": 76940 }, { "epoch": 0.37975, "grad_norm": 0.06988395750522614, "learning_rate": 3.6184078650529175e-05, "loss": 0.0357, "step": 76950 }, { "epoch": 0.3798, "grad_norm": 0.0905274897813797, "learning_rate": 3.618038153591189e-05, "loss": 0.0394, "step": 76960 }, { "epoch": 0.37985, "grad_norm": 0.1146807000041008, "learning_rate": 3.617668411562717e-05, "loss": 0.037, "step": 76970 }, { "epoch": 0.3799, "grad_norm": 0.08603715151548386, "learning_rate": 3.617298638977611e-05, "loss": 0.0379, "step": 76980 }, { "epoch": 0.37995, "grad_norm": 0.08420335501432419, "learning_rate": 3.61692883584598e-05, "loss": 0.0379, "step": 76990 }, { "epoch": 0.38, "grad_norm": 0.08034734427928925, "learning_rate": 3.616559002177935e-05, "loss": 0.0377, "step": 77000 }, { "epoch": 0.38005, "grad_norm": 0.1009376272559166, "learning_rate": 3.616189137983586e-05, "loss": 0.0381, "step": 77010 }, { "epoch": 0.3801, "grad_norm": 0.0851750373840332, "learning_rate": 3.6158192432730444e-05, "loss": 0.0357, "step": 77020 }, { "epoch": 0.38015, "grad_norm": 0.09277315437793732, "learning_rate": 3.615449318056424e-05, "loss": 0.0379, "step": 77030 }, { "epoch": 0.3802, "grad_norm": 0.07979048788547516, "learning_rate": 3.615079362343839e-05, "loss": 0.0365, "step": 77040 }, { "epoch": 0.38025, "grad_norm": 0.09772320091724396, "learning_rate": 3.614709376145402e-05, "loss": 0.0372, "step": 77050 }, { "epoch": 0.3803, "grad_norm": 0.07867799699306488, "learning_rate": 3.614339359471231e-05, "loss": 0.0368, "step": 77060 }, { "epoch": 0.38035, "grad_norm": 0.08486375212669373, "learning_rate": 3.61396931233144e-05, "loss": 0.0365, "step": 77070 }, { "epoch": 0.3804, "grad_norm": 0.09690821915864944, "learning_rate": 3.613599234736146e-05, "loss": 0.0368, "step": 77080 }, { "epoch": 0.38045, "grad_norm": 0.0905151292681694, "learning_rate": 3.613229126695467e-05, "loss": 0.0355, "step": 77090 }, { "epoch": 0.3805, "grad_norm": 0.08214130252599716, "learning_rate": 3.612858988219523e-05, "loss": 0.0354, "step": 77100 }, { "epoch": 0.38055, "grad_norm": 0.09367689490318298, "learning_rate": 3.612488819318431e-05, "loss": 0.0356, "step": 77110 }, { "epoch": 0.3806, "grad_norm": 0.08685409277677536, "learning_rate": 3.612118620002314e-05, "loss": 0.0354, "step": 77120 }, { "epoch": 0.38065, "grad_norm": 0.09349635243415833, "learning_rate": 3.6117483902812914e-05, "loss": 0.0369, "step": 77130 }, { "epoch": 0.3807, "grad_norm": 0.0709528923034668, "learning_rate": 3.611378130165486e-05, "loss": 0.0358, "step": 77140 }, { "epoch": 0.38075, "grad_norm": 0.07947801798582077, "learning_rate": 3.6110078396650186e-05, "loss": 0.0336, "step": 77150 }, { "epoch": 0.3808, "grad_norm": 0.06851892918348312, "learning_rate": 3.6106375187900146e-05, "loss": 0.0336, "step": 77160 }, { "epoch": 0.38085, "grad_norm": 0.0975281223654747, "learning_rate": 3.610267167550599e-05, "loss": 0.0362, "step": 77170 }, { "epoch": 0.3809, "grad_norm": 0.10145972669124603, "learning_rate": 3.609896785956896e-05, "loss": 0.0353, "step": 77180 }, { "epoch": 0.38095, "grad_norm": 0.08614010363817215, "learning_rate": 3.609526374019031e-05, "loss": 0.035, "step": 77190 }, { "epoch": 0.381, "grad_norm": 0.09822831302881241, "learning_rate": 3.6091559317471316e-05, "loss": 0.0351, "step": 77200 }, { "epoch": 0.38105, "grad_norm": 0.0786934494972229, "learning_rate": 3.608785459151327e-05, "loss": 0.0372, "step": 77210 }, { "epoch": 0.3811, "grad_norm": 0.09421947598457336, "learning_rate": 3.608414956241743e-05, "loss": 0.036, "step": 77220 }, { "epoch": 0.38115, "grad_norm": 0.08654823154211044, "learning_rate": 3.608044423028511e-05, "loss": 0.0358, "step": 77230 }, { "epoch": 0.3812, "grad_norm": 0.0809483677148819, "learning_rate": 3.607673859521762e-05, "loss": 0.0349, "step": 77240 }, { "epoch": 0.38125, "grad_norm": 0.10821547359228134, "learning_rate": 3.607303265731625e-05, "loss": 0.0373, "step": 77250 }, { "epoch": 0.3813, "grad_norm": 0.08119744807481766, "learning_rate": 3.606932641668232e-05, "loss": 0.0359, "step": 77260 }, { "epoch": 0.38135, "grad_norm": 0.15310417115688324, "learning_rate": 3.606561987341718e-05, "loss": 0.0369, "step": 77270 }, { "epoch": 0.3814, "grad_norm": 0.09876388311386108, "learning_rate": 3.606191302762213e-05, "loss": 0.0394, "step": 77280 }, { "epoch": 0.38145, "grad_norm": 0.09586326032876968, "learning_rate": 3.6058205879398544e-05, "loss": 0.0361, "step": 77290 }, { "epoch": 0.3815, "grad_norm": 0.07784045487642288, "learning_rate": 3.605449842884776e-05, "loss": 0.0372, "step": 77300 }, { "epoch": 0.38155, "grad_norm": 0.08723390102386475, "learning_rate": 3.605079067607115e-05, "loss": 0.0367, "step": 77310 }, { "epoch": 0.3816, "grad_norm": 0.09609778970479965, "learning_rate": 3.604708262117007e-05, "loss": 0.0377, "step": 77320 }, { "epoch": 0.38165, "grad_norm": 0.08643398433923721, "learning_rate": 3.6043374264245904e-05, "loss": 0.0352, "step": 77330 }, { "epoch": 0.3817, "grad_norm": 0.10599758476018906, "learning_rate": 3.603966560540003e-05, "loss": 0.0369, "step": 77340 }, { "epoch": 0.38175, "grad_norm": 0.10870613902807236, "learning_rate": 3.603595664473385e-05, "loss": 0.0369, "step": 77350 }, { "epoch": 0.3818, "grad_norm": 0.10345200449228287, "learning_rate": 3.603224738234875e-05, "loss": 0.0376, "step": 77360 }, { "epoch": 0.38185, "grad_norm": 0.09829385578632355, "learning_rate": 3.602853781834616e-05, "loss": 0.0383, "step": 77370 }, { "epoch": 0.3819, "grad_norm": 0.09776751697063446, "learning_rate": 3.6024827952827486e-05, "loss": 0.0345, "step": 77380 }, { "epoch": 0.38195, "grad_norm": 0.10886778682470322, "learning_rate": 3.602111778589417e-05, "loss": 0.0357, "step": 77390 }, { "epoch": 0.382, "grad_norm": 0.11069987714290619, "learning_rate": 3.6017407317647626e-05, "loss": 0.0373, "step": 77400 }, { "epoch": 0.38205, "grad_norm": 0.12858431041240692, "learning_rate": 3.60136965481893e-05, "loss": 0.0374, "step": 77410 }, { "epoch": 0.3821, "grad_norm": 0.0967574417591095, "learning_rate": 3.600998547762065e-05, "loss": 0.0404, "step": 77420 }, { "epoch": 0.38215, "grad_norm": 0.11484010517597198, "learning_rate": 3.6006274106043135e-05, "loss": 0.0388, "step": 77430 }, { "epoch": 0.3822, "grad_norm": 0.0994647741317749, "learning_rate": 3.600256243355822e-05, "loss": 0.0355, "step": 77440 }, { "epoch": 0.38225, "grad_norm": 0.11798536032438278, "learning_rate": 3.599885046026738e-05, "loss": 0.0362, "step": 77450 }, { "epoch": 0.3823, "grad_norm": 0.08284584432840347, "learning_rate": 3.599513818627211e-05, "loss": 0.037, "step": 77460 }, { "epoch": 0.38235, "grad_norm": 0.08958155661821365, "learning_rate": 3.5991425611673876e-05, "loss": 0.0375, "step": 77470 }, { "epoch": 0.3824, "grad_norm": 0.08036845177412033, "learning_rate": 3.598771273657421e-05, "loss": 0.0359, "step": 77480 }, { "epoch": 0.38245, "grad_norm": 0.09704770147800446, "learning_rate": 3.59839995610746e-05, "loss": 0.0355, "step": 77490 }, { "epoch": 0.3825, "grad_norm": 0.07127789407968521, "learning_rate": 3.5980286085276574e-05, "loss": 0.0345, "step": 77500 }, { "epoch": 0.38255, "grad_norm": 0.07797723263502121, "learning_rate": 3.597657230928164e-05, "loss": 0.0368, "step": 77510 }, { "epoch": 0.3826, "grad_norm": 0.08957751095294952, "learning_rate": 3.5972858233191356e-05, "loss": 0.0367, "step": 77520 }, { "epoch": 0.38265, "grad_norm": 0.07110311836004257, "learning_rate": 3.596914385710724e-05, "loss": 0.0349, "step": 77530 }, { "epoch": 0.3827, "grad_norm": 0.07258278876543045, "learning_rate": 3.596542918113085e-05, "loss": 0.0366, "step": 77540 }, { "epoch": 0.38275, "grad_norm": 0.07738789170980453, "learning_rate": 3.596171420536375e-05, "loss": 0.0352, "step": 77550 }, { "epoch": 0.3828, "grad_norm": 0.06839203089475632, "learning_rate": 3.595799892990751e-05, "loss": 0.0357, "step": 77560 }, { "epoch": 0.38285, "grad_norm": 0.08227694034576416, "learning_rate": 3.595428335486368e-05, "loss": 0.035, "step": 77570 }, { "epoch": 0.3829, "grad_norm": 0.0727728009223938, "learning_rate": 3.5950567480333876e-05, "loss": 0.0357, "step": 77580 }, { "epoch": 0.38295, "grad_norm": 0.08525092154741287, "learning_rate": 3.594685130641966e-05, "loss": 0.0371, "step": 77590 }, { "epoch": 0.383, "grad_norm": 0.08446183800697327, "learning_rate": 3.594313483322264e-05, "loss": 0.0367, "step": 77600 }, { "epoch": 0.38305, "grad_norm": 0.08879677951335907, "learning_rate": 3.593941806084443e-05, "loss": 0.0374, "step": 77610 }, { "epoch": 0.3831, "grad_norm": 0.08682093769311905, "learning_rate": 3.593570098938664e-05, "loss": 0.0381, "step": 77620 }, { "epoch": 0.38315, "grad_norm": 0.08730120211839676, "learning_rate": 3.5931983618950896e-05, "loss": 0.0369, "step": 77630 }, { "epoch": 0.3832, "grad_norm": 0.08197631686925888, "learning_rate": 3.5928265949638816e-05, "loss": 0.0368, "step": 77640 }, { "epoch": 0.38325, "grad_norm": 0.08786528557538986, "learning_rate": 3.592454798155206e-05, "loss": 0.0375, "step": 77650 }, { "epoch": 0.3833, "grad_norm": 0.08104783296585083, "learning_rate": 3.592082971479226e-05, "loss": 0.0364, "step": 77660 }, { "epoch": 0.38335, "grad_norm": 0.07689512521028519, "learning_rate": 3.591711114946108e-05, "loss": 0.0372, "step": 77670 }, { "epoch": 0.3834, "grad_norm": 0.11725229769945145, "learning_rate": 3.591339228566019e-05, "loss": 0.0411, "step": 77680 }, { "epoch": 0.38345, "grad_norm": 0.09530025720596313, "learning_rate": 3.590967312349125e-05, "loss": 0.0371, "step": 77690 }, { "epoch": 0.3835, "grad_norm": 0.0992274358868599, "learning_rate": 3.5905953663055944e-05, "loss": 0.0372, "step": 77700 }, { "epoch": 0.38355, "grad_norm": 0.08710917085409164, "learning_rate": 3.590223390445596e-05, "loss": 0.0372, "step": 77710 }, { "epoch": 0.3836, "grad_norm": 0.1039763018488884, "learning_rate": 3.5898513847793004e-05, "loss": 0.0362, "step": 77720 }, { "epoch": 0.38365, "grad_norm": 0.09274699538946152, "learning_rate": 3.589479349316877e-05, "loss": 0.0376, "step": 77730 }, { "epoch": 0.3837, "grad_norm": 0.11407878249883652, "learning_rate": 3.589107284068497e-05, "loss": 0.0374, "step": 77740 }, { "epoch": 0.38375, "grad_norm": 0.0961870476603508, "learning_rate": 3.5887351890443336e-05, "loss": 0.0371, "step": 77750 }, { "epoch": 0.3838, "grad_norm": 0.12014513462781906, "learning_rate": 3.5883630642545586e-05, "loss": 0.0379, "step": 77760 }, { "epoch": 0.38385, "grad_norm": 0.0941246747970581, "learning_rate": 3.5879909097093476e-05, "loss": 0.0364, "step": 77770 }, { "epoch": 0.3839, "grad_norm": 0.07910443842411041, "learning_rate": 3.587618725418872e-05, "loss": 0.0357, "step": 77780 }, { "epoch": 0.38395, "grad_norm": 0.09617926180362701, "learning_rate": 3.5872465113933104e-05, "loss": 0.0361, "step": 77790 }, { "epoch": 0.384, "grad_norm": 0.10334715247154236, "learning_rate": 3.586874267642837e-05, "loss": 0.0358, "step": 77800 }, { "epoch": 0.38405, "grad_norm": 0.09399249404668808, "learning_rate": 3.58650199417763e-05, "loss": 0.0364, "step": 77810 }, { "epoch": 0.3841, "grad_norm": 0.08352718502283096, "learning_rate": 3.5861296910078664e-05, "loss": 0.0361, "step": 77820 }, { "epoch": 0.38415, "grad_norm": 0.0855955183506012, "learning_rate": 3.585757358143725e-05, "loss": 0.0362, "step": 77830 }, { "epoch": 0.3842, "grad_norm": 0.10338416695594788, "learning_rate": 3.5853849955953855e-05, "loss": 0.0366, "step": 77840 }, { "epoch": 0.38425, "grad_norm": 0.09872971475124359, "learning_rate": 3.585012603373028e-05, "loss": 0.038, "step": 77850 }, { "epoch": 0.3843, "grad_norm": 0.09367211908102036, "learning_rate": 3.584640181486833e-05, "loss": 0.036, "step": 77860 }, { "epoch": 0.38435, "grad_norm": 0.09950553625822067, "learning_rate": 3.584267729946983e-05, "loss": 0.0377, "step": 77870 }, { "epoch": 0.3844, "grad_norm": 0.0915476605296135, "learning_rate": 3.583895248763661e-05, "loss": 0.0352, "step": 77880 }, { "epoch": 0.38445, "grad_norm": 0.09203360974788666, "learning_rate": 3.58352273794705e-05, "loss": 0.0364, "step": 77890 }, { "epoch": 0.3845, "grad_norm": 0.08658530563116074, "learning_rate": 3.583150197507335e-05, "loss": 0.0366, "step": 77900 }, { "epoch": 0.38455, "grad_norm": 0.1105838268995285, "learning_rate": 3.582777627454699e-05, "loss": 0.0355, "step": 77910 }, { "epoch": 0.3846, "grad_norm": 0.09125398844480515, "learning_rate": 3.5824050277993304e-05, "loss": 0.0378, "step": 77920 }, { "epoch": 0.38465, "grad_norm": 0.07785584032535553, "learning_rate": 3.5820323985514157e-05, "loss": 0.0351, "step": 77930 }, { "epoch": 0.3847, "grad_norm": 0.09558206051588058, "learning_rate": 3.58165973972114e-05, "loss": 0.0371, "step": 77940 }, { "epoch": 0.38475, "grad_norm": 0.08811909705400467, "learning_rate": 3.581287051318695e-05, "loss": 0.0357, "step": 77950 }, { "epoch": 0.3848, "grad_norm": 0.08579441159963608, "learning_rate": 3.5809143333542676e-05, "loss": 0.0361, "step": 77960 }, { "epoch": 0.38485, "grad_norm": 0.09081538766622543, "learning_rate": 3.580541585838049e-05, "loss": 0.0368, "step": 77970 }, { "epoch": 0.3849, "grad_norm": 0.10513069480657578, "learning_rate": 3.580168808780228e-05, "loss": 0.0372, "step": 77980 }, { "epoch": 0.38495, "grad_norm": 0.0886191725730896, "learning_rate": 3.579796002190998e-05, "loss": 0.0357, "step": 77990 }, { "epoch": 0.385, "grad_norm": 0.0755455270409584, "learning_rate": 3.579423166080552e-05, "loss": 0.0367, "step": 78000 }, { "epoch": 0.38505, "grad_norm": 0.10791072249412537, "learning_rate": 3.5790503004590814e-05, "loss": 0.0373, "step": 78010 }, { "epoch": 0.3851, "grad_norm": 0.1349969208240509, "learning_rate": 3.578677405336781e-05, "loss": 0.0386, "step": 78020 }, { "epoch": 0.38515, "grad_norm": 0.11925962567329407, "learning_rate": 3.578304480723845e-05, "loss": 0.0387, "step": 78030 }, { "epoch": 0.3852, "grad_norm": 0.09497813880443573, "learning_rate": 3.577931526630471e-05, "loss": 0.0392, "step": 78040 }, { "epoch": 0.38525, "grad_norm": 0.09541451185941696, "learning_rate": 3.5775585430668524e-05, "loss": 0.0368, "step": 78050 }, { "epoch": 0.3853, "grad_norm": 0.10846823453903198, "learning_rate": 3.577185530043189e-05, "loss": 0.0371, "step": 78060 }, { "epoch": 0.38535, "grad_norm": 0.14890998601913452, "learning_rate": 3.576812487569678e-05, "loss": 0.0375, "step": 78070 }, { "epoch": 0.3854, "grad_norm": 0.10274876654148102, "learning_rate": 3.576439415656519e-05, "loss": 0.0382, "step": 78080 }, { "epoch": 0.38545, "grad_norm": 0.08878965675830841, "learning_rate": 3.576066314313909e-05, "loss": 0.0376, "step": 78090 }, { "epoch": 0.3855, "grad_norm": 0.08467261493206024, "learning_rate": 3.575693183552051e-05, "loss": 0.0385, "step": 78100 }, { "epoch": 0.38555, "grad_norm": 0.10923632234334946, "learning_rate": 3.5753200233811446e-05, "loss": 0.036, "step": 78110 }, { "epoch": 0.3856, "grad_norm": 0.09093783795833588, "learning_rate": 3.574946833811394e-05, "loss": 0.0378, "step": 78120 }, { "epoch": 0.38565, "grad_norm": 0.08795181661844254, "learning_rate": 3.574573614853e-05, "loss": 0.0405, "step": 78130 }, { "epoch": 0.3857, "grad_norm": 0.10429956018924713, "learning_rate": 3.5742003665161684e-05, "loss": 0.0376, "step": 78140 }, { "epoch": 0.38575, "grad_norm": 0.0812741219997406, "learning_rate": 3.573827088811101e-05, "loss": 0.0361, "step": 78150 }, { "epoch": 0.3858, "grad_norm": 0.0807657390832901, "learning_rate": 3.573453781748004e-05, "loss": 0.0361, "step": 78160 }, { "epoch": 0.38585, "grad_norm": 0.08330751955509186, "learning_rate": 3.5730804453370846e-05, "loss": 0.0364, "step": 78170 }, { "epoch": 0.3859, "grad_norm": 0.08211551606655121, "learning_rate": 3.5727070795885496e-05, "loss": 0.0368, "step": 78180 }, { "epoch": 0.38595, "grad_norm": 0.08663376420736313, "learning_rate": 3.5723336845126055e-05, "loss": 0.0367, "step": 78190 }, { "epoch": 0.386, "grad_norm": 0.10587425529956818, "learning_rate": 3.5719602601194606e-05, "loss": 0.0423, "step": 78200 }, { "epoch": 0.38605, "grad_norm": 0.07704432308673859, "learning_rate": 3.571586806419326e-05, "loss": 0.0373, "step": 78210 }, { "epoch": 0.3861, "grad_norm": 0.08284270763397217, "learning_rate": 3.57121332342241e-05, "loss": 0.0365, "step": 78220 }, { "epoch": 0.38615, "grad_norm": 0.09966335445642471, "learning_rate": 3.570839811138925e-05, "loss": 0.0385, "step": 78230 }, { "epoch": 0.3862, "grad_norm": 0.09196805208921432, "learning_rate": 3.570466269579081e-05, "loss": 0.0375, "step": 78240 }, { "epoch": 0.38625, "grad_norm": 0.08560289442539215, "learning_rate": 3.570092698753091e-05, "loss": 0.0372, "step": 78250 }, { "epoch": 0.3863, "grad_norm": 0.08833914995193481, "learning_rate": 3.5697190986711696e-05, "loss": 0.0368, "step": 78260 }, { "epoch": 0.38635, "grad_norm": 0.09780129790306091, "learning_rate": 3.56934546934353e-05, "loss": 0.0362, "step": 78270 }, { "epoch": 0.3864, "grad_norm": 0.09053563326597214, "learning_rate": 3.568971810780386e-05, "loss": 0.0365, "step": 78280 }, { "epoch": 0.38645, "grad_norm": 0.08348232507705688, "learning_rate": 3.5685981229919557e-05, "loss": 0.0365, "step": 78290 }, { "epoch": 0.3865, "grad_norm": 0.0905507355928421, "learning_rate": 3.568224405988453e-05, "loss": 0.0372, "step": 78300 }, { "epoch": 0.38655, "grad_norm": 0.0896824523806572, "learning_rate": 3.567850659780097e-05, "loss": 0.0368, "step": 78310 }, { "epoch": 0.3866, "grad_norm": 0.08112866431474686, "learning_rate": 3.567476884377104e-05, "loss": 0.0363, "step": 78320 }, { "epoch": 0.38665, "grad_norm": 0.10949297994375229, "learning_rate": 3.567103079789695e-05, "loss": 0.0362, "step": 78330 }, { "epoch": 0.3867, "grad_norm": 0.09558262676000595, "learning_rate": 3.566729246028089e-05, "loss": 0.0356, "step": 78340 }, { "epoch": 0.38675, "grad_norm": 0.0879255011677742, "learning_rate": 3.566355383102506e-05, "loss": 0.037, "step": 78350 }, { "epoch": 0.3868, "grad_norm": 0.08192962408065796, "learning_rate": 3.565981491023167e-05, "loss": 0.036, "step": 78360 }, { "epoch": 0.38685, "grad_norm": 0.09210727363824844, "learning_rate": 3.5656075698002946e-05, "loss": 0.0351, "step": 78370 }, { "epoch": 0.3869, "grad_norm": 0.09961491823196411, "learning_rate": 3.565233619444111e-05, "loss": 0.0358, "step": 78380 }, { "epoch": 0.38695, "grad_norm": 0.09111356735229492, "learning_rate": 3.564859639964841e-05, "loss": 0.0354, "step": 78390 }, { "epoch": 0.387, "grad_norm": 0.10473669320344925, "learning_rate": 3.564485631372709e-05, "loss": 0.0348, "step": 78400 }, { "epoch": 0.38705, "grad_norm": 0.0872640311717987, "learning_rate": 3.564111593677939e-05, "loss": 0.036, "step": 78410 }, { "epoch": 0.3871, "grad_norm": 0.08784716576337814, "learning_rate": 3.563737526890759e-05, "loss": 0.0356, "step": 78420 }, { "epoch": 0.38715, "grad_norm": 0.09517963230609894, "learning_rate": 3.563363431021393e-05, "loss": 0.0361, "step": 78430 }, { "epoch": 0.3872, "grad_norm": 0.09568300098180771, "learning_rate": 3.562989306080071e-05, "loss": 0.0367, "step": 78440 }, { "epoch": 0.38725, "grad_norm": 0.07990391552448273, "learning_rate": 3.562615152077021e-05, "loss": 0.0354, "step": 78450 }, { "epoch": 0.3873, "grad_norm": 0.08299369364976883, "learning_rate": 3.562240969022471e-05, "loss": 0.0362, "step": 78460 }, { "epoch": 0.38735, "grad_norm": 0.07802381366491318, "learning_rate": 3.5618667569266525e-05, "loss": 0.0355, "step": 78470 }, { "epoch": 0.3874, "grad_norm": 0.08770909160375595, "learning_rate": 3.561492515799797e-05, "loss": 0.0371, "step": 78480 }, { "epoch": 0.38745, "grad_norm": 0.08272002637386322, "learning_rate": 3.561118245652133e-05, "loss": 0.0373, "step": 78490 }, { "epoch": 0.3875, "grad_norm": 0.08167009800672531, "learning_rate": 3.560743946493896e-05, "loss": 0.0366, "step": 78500 }, { "epoch": 0.38755, "grad_norm": 0.10057704895734787, "learning_rate": 3.560369618335317e-05, "loss": 0.0386, "step": 78510 }, { "epoch": 0.3876, "grad_norm": 0.09390366822481155, "learning_rate": 3.5599952611866325e-05, "loss": 0.0368, "step": 78520 }, { "epoch": 0.38765, "grad_norm": 0.09164083003997803, "learning_rate": 3.559620875058075e-05, "loss": 0.0368, "step": 78530 }, { "epoch": 0.3877, "grad_norm": 0.10917727649211884, "learning_rate": 3.5592464599598804e-05, "loss": 0.036, "step": 78540 }, { "epoch": 0.38775, "grad_norm": 0.11053070425987244, "learning_rate": 3.558872015902286e-05, "loss": 0.0403, "step": 78550 }, { "epoch": 0.3878, "grad_norm": 0.08020950853824615, "learning_rate": 3.558497542895528e-05, "loss": 0.0366, "step": 78560 }, { "epoch": 0.38785, "grad_norm": 0.09129568189382553, "learning_rate": 3.558123040949846e-05, "loss": 0.0365, "step": 78570 }, { "epoch": 0.3879, "grad_norm": 0.09383866935968399, "learning_rate": 3.557748510075477e-05, "loss": 0.0369, "step": 78580 }, { "epoch": 0.38795, "grad_norm": 0.07243139296770096, "learning_rate": 3.55737395028266e-05, "loss": 0.0361, "step": 78590 }, { "epoch": 0.388, "grad_norm": 0.07961437851190567, "learning_rate": 3.5569993615816386e-05, "loss": 0.0369, "step": 78600 }, { "epoch": 0.38805, "grad_norm": 0.08032999187707901, "learning_rate": 3.55662474398265e-05, "loss": 0.0367, "step": 78610 }, { "epoch": 0.3881, "grad_norm": 0.08743961155414581, "learning_rate": 3.556250097495938e-05, "loss": 0.036, "step": 78620 }, { "epoch": 0.38815, "grad_norm": 0.10869049280881882, "learning_rate": 3.5558754221317454e-05, "loss": 0.0388, "step": 78630 }, { "epoch": 0.3882, "grad_norm": 0.08218946307897568, "learning_rate": 3.555500717900316e-05, "loss": 0.0351, "step": 78640 }, { "epoch": 0.38825, "grad_norm": 0.0724598839879036, "learning_rate": 3.5551259848118926e-05, "loss": 0.035, "step": 78650 }, { "epoch": 0.3883, "grad_norm": 0.09266900271177292, "learning_rate": 3.554751222876722e-05, "loss": 0.0363, "step": 78660 }, { "epoch": 0.38835, "grad_norm": 0.08385990560054779, "learning_rate": 3.554376432105049e-05, "loss": 0.0368, "step": 78670 }, { "epoch": 0.3884, "grad_norm": 0.08530783653259277, "learning_rate": 3.55400161250712e-05, "loss": 0.0353, "step": 78680 }, { "epoch": 0.38845, "grad_norm": 0.07914736121892929, "learning_rate": 3.553626764093183e-05, "loss": 0.0354, "step": 78690 }, { "epoch": 0.3885, "grad_norm": 0.10238605737686157, "learning_rate": 3.553251886873486e-05, "loss": 0.0364, "step": 78700 }, { "epoch": 0.38855, "grad_norm": 0.1394563615322113, "learning_rate": 3.552876980858279e-05, "loss": 0.0363, "step": 78710 }, { "epoch": 0.3886, "grad_norm": 0.10636939108371735, "learning_rate": 3.552502046057809e-05, "loss": 0.0364, "step": 78720 }, { "epoch": 0.38865, "grad_norm": 0.08263157308101654, "learning_rate": 3.552127082482331e-05, "loss": 0.0345, "step": 78730 }, { "epoch": 0.3887, "grad_norm": 0.09180624783039093, "learning_rate": 3.551752090142093e-05, "loss": 0.0354, "step": 78740 }, { "epoch": 0.38875, "grad_norm": 0.09710685163736343, "learning_rate": 3.5513770690473466e-05, "loss": 0.0349, "step": 78750 }, { "epoch": 0.3888, "grad_norm": 0.08567915111780167, "learning_rate": 3.551002019208347e-05, "loss": 0.0349, "step": 78760 }, { "epoch": 0.38885, "grad_norm": 0.10945023596286774, "learning_rate": 3.5506269406353476e-05, "loss": 0.0369, "step": 78770 }, { "epoch": 0.3889, "grad_norm": 0.08565888553857803, "learning_rate": 3.550251833338601e-05, "loss": 0.0356, "step": 78780 }, { "epoch": 0.38895, "grad_norm": 0.09337818622589111, "learning_rate": 3.549876697328366e-05, "loss": 0.0354, "step": 78790 }, { "epoch": 0.389, "grad_norm": 0.07999156415462494, "learning_rate": 3.5495015326148945e-05, "loss": 0.0341, "step": 78800 }, { "epoch": 0.38905, "grad_norm": 0.07796759903430939, "learning_rate": 3.549126339208446e-05, "loss": 0.035, "step": 78810 }, { "epoch": 0.3891, "grad_norm": 0.09235970675945282, "learning_rate": 3.548751117119278e-05, "loss": 0.0347, "step": 78820 }, { "epoch": 0.38915, "grad_norm": 0.08895865827798843, "learning_rate": 3.5483758663576486e-05, "loss": 0.034, "step": 78830 }, { "epoch": 0.3892, "grad_norm": 0.0840078666806221, "learning_rate": 3.548000586933816e-05, "loss": 0.0349, "step": 78840 }, { "epoch": 0.38925, "grad_norm": 0.10279300808906555, "learning_rate": 3.547625278858041e-05, "loss": 0.0364, "step": 78850 }, { "epoch": 0.3893, "grad_norm": 0.11641772836446762, "learning_rate": 3.5472499421405844e-05, "loss": 0.036, "step": 78860 }, { "epoch": 0.38935, "grad_norm": 0.08842886239290237, "learning_rate": 3.5468745767917086e-05, "loss": 0.0355, "step": 78870 }, { "epoch": 0.3894, "grad_norm": 0.10167177766561508, "learning_rate": 3.546499182821675e-05, "loss": 0.0356, "step": 78880 }, { "epoch": 0.38945, "grad_norm": 0.08147701621055603, "learning_rate": 3.5461237602407474e-05, "loss": 0.0351, "step": 78890 }, { "epoch": 0.3895, "grad_norm": 0.0973169133067131, "learning_rate": 3.545748309059188e-05, "loss": 0.0358, "step": 78900 }, { "epoch": 0.38955, "grad_norm": 0.08725616335868835, "learning_rate": 3.545372829287263e-05, "loss": 0.0347, "step": 78910 }, { "epoch": 0.3896, "grad_norm": 0.09617139399051666, "learning_rate": 3.5449973209352386e-05, "loss": 0.0355, "step": 78920 }, { "epoch": 0.38965, "grad_norm": 0.10085910558700562, "learning_rate": 3.544621784013378e-05, "loss": 0.0353, "step": 78930 }, { "epoch": 0.3897, "grad_norm": 0.09641211479902267, "learning_rate": 3.544246218531952e-05, "loss": 0.0346, "step": 78940 }, { "epoch": 0.38975, "grad_norm": 0.09419838339090347, "learning_rate": 3.543870624501226e-05, "loss": 0.0346, "step": 78950 }, { "epoch": 0.3898, "grad_norm": 0.09785092622041702, "learning_rate": 3.5434950019314694e-05, "loss": 0.0353, "step": 78960 }, { "epoch": 0.38985, "grad_norm": 0.08658993244171143, "learning_rate": 3.543119350832952e-05, "loss": 0.0343, "step": 78970 }, { "epoch": 0.3899, "grad_norm": 0.11044695228338242, "learning_rate": 3.542743671215943e-05, "loss": 0.0366, "step": 78980 }, { "epoch": 0.38995, "grad_norm": 0.08109579980373383, "learning_rate": 3.542367963090714e-05, "loss": 0.0365, "step": 78990 }, { "epoch": 0.39, "grad_norm": 0.09335202723741531, "learning_rate": 3.5419922264675356e-05, "loss": 0.0352, "step": 79000 }, { "epoch": 0.39005, "grad_norm": 0.0846087858080864, "learning_rate": 3.541616461356682e-05, "loss": 0.0415, "step": 79010 }, { "epoch": 0.3901, "grad_norm": 0.09359367191791534, "learning_rate": 3.541240667768426e-05, "loss": 0.0343, "step": 79020 }, { "epoch": 0.39015, "grad_norm": 0.09477211534976959, "learning_rate": 3.54086484571304e-05, "loss": 0.0373, "step": 79030 }, { "epoch": 0.3902, "grad_norm": 0.09294027835130692, "learning_rate": 3.540488995200801e-05, "loss": 0.036, "step": 79040 }, { "epoch": 0.39025, "grad_norm": 0.08735240995883942, "learning_rate": 3.540113116241984e-05, "loss": 0.0352, "step": 79050 }, { "epoch": 0.3903, "grad_norm": 0.08991049975156784, "learning_rate": 3.539737208846865e-05, "loss": 0.0363, "step": 79060 }, { "epoch": 0.39035, "grad_norm": 0.08595713973045349, "learning_rate": 3.539361273025721e-05, "loss": 0.0367, "step": 79070 }, { "epoch": 0.3904, "grad_norm": 0.10878205299377441, "learning_rate": 3.538985308788831e-05, "loss": 0.0381, "step": 79080 }, { "epoch": 0.39045, "grad_norm": 0.08834918588399887, "learning_rate": 3.538609316146472e-05, "loss": 0.0367, "step": 79090 }, { "epoch": 0.3905, "grad_norm": 0.08705242723226547, "learning_rate": 3.538233295108925e-05, "loss": 0.0386, "step": 79100 }, { "epoch": 0.39055, "grad_norm": 0.09597699344158173, "learning_rate": 3.53785724568647e-05, "loss": 0.0376, "step": 79110 }, { "epoch": 0.3906, "grad_norm": 0.07092789560556412, "learning_rate": 3.5374811678893874e-05, "loss": 0.0386, "step": 79120 }, { "epoch": 0.39065, "grad_norm": 0.10917045921087265, "learning_rate": 3.537105061727959e-05, "loss": 0.0374, "step": 79130 }, { "epoch": 0.3907, "grad_norm": 0.09935753792524338, "learning_rate": 3.536728927212469e-05, "loss": 0.0362, "step": 79140 }, { "epoch": 0.39075, "grad_norm": 0.08676110953092575, "learning_rate": 3.536352764353198e-05, "loss": 0.0364, "step": 79150 }, { "epoch": 0.3908, "grad_norm": 0.08344744145870209, "learning_rate": 3.5359765731604336e-05, "loss": 0.036, "step": 79160 }, { "epoch": 0.39085, "grad_norm": 0.08429134637117386, "learning_rate": 3.535600353644458e-05, "loss": 0.0349, "step": 79170 }, { "epoch": 0.3909, "grad_norm": 0.10815691202878952, "learning_rate": 3.535224105815558e-05, "loss": 0.0383, "step": 79180 }, { "epoch": 0.39095, "grad_norm": 0.08904128521680832, "learning_rate": 3.534847829684019e-05, "loss": 0.0354, "step": 79190 }, { "epoch": 0.391, "grad_norm": 0.0823027566075325, "learning_rate": 3.53447152526013e-05, "loss": 0.0358, "step": 79200 }, { "epoch": 0.39105, "grad_norm": 0.08951200544834137, "learning_rate": 3.534095192554178e-05, "loss": 0.0351, "step": 79210 }, { "epoch": 0.3911, "grad_norm": 0.07517030835151672, "learning_rate": 3.5337188315764516e-05, "loss": 0.0349, "step": 79220 }, { "epoch": 0.39115, "grad_norm": 0.10150893032550812, "learning_rate": 3.533342442337241e-05, "loss": 0.0369, "step": 79230 }, { "epoch": 0.3912, "grad_norm": 0.08564486354589462, "learning_rate": 3.5329660248468366e-05, "loss": 0.0368, "step": 79240 }, { "epoch": 0.39125, "grad_norm": 0.08716096729040146, "learning_rate": 3.532589579115529e-05, "loss": 0.0358, "step": 79250 }, { "epoch": 0.3913, "grad_norm": 0.08219984173774719, "learning_rate": 3.53221310515361e-05, "loss": 0.0389, "step": 79260 }, { "epoch": 0.39135, "grad_norm": 0.08610589057207108, "learning_rate": 3.5318366029713724e-05, "loss": 0.0367, "step": 79270 }, { "epoch": 0.3914, "grad_norm": 0.06737655401229858, "learning_rate": 3.531460072579109e-05, "loss": 0.0351, "step": 79280 }, { "epoch": 0.39145, "grad_norm": 0.09205567836761475, "learning_rate": 3.5310835139871164e-05, "loss": 0.0379, "step": 79290 }, { "epoch": 0.3915, "grad_norm": 0.08664903789758682, "learning_rate": 3.530706927205687e-05, "loss": 0.0369, "step": 79300 }, { "epoch": 0.39155, "grad_norm": 0.08422086387872696, "learning_rate": 3.530330312245117e-05, "loss": 0.0386, "step": 79310 }, { "epoch": 0.3916, "grad_norm": 0.06627576798200607, "learning_rate": 3.529953669115703e-05, "loss": 0.036, "step": 79320 }, { "epoch": 0.39165, "grad_norm": 0.096906378865242, "learning_rate": 3.529576997827744e-05, "loss": 0.0384, "step": 79330 }, { "epoch": 0.3917, "grad_norm": 0.08024526387453079, "learning_rate": 3.529200298391536e-05, "loss": 0.0356, "step": 79340 }, { "epoch": 0.39175, "grad_norm": 0.08066499978303909, "learning_rate": 3.528823570817379e-05, "loss": 0.0367, "step": 79350 }, { "epoch": 0.3918, "grad_norm": 0.09209758043289185, "learning_rate": 3.5284468151155716e-05, "loss": 0.0391, "step": 79360 }, { "epoch": 0.39185, "grad_norm": 0.096739761531353, "learning_rate": 3.528070031296414e-05, "loss": 0.0382, "step": 79370 }, { "epoch": 0.3919, "grad_norm": 0.0762137770652771, "learning_rate": 3.527693219370209e-05, "loss": 0.0354, "step": 79380 }, { "epoch": 0.39195, "grad_norm": 0.07095940411090851, "learning_rate": 3.527316379347257e-05, "loss": 0.0367, "step": 79390 }, { "epoch": 0.392, "grad_norm": 0.07851160317659378, "learning_rate": 3.526939511237861e-05, "loss": 0.0361, "step": 79400 }, { "epoch": 0.39205, "grad_norm": 0.08974769711494446, "learning_rate": 3.526562615052325e-05, "loss": 0.0363, "step": 79410 }, { "epoch": 0.3921, "grad_norm": 0.0686379075050354, "learning_rate": 3.526185690800953e-05, "loss": 0.0358, "step": 79420 }, { "epoch": 0.39215, "grad_norm": 0.07639919221401215, "learning_rate": 3.525808738494049e-05, "loss": 0.0341, "step": 79430 }, { "epoch": 0.3922, "grad_norm": 0.09294627606868744, "learning_rate": 3.52543175814192e-05, "loss": 0.036, "step": 79440 }, { "epoch": 0.39225, "grad_norm": 0.10250154882669449, "learning_rate": 3.525054749754871e-05, "loss": 0.0383, "step": 79450 }, { "epoch": 0.3923, "grad_norm": 0.08065023273229599, "learning_rate": 3.524677713343212e-05, "loss": 0.0373, "step": 79460 }, { "epoch": 0.39235, "grad_norm": 0.07971781492233276, "learning_rate": 3.5243006489172475e-05, "loss": 0.0385, "step": 79470 }, { "epoch": 0.3924, "grad_norm": 0.08311796188354492, "learning_rate": 3.523923556487289e-05, "loss": 0.0356, "step": 79480 }, { "epoch": 0.39245, "grad_norm": 0.08565957099199295, "learning_rate": 3.523546436063645e-05, "loss": 0.0364, "step": 79490 }, { "epoch": 0.3925, "grad_norm": 0.08758903294801712, "learning_rate": 3.5231692876566264e-05, "loss": 0.0364, "step": 79500 }, { "epoch": 0.39255, "grad_norm": 0.07650836557149887, "learning_rate": 3.522792111276543e-05, "loss": 0.0376, "step": 79510 }, { "epoch": 0.3926, "grad_norm": 0.08982399851083755, "learning_rate": 3.522414906933708e-05, "loss": 0.0354, "step": 79520 }, { "epoch": 0.39265, "grad_norm": 0.09377458691596985, "learning_rate": 3.522037674638433e-05, "loss": 0.0366, "step": 79530 }, { "epoch": 0.3927, "grad_norm": 0.0800488144159317, "learning_rate": 3.521660414401033e-05, "loss": 0.0365, "step": 79540 }, { "epoch": 0.39275, "grad_norm": 0.16223900020122528, "learning_rate": 3.5212831262318204e-05, "loss": 0.041, "step": 79550 }, { "epoch": 0.3928, "grad_norm": 0.12490296363830566, "learning_rate": 3.5209058101411114e-05, "loss": 0.0371, "step": 79560 }, { "epoch": 0.39285, "grad_norm": 0.11788595467805862, "learning_rate": 3.52052846613922e-05, "loss": 0.0366, "step": 79570 }, { "epoch": 0.3929, "grad_norm": 0.1282113492488861, "learning_rate": 3.520151094236465e-05, "loss": 0.0383, "step": 79580 }, { "epoch": 0.39295, "grad_norm": 0.08661283552646637, "learning_rate": 3.519773694443161e-05, "loss": 0.0383, "step": 79590 }, { "epoch": 0.393, "grad_norm": 0.07716389745473862, "learning_rate": 3.519396266769628e-05, "loss": 0.0366, "step": 79600 }, { "epoch": 0.39305, "grad_norm": 0.09312919527292252, "learning_rate": 3.519018811226184e-05, "loss": 0.0387, "step": 79610 }, { "epoch": 0.3931, "grad_norm": 0.08408086746931076, "learning_rate": 3.5186413278231487e-05, "loss": 0.036, "step": 79620 }, { "epoch": 0.39315, "grad_norm": 0.08217661082744598, "learning_rate": 3.518263816570842e-05, "loss": 0.0368, "step": 79630 }, { "epoch": 0.3932, "grad_norm": 0.08453497290611267, "learning_rate": 3.517886277479585e-05, "loss": 0.0347, "step": 79640 }, { "epoch": 0.39325, "grad_norm": 0.08217451721429825, "learning_rate": 3.5175087105596995e-05, "loss": 0.0357, "step": 79650 }, { "epoch": 0.3933, "grad_norm": 0.09013067930936813, "learning_rate": 3.517131115821508e-05, "loss": 0.0364, "step": 79660 }, { "epoch": 0.39335, "grad_norm": 0.0868317037820816, "learning_rate": 3.5167534932753344e-05, "loss": 0.0369, "step": 79670 }, { "epoch": 0.3934, "grad_norm": 0.09239938110113144, "learning_rate": 3.516375842931502e-05, "loss": 0.0371, "step": 79680 }, { "epoch": 0.39345, "grad_norm": 0.08693993091583252, "learning_rate": 3.515998164800336e-05, "loss": 0.0351, "step": 79690 }, { "epoch": 0.3935, "grad_norm": 0.10249830782413483, "learning_rate": 3.515620458892162e-05, "loss": 0.0357, "step": 79700 }, { "epoch": 0.39355, "grad_norm": 0.08632895350456238, "learning_rate": 3.515242725217306e-05, "loss": 0.0357, "step": 79710 }, { "epoch": 0.3936, "grad_norm": 0.09208270907402039, "learning_rate": 3.514864963786095e-05, "loss": 0.0364, "step": 79720 }, { "epoch": 0.39365, "grad_norm": 0.08991848677396774, "learning_rate": 3.514487174608858e-05, "loss": 0.0351, "step": 79730 }, { "epoch": 0.3937, "grad_norm": 0.08170048147439957, "learning_rate": 3.5141093576959225e-05, "loss": 0.0357, "step": 79740 }, { "epoch": 0.39375, "grad_norm": 0.10216086357831955, "learning_rate": 3.5137315130576174e-05, "loss": 0.0356, "step": 79750 }, { "epoch": 0.3938, "grad_norm": 0.10113737732172012, "learning_rate": 3.5133536407042743e-05, "loss": 0.0353, "step": 79760 }, { "epoch": 0.39385, "grad_norm": 0.09250824898481369, "learning_rate": 3.512975740646223e-05, "loss": 0.0367, "step": 79770 }, { "epoch": 0.3939, "grad_norm": 0.10215970128774643, "learning_rate": 3.512597812893795e-05, "loss": 0.0363, "step": 79780 }, { "epoch": 0.39395, "grad_norm": 0.08758533746004105, "learning_rate": 3.512219857457325e-05, "loss": 0.0356, "step": 79790 }, { "epoch": 0.394, "grad_norm": 0.08108478039503098, "learning_rate": 3.511841874347143e-05, "loss": 0.0358, "step": 79800 }, { "epoch": 0.39405, "grad_norm": 0.07901604473590851, "learning_rate": 3.5114638635735843e-05, "loss": 0.0366, "step": 79810 }, { "epoch": 0.3941, "grad_norm": 0.09510098397731781, "learning_rate": 3.511085825146984e-05, "loss": 0.0353, "step": 79820 }, { "epoch": 0.39415, "grad_norm": 0.09487070143222809, "learning_rate": 3.510707759077677e-05, "loss": 0.0365, "step": 79830 }, { "epoch": 0.3942, "grad_norm": 0.09669601172208786, "learning_rate": 3.510329665375999e-05, "loss": 0.0345, "step": 79840 }, { "epoch": 0.39425, "grad_norm": 0.09767556935548782, "learning_rate": 3.509951544052288e-05, "loss": 0.0355, "step": 79850 }, { "epoch": 0.3943, "grad_norm": 0.11945126950740814, "learning_rate": 3.509573395116881e-05, "loss": 0.0354, "step": 79860 }, { "epoch": 0.39435, "grad_norm": 0.08112930506467819, "learning_rate": 3.509195218580117e-05, "loss": 0.0365, "step": 79870 }, { "epoch": 0.3944, "grad_norm": 0.07552963495254517, "learning_rate": 3.508817014452335e-05, "loss": 0.0351, "step": 79880 }, { "epoch": 0.39445, "grad_norm": 0.0858389362692833, "learning_rate": 3.5084387827438734e-05, "loss": 0.0352, "step": 79890 }, { "epoch": 0.3945, "grad_norm": 0.0821891501545906, "learning_rate": 3.508060523465076e-05, "loss": 0.0345, "step": 79900 }, { "epoch": 0.39455, "grad_norm": 0.08184736222028732, "learning_rate": 3.5076822366262816e-05, "loss": 0.034, "step": 79910 }, { "epoch": 0.3946, "grad_norm": 0.09934855997562408, "learning_rate": 3.5073039222378344e-05, "loss": 0.035, "step": 79920 }, { "epoch": 0.39465, "grad_norm": 0.10376731306314468, "learning_rate": 3.506925580310076e-05, "loss": 0.0353, "step": 79930 }, { "epoch": 0.3947, "grad_norm": 0.10761922597885132, "learning_rate": 3.5065472108533505e-05, "loss": 0.0346, "step": 79940 }, { "epoch": 0.39475, "grad_norm": 0.10313154757022858, "learning_rate": 3.506168813878002e-05, "loss": 0.0351, "step": 79950 }, { "epoch": 0.3948, "grad_norm": 0.091402068734169, "learning_rate": 3.505790389394377e-05, "loss": 0.0349, "step": 79960 }, { "epoch": 0.39485, "grad_norm": 0.08425392955541611, "learning_rate": 3.505411937412819e-05, "loss": 0.0348, "step": 79970 }, { "epoch": 0.3949, "grad_norm": 0.070528544485569, "learning_rate": 3.505033457943678e-05, "loss": 0.0361, "step": 79980 }, { "epoch": 0.39495, "grad_norm": 0.08500799536705017, "learning_rate": 3.504654950997299e-05, "loss": 0.0353, "step": 79990 }, { "epoch": 0.395, "grad_norm": 0.07569961994886398, "learning_rate": 3.5042764165840314e-05, "loss": 0.0368, "step": 80000 }, { "epoch": 5e-05, "grad_norm": 0.07905327528715134, "learning_rate": 3.5038978547142234e-05, "loss": 0.0368, "step": 80010 }, { "epoch": 0.0001, "grad_norm": 0.08307768404483795, "learning_rate": 3.503519265398226e-05, "loss": 0.0351, "step": 80020 }, { "epoch": 0.00015, "grad_norm": 0.07752068340778351, "learning_rate": 3.503140648646388e-05, "loss": 0.0345, "step": 80030 }, { "epoch": 0.0002, "grad_norm": 0.07796391099691391, "learning_rate": 3.502762004469062e-05, "loss": 0.0358, "step": 80040 }, { "epoch": 0.00025, "grad_norm": 0.0741119384765625, "learning_rate": 3.502383332876599e-05, "loss": 0.0373, "step": 80050 }, { "epoch": 0.0003, "grad_norm": 0.08147093653678894, "learning_rate": 3.502004633879353e-05, "loss": 0.035, "step": 80060 }, { "epoch": 0.00035, "grad_norm": 0.08216322958469391, "learning_rate": 3.5016259074876764e-05, "loss": 0.035, "step": 80070 }, { "epoch": 0.0004, "grad_norm": 0.07296330481767654, "learning_rate": 3.501247153711924e-05, "loss": 0.0356, "step": 80080 }, { "epoch": 0.00045, "grad_norm": 0.09399837255477905, "learning_rate": 3.5008683725624506e-05, "loss": 0.0352, "step": 80090 }, { "epoch": 0.0005, "grad_norm": 0.0898161381483078, "learning_rate": 3.5004895640496113e-05, "loss": 0.0351, "step": 80100 }, { "epoch": 0.00055, "grad_norm": 0.09958512336015701, "learning_rate": 3.5001107281837635e-05, "loss": 0.0355, "step": 80110 }, { "epoch": 0.0006, "grad_norm": 0.0894375592470169, "learning_rate": 3.499731864975264e-05, "loss": 0.0405, "step": 80120 }, { "epoch": 0.00065, "grad_norm": 0.1024933010339737, "learning_rate": 3.499352974434472e-05, "loss": 0.0381, "step": 80130 }, { "epoch": 0.0007, "grad_norm": 0.07921604067087173, "learning_rate": 3.498974056571744e-05, "loss": 0.0367, "step": 80140 }, { "epoch": 0.00075, "grad_norm": 0.09752549976110458, "learning_rate": 3.49859511139744e-05, "loss": 0.0367, "step": 80150 }, { "epoch": 0.0008, "grad_norm": 0.09751075506210327, "learning_rate": 3.4982161389219214e-05, "loss": 0.0393, "step": 80160 }, { "epoch": 0.00085, "grad_norm": 0.08204904943704605, "learning_rate": 3.49783713915555e-05, "loss": 0.0363, "step": 80170 }, { "epoch": 0.0009, "grad_norm": 0.09174011647701263, "learning_rate": 3.497458112108684e-05, "loss": 0.0348, "step": 80180 }, { "epoch": 0.00095, "grad_norm": 0.12619981169700623, "learning_rate": 3.49707905779169e-05, "loss": 0.0387, "step": 80190 }, { "epoch": 0.001, "grad_norm": 0.09585075080394745, "learning_rate": 3.496699976214927e-05, "loss": 0.038, "step": 80200 }, { "epoch": 0.00105, "grad_norm": 0.09432283043861389, "learning_rate": 3.496320867388762e-05, "loss": 0.0357, "step": 80210 }, { "epoch": 0.0011, "grad_norm": 0.09583482891321182, "learning_rate": 3.4959417313235585e-05, "loss": 0.0366, "step": 80220 }, { "epoch": 0.00115, "grad_norm": 0.09143300354480743, "learning_rate": 3.495562568029683e-05, "loss": 0.0375, "step": 80230 }, { "epoch": 0.0012, "grad_norm": 0.08289297670125961, "learning_rate": 3.4951833775175005e-05, "loss": 0.0367, "step": 80240 }, { "epoch": 0.00125, "grad_norm": 0.07733283191919327, "learning_rate": 3.494804159797378e-05, "loss": 0.0375, "step": 80250 }, { "epoch": 0.0013, "grad_norm": 0.0847741961479187, "learning_rate": 3.4944249148796845e-05, "loss": 0.04, "step": 80260 }, { "epoch": 0.00135, "grad_norm": 0.08987481892108917, "learning_rate": 3.4940456427747866e-05, "loss": 0.0353, "step": 80270 }, { "epoch": 0.0014, "grad_norm": 0.0879490077495575, "learning_rate": 3.493666343493054e-05, "loss": 0.0368, "step": 80280 }, { "epoch": 0.00145, "grad_norm": 0.09618838876485825, "learning_rate": 3.493287017044857e-05, "loss": 0.0369, "step": 80290 }, { "epoch": 0.0015, "grad_norm": 0.0725872665643692, "learning_rate": 3.4929076634405667e-05, "loss": 0.0345, "step": 80300 }, { "epoch": 0.00155, "grad_norm": 0.08574867993593216, "learning_rate": 3.4925282826905533e-05, "loss": 0.0349, "step": 80310 }, { "epoch": 0.0016, "grad_norm": 0.0960630550980568, "learning_rate": 3.49214887480519e-05, "loss": 0.0364, "step": 80320 }, { "epoch": 0.00165, "grad_norm": 0.08120320737361908, "learning_rate": 3.491769439794849e-05, "loss": 0.035, "step": 80330 }, { "epoch": 0.0017, "grad_norm": 0.09600193053483963, "learning_rate": 3.491389977669904e-05, "loss": 0.0357, "step": 80340 }, { "epoch": 0.00175, "grad_norm": 0.07628503441810608, "learning_rate": 3.4910104884407294e-05, "loss": 0.0355, "step": 80350 }, { "epoch": 0.0018, "grad_norm": 0.08768440037965775, "learning_rate": 3.490630972117701e-05, "loss": 0.0362, "step": 80360 }, { "epoch": 0.00185, "grad_norm": 0.09640726447105408, "learning_rate": 3.490251428711193e-05, "loss": 0.0387, "step": 80370 }, { "epoch": 0.0019, "grad_norm": 0.08039938658475876, "learning_rate": 3.489871858231584e-05, "loss": 0.0367, "step": 80380 }, { "epoch": 0.00195, "grad_norm": 0.08057080209255219, "learning_rate": 3.48949226068925e-05, "loss": 0.0372, "step": 80390 }, { "epoch": 0.002, "grad_norm": 0.08435969054698944, "learning_rate": 3.489112636094569e-05, "loss": 0.0377, "step": 80400 }, { "epoch": 0.00205, "grad_norm": 0.08451063185930252, "learning_rate": 3.4887329844579194e-05, "loss": 0.038, "step": 80410 }, { "epoch": 0.0021, "grad_norm": 0.07690353691577911, "learning_rate": 3.4883533057896826e-05, "loss": 0.0355, "step": 80420 }, { "epoch": 0.00215, "grad_norm": 0.09554172307252884, "learning_rate": 3.4879736001002375e-05, "loss": 0.0375, "step": 80430 }, { "epoch": 0.0022, "grad_norm": 0.07848838716745377, "learning_rate": 3.4875938673999654e-05, "loss": 0.0371, "step": 80440 }, { "epoch": 0.00225, "grad_norm": 0.09010570496320724, "learning_rate": 3.4872141076992476e-05, "loss": 0.0355, "step": 80450 }, { "epoch": 0.0023, "grad_norm": 0.07799280434846878, "learning_rate": 3.486834321008467e-05, "loss": 0.0383, "step": 80460 }, { "epoch": 0.00235, "grad_norm": 0.10257743299007416, "learning_rate": 3.4864545073380065e-05, "loss": 0.0356, "step": 80470 }, { "epoch": 0.0024, "grad_norm": 0.07939010113477707, "learning_rate": 3.486074666698251e-05, "loss": 0.0355, "step": 80480 }, { "epoch": 0.00245, "grad_norm": 0.09207401424646378, "learning_rate": 3.485694799099585e-05, "loss": 0.0356, "step": 80490 }, { "epoch": 0.0025, "grad_norm": 0.08484499156475067, "learning_rate": 3.485314904552392e-05, "loss": 0.036, "step": 80500 }, { "epoch": 0.00255, "grad_norm": 0.1318163424730301, "learning_rate": 3.4849349830670615e-05, "loss": 0.0367, "step": 80510 }, { "epoch": 0.0026, "grad_norm": 0.1633630394935608, "learning_rate": 3.484555034653977e-05, "loss": 0.0361, "step": 80520 }, { "epoch": 0.00265, "grad_norm": 0.09611942619085312, "learning_rate": 3.4841750593235285e-05, "loss": 0.037, "step": 80530 }, { "epoch": 0.0027, "grad_norm": 0.08366899192333221, "learning_rate": 3.483795057086104e-05, "loss": 0.0356, "step": 80540 }, { "epoch": 0.00275, "grad_norm": 0.08930250257253647, "learning_rate": 3.4834150279520916e-05, "loss": 0.0359, "step": 80550 }, { "epoch": 0.0028, "grad_norm": 0.08847183734178543, "learning_rate": 3.4830349719318815e-05, "loss": 0.0367, "step": 80560 }, { "epoch": 0.00285, "grad_norm": 0.08116517961025238, "learning_rate": 3.4826548890358656e-05, "loss": 0.0367, "step": 80570 }, { "epoch": 0.0029, "grad_norm": 0.08152367919683456, "learning_rate": 3.482274779274433e-05, "loss": 0.0348, "step": 80580 }, { "epoch": 0.00295, "grad_norm": 0.08631303906440735, "learning_rate": 3.481894642657977e-05, "loss": 0.0366, "step": 80590 }, { "epoch": 0.003, "grad_norm": 0.09753107279539108, "learning_rate": 3.481514479196891e-05, "loss": 0.0377, "step": 80600 }, { "epoch": 0.00305, "grad_norm": 0.07813764363527298, "learning_rate": 3.4811342889015686e-05, "loss": 0.0379, "step": 80610 }, { "epoch": 0.0031, "grad_norm": 0.08773379027843475, "learning_rate": 3.4807540717824025e-05, "loss": 0.0362, "step": 80620 }, { "epoch": 0.00315, "grad_norm": 0.10971876233816147, "learning_rate": 3.480373827849789e-05, "loss": 0.0367, "step": 80630 }, { "epoch": 0.0032, "grad_norm": 0.08488216251134872, "learning_rate": 3.4799935571141226e-05, "loss": 0.0375, "step": 80640 }, { "epoch": 0.00325, "grad_norm": 0.08824058622121811, "learning_rate": 3.479613259585801e-05, "loss": 0.0355, "step": 80650 }, { "epoch": 0.0033, "grad_norm": 0.1005680039525032, "learning_rate": 3.47923293527522e-05, "loss": 0.0363, "step": 80660 }, { "epoch": 0.00335, "grad_norm": 0.0813353955745697, "learning_rate": 3.4788525841927794e-05, "loss": 0.0354, "step": 80670 }, { "epoch": 0.0034, "grad_norm": 0.08333186060190201, "learning_rate": 3.4784722063488764e-05, "loss": 0.0359, "step": 80680 }, { "epoch": 0.00345, "grad_norm": 0.11254861950874329, "learning_rate": 3.478091801753912e-05, "loss": 0.0354, "step": 80690 }, { "epoch": 0.0035, "grad_norm": 0.09645597636699677, "learning_rate": 3.477711370418284e-05, "loss": 0.0355, "step": 80700 }, { "epoch": 0.00355, "grad_norm": 0.08780481666326523, "learning_rate": 3.4773309123523946e-05, "loss": 0.0373, "step": 80710 }, { "epoch": 0.0036, "grad_norm": 0.08916894346475601, "learning_rate": 3.476950427566645e-05, "loss": 0.0356, "step": 80720 }, { "epoch": 0.00365, "grad_norm": 0.10412748157978058, "learning_rate": 3.476569916071438e-05, "loss": 0.0356, "step": 80730 }, { "epoch": 0.0037, "grad_norm": 0.10716842114925385, "learning_rate": 3.4761893778771766e-05, "loss": 0.0375, "step": 80740 }, { "epoch": 0.00375, "grad_norm": 0.10552069544792175, "learning_rate": 3.475808812994264e-05, "loss": 0.038, "step": 80750 }, { "epoch": 0.0038, "grad_norm": 0.10847306251525879, "learning_rate": 3.475428221433106e-05, "loss": 0.0364, "step": 80760 }, { "epoch": 0.00385, "grad_norm": 0.09877172857522964, "learning_rate": 3.4750476032041054e-05, "loss": 0.0377, "step": 80770 }, { "epoch": 0.0039, "grad_norm": 0.06513556838035583, "learning_rate": 3.474666958317671e-05, "loss": 0.0366, "step": 80780 }, { "epoch": 0.00395, "grad_norm": 0.07210590690374374, "learning_rate": 3.474286286784207e-05, "loss": 0.0359, "step": 80790 }, { "epoch": 0.004, "grad_norm": 0.07605480402708054, "learning_rate": 3.473905588614122e-05, "loss": 0.0365, "step": 80800 }, { "epoch": 0.00405, "grad_norm": 0.07498470693826675, "learning_rate": 3.4735248638178255e-05, "loss": 0.0371, "step": 80810 }, { "epoch": 0.0041, "grad_norm": 0.08058614283800125, "learning_rate": 3.473144112405724e-05, "loss": 0.0356, "step": 80820 }, { "epoch": 0.00415, "grad_norm": 0.0745462104678154, "learning_rate": 3.472763334388228e-05, "loss": 0.0359, "step": 80830 }, { "epoch": 0.0042, "grad_norm": 0.07761227339506149, "learning_rate": 3.472382529775748e-05, "loss": 0.0355, "step": 80840 }, { "epoch": 0.00425, "grad_norm": 0.07811295241117477, "learning_rate": 3.472001698578694e-05, "loss": 0.0343, "step": 80850 }, { "epoch": 0.0043, "grad_norm": 0.08239776641130447, "learning_rate": 3.47162084080748e-05, "loss": 0.0365, "step": 80860 }, { "epoch": 0.00435, "grad_norm": 0.07351667433977127, "learning_rate": 3.471239956472517e-05, "loss": 0.0348, "step": 80870 }, { "epoch": 0.0044, "grad_norm": 0.07278984785079956, "learning_rate": 3.470859045584218e-05, "loss": 0.036, "step": 80880 }, { "epoch": 0.00445, "grad_norm": 0.08412481844425201, "learning_rate": 3.470478108152998e-05, "loss": 0.0371, "step": 80890 }, { "epoch": 0.0045, "grad_norm": 0.10147181153297424, "learning_rate": 3.470097144189272e-05, "loss": 0.0368, "step": 80900 }, { "epoch": 0.00455, "grad_norm": 0.09770499169826508, "learning_rate": 3.469716153703452e-05, "loss": 0.0354, "step": 80910 }, { "epoch": 0.0046, "grad_norm": 0.10202199220657349, "learning_rate": 3.469335136705958e-05, "loss": 0.0372, "step": 80920 }, { "epoch": 0.00465, "grad_norm": 0.10253550112247467, "learning_rate": 3.4689540932072057e-05, "loss": 0.0377, "step": 80930 }, { "epoch": 0.0047, "grad_norm": 0.09305887669324875, "learning_rate": 3.468573023217613e-05, "loss": 0.0363, "step": 80940 }, { "epoch": 0.00475, "grad_norm": 0.11431948095560074, "learning_rate": 3.468191926747597e-05, "loss": 0.036, "step": 80950 }, { "epoch": 0.0048, "grad_norm": 0.08740732073783875, "learning_rate": 3.4678108038075775e-05, "loss": 0.0361, "step": 80960 }, { "epoch": 0.00485, "grad_norm": 0.07707460969686508, "learning_rate": 3.4674296544079745e-05, "loss": 0.0352, "step": 80970 }, { "epoch": 0.0049, "grad_norm": 0.06795619428157806, "learning_rate": 3.467048478559208e-05, "loss": 0.0357, "step": 80980 }, { "epoch": 0.00495, "grad_norm": 0.0804039016366005, "learning_rate": 3.466667276271699e-05, "loss": 0.0358, "step": 80990 }, { "epoch": 0.005, "grad_norm": 0.08920106291770935, "learning_rate": 3.46628604755587e-05, "loss": 0.0372, "step": 81000 }, { "epoch": 0.00505, "grad_norm": 0.09149488061666489, "learning_rate": 3.465904792422144e-05, "loss": 0.0382, "step": 81010 }, { "epoch": 0.0051, "grad_norm": 0.08531561493873596, "learning_rate": 3.465523510880943e-05, "loss": 0.0373, "step": 81020 }, { "epoch": 0.00515, "grad_norm": 0.09696276485919952, "learning_rate": 3.4651422029426926e-05, "loss": 0.036, "step": 81030 }, { "epoch": 0.0052, "grad_norm": 0.06882733106613159, "learning_rate": 3.464760868617817e-05, "loss": 0.0363, "step": 81040 }, { "epoch": 0.00525, "grad_norm": 0.07693909108638763, "learning_rate": 3.4643795079167414e-05, "loss": 0.0365, "step": 81050 }, { "epoch": 0.0053, "grad_norm": 0.0854329913854599, "learning_rate": 3.4639981208498924e-05, "loss": 0.0354, "step": 81060 }, { "epoch": 0.00535, "grad_norm": 0.09463875740766525, "learning_rate": 3.463616707427697e-05, "loss": 0.0382, "step": 81070 }, { "epoch": 0.0054, "grad_norm": 0.08794492483139038, "learning_rate": 3.463235267660583e-05, "loss": 0.0367, "step": 81080 }, { "epoch": 0.00545, "grad_norm": 0.08704029768705368, "learning_rate": 3.462853801558979e-05, "loss": 0.0351, "step": 81090 }, { "epoch": 0.0055, "grad_norm": 0.08887019008398056, "learning_rate": 3.462472309133312e-05, "loss": 0.0382, "step": 81100 }, { "epoch": 0.00555, "grad_norm": 0.08907011151313782, "learning_rate": 3.4620907903940156e-05, "loss": 0.0365, "step": 81110 }, { "epoch": 0.0056, "grad_norm": 0.11149367690086365, "learning_rate": 3.461709245351518e-05, "loss": 0.0345, "step": 81120 }, { "epoch": 0.00565, "grad_norm": 0.09168368577957153, "learning_rate": 3.4613276740162506e-05, "loss": 0.0363, "step": 81130 }, { "epoch": 0.0057, "grad_norm": 0.10839654505252838, "learning_rate": 3.4609460763986454e-05, "loss": 0.0356, "step": 81140 }, { "epoch": 0.00575, "grad_norm": 0.08748756349086761, "learning_rate": 3.460564452509137e-05, "loss": 0.0372, "step": 81150 }, { "epoch": 0.0058, "grad_norm": 0.11382316797971725, "learning_rate": 3.4601828023581554e-05, "loss": 0.0354, "step": 81160 }, { "epoch": 0.00585, "grad_norm": 0.10300473123788834, "learning_rate": 3.459801125956138e-05, "loss": 0.0359, "step": 81170 }, { "epoch": 0.0059, "grad_norm": 0.08995317667722702, "learning_rate": 3.459419423313517e-05, "loss": 0.0375, "step": 81180 }, { "epoch": 0.00595, "grad_norm": 0.07771133631467819, "learning_rate": 3.459037694440731e-05, "loss": 0.0351, "step": 81190 }, { "epoch": 0.006, "grad_norm": 0.08672454953193665, "learning_rate": 3.458655939348214e-05, "loss": 0.0359, "step": 81200 }, { "epoch": 0.00605, "grad_norm": 0.09423719346523285, "learning_rate": 3.4582741580464044e-05, "loss": 0.0367, "step": 81210 }, { "epoch": 0.0061, "grad_norm": 0.07885994762182236, "learning_rate": 3.457892350545738e-05, "loss": 0.0368, "step": 81220 }, { "epoch": 0.00615, "grad_norm": 0.07632511854171753, "learning_rate": 3.457510516856656e-05, "loss": 0.0359, "step": 81230 }, { "epoch": 0.0062, "grad_norm": 0.07641930133104324, "learning_rate": 3.457128656989596e-05, "loss": 0.0371, "step": 81240 }, { "epoch": 0.00625, "grad_norm": 0.09359002113342285, "learning_rate": 3.456746770954997e-05, "loss": 0.0367, "step": 81250 }, { "epoch": 0.0063, "grad_norm": 0.09460229426622391, "learning_rate": 3.456364858763301e-05, "loss": 0.0391, "step": 81260 }, { "epoch": 0.00635, "grad_norm": 0.09273259341716766, "learning_rate": 3.45598292042495e-05, "loss": 0.035, "step": 81270 }, { "epoch": 0.0064, "grad_norm": 0.0678936317563057, "learning_rate": 3.455600955950385e-05, "loss": 0.035, "step": 81280 }, { "epoch": 0.00645, "grad_norm": 0.0818251222372055, "learning_rate": 3.4552189653500475e-05, "loss": 0.0355, "step": 81290 }, { "epoch": 0.0065, "grad_norm": 0.07798295468091965, "learning_rate": 3.454836948634383e-05, "loss": 0.0348, "step": 81300 }, { "epoch": 0.00655, "grad_norm": 0.07539451122283936, "learning_rate": 3.454454905813835e-05, "loss": 0.0358, "step": 81310 }, { "epoch": 0.0066, "grad_norm": 0.07479606568813324, "learning_rate": 3.454072836898849e-05, "loss": 0.0356, "step": 81320 }, { "epoch": 0.00665, "grad_norm": 0.08967671543359756, "learning_rate": 3.45369074189987e-05, "loss": 0.0353, "step": 81330 }, { "epoch": 0.0067, "grad_norm": 0.07154927402734756, "learning_rate": 3.4533086208273447e-05, "loss": 0.0355, "step": 81340 }, { "epoch": 0.00675, "grad_norm": 0.08140010386705399, "learning_rate": 3.45292647369172e-05, "loss": 0.0361, "step": 81350 }, { "epoch": 0.0068, "grad_norm": 0.08675054460763931, "learning_rate": 3.452544300503442e-05, "loss": 0.0365, "step": 81360 }, { "epoch": 0.00685, "grad_norm": 0.08151241391897202, "learning_rate": 3.452162101272961e-05, "loss": 0.034, "step": 81370 }, { "epoch": 0.0069, "grad_norm": 0.07963025569915771, "learning_rate": 3.451779876010727e-05, "loss": 0.037, "step": 81380 }, { "epoch": 0.00695, "grad_norm": 0.08118466287851334, "learning_rate": 3.4513976247271885e-05, "loss": 0.0343, "step": 81390 }, { "epoch": 0.007, "grad_norm": 0.07270652055740356, "learning_rate": 3.451015347432796e-05, "loss": 0.036, "step": 81400 }, { "epoch": 0.00705, "grad_norm": 0.09753786772489548, "learning_rate": 3.450633044138001e-05, "loss": 0.037, "step": 81410 }, { "epoch": 0.0071, "grad_norm": 0.07925489544868469, "learning_rate": 3.4502507148532556e-05, "loss": 0.0366, "step": 81420 }, { "epoch": 0.00715, "grad_norm": 0.09283397346735, "learning_rate": 3.449868359589013e-05, "loss": 0.0363, "step": 81430 }, { "epoch": 0.0072, "grad_norm": 0.08941236138343811, "learning_rate": 3.449485978355726e-05, "loss": 0.0379, "step": 81440 }, { "epoch": 0.00725, "grad_norm": 0.09721534699201584, "learning_rate": 3.44910357116385e-05, "loss": 0.0376, "step": 81450 }, { "epoch": 0.0073, "grad_norm": 0.08483276516199112, "learning_rate": 3.448721138023838e-05, "loss": 0.0367, "step": 81460 }, { "epoch": 0.00735, "grad_norm": 0.09143737703561783, "learning_rate": 3.448338678946147e-05, "loss": 0.0388, "step": 81470 }, { "epoch": 0.0074, "grad_norm": 0.0824364721775055, "learning_rate": 3.447956193941233e-05, "loss": 0.0359, "step": 81480 }, { "epoch": 0.00745, "grad_norm": 0.07745369523763657, "learning_rate": 3.4475736830195516e-05, "loss": 0.0373, "step": 81490 }, { "epoch": 0.0075, "grad_norm": 0.07666676491498947, "learning_rate": 3.447191146191563e-05, "loss": 0.0362, "step": 81500 }, { "epoch": 0.00755, "grad_norm": 0.09171445667743683, "learning_rate": 3.446808583467723e-05, "loss": 0.0386, "step": 81510 }, { "epoch": 0.0076, "grad_norm": 0.09327006340026855, "learning_rate": 3.446425994858493e-05, "loss": 0.0387, "step": 81520 }, { "epoch": 0.00765, "grad_norm": 0.08528904616832733, "learning_rate": 3.446043380374332e-05, "loss": 0.0375, "step": 81530 }, { "epoch": 0.0077, "grad_norm": 0.08111163973808289, "learning_rate": 3.4456607400256994e-05, "loss": 0.0363, "step": 81540 }, { "epoch": 0.00775, "grad_norm": 0.07948674261569977, "learning_rate": 3.4452780738230584e-05, "loss": 0.0379, "step": 81550 }, { "epoch": 0.0078, "grad_norm": 0.1013888418674469, "learning_rate": 3.444895381776869e-05, "loss": 0.0381, "step": 81560 }, { "epoch": 0.00785, "grad_norm": 0.06877487152814865, "learning_rate": 3.444512663897596e-05, "loss": 0.0358, "step": 81570 }, { "epoch": 0.0079, "grad_norm": 0.10422961413860321, "learning_rate": 3.444129920195701e-05, "loss": 0.037, "step": 81580 }, { "epoch": 0.00795, "grad_norm": 0.07992278784513474, "learning_rate": 3.4437471506816497e-05, "loss": 0.037, "step": 81590 }, { "epoch": 0.008, "grad_norm": 0.06682839244604111, "learning_rate": 3.443364355365905e-05, "loss": 0.0365, "step": 81600 }, { "epoch": 0.00805, "grad_norm": 0.09197988361120224, "learning_rate": 3.442981534258932e-05, "loss": 0.0364, "step": 81610 }, { "epoch": 0.0081, "grad_norm": 0.08035583049058914, "learning_rate": 3.442598687371199e-05, "loss": 0.0375, "step": 81620 }, { "epoch": 0.00815, "grad_norm": 0.08478248864412308, "learning_rate": 3.4422158147131726e-05, "loss": 0.0372, "step": 81630 }, { "epoch": 0.0082, "grad_norm": 0.08741386979818344, "learning_rate": 3.4418329162953196e-05, "loss": 0.0377, "step": 81640 }, { "epoch": 0.00825, "grad_norm": 0.07935051620006561, "learning_rate": 3.441449992128108e-05, "loss": 0.0366, "step": 81650 }, { "epoch": 0.0083, "grad_norm": 0.08112239092588425, "learning_rate": 3.441067042222008e-05, "loss": 0.0366, "step": 81660 }, { "epoch": 0.00835, "grad_norm": 0.08402236551046371, "learning_rate": 3.440684066587489e-05, "loss": 0.0358, "step": 81670 }, { "epoch": 0.0084, "grad_norm": 0.07969480007886887, "learning_rate": 3.440301065235019e-05, "loss": 0.0377, "step": 81680 }, { "epoch": 0.00845, "grad_norm": 0.09947582334280014, "learning_rate": 3.439918038175073e-05, "loss": 0.0362, "step": 81690 }, { "epoch": 0.0085, "grad_norm": 0.08369084447622299, "learning_rate": 3.43953498541812e-05, "loss": 0.0366, "step": 81700 }, { "epoch": 0.00855, "grad_norm": 0.07206384837627411, "learning_rate": 3.439151906974635e-05, "loss": 0.0364, "step": 81710 }, { "epoch": 0.0086, "grad_norm": 0.10137312859296799, "learning_rate": 3.438768802855088e-05, "loss": 0.0365, "step": 81720 }, { "epoch": 0.00865, "grad_norm": 0.07239537686109543, "learning_rate": 3.4383856730699546e-05, "loss": 0.0392, "step": 81730 }, { "epoch": 0.0087, "grad_norm": 0.07529709488153458, "learning_rate": 3.4380025176297095e-05, "loss": 0.0352, "step": 81740 }, { "epoch": 0.00875, "grad_norm": 0.10035303235054016, "learning_rate": 3.43761933654483e-05, "loss": 0.0355, "step": 81750 }, { "epoch": 0.0088, "grad_norm": 0.09971463680267334, "learning_rate": 3.4372361298257875e-05, "loss": 0.0349, "step": 81760 }, { "epoch": 0.00885, "grad_norm": 0.09327927976846695, "learning_rate": 3.436852897483062e-05, "loss": 0.0392, "step": 81770 }, { "epoch": 0.0089, "grad_norm": 0.08059550821781158, "learning_rate": 3.4364696395271315e-05, "loss": 0.036, "step": 81780 }, { "epoch": 0.00895, "grad_norm": 0.07822158932685852, "learning_rate": 3.4360863559684715e-05, "loss": 0.0355, "step": 81790 }, { "epoch": 0.009, "grad_norm": 0.08221601694822311, "learning_rate": 3.435703046817562e-05, "loss": 0.0353, "step": 81800 }, { "epoch": 0.00905, "grad_norm": 0.08299103379249573, "learning_rate": 3.4353197120848833e-05, "loss": 0.0358, "step": 81810 }, { "epoch": 0.0091, "grad_norm": 0.08895883709192276, "learning_rate": 3.4349363517809156e-05, "loss": 0.0367, "step": 81820 }, { "epoch": 0.00915, "grad_norm": 0.09609355032444, "learning_rate": 3.434552965916138e-05, "loss": 0.0366, "step": 81830 }, { "epoch": 0.0092, "grad_norm": 0.08394278585910797, "learning_rate": 3.434169554501035e-05, "loss": 0.0369, "step": 81840 }, { "epoch": 0.00925, "grad_norm": 0.08705271780490875, "learning_rate": 3.4337861175460864e-05, "loss": 0.0373, "step": 81850 }, { "epoch": 0.0093, "grad_norm": 0.0820394977927208, "learning_rate": 3.433402655061777e-05, "loss": 0.0349, "step": 81860 }, { "epoch": 0.00935, "grad_norm": 0.08074338734149933, "learning_rate": 3.433019167058588e-05, "loss": 0.0355, "step": 81870 }, { "epoch": 0.0094, "grad_norm": 0.07878132909536362, "learning_rate": 3.432635653547007e-05, "loss": 0.035, "step": 81880 }, { "epoch": 0.00945, "grad_norm": 0.09017588943243027, "learning_rate": 3.4322521145375167e-05, "loss": 0.0357, "step": 81890 }, { "epoch": 0.0095, "grad_norm": 0.0714225172996521, "learning_rate": 3.4318685500406045e-05, "loss": 0.0354, "step": 81900 }, { "epoch": 0.00955, "grad_norm": 0.077426977455616, "learning_rate": 3.431484960066756e-05, "loss": 0.0366, "step": 81910 }, { "epoch": 0.0096, "grad_norm": 0.08480240404605865, "learning_rate": 3.4311013446264586e-05, "loss": 0.0359, "step": 81920 }, { "epoch": 0.00965, "grad_norm": 0.08634501695632935, "learning_rate": 3.4307177037301996e-05, "loss": 0.0348, "step": 81930 }, { "epoch": 0.0097, "grad_norm": 0.08254533261060715, "learning_rate": 3.430334037388469e-05, "loss": 0.0353, "step": 81940 }, { "epoch": 0.00975, "grad_norm": 0.08382163196802139, "learning_rate": 3.4299503456117546e-05, "loss": 0.0374, "step": 81950 }, { "epoch": 0.0098, "grad_norm": 0.09046601504087448, "learning_rate": 3.429566628410548e-05, "loss": 0.0361, "step": 81960 }, { "epoch": 0.00985, "grad_norm": 0.08730943500995636, "learning_rate": 3.429182885795339e-05, "loss": 0.0351, "step": 81970 }, { "epoch": 0.0099, "grad_norm": 0.09844104200601578, "learning_rate": 3.4287991177766184e-05, "loss": 0.0366, "step": 81980 }, { "epoch": 0.00995, "grad_norm": 0.07887846976518631, "learning_rate": 3.428415324364879e-05, "loss": 0.0367, "step": 81990 }, { "epoch": 0.01, "grad_norm": 0.09441222995519638, "learning_rate": 3.428031505570614e-05, "loss": 0.0361, "step": 82000 }, { "epoch": 0.01005, "grad_norm": 0.08640168607234955, "learning_rate": 3.427647661404315e-05, "loss": 0.0362, "step": 82010 }, { "epoch": 0.0101, "grad_norm": 0.07743245363235474, "learning_rate": 3.427263791876478e-05, "loss": 0.0362, "step": 82020 }, { "epoch": 0.01015, "grad_norm": 0.07744090259075165, "learning_rate": 3.426879896997598e-05, "loss": 0.035, "step": 82030 }, { "epoch": 0.0102, "grad_norm": 0.08554510772228241, "learning_rate": 3.42649597677817e-05, "loss": 0.0359, "step": 82040 }, { "epoch": 0.01025, "grad_norm": 0.08902224898338318, "learning_rate": 3.426112031228689e-05, "loss": 0.0355, "step": 82050 }, { "epoch": 0.0103, "grad_norm": 0.09727823734283447, "learning_rate": 3.425728060359653e-05, "loss": 0.0356, "step": 82060 }, { "epoch": 0.01035, "grad_norm": 0.0850490853190422, "learning_rate": 3.42534406418156e-05, "loss": 0.0354, "step": 82070 }, { "epoch": 0.0104, "grad_norm": 0.06924112141132355, "learning_rate": 3.424960042704908e-05, "loss": 0.0352, "step": 82080 }, { "epoch": 0.01045, "grad_norm": 0.07539879530668259, "learning_rate": 3.424575995940196e-05, "loss": 0.0344, "step": 82090 }, { "epoch": 0.0105, "grad_norm": 0.07704594731330872, "learning_rate": 3.424191923897923e-05, "loss": 0.0365, "step": 82100 }, { "epoch": 0.01055, "grad_norm": 0.07837717980146408, "learning_rate": 3.423807826588591e-05, "loss": 0.0354, "step": 82110 }, { "epoch": 0.0106, "grad_norm": 0.08570121973752975, "learning_rate": 3.423423704022699e-05, "loss": 0.0363, "step": 82120 }, { "epoch": 0.01065, "grad_norm": 0.07615642249584198, "learning_rate": 3.4230395562107506e-05, "loss": 0.0365, "step": 82130 }, { "epoch": 0.0107, "grad_norm": 0.09024906158447266, "learning_rate": 3.422655383163247e-05, "loss": 0.0367, "step": 82140 }, { "epoch": 0.01075, "grad_norm": 0.0853065475821495, "learning_rate": 3.4222711848906927e-05, "loss": 0.0368, "step": 82150 }, { "epoch": 0.0108, "grad_norm": 0.07461123168468475, "learning_rate": 3.42188696140359e-05, "loss": 0.0371, "step": 82160 }, { "epoch": 0.01085, "grad_norm": 0.08725110441446304, "learning_rate": 3.421502712712445e-05, "loss": 0.035, "step": 82170 }, { "epoch": 0.0109, "grad_norm": 0.09073107689619064, "learning_rate": 3.4211184388277604e-05, "loss": 0.0345, "step": 82180 }, { "epoch": 0.01095, "grad_norm": 0.08718758821487427, "learning_rate": 3.420734139760045e-05, "loss": 0.039, "step": 82190 }, { "epoch": 0.011, "grad_norm": 0.07554249465465546, "learning_rate": 3.420349815519803e-05, "loss": 0.0363, "step": 82200 }, { "epoch": 0.01105, "grad_norm": 0.0913548395037651, "learning_rate": 3.4199654661175445e-05, "loss": 0.0366, "step": 82210 }, { "epoch": 0.0111, "grad_norm": 0.07682806253433228, "learning_rate": 3.419581091563775e-05, "loss": 0.0352, "step": 82220 }, { "epoch": 0.01115, "grad_norm": 0.07206662744283676, "learning_rate": 3.419196691869003e-05, "loss": 0.0353, "step": 82230 }, { "epoch": 0.0112, "grad_norm": 0.07625097036361694, "learning_rate": 3.41881226704374e-05, "loss": 0.039, "step": 82240 }, { "epoch": 0.01125, "grad_norm": 0.09000785648822784, "learning_rate": 3.418427817098494e-05, "loss": 0.036, "step": 82250 }, { "epoch": 0.0113, "grad_norm": 0.07559432089328766, "learning_rate": 3.4180433420437766e-05, "loss": 0.0344, "step": 82260 }, { "epoch": 0.01135, "grad_norm": 0.11739441752433777, "learning_rate": 3.417658841890099e-05, "loss": 0.0352, "step": 82270 }, { "epoch": 0.0114, "grad_norm": 0.09553413838148117, "learning_rate": 3.417274316647974e-05, "loss": 0.0348, "step": 82280 }, { "epoch": 0.01145, "grad_norm": 0.09647861123085022, "learning_rate": 3.416889766327914e-05, "loss": 0.0366, "step": 82290 }, { "epoch": 0.0115, "grad_norm": 0.12469710409641266, "learning_rate": 3.416505190940432e-05, "loss": 0.0361, "step": 82300 }, { "epoch": 0.01155, "grad_norm": 0.10431114584207535, "learning_rate": 3.4161205904960414e-05, "loss": 0.0345, "step": 82310 }, { "epoch": 0.0116, "grad_norm": 0.08366325497627258, "learning_rate": 3.415735965005259e-05, "loss": 0.0342, "step": 82320 }, { "epoch": 0.01165, "grad_norm": 0.08749702572822571, "learning_rate": 3.415351314478599e-05, "loss": 0.0365, "step": 82330 }, { "epoch": 0.0117, "grad_norm": 0.07440687716007233, "learning_rate": 3.414966638926579e-05, "loss": 0.0361, "step": 82340 }, { "epoch": 0.01175, "grad_norm": 0.09167435765266418, "learning_rate": 3.414581938359713e-05, "loss": 0.0357, "step": 82350 }, { "epoch": 0.0118, "grad_norm": 0.08168090879917145, "learning_rate": 3.414197212788522e-05, "loss": 0.0366, "step": 82360 }, { "epoch": 0.01185, "grad_norm": 0.12418495118618011, "learning_rate": 3.413812462223522e-05, "loss": 0.0372, "step": 82370 }, { "epoch": 0.0119, "grad_norm": 0.10299114882946014, "learning_rate": 3.4134276866752325e-05, "loss": 0.0356, "step": 82380 }, { "epoch": 0.01195, "grad_norm": 0.08420941233634949, "learning_rate": 3.413042886154173e-05, "loss": 0.0377, "step": 82390 }, { "epoch": 0.012, "grad_norm": 0.0910545364022255, "learning_rate": 3.4126580606708644e-05, "loss": 0.0382, "step": 82400 }, { "epoch": 0.01205, "grad_norm": 0.08392889052629471, "learning_rate": 3.4122732102358265e-05, "loss": 0.0356, "step": 82410 }, { "epoch": 0.0121, "grad_norm": 0.0799657553434372, "learning_rate": 3.411888334859583e-05, "loss": 0.0368, "step": 82420 }, { "epoch": 0.01215, "grad_norm": 0.09799450635910034, "learning_rate": 3.411503434552654e-05, "loss": 0.0376, "step": 82430 }, { "epoch": 0.0122, "grad_norm": 0.09583955258131027, "learning_rate": 3.411118509325564e-05, "loss": 0.0371, "step": 82440 }, { "epoch": 0.01225, "grad_norm": 0.08460697531700134, "learning_rate": 3.410733559188836e-05, "loss": 0.0375, "step": 82450 }, { "epoch": 0.0123, "grad_norm": 0.0918983668088913, "learning_rate": 3.410348584152996e-05, "loss": 0.0366, "step": 82460 }, { "epoch": 0.01235, "grad_norm": 0.10554148256778717, "learning_rate": 3.4099635842285657e-05, "loss": 0.0398, "step": 82470 }, { "epoch": 0.0124, "grad_norm": 0.09713000804185867, "learning_rate": 3.409578559426074e-05, "loss": 0.0364, "step": 82480 }, { "epoch": 0.01245, "grad_norm": 0.10049746930599213, "learning_rate": 3.409193509756046e-05, "loss": 0.0364, "step": 82490 }, { "epoch": 0.0125, "grad_norm": 0.0884561762213707, "learning_rate": 3.408808435229009e-05, "loss": 0.0362, "step": 82500 }, { "epoch": 0.01255, "grad_norm": 0.0997234433889389, "learning_rate": 3.4084233358554906e-05, "loss": 0.0363, "step": 82510 }, { "epoch": 0.0126, "grad_norm": 0.0873696431517601, "learning_rate": 3.408038211646019e-05, "loss": 0.0362, "step": 82520 }, { "epoch": 0.01265, "grad_norm": 0.0935036689043045, "learning_rate": 3.4076530626111244e-05, "loss": 0.0375, "step": 82530 }, { "epoch": 0.0127, "grad_norm": 0.09640396386384964, "learning_rate": 3.4072678887613364e-05, "loss": 0.0364, "step": 82540 }, { "epoch": 0.01275, "grad_norm": 0.09741134196519852, "learning_rate": 3.406882690107185e-05, "loss": 0.038, "step": 82550 }, { "epoch": 0.0128, "grad_norm": 0.10588259249925613, "learning_rate": 3.4064974666592014e-05, "loss": 0.0367, "step": 82560 }, { "epoch": 0.01285, "grad_norm": 0.09171366691589355, "learning_rate": 3.406112218427918e-05, "loss": 0.0364, "step": 82570 }, { "epoch": 0.0129, "grad_norm": 0.09563100337982178, "learning_rate": 3.405726945423866e-05, "loss": 0.0376, "step": 82580 }, { "epoch": 0.01295, "grad_norm": 0.08102994412183762, "learning_rate": 3.405341647657581e-05, "loss": 0.0358, "step": 82590 }, { "epoch": 0.013, "grad_norm": 0.0844772458076477, "learning_rate": 3.404956325139594e-05, "loss": 0.0394, "step": 82600 }, { "epoch": 0.01305, "grad_norm": 0.09395796060562134, "learning_rate": 3.4045709778804426e-05, "loss": 0.037, "step": 82610 }, { "epoch": 0.0131, "grad_norm": 0.08102398365736008, "learning_rate": 3.404185605890659e-05, "loss": 0.0362, "step": 82620 }, { "epoch": 0.01315, "grad_norm": 0.0828714370727539, "learning_rate": 3.403800209180781e-05, "loss": 0.036, "step": 82630 }, { "epoch": 0.0132, "grad_norm": 0.08016139268875122, "learning_rate": 3.403414787761345e-05, "loss": 0.0366, "step": 82640 }, { "epoch": 0.01325, "grad_norm": 0.0881897360086441, "learning_rate": 3.403029341642888e-05, "loss": 0.0375, "step": 82650 }, { "epoch": 0.0133, "grad_norm": 0.09445564448833466, "learning_rate": 3.402643870835948e-05, "loss": 0.0371, "step": 82660 }, { "epoch": 0.01335, "grad_norm": 0.08610563725233078, "learning_rate": 3.4022583753510646e-05, "loss": 0.0366, "step": 82670 }, { "epoch": 0.0134, "grad_norm": 0.08738268911838531, "learning_rate": 3.4018728551987746e-05, "loss": 0.0376, "step": 82680 }, { "epoch": 0.01345, "grad_norm": 0.0908346176147461, "learning_rate": 3.4014873103896205e-05, "loss": 0.0384, "step": 82690 }, { "epoch": 0.0135, "grad_norm": 0.08275371044874191, "learning_rate": 3.4011017409341414e-05, "loss": 0.037, "step": 82700 }, { "epoch": 0.01355, "grad_norm": 0.08076354116201401, "learning_rate": 3.4007161468428805e-05, "loss": 0.0364, "step": 82710 }, { "epoch": 0.0136, "grad_norm": 0.09224753081798553, "learning_rate": 3.4003305281263776e-05, "loss": 0.0364, "step": 82720 }, { "epoch": 0.01365, "grad_norm": 0.08687998354434967, "learning_rate": 3.3999448847951764e-05, "loss": 0.0363, "step": 82730 }, { "epoch": 0.0137, "grad_norm": 0.08099762350320816, "learning_rate": 3.39955921685982e-05, "loss": 0.0364, "step": 82740 }, { "epoch": 0.01375, "grad_norm": 0.09829870611429214, "learning_rate": 3.399173524330853e-05, "loss": 0.0378, "step": 82750 }, { "epoch": 0.0138, "grad_norm": 0.10540544241666794, "learning_rate": 3.398787807218819e-05, "loss": 0.0377, "step": 82760 }, { "epoch": 0.01385, "grad_norm": 0.0992199257016182, "learning_rate": 3.398402065534265e-05, "loss": 0.0369, "step": 82770 }, { "epoch": 0.0139, "grad_norm": 0.1026124507188797, "learning_rate": 3.398016299287736e-05, "loss": 0.0371, "step": 82780 }, { "epoch": 0.01395, "grad_norm": 0.09222318977117538, "learning_rate": 3.3976305084897776e-05, "loss": 0.0363, "step": 82790 }, { "epoch": 0.014, "grad_norm": 0.08628864586353302, "learning_rate": 3.397244693150939e-05, "loss": 0.0375, "step": 82800 }, { "epoch": 0.01405, "grad_norm": 0.09229279309511185, "learning_rate": 3.396858853281767e-05, "loss": 0.0367, "step": 82810 }, { "epoch": 0.0141, "grad_norm": 0.10248692333698273, "learning_rate": 3.3964729888928115e-05, "loss": 0.0361, "step": 82820 }, { "epoch": 0.01415, "grad_norm": 0.09319385886192322, "learning_rate": 3.396087099994621e-05, "loss": 0.0357, "step": 82830 }, { "epoch": 0.0142, "grad_norm": 0.08986565470695496, "learning_rate": 3.3957011865977466e-05, "loss": 0.0369, "step": 82840 }, { "epoch": 0.01425, "grad_norm": 0.06687970459461212, "learning_rate": 3.3953152487127375e-05, "loss": 0.0357, "step": 82850 }, { "epoch": 0.0143, "grad_norm": 0.08596998453140259, "learning_rate": 3.3949292863501465e-05, "loss": 0.0373, "step": 82860 }, { "epoch": 0.01435, "grad_norm": 0.0879545584321022, "learning_rate": 3.394543299520524e-05, "loss": 0.0393, "step": 82870 }, { "epoch": 0.0144, "grad_norm": 0.08258802443742752, "learning_rate": 3.3941572882344244e-05, "loss": 0.0355, "step": 82880 }, { "epoch": 0.01445, "grad_norm": 0.08050991594791412, "learning_rate": 3.3937712525024e-05, "loss": 0.0361, "step": 82890 }, { "epoch": 0.0145, "grad_norm": 0.08885800838470459, "learning_rate": 3.393385192335006e-05, "loss": 0.0358, "step": 82900 }, { "epoch": 0.01455, "grad_norm": 0.07728556543588638, "learning_rate": 3.392999107742796e-05, "loss": 0.0377, "step": 82910 }, { "epoch": 0.0146, "grad_norm": 0.08573156595230103, "learning_rate": 3.392612998736327e-05, "loss": 0.0364, "step": 82920 }, { "epoch": 0.01465, "grad_norm": 0.07444633543491364, "learning_rate": 3.392226865326153e-05, "loss": 0.0357, "step": 82930 }, { "epoch": 0.0147, "grad_norm": 0.07533413171768188, "learning_rate": 3.3918407075228306e-05, "loss": 0.0348, "step": 82940 }, { "epoch": 0.01475, "grad_norm": 0.09161031991243362, "learning_rate": 3.3914545253369196e-05, "loss": 0.0356, "step": 82950 }, { "epoch": 0.0148, "grad_norm": 0.08216315507888794, "learning_rate": 3.3910683187789766e-05, "loss": 0.0347, "step": 82960 }, { "epoch": 0.01485, "grad_norm": 0.08883273601531982, "learning_rate": 3.3906820878595604e-05, "loss": 0.0364, "step": 82970 }, { "epoch": 0.0149, "grad_norm": 0.08381297439336777, "learning_rate": 3.3902958325892303e-05, "loss": 0.0353, "step": 82980 }, { "epoch": 0.01495, "grad_norm": 0.08417104184627533, "learning_rate": 3.389909552978547e-05, "loss": 0.0358, "step": 82990 }, { "epoch": 0.015, "grad_norm": 0.08534098416566849, "learning_rate": 3.3895232490380714e-05, "loss": 0.0348, "step": 83000 }, { "epoch": 0.01505, "grad_norm": 0.08981525152921677, "learning_rate": 3.389136920778363e-05, "loss": 0.0361, "step": 83010 }, { "epoch": 0.0151, "grad_norm": 0.08870881050825119, "learning_rate": 3.388750568209986e-05, "loss": 0.0346, "step": 83020 }, { "epoch": 0.01515, "grad_norm": 0.07469086349010468, "learning_rate": 3.3883641913435025e-05, "loss": 0.0359, "step": 83030 }, { "epoch": 0.0152, "grad_norm": 0.10804679989814758, "learning_rate": 3.3879777901894754e-05, "loss": 0.0363, "step": 83040 }, { "epoch": 0.01525, "grad_norm": 0.08663041144609451, "learning_rate": 3.3875913647584695e-05, "loss": 0.038, "step": 83050 }, { "epoch": 0.0153, "grad_norm": 0.07422086596488953, "learning_rate": 3.3872049150610486e-05, "loss": 0.0349, "step": 83060 }, { "epoch": 0.01535, "grad_norm": 0.09427531063556671, "learning_rate": 3.38681844110778e-05, "loss": 0.0372, "step": 83070 }, { "epoch": 0.0154, "grad_norm": 0.08496449887752533, "learning_rate": 3.386431942909226e-05, "loss": 0.0358, "step": 83080 }, { "epoch": 0.01545, "grad_norm": 0.09647180885076523, "learning_rate": 3.3860454204759576e-05, "loss": 0.0374, "step": 83090 }, { "epoch": 0.0155, "grad_norm": 0.09570591151714325, "learning_rate": 3.385658873818539e-05, "loss": 0.0342, "step": 83100 }, { "epoch": 0.01555, "grad_norm": 0.10772760957479477, "learning_rate": 3.385272302947541e-05, "loss": 0.037, "step": 83110 }, { "epoch": 0.0156, "grad_norm": 0.09298603236675262, "learning_rate": 3.384885707873529e-05, "loss": 0.0353, "step": 83120 }, { "epoch": 0.01565, "grad_norm": 0.0879300981760025, "learning_rate": 3.384499088607076e-05, "loss": 0.0352, "step": 83130 }, { "epoch": 0.0157, "grad_norm": 0.09325937926769257, "learning_rate": 3.3841124451587494e-05, "loss": 0.0358, "step": 83140 }, { "epoch": 0.01575, "grad_norm": 0.09831637144088745, "learning_rate": 3.383725777539121e-05, "loss": 0.0364, "step": 83150 }, { "epoch": 0.0158, "grad_norm": 0.09208468347787857, "learning_rate": 3.383339085758761e-05, "loss": 0.0362, "step": 83160 }, { "epoch": 0.01585, "grad_norm": 0.08796915411949158, "learning_rate": 3.382952369828243e-05, "loss": 0.0352, "step": 83170 }, { "epoch": 0.0159, "grad_norm": 0.07426141202449799, "learning_rate": 3.382565629758139e-05, "loss": 0.0351, "step": 83180 }, { "epoch": 0.01595, "grad_norm": 0.07396486401557922, "learning_rate": 3.3821788655590215e-05, "loss": 0.0349, "step": 83190 }, { "epoch": 0.016, "grad_norm": 0.07119892537593842, "learning_rate": 3.381792077241466e-05, "loss": 0.0352, "step": 83200 }, { "epoch": 0.01605, "grad_norm": 0.08340752124786377, "learning_rate": 3.381405264816046e-05, "loss": 0.0377, "step": 83210 }, { "epoch": 0.0161, "grad_norm": 0.08224661648273468, "learning_rate": 3.381018428293337e-05, "loss": 0.0359, "step": 83220 }, { "epoch": 0.01615, "grad_norm": 0.09233871102333069, "learning_rate": 3.380631567683915e-05, "loss": 0.0366, "step": 83230 }, { "epoch": 0.0162, "grad_norm": 0.0836290791630745, "learning_rate": 3.380244682998358e-05, "loss": 0.0352, "step": 83240 }, { "epoch": 0.01625, "grad_norm": 0.09171456098556519, "learning_rate": 3.379857774247241e-05, "loss": 0.0352, "step": 83250 }, { "epoch": 0.0163, "grad_norm": 0.08661381900310516, "learning_rate": 3.379470841441144e-05, "loss": 0.0355, "step": 83260 }, { "epoch": 0.01635, "grad_norm": 0.09041456878185272, "learning_rate": 3.3790838845906426e-05, "loss": 0.0357, "step": 83270 }, { "epoch": 0.0164, "grad_norm": 0.07483222335577011, "learning_rate": 3.3786969037063196e-05, "loss": 0.0353, "step": 83280 }, { "epoch": 0.01645, "grad_norm": 0.10447685420513153, "learning_rate": 3.378309898798753e-05, "loss": 0.0364, "step": 83290 }, { "epoch": 0.0165, "grad_norm": 0.089652419090271, "learning_rate": 3.377922869878524e-05, "loss": 0.0344, "step": 83300 }, { "epoch": 0.01655, "grad_norm": 0.11374127864837646, "learning_rate": 3.377535816956213e-05, "loss": 0.0368, "step": 83310 }, { "epoch": 0.0166, "grad_norm": 0.0918031632900238, "learning_rate": 3.3771487400424036e-05, "loss": 0.0352, "step": 83320 }, { "epoch": 0.01665, "grad_norm": 0.0891224667429924, "learning_rate": 3.376761639147675e-05, "loss": 0.0358, "step": 83330 }, { "epoch": 0.0167, "grad_norm": 0.08948148041963577, "learning_rate": 3.3763745142826146e-05, "loss": 0.0376, "step": 83340 }, { "epoch": 0.01675, "grad_norm": 0.09376305341720581, "learning_rate": 3.375987365457804e-05, "loss": 0.0351, "step": 83350 }, { "epoch": 0.0168, "grad_norm": 0.08524085581302643, "learning_rate": 3.3756001926838273e-05, "loss": 0.0365, "step": 83360 }, { "epoch": 0.01685, "grad_norm": 0.08106539398431778, "learning_rate": 3.37521299597127e-05, "loss": 0.0352, "step": 83370 }, { "epoch": 0.0169, "grad_norm": 0.06964035332202911, "learning_rate": 3.374825775330719e-05, "loss": 0.0359, "step": 83380 }, { "epoch": 0.01695, "grad_norm": 0.08054011315107346, "learning_rate": 3.37443853077276e-05, "loss": 0.0367, "step": 83390 }, { "epoch": 0.017, "grad_norm": 0.07989324629306793, "learning_rate": 3.3740512623079794e-05, "loss": 0.0351, "step": 83400 }, { "epoch": 0.01705, "grad_norm": 0.09449871629476547, "learning_rate": 3.3736639699469655e-05, "loss": 0.0367, "step": 83410 }, { "epoch": 0.0171, "grad_norm": 0.07510863244533539, "learning_rate": 3.373276653700308e-05, "loss": 0.0355, "step": 83420 }, { "epoch": 0.01715, "grad_norm": 0.08826517313718796, "learning_rate": 3.3728893135785937e-05, "loss": 0.0362, "step": 83430 }, { "epoch": 0.0172, "grad_norm": 0.09773914515972137, "learning_rate": 3.3725019495924135e-05, "loss": 0.0363, "step": 83440 }, { "epoch": 0.01725, "grad_norm": 0.08512328565120697, "learning_rate": 3.372114561752359e-05, "loss": 0.0366, "step": 83450 }, { "epoch": 0.0173, "grad_norm": 0.10157383233308792, "learning_rate": 3.37172715006902e-05, "loss": 0.0369, "step": 83460 }, { "epoch": 0.01735, "grad_norm": 0.09032903611660004, "learning_rate": 3.371339714552987e-05, "loss": 0.0353, "step": 83470 }, { "epoch": 0.0174, "grad_norm": 0.07454940676689148, "learning_rate": 3.370952255214853e-05, "loss": 0.0363, "step": 83480 }, { "epoch": 0.01745, "grad_norm": 0.07773549109697342, "learning_rate": 3.3705647720652135e-05, "loss": 0.0359, "step": 83490 }, { "epoch": 0.0175, "grad_norm": 0.07942163944244385, "learning_rate": 3.370177265114659e-05, "loss": 0.0352, "step": 83500 }, { "epoch": 0.01755, "grad_norm": 0.10010170936584473, "learning_rate": 3.3697897343737855e-05, "loss": 0.0358, "step": 83510 }, { "epoch": 0.0176, "grad_norm": 0.09081415086984634, "learning_rate": 3.3694021798531865e-05, "loss": 0.0352, "step": 83520 }, { "epoch": 0.01765, "grad_norm": 0.08686517924070358, "learning_rate": 3.369014601563459e-05, "loss": 0.0359, "step": 83530 }, { "epoch": 0.0177, "grad_norm": 0.07877647876739502, "learning_rate": 3.3686269995152e-05, "loss": 0.0354, "step": 83540 }, { "epoch": 0.01775, "grad_norm": 0.09665057063102722, "learning_rate": 3.3682393737190035e-05, "loss": 0.0372, "step": 83550 }, { "epoch": 0.0178, "grad_norm": 0.0790608748793602, "learning_rate": 3.367851724185469e-05, "loss": 0.034, "step": 83560 }, { "epoch": 0.01785, "grad_norm": 0.0802990272641182, "learning_rate": 3.3674640509251956e-05, "loss": 0.0364, "step": 83570 }, { "epoch": 0.0179, "grad_norm": 0.09536537528038025, "learning_rate": 3.36707635394878e-05, "loss": 0.0362, "step": 83580 }, { "epoch": 0.01795, "grad_norm": 0.089724600315094, "learning_rate": 3.366688633266822e-05, "loss": 0.0365, "step": 83590 }, { "epoch": 0.018, "grad_norm": 0.10376014560461044, "learning_rate": 3.366300888889923e-05, "loss": 0.036, "step": 83600 }, { "epoch": 0.01805, "grad_norm": 0.08771280199289322, "learning_rate": 3.365913120828684e-05, "loss": 0.0355, "step": 83610 }, { "epoch": 0.0181, "grad_norm": 0.09692392498254776, "learning_rate": 3.365525329093705e-05, "loss": 0.0365, "step": 83620 }, { "epoch": 0.01815, "grad_norm": 0.08540845662355423, "learning_rate": 3.365137513695589e-05, "loss": 0.0373, "step": 83630 }, { "epoch": 0.0182, "grad_norm": 0.08605222404003143, "learning_rate": 3.364749674644937e-05, "loss": 0.037, "step": 83640 }, { "epoch": 0.01825, "grad_norm": 0.10747594386339188, "learning_rate": 3.3643618119523545e-05, "loss": 0.0387, "step": 83650 }, { "epoch": 0.0183, "grad_norm": 0.09305700659751892, "learning_rate": 3.363973925628445e-05, "loss": 0.0371, "step": 83660 }, { "epoch": 0.01835, "grad_norm": 0.0908249244093895, "learning_rate": 3.3635860156838137e-05, "loss": 0.0356, "step": 83670 }, { "epoch": 0.0184, "grad_norm": 0.07326623052358627, "learning_rate": 3.363198082129064e-05, "loss": 0.038, "step": 83680 }, { "epoch": 0.01845, "grad_norm": 0.0957440733909607, "learning_rate": 3.362810124974803e-05, "loss": 0.0377, "step": 83690 }, { "epoch": 0.0185, "grad_norm": 0.09219150990247726, "learning_rate": 3.3624221442316376e-05, "loss": 0.039, "step": 83700 }, { "epoch": 0.01855, "grad_norm": 0.08684364706277847, "learning_rate": 3.362034139910175e-05, "loss": 0.0367, "step": 83710 }, { "epoch": 0.0186, "grad_norm": 0.07927060127258301, "learning_rate": 3.3616461120210224e-05, "loss": 0.0369, "step": 83720 }, { "epoch": 0.01865, "grad_norm": 0.08206473290920258, "learning_rate": 3.361258060574789e-05, "loss": 0.037, "step": 83730 }, { "epoch": 0.0187, "grad_norm": 0.09019777923822403, "learning_rate": 3.3608699855820846e-05, "loss": 0.0394, "step": 83740 }, { "epoch": 0.01875, "grad_norm": 0.08604075014591217, "learning_rate": 3.3604818870535174e-05, "loss": 0.0358, "step": 83750 }, { "epoch": 0.0188, "grad_norm": 0.07271555811166763, "learning_rate": 3.360093764999699e-05, "loss": 0.0357, "step": 83760 }, { "epoch": 0.01885, "grad_norm": 0.07016167789697647, "learning_rate": 3.35970561943124e-05, "loss": 0.0349, "step": 83770 }, { "epoch": 0.0189, "grad_norm": 0.08462268859148026, "learning_rate": 3.359317450358752e-05, "loss": 0.035, "step": 83780 }, { "epoch": 0.01895, "grad_norm": 0.08154400438070297, "learning_rate": 3.358929257792848e-05, "loss": 0.037, "step": 83790 }, { "epoch": 0.019, "grad_norm": 0.0706799328327179, "learning_rate": 3.358541041744141e-05, "loss": 0.036, "step": 83800 }, { "epoch": 0.01905, "grad_norm": 0.09226647764444351, "learning_rate": 3.358152802223244e-05, "loss": 0.037, "step": 83810 }, { "epoch": 0.0191, "grad_norm": 0.09303418546915054, "learning_rate": 3.357764539240772e-05, "loss": 0.0363, "step": 83820 }, { "epoch": 0.01915, "grad_norm": 0.0904071107506752, "learning_rate": 3.3573762528073404e-05, "loss": 0.0348, "step": 83830 }, { "epoch": 0.0192, "grad_norm": 0.0851183757185936, "learning_rate": 3.356987942933563e-05, "loss": 0.0357, "step": 83840 }, { "epoch": 0.01925, "grad_norm": 0.06624419242143631, "learning_rate": 3.356599609630058e-05, "loss": 0.0339, "step": 83850 }, { "epoch": 0.0193, "grad_norm": 0.08188678324222565, "learning_rate": 3.356211252907441e-05, "loss": 0.0366, "step": 83860 }, { "epoch": 0.01935, "grad_norm": 0.0802663192152977, "learning_rate": 3.3558228727763305e-05, "loss": 0.035, "step": 83870 }, { "epoch": 0.0194, "grad_norm": 0.08806353062391281, "learning_rate": 3.355434469247344e-05, "loss": 0.0356, "step": 83880 }, { "epoch": 0.01945, "grad_norm": 0.10073021054267883, "learning_rate": 3.3550460423311004e-05, "loss": 0.038, "step": 83890 }, { "epoch": 0.0195, "grad_norm": 0.08883056789636612, "learning_rate": 3.354657592038219e-05, "loss": 0.0352, "step": 83900 }, { "epoch": 0.01955, "grad_norm": 0.07281231135129929, "learning_rate": 3.354269118379321e-05, "loss": 0.0355, "step": 83910 }, { "epoch": 0.0196, "grad_norm": 0.07044170051813126, "learning_rate": 3.353880621365025e-05, "loss": 0.0348, "step": 83920 }, { "epoch": 0.01965, "grad_norm": 0.06740438938140869, "learning_rate": 3.353492101005955e-05, "loss": 0.0353, "step": 83930 }, { "epoch": 0.0197, "grad_norm": 0.08475062996149063, "learning_rate": 3.3531035573127304e-05, "loss": 0.035, "step": 83940 }, { "epoch": 0.01975, "grad_norm": 0.06419086456298828, "learning_rate": 3.3527149902959755e-05, "loss": 0.0367, "step": 83950 }, { "epoch": 0.0198, "grad_norm": 0.07238471508026123, "learning_rate": 3.3523263999663124e-05, "loss": 0.0352, "step": 83960 }, { "epoch": 0.01985, "grad_norm": 0.07790805399417877, "learning_rate": 3.3519377863343664e-05, "loss": 0.0366, "step": 83970 }, { "epoch": 0.0199, "grad_norm": 0.08158902823925018, "learning_rate": 3.351549149410761e-05, "loss": 0.0361, "step": 83980 }, { "epoch": 0.01995, "grad_norm": 0.07997786998748779, "learning_rate": 3.351160489206123e-05, "loss": 0.0353, "step": 83990 }, { "epoch": 0.02, "grad_norm": 0.07322206348180771, "learning_rate": 3.350771805731076e-05, "loss": 0.0353, "step": 84000 }, { "epoch": 0.02005, "grad_norm": 0.08143064379692078, "learning_rate": 3.350383098996248e-05, "loss": 0.0363, "step": 84010 }, { "epoch": 0.0201, "grad_norm": 0.08571472018957138, "learning_rate": 3.349994369012265e-05, "loss": 0.0367, "step": 84020 }, { "epoch": 0.02015, "grad_norm": 0.08176877349615097, "learning_rate": 3.3496056157897545e-05, "loss": 0.0364, "step": 84030 }, { "epoch": 0.0202, "grad_norm": 0.0745140090584755, "learning_rate": 3.3492168393393465e-05, "loss": 0.0363, "step": 84040 }, { "epoch": 0.02025, "grad_norm": 0.08554243296384811, "learning_rate": 3.34882803967167e-05, "loss": 0.035, "step": 84050 }, { "epoch": 0.0203, "grad_norm": 0.08013322949409485, "learning_rate": 3.348439216797353e-05, "loss": 0.0368, "step": 84060 }, { "epoch": 0.02035, "grad_norm": 0.07512438297271729, "learning_rate": 3.348050370727027e-05, "loss": 0.0349, "step": 84070 }, { "epoch": 0.0204, "grad_norm": 0.08043927699327469, "learning_rate": 3.347661501471321e-05, "loss": 0.0353, "step": 84080 }, { "epoch": 0.02045, "grad_norm": 0.09401516616344452, "learning_rate": 3.34727260904087e-05, "loss": 0.036, "step": 84090 }, { "epoch": 0.0205, "grad_norm": 0.07855821400880814, "learning_rate": 3.346883693446302e-05, "loss": 0.0362, "step": 84100 }, { "epoch": 0.02055, "grad_norm": 0.07647678256034851, "learning_rate": 3.346494754698254e-05, "loss": 0.0345, "step": 84110 }, { "epoch": 0.0206, "grad_norm": 0.07470206916332245, "learning_rate": 3.3461057928073556e-05, "loss": 0.035, "step": 84120 }, { "epoch": 0.02065, "grad_norm": 0.08800902217626572, "learning_rate": 3.3457168077842444e-05, "loss": 0.0372, "step": 84130 }, { "epoch": 0.0207, "grad_norm": 0.09285101294517517, "learning_rate": 3.345327799639553e-05, "loss": 0.0362, "step": 84140 }, { "epoch": 0.02075, "grad_norm": 0.09030649811029434, "learning_rate": 3.3449387683839165e-05, "loss": 0.0358, "step": 84150 }, { "epoch": 0.0208, "grad_norm": 0.08570466935634613, "learning_rate": 3.344549714027971e-05, "loss": 0.0356, "step": 84160 }, { "epoch": 0.02085, "grad_norm": 0.10109095275402069, "learning_rate": 3.3441606365823553e-05, "loss": 0.0365, "step": 84170 }, { "epoch": 0.0209, "grad_norm": 0.07555997371673584, "learning_rate": 3.343771536057704e-05, "loss": 0.0357, "step": 84180 }, { "epoch": 0.02095, "grad_norm": 0.09468481689691544, "learning_rate": 3.3433824124646554e-05, "loss": 0.0369, "step": 84190 }, { "epoch": 0.021, "grad_norm": 0.09233641624450684, "learning_rate": 3.34299326581385e-05, "loss": 0.0366, "step": 84200 }, { "epoch": 0.02105, "grad_norm": 0.08957118541002274, "learning_rate": 3.342604096115923e-05, "loss": 0.0367, "step": 84210 }, { "epoch": 0.0211, "grad_norm": 0.07740267366170883, "learning_rate": 3.342214903381519e-05, "loss": 0.0361, "step": 84220 }, { "epoch": 0.02115, "grad_norm": 0.0924314484000206, "learning_rate": 3.341825687621274e-05, "loss": 0.0367, "step": 84230 }, { "epoch": 0.0212, "grad_norm": 0.08203538507223129, "learning_rate": 3.3414364488458325e-05, "loss": 0.0367, "step": 84240 }, { "epoch": 0.02125, "grad_norm": 0.09372982382774353, "learning_rate": 3.341047187065834e-05, "loss": 0.0386, "step": 84250 }, { "epoch": 0.0213, "grad_norm": 0.09793376922607422, "learning_rate": 3.3406579022919216e-05, "loss": 0.0371, "step": 84260 }, { "epoch": 0.02135, "grad_norm": 0.08053075522184372, "learning_rate": 3.3402685945347374e-05, "loss": 0.0371, "step": 84270 }, { "epoch": 0.0214, "grad_norm": 0.09939832240343094, "learning_rate": 3.339879263804926e-05, "loss": 0.0363, "step": 84280 }, { "epoch": 0.02145, "grad_norm": 0.08278929442167282, "learning_rate": 3.339489910113131e-05, "loss": 0.0396, "step": 84290 }, { "epoch": 0.0215, "grad_norm": 0.1037469282746315, "learning_rate": 3.3391005334699966e-05, "loss": 0.0382, "step": 84300 }, { "epoch": 0.02155, "grad_norm": 0.07742772996425629, "learning_rate": 3.338711133886169e-05, "loss": 0.0391, "step": 84310 }, { "epoch": 0.0216, "grad_norm": 0.08141908049583435, "learning_rate": 3.338321711372295e-05, "loss": 0.0364, "step": 84320 }, { "epoch": 0.02165, "grad_norm": 0.08545833081007004, "learning_rate": 3.33793226593902e-05, "loss": 0.0376, "step": 84330 }, { "epoch": 0.0217, "grad_norm": 0.08423074334859848, "learning_rate": 3.337542797596992e-05, "loss": 0.0374, "step": 84340 }, { "epoch": 0.02175, "grad_norm": 0.09167063236236572, "learning_rate": 3.337153306356857e-05, "loss": 0.0374, "step": 84350 }, { "epoch": 0.0218, "grad_norm": 0.072408527135849, "learning_rate": 3.336763792229267e-05, "loss": 0.0362, "step": 84360 }, { "epoch": 0.02185, "grad_norm": 0.08044564723968506, "learning_rate": 3.336374255224868e-05, "loss": 0.0359, "step": 84370 }, { "epoch": 0.0219, "grad_norm": 0.07284487038850784, "learning_rate": 3.3359846953543117e-05, "loss": 0.0364, "step": 84380 }, { "epoch": 0.02195, "grad_norm": 0.06727509200572968, "learning_rate": 3.335595112628248e-05, "loss": 0.0354, "step": 84390 }, { "epoch": 0.022, "grad_norm": 0.07404442131519318, "learning_rate": 3.3352055070573266e-05, "loss": 0.0373, "step": 84400 }, { "epoch": 0.02205, "grad_norm": 0.07334660738706589, "learning_rate": 3.334815878652202e-05, "loss": 0.0374, "step": 84410 }, { "epoch": 0.0221, "grad_norm": 0.08427029848098755, "learning_rate": 3.334426227423524e-05, "loss": 0.0371, "step": 84420 }, { "epoch": 0.02215, "grad_norm": 0.08811043202877045, "learning_rate": 3.334036553381946e-05, "loss": 0.038, "step": 84430 }, { "epoch": 0.0222, "grad_norm": 0.07698401808738708, "learning_rate": 3.333646856538123e-05, "loss": 0.0358, "step": 84440 }, { "epoch": 0.02225, "grad_norm": 0.0758872851729393, "learning_rate": 3.333257136902708e-05, "loss": 0.0384, "step": 84450 }, { "epoch": 0.0223, "grad_norm": 0.13882263004779816, "learning_rate": 3.3328673944863556e-05, "loss": 0.0379, "step": 84460 }, { "epoch": 0.02235, "grad_norm": 0.14099398255348206, "learning_rate": 3.332477629299722e-05, "loss": 0.0353, "step": 84470 }, { "epoch": 0.0224, "grad_norm": 0.09753715991973877, "learning_rate": 3.332087841353462e-05, "loss": 0.0357, "step": 84480 }, { "epoch": 0.02245, "grad_norm": 0.10037767887115479, "learning_rate": 3.3316980306582333e-05, "loss": 0.0374, "step": 84490 }, { "epoch": 0.0225, "grad_norm": 0.08424913883209229, "learning_rate": 3.331308197224693e-05, "loss": 0.0362, "step": 84500 }, { "epoch": 0.02255, "grad_norm": 0.08143261820077896, "learning_rate": 3.330918341063499e-05, "loss": 0.0348, "step": 84510 }, { "epoch": 0.0226, "grad_norm": 0.1272321194410324, "learning_rate": 3.330528462185309e-05, "loss": 0.0358, "step": 84520 }, { "epoch": 0.02265, "grad_norm": 0.1025131568312645, "learning_rate": 3.3301385606007837e-05, "loss": 0.0363, "step": 84530 }, { "epoch": 0.0227, "grad_norm": 0.10091119259595871, "learning_rate": 3.3297486363205816e-05, "loss": 0.0352, "step": 84540 }, { "epoch": 0.02275, "grad_norm": 0.09058336168527603, "learning_rate": 3.329358689355364e-05, "loss": 0.0373, "step": 84550 }, { "epoch": 0.0228, "grad_norm": 0.08595266938209534, "learning_rate": 3.328968719715791e-05, "loss": 0.0347, "step": 84560 }, { "epoch": 0.02285, "grad_norm": 0.09240694344043732, "learning_rate": 3.328578727412525e-05, "loss": 0.036, "step": 84570 }, { "epoch": 0.0229, "grad_norm": 0.08921545743942261, "learning_rate": 3.3281887124562275e-05, "loss": 0.035, "step": 84580 }, { "epoch": 0.02295, "grad_norm": 0.08916866779327393, "learning_rate": 3.3277986748575624e-05, "loss": 0.0364, "step": 84590 }, { "epoch": 0.023, "grad_norm": 0.08710570633411407, "learning_rate": 3.327408614627191e-05, "loss": 0.0396, "step": 84600 }, { "epoch": 0.02305, "grad_norm": 0.08730561286211014, "learning_rate": 3.32701853177578e-05, "loss": 0.0362, "step": 84610 }, { "epoch": 0.0231, "grad_norm": 0.08374005556106567, "learning_rate": 3.326628426313993e-05, "loss": 0.0373, "step": 84620 }, { "epoch": 0.02315, "grad_norm": 0.10687735676765442, "learning_rate": 3.3262382982524953e-05, "loss": 0.0373, "step": 84630 }, { "epoch": 0.0232, "grad_norm": 0.07977034151554108, "learning_rate": 3.3258481476019535e-05, "loss": 0.0352, "step": 84640 }, { "epoch": 0.02325, "grad_norm": 0.08451544493436813, "learning_rate": 3.325457974373032e-05, "loss": 0.0374, "step": 84650 }, { "epoch": 0.0233, "grad_norm": 0.08229358494281769, "learning_rate": 3.325067778576401e-05, "loss": 0.0355, "step": 84660 }, { "epoch": 0.02335, "grad_norm": 0.08253395557403564, "learning_rate": 3.3246775602227266e-05, "loss": 0.035, "step": 84670 }, { "epoch": 0.0234, "grad_norm": 0.10140329599380493, "learning_rate": 3.3242873193226775e-05, "loss": 0.0359, "step": 84680 }, { "epoch": 0.02345, "grad_norm": 0.0771637111902237, "learning_rate": 3.323897055886922e-05, "loss": 0.0355, "step": 84690 }, { "epoch": 0.0235, "grad_norm": 0.08327921479940414, "learning_rate": 3.323506769926132e-05, "loss": 0.0374, "step": 84700 }, { "epoch": 0.02355, "grad_norm": 0.08944735676050186, "learning_rate": 3.3231164614509755e-05, "loss": 0.0369, "step": 84710 }, { "epoch": 0.0236, "grad_norm": 0.09130865335464478, "learning_rate": 3.322726130472124e-05, "loss": 0.0379, "step": 84720 }, { "epoch": 0.02365, "grad_norm": 0.08416770398616791, "learning_rate": 3.322335777000249e-05, "loss": 0.0362, "step": 84730 }, { "epoch": 0.0237, "grad_norm": 0.07621768862009048, "learning_rate": 3.321945401046023e-05, "loss": 0.036, "step": 84740 }, { "epoch": 0.02375, "grad_norm": 0.09680519998073578, "learning_rate": 3.3215550026201186e-05, "loss": 0.0377, "step": 84750 }, { "epoch": 0.0238, "grad_norm": 0.10799897462129593, "learning_rate": 3.321164581733209e-05, "loss": 0.0358, "step": 84760 }, { "epoch": 0.02385, "grad_norm": 0.0748513713479042, "learning_rate": 3.320774138395969e-05, "loss": 0.0358, "step": 84770 }, { "epoch": 0.0239, "grad_norm": 0.08447539806365967, "learning_rate": 3.3203836726190715e-05, "loss": 0.0363, "step": 84780 }, { "epoch": 0.02395, "grad_norm": 0.09034404903650284, "learning_rate": 3.319993184413193e-05, "loss": 0.0355, "step": 84790 }, { "epoch": 0.024, "grad_norm": 0.09197676926851273, "learning_rate": 3.3196026737890085e-05, "loss": 0.0363, "step": 84800 }, { "epoch": 0.02405, "grad_norm": 0.09282320737838745, "learning_rate": 3.3192121407571954e-05, "loss": 0.0357, "step": 84810 }, { "epoch": 0.0241, "grad_norm": 0.10092207789421082, "learning_rate": 3.31882158532843e-05, "loss": 0.0355, "step": 84820 }, { "epoch": 0.02415, "grad_norm": 0.06371040642261505, "learning_rate": 3.31843100751339e-05, "loss": 0.0357, "step": 84830 }, { "epoch": 0.0242, "grad_norm": 0.0727243646979332, "learning_rate": 3.318040407322753e-05, "loss": 0.0354, "step": 84840 }, { "epoch": 0.02425, "grad_norm": 0.08339813351631165, "learning_rate": 3.317649784767199e-05, "loss": 0.0377, "step": 84850 }, { "epoch": 0.0243, "grad_norm": 0.07810098677873611, "learning_rate": 3.3172591398574074e-05, "loss": 0.0372, "step": 84860 }, { "epoch": 0.02435, "grad_norm": 0.08262559771537781, "learning_rate": 3.3168684726040575e-05, "loss": 0.0353, "step": 84870 }, { "epoch": 0.0244, "grad_norm": 0.09877780079841614, "learning_rate": 3.3164777830178315e-05, "loss": 0.0349, "step": 84880 }, { "epoch": 0.02445, "grad_norm": 0.1113976314663887, "learning_rate": 3.316087071109408e-05, "loss": 0.0366, "step": 84890 }, { "epoch": 0.0245, "grad_norm": 0.10309489816427231, "learning_rate": 3.3156963368894714e-05, "loss": 0.0366, "step": 84900 }, { "epoch": 0.02455, "grad_norm": 0.09552957117557526, "learning_rate": 3.315305580368704e-05, "loss": 0.037, "step": 84910 }, { "epoch": 0.0246, "grad_norm": 0.10593295842409134, "learning_rate": 3.314914801557788e-05, "loss": 0.0369, "step": 84920 }, { "epoch": 0.02465, "grad_norm": 0.09463546425104141, "learning_rate": 3.314524000467407e-05, "loss": 0.0373, "step": 84930 }, { "epoch": 0.0247, "grad_norm": 0.07869092375040054, "learning_rate": 3.3141331771082456e-05, "loss": 0.0357, "step": 84940 }, { "epoch": 0.02475, "grad_norm": 0.07995082437992096, "learning_rate": 3.31374233149099e-05, "loss": 0.0359, "step": 84950 }, { "epoch": 0.0248, "grad_norm": 0.07646608352661133, "learning_rate": 3.313351463626324e-05, "loss": 0.0379, "step": 84960 }, { "epoch": 0.02485, "grad_norm": 0.09957541525363922, "learning_rate": 3.3129605735249354e-05, "loss": 0.0381, "step": 84970 }, { "epoch": 0.0249, "grad_norm": 0.08227454870939255, "learning_rate": 3.312569661197509e-05, "loss": 0.0373, "step": 84980 }, { "epoch": 0.02495, "grad_norm": 0.07015284895896912, "learning_rate": 3.312178726654734e-05, "loss": 0.0367, "step": 84990 }, { "epoch": 0.025, "grad_norm": 0.09884293377399445, "learning_rate": 3.3117877699072975e-05, "loss": 0.0368, "step": 85000 }, { "epoch": 0.02505, "grad_norm": 0.10437457263469696, "learning_rate": 3.311396790965888e-05, "loss": 0.0359, "step": 85010 }, { "epoch": 0.0251, "grad_norm": 0.10808396339416504, "learning_rate": 3.311005789841196e-05, "loss": 0.0375, "step": 85020 }, { "epoch": 0.02515, "grad_norm": 0.06980592757463455, "learning_rate": 3.3106147665439105e-05, "loss": 0.0344, "step": 85030 }, { "epoch": 0.0252, "grad_norm": 0.07518026977777481, "learning_rate": 3.3102237210847206e-05, "loss": 0.0368, "step": 85040 }, { "epoch": 0.02525, "grad_norm": 0.08038965612649918, "learning_rate": 3.309832653474319e-05, "loss": 0.0359, "step": 85050 }, { "epoch": 0.0253, "grad_norm": 0.06596371531486511, "learning_rate": 3.3094415637233966e-05, "loss": 0.0358, "step": 85060 }, { "epoch": 0.02535, "grad_norm": 0.06973963230848312, "learning_rate": 3.309050451842647e-05, "loss": 0.0352, "step": 85070 }, { "epoch": 0.0254, "grad_norm": 0.08272168040275574, "learning_rate": 3.308659317842761e-05, "loss": 0.0379, "step": 85080 }, { "epoch": 0.02545, "grad_norm": 0.0812903344631195, "learning_rate": 3.308268161734434e-05, "loss": 0.0357, "step": 85090 }, { "epoch": 0.0255, "grad_norm": 0.08610591292381287, "learning_rate": 3.3078769835283585e-05, "loss": 0.0352, "step": 85100 }, { "epoch": 0.02555, "grad_norm": 0.09931080788373947, "learning_rate": 3.3074857832352294e-05, "loss": 0.0363, "step": 85110 }, { "epoch": 0.0256, "grad_norm": 0.10501144081354141, "learning_rate": 3.307094560865743e-05, "loss": 0.0352, "step": 85120 }, { "epoch": 0.02565, "grad_norm": 0.10684654116630554, "learning_rate": 3.3067033164305944e-05, "loss": 0.0384, "step": 85130 }, { "epoch": 0.0257, "grad_norm": 0.08528265357017517, "learning_rate": 3.30631204994048e-05, "loss": 0.0367, "step": 85140 }, { "epoch": 0.02575, "grad_norm": 0.07739049941301346, "learning_rate": 3.305920761406097e-05, "loss": 0.0358, "step": 85150 }, { "epoch": 0.0258, "grad_norm": 0.0894133523106575, "learning_rate": 3.3055294508381435e-05, "loss": 0.0367, "step": 85160 }, { "epoch": 0.02585, "grad_norm": 0.07886653393507004, "learning_rate": 3.3051381182473165e-05, "loss": 0.0374, "step": 85170 }, { "epoch": 0.0259, "grad_norm": 0.08629165589809418, "learning_rate": 3.304746763644317e-05, "loss": 0.0361, "step": 85180 }, { "epoch": 0.02595, "grad_norm": 0.06818588823080063, "learning_rate": 3.304355387039843e-05, "loss": 0.0346, "step": 85190 }, { "epoch": 0.026, "grad_norm": 0.08322655409574509, "learning_rate": 3.3039639884445947e-05, "loss": 0.0349, "step": 85200 }, { "epoch": 0.02605, "grad_norm": 0.08271487057209015, "learning_rate": 3.303572567869273e-05, "loss": 0.0349, "step": 85210 }, { "epoch": 0.0261, "grad_norm": 0.08312942832708359, "learning_rate": 3.303181125324579e-05, "loss": 0.0342, "step": 85220 }, { "epoch": 0.02615, "grad_norm": 0.06256101280450821, "learning_rate": 3.302789660821215e-05, "loss": 0.0353, "step": 85230 }, { "epoch": 0.0262, "grad_norm": 0.08462122827768326, "learning_rate": 3.302398174369883e-05, "loss": 0.0362, "step": 85240 }, { "epoch": 0.02625, "grad_norm": 0.08205302804708481, "learning_rate": 3.302006665981287e-05, "loss": 0.0356, "step": 85250 }, { "epoch": 0.0263, "grad_norm": 0.08983128517866135, "learning_rate": 3.30161513566613e-05, "loss": 0.0377, "step": 85260 }, { "epoch": 0.02635, "grad_norm": 0.11347589641809464, "learning_rate": 3.3012235834351154e-05, "loss": 0.0361, "step": 85270 }, { "epoch": 0.0264, "grad_norm": 0.08793602138757706, "learning_rate": 3.30083200929895e-05, "loss": 0.0371, "step": 85280 }, { "epoch": 0.02645, "grad_norm": 0.09794158488512039, "learning_rate": 3.3004404132683384e-05, "loss": 0.0366, "step": 85290 }, { "epoch": 0.0265, "grad_norm": 0.08309174329042435, "learning_rate": 3.300048795353986e-05, "loss": 0.0363, "step": 85300 }, { "epoch": 0.02655, "grad_norm": 0.07576657831668854, "learning_rate": 3.2996571555666e-05, "loss": 0.0357, "step": 85310 }, { "epoch": 0.0266, "grad_norm": 0.08200152963399887, "learning_rate": 3.299265493916888e-05, "loss": 0.0374, "step": 85320 }, { "epoch": 0.02665, "grad_norm": 0.07354395091533661, "learning_rate": 3.298873810415558e-05, "loss": 0.0368, "step": 85330 }, { "epoch": 0.0267, "grad_norm": 0.09424217790365219, "learning_rate": 3.298482105073318e-05, "loss": 0.036, "step": 85340 }, { "epoch": 0.02675, "grad_norm": 0.09676692634820938, "learning_rate": 3.298090377900877e-05, "loss": 0.0382, "step": 85350 }, { "epoch": 0.0268, "grad_norm": 0.09819674491882324, "learning_rate": 3.297698628908945e-05, "loss": 0.0376, "step": 85360 }, { "epoch": 0.02685, "grad_norm": 0.11677680909633636, "learning_rate": 3.297306858108232e-05, "loss": 0.0384, "step": 85370 }, { "epoch": 0.0269, "grad_norm": 0.07928648591041565, "learning_rate": 3.296915065509449e-05, "loss": 0.0361, "step": 85380 }, { "epoch": 0.02695, "grad_norm": 0.11051487922668457, "learning_rate": 3.296523251123308e-05, "loss": 0.0397, "step": 85390 }, { "epoch": 0.027, "grad_norm": 0.08754760771989822, "learning_rate": 3.29613141496052e-05, "loss": 0.0364, "step": 85400 }, { "epoch": 0.02705, "grad_norm": 0.09275832772254944, "learning_rate": 3.295739557031799e-05, "loss": 0.037, "step": 85410 }, { "epoch": 0.0271, "grad_norm": 0.08891261368989944, "learning_rate": 3.295347677347857e-05, "loss": 0.039, "step": 85420 }, { "epoch": 0.02715, "grad_norm": 0.08942827582359314, "learning_rate": 3.2949557759194075e-05, "loss": 0.0369, "step": 85430 }, { "epoch": 0.0272, "grad_norm": 0.0818541869521141, "learning_rate": 3.294563852757167e-05, "loss": 0.0367, "step": 85440 }, { "epoch": 0.02725, "grad_norm": 0.08699609339237213, "learning_rate": 3.294171907871849e-05, "loss": 0.0374, "step": 85450 }, { "epoch": 0.0273, "grad_norm": 0.10503701120615005, "learning_rate": 3.2937799412741685e-05, "loss": 0.0367, "step": 85460 }, { "epoch": 0.02735, "grad_norm": 0.08131738752126694, "learning_rate": 3.2933879529748435e-05, "loss": 0.0356, "step": 85470 }, { "epoch": 0.0274, "grad_norm": 0.07324840873479843, "learning_rate": 3.2929959429845896e-05, "loss": 0.0355, "step": 85480 }, { "epoch": 0.02745, "grad_norm": 0.08623120933771133, "learning_rate": 3.292603911314125e-05, "loss": 0.0358, "step": 85490 }, { "epoch": 0.0275, "grad_norm": 0.13020314276218414, "learning_rate": 3.292211857974166e-05, "loss": 0.0406, "step": 85500 }, { "epoch": 0.02755, "grad_norm": 0.09622704982757568, "learning_rate": 3.291819782975434e-05, "loss": 0.0346, "step": 85510 }, { "epoch": 0.0276, "grad_norm": 0.09420084953308105, "learning_rate": 3.291427686328645e-05, "loss": 0.0365, "step": 85520 }, { "epoch": 0.02765, "grad_norm": 0.09245433658361435, "learning_rate": 3.291035568044522e-05, "loss": 0.0355, "step": 85530 }, { "epoch": 0.0277, "grad_norm": 0.08546995371580124, "learning_rate": 3.2906434281337826e-05, "loss": 0.0373, "step": 85540 }, { "epoch": 0.02775, "grad_norm": 0.07339838892221451, "learning_rate": 3.29025126660715e-05, "loss": 0.0364, "step": 85550 }, { "epoch": 0.0278, "grad_norm": 0.0915692150592804, "learning_rate": 3.289859083475343e-05, "loss": 0.0353, "step": 85560 }, { "epoch": 0.02785, "grad_norm": 0.10731060057878494, "learning_rate": 3.289466878749087e-05, "loss": 0.0368, "step": 85570 }, { "epoch": 0.0279, "grad_norm": 0.09024395048618317, "learning_rate": 3.289074652439102e-05, "loss": 0.0366, "step": 85580 }, { "epoch": 0.02795, "grad_norm": 0.11369603127241135, "learning_rate": 3.2886824045561134e-05, "loss": 0.0356, "step": 85590 }, { "epoch": 0.028, "grad_norm": 0.11600227653980255, "learning_rate": 3.288290135110844e-05, "loss": 0.0355, "step": 85600 }, { "epoch": 0.02805, "grad_norm": 0.10826078802347183, "learning_rate": 3.2878978441140174e-05, "loss": 0.0369, "step": 85610 }, { "epoch": 0.0281, "grad_norm": 0.09068696945905685, "learning_rate": 3.2875055315763606e-05, "loss": 0.0353, "step": 85620 }, { "epoch": 0.02815, "grad_norm": 0.08113870024681091, "learning_rate": 3.287113197508598e-05, "loss": 0.0351, "step": 85630 }, { "epoch": 0.0282, "grad_norm": 0.08663254231214523, "learning_rate": 3.286720841921457e-05, "loss": 0.0348, "step": 85640 }, { "epoch": 0.02825, "grad_norm": 0.07541149854660034, "learning_rate": 3.286328464825663e-05, "loss": 0.0353, "step": 85650 }, { "epoch": 0.0283, "grad_norm": 0.06735502183437347, "learning_rate": 3.285936066231945e-05, "loss": 0.035, "step": 85660 }, { "epoch": 0.02835, "grad_norm": 0.09373398870229721, "learning_rate": 3.2855436461510295e-05, "loss": 0.035, "step": 85670 }, { "epoch": 0.0284, "grad_norm": 0.0879872515797615, "learning_rate": 3.285151204593646e-05, "loss": 0.0386, "step": 85680 }, { "epoch": 0.02845, "grad_norm": 0.07386450469493866, "learning_rate": 3.2847587415705236e-05, "loss": 0.0356, "step": 85690 }, { "epoch": 0.0285, "grad_norm": 0.08008229732513428, "learning_rate": 3.284366257092392e-05, "loss": 0.0367, "step": 85700 }, { "epoch": 0.02855, "grad_norm": 0.08670288324356079, "learning_rate": 3.283973751169981e-05, "loss": 0.0342, "step": 85710 }, { "epoch": 0.0286, "grad_norm": 0.07803010940551758, "learning_rate": 3.283581223814024e-05, "loss": 0.0366, "step": 85720 }, { "epoch": 0.02865, "grad_norm": 0.0688256099820137, "learning_rate": 3.283188675035249e-05, "loss": 0.0347, "step": 85730 }, { "epoch": 0.0287, "grad_norm": 0.07695078104734421, "learning_rate": 3.2827961048443906e-05, "loss": 0.0343, "step": 85740 }, { "epoch": 0.02875, "grad_norm": 0.0665489062666893, "learning_rate": 3.28240351325218e-05, "loss": 0.0357, "step": 85750 }, { "epoch": 0.0288, "grad_norm": 0.06531370431184769, "learning_rate": 3.282010900269352e-05, "loss": 0.036, "step": 85760 }, { "epoch": 0.02885, "grad_norm": 0.08302236348390579, "learning_rate": 3.281618265906639e-05, "loss": 0.0353, "step": 85770 }, { "epoch": 0.0289, "grad_norm": 0.08833914250135422, "learning_rate": 3.281225610174778e-05, "loss": 0.0345, "step": 85780 }, { "epoch": 0.02895, "grad_norm": 0.08267134428024292, "learning_rate": 3.2808329330845006e-05, "loss": 0.0357, "step": 85790 }, { "epoch": 0.029, "grad_norm": 0.0864887535572052, "learning_rate": 3.280440234646544e-05, "loss": 0.0357, "step": 85800 }, { "epoch": 0.02905, "grad_norm": 0.0825200080871582, "learning_rate": 3.280047514871645e-05, "loss": 0.0354, "step": 85810 }, { "epoch": 0.0291, "grad_norm": 0.08538206666707993, "learning_rate": 3.2796547737705414e-05, "loss": 0.0362, "step": 85820 }, { "epoch": 0.02915, "grad_norm": 0.06769226491451263, "learning_rate": 3.2792620113539674e-05, "loss": 0.0351, "step": 85830 }, { "epoch": 0.0292, "grad_norm": 0.08536852896213531, "learning_rate": 3.2788692276326635e-05, "loss": 0.038, "step": 85840 }, { "epoch": 0.02925, "grad_norm": 0.08903807401657104, "learning_rate": 3.2784764226173673e-05, "loss": 0.0373, "step": 85850 }, { "epoch": 0.0293, "grad_norm": 0.07940450310707092, "learning_rate": 3.278083596318819e-05, "loss": 0.0359, "step": 85860 }, { "epoch": 0.02935, "grad_norm": 0.08667632192373276, "learning_rate": 3.277690748747757e-05, "loss": 0.0371, "step": 85870 }, { "epoch": 0.0294, "grad_norm": 0.0772915631532669, "learning_rate": 3.277297879914921e-05, "loss": 0.0369, "step": 85880 }, { "epoch": 0.02945, "grad_norm": 0.08793382346630096, "learning_rate": 3.2769049898310545e-05, "loss": 0.036, "step": 85890 }, { "epoch": 0.0295, "grad_norm": 0.07650678604841232, "learning_rate": 3.276512078506897e-05, "loss": 0.0369, "step": 85900 }, { "epoch": 0.02955, "grad_norm": 0.08649063855409622, "learning_rate": 3.2761191459531904e-05, "loss": 0.0386, "step": 85910 }, { "epoch": 0.0296, "grad_norm": 0.08880212903022766, "learning_rate": 3.275726192180678e-05, "loss": 0.0377, "step": 85920 }, { "epoch": 0.02965, "grad_norm": 0.08839485049247742, "learning_rate": 3.2753332172001036e-05, "loss": 0.0356, "step": 85930 }, { "epoch": 0.0297, "grad_norm": 0.08877583593130112, "learning_rate": 3.27494022102221e-05, "loss": 0.0374, "step": 85940 }, { "epoch": 0.02975, "grad_norm": 0.08329977095127106, "learning_rate": 3.274547203657742e-05, "loss": 0.0371, "step": 85950 }, { "epoch": 0.0298, "grad_norm": 0.08751018345355988, "learning_rate": 3.274154165117444e-05, "loss": 0.036, "step": 85960 }, { "epoch": 0.02985, "grad_norm": 0.07727228850126266, "learning_rate": 3.273761105412063e-05, "loss": 0.0375, "step": 85970 }, { "epoch": 0.0299, "grad_norm": 0.07666754722595215, "learning_rate": 3.273368024552343e-05, "loss": 0.0362, "step": 85980 }, { "epoch": 0.02995, "grad_norm": 0.09056229144334793, "learning_rate": 3.272974922549032e-05, "loss": 0.0368, "step": 85990 }, { "epoch": 0.03, "grad_norm": 0.09175769984722137, "learning_rate": 3.2725817994128774e-05, "loss": 0.0374, "step": 86000 }, { "epoch": 0.03005, "grad_norm": 0.08103155344724655, "learning_rate": 3.272188655154626e-05, "loss": 0.0366, "step": 86010 }, { "epoch": 0.0301, "grad_norm": 0.10952797532081604, "learning_rate": 3.2717954897850264e-05, "loss": 0.0378, "step": 86020 }, { "epoch": 0.03015, "grad_norm": 0.0774063915014267, "learning_rate": 3.27140230331483e-05, "loss": 0.0365, "step": 86030 }, { "epoch": 0.0302, "grad_norm": 0.08491382747888565, "learning_rate": 3.2710090957547826e-05, "loss": 0.0368, "step": 86040 }, { "epoch": 0.03025, "grad_norm": 0.08859048038721085, "learning_rate": 3.2706158671156375e-05, "loss": 0.0349, "step": 86050 }, { "epoch": 0.0303, "grad_norm": 0.06936676055192947, "learning_rate": 3.270222617408144e-05, "loss": 0.0356, "step": 86060 }, { "epoch": 0.03035, "grad_norm": 0.08609030395746231, "learning_rate": 3.269829346643052e-05, "loss": 0.0352, "step": 86070 }, { "epoch": 0.0304, "grad_norm": 0.09073681384325027, "learning_rate": 3.269436054831116e-05, "loss": 0.0381, "step": 86080 }, { "epoch": 0.03045, "grad_norm": 0.10690750181674957, "learning_rate": 3.269042741983087e-05, "loss": 0.0381, "step": 86090 }, { "epoch": 0.0305, "grad_norm": 0.08611635863780975, "learning_rate": 3.268649408109719e-05, "loss": 0.0371, "step": 86100 }, { "epoch": 0.03055, "grad_norm": 0.09616200625896454, "learning_rate": 3.268256053221764e-05, "loss": 0.0362, "step": 86110 }, { "epoch": 0.0306, "grad_norm": 0.09797549247741699, "learning_rate": 3.267862677329978e-05, "loss": 0.0364, "step": 86120 }, { "epoch": 0.03065, "grad_norm": 0.0829409658908844, "learning_rate": 3.267469280445114e-05, "loss": 0.0355, "step": 86130 }, { "epoch": 0.0307, "grad_norm": 0.08753553777933121, "learning_rate": 3.267075862577929e-05, "loss": 0.0353, "step": 86140 }, { "epoch": 0.03075, "grad_norm": 0.0914936363697052, "learning_rate": 3.2666824237391774e-05, "loss": 0.036, "step": 86150 }, { "epoch": 0.0308, "grad_norm": 0.09489694237709045, "learning_rate": 3.2662889639396175e-05, "loss": 0.0377, "step": 86160 }, { "epoch": 0.03085, "grad_norm": 0.08153481781482697, "learning_rate": 3.265895483190004e-05, "loss": 0.0364, "step": 86170 }, { "epoch": 0.0309, "grad_norm": 0.08516906201839447, "learning_rate": 3.2655019815010965e-05, "loss": 0.0356, "step": 86180 }, { "epoch": 0.03095, "grad_norm": 0.08353491872549057, "learning_rate": 3.265108458883652e-05, "loss": 0.0376, "step": 86190 }, { "epoch": 0.031, "grad_norm": 0.08986058086156845, "learning_rate": 3.2647149153484296e-05, "loss": 0.0367, "step": 86200 }, { "epoch": 0.03105, "grad_norm": 0.09755656123161316, "learning_rate": 3.264321350906189e-05, "loss": 0.037, "step": 86210 }, { "epoch": 0.0311, "grad_norm": 0.08106609433889389, "learning_rate": 3.2639277655676896e-05, "loss": 0.0382, "step": 86220 }, { "epoch": 0.03115, "grad_norm": 0.0735519528388977, "learning_rate": 3.263534159343692e-05, "loss": 0.0353, "step": 86230 }, { "epoch": 0.0312, "grad_norm": 0.08236845582723618, "learning_rate": 3.263140532244958e-05, "loss": 0.036, "step": 86240 }, { "epoch": 0.03125, "grad_norm": 0.09598668664693832, "learning_rate": 3.262746884282248e-05, "loss": 0.0378, "step": 86250 }, { "epoch": 0.0313, "grad_norm": 0.07468762993812561, "learning_rate": 3.262353215466325e-05, "loss": 0.037, "step": 86260 }, { "epoch": 0.03135, "grad_norm": 0.10694070905447006, "learning_rate": 3.26195952580795e-05, "loss": 0.0358, "step": 86270 }, { "epoch": 0.0314, "grad_norm": 0.0785185769200325, "learning_rate": 3.2615658153178894e-05, "loss": 0.0364, "step": 86280 }, { "epoch": 0.03145, "grad_norm": 0.07416651397943497, "learning_rate": 3.2611720840069055e-05, "loss": 0.0351, "step": 86290 }, { "epoch": 0.0315, "grad_norm": 0.07013098150491714, "learning_rate": 3.260778331885762e-05, "loss": 0.0354, "step": 86300 }, { "epoch": 0.03155, "grad_norm": 0.07015841454267502, "learning_rate": 3.260384558965226e-05, "loss": 0.0348, "step": 86310 }, { "epoch": 0.0316, "grad_norm": 0.07380936294794083, "learning_rate": 3.2599907652560605e-05, "loss": 0.0346, "step": 86320 }, { "epoch": 0.03165, "grad_norm": 0.06309569627046585, "learning_rate": 3.259596950769033e-05, "loss": 0.0355, "step": 86330 }, { "epoch": 0.0317, "grad_norm": 0.06355613470077515, "learning_rate": 3.259203115514911e-05, "loss": 0.0358, "step": 86340 }, { "epoch": 0.03175, "grad_norm": 0.08989034593105316, "learning_rate": 3.258809259504461e-05, "loss": 0.0357, "step": 86350 }, { "epoch": 0.0318, "grad_norm": 0.08825557678937912, "learning_rate": 3.258415382748451e-05, "loss": 0.0411, "step": 86360 }, { "epoch": 0.03185, "grad_norm": 0.08263460546731949, "learning_rate": 3.258021485257649e-05, "loss": 0.035, "step": 86370 }, { "epoch": 0.0319, "grad_norm": 0.08822799474000931, "learning_rate": 3.2576275670428245e-05, "loss": 0.037, "step": 86380 }, { "epoch": 0.03195, "grad_norm": 0.07840435951948166, "learning_rate": 3.2572336281147466e-05, "loss": 0.0353, "step": 86390 }, { "epoch": 0.032, "grad_norm": 0.08038538694381714, "learning_rate": 3.256839668484186e-05, "loss": 0.038, "step": 86400 }, { "epoch": 0.03205, "grad_norm": 0.08051291853189468, "learning_rate": 3.2564456881619135e-05, "loss": 0.0369, "step": 86410 }, { "epoch": 0.0321, "grad_norm": 0.08047017455101013, "learning_rate": 3.2560516871587e-05, "loss": 0.0376, "step": 86420 }, { "epoch": 0.03215, "grad_norm": 0.08687380701303482, "learning_rate": 3.255657665485317e-05, "loss": 0.0366, "step": 86430 }, { "epoch": 0.0322, "grad_norm": 0.09004174917936325, "learning_rate": 3.255263623152537e-05, "loss": 0.0359, "step": 86440 }, { "epoch": 0.03225, "grad_norm": 0.0855182409286499, "learning_rate": 3.2548695601711344e-05, "loss": 0.0374, "step": 86450 }, { "epoch": 0.0323, "grad_norm": 0.09575967490673065, "learning_rate": 3.25447547655188e-05, "loss": 0.0379, "step": 86460 }, { "epoch": 0.03235, "grad_norm": 0.09607132524251938, "learning_rate": 3.254081372305552e-05, "loss": 0.0371, "step": 86470 }, { "epoch": 0.0324, "grad_norm": 0.08806268125772476, "learning_rate": 3.2536872474429205e-05, "loss": 0.0376, "step": 86480 }, { "epoch": 0.03245, "grad_norm": 0.09159974753856659, "learning_rate": 3.253293101974764e-05, "loss": 0.0372, "step": 86490 }, { "epoch": 0.0325, "grad_norm": 0.10884981602430344, "learning_rate": 3.252898935911856e-05, "loss": 0.0376, "step": 86500 }, { "epoch": 0.03255, "grad_norm": 0.12202293425798416, "learning_rate": 3.2525047492649744e-05, "loss": 0.0363, "step": 86510 }, { "epoch": 0.0326, "grad_norm": 0.09730051457881927, "learning_rate": 3.252110542044896e-05, "loss": 0.0388, "step": 86520 }, { "epoch": 0.03265, "grad_norm": 0.0869157612323761, "learning_rate": 3.251716314262398e-05, "loss": 0.0352, "step": 86530 }, { "epoch": 0.0327, "grad_norm": 0.09806448221206665, "learning_rate": 3.251322065928257e-05, "loss": 0.0349, "step": 86540 }, { "epoch": 0.03275, "grad_norm": 0.07501520216464996, "learning_rate": 3.250927797053254e-05, "loss": 0.0348, "step": 86550 }, { "epoch": 0.0328, "grad_norm": 0.08077821880578995, "learning_rate": 3.250533507648168e-05, "loss": 0.0343, "step": 86560 }, { "epoch": 0.03285, "grad_norm": 0.08169900625944138, "learning_rate": 3.250139197723776e-05, "loss": 0.0348, "step": 86570 }, { "epoch": 0.0329, "grad_norm": 0.07048002630472183, "learning_rate": 3.249744867290862e-05, "loss": 0.036, "step": 86580 }, { "epoch": 0.03295, "grad_norm": 0.0722208097577095, "learning_rate": 3.249350516360203e-05, "loss": 0.0386, "step": 86590 }, { "epoch": 0.033, "grad_norm": 0.11111637204885483, "learning_rate": 3.2489561449425844e-05, "loss": 0.0345, "step": 86600 }, { "epoch": 0.03305, "grad_norm": 0.11488772928714752, "learning_rate": 3.248561753048786e-05, "loss": 0.0353, "step": 86610 }, { "epoch": 0.0331, "grad_norm": 0.07998430728912354, "learning_rate": 3.2481673406895895e-05, "loss": 0.0352, "step": 86620 }, { "epoch": 0.03315, "grad_norm": 0.07691726833581924, "learning_rate": 3.247772907875779e-05, "loss": 0.0353, "step": 86630 }, { "epoch": 0.0332, "grad_norm": 0.08369085937738419, "learning_rate": 3.247378454618138e-05, "loss": 0.0343, "step": 86640 }, { "epoch": 0.03325, "grad_norm": 0.09220634400844574, "learning_rate": 3.2469839809274514e-05, "loss": 0.0373, "step": 86650 }, { "epoch": 0.0333, "grad_norm": 0.09541697800159454, "learning_rate": 3.2465894868145034e-05, "loss": 0.0345, "step": 86660 }, { "epoch": 0.03335, "grad_norm": 0.09451382607221603, "learning_rate": 3.246194972290079e-05, "loss": 0.0351, "step": 86670 }, { "epoch": 0.0334, "grad_norm": 0.09029912203550339, "learning_rate": 3.2458004373649656e-05, "loss": 0.0345, "step": 86680 }, { "epoch": 0.03345, "grad_norm": 0.0942051112651825, "learning_rate": 3.245405882049947e-05, "loss": 0.0349, "step": 86690 }, { "epoch": 0.0335, "grad_norm": 0.07905034720897675, "learning_rate": 3.245011306355812e-05, "loss": 0.0341, "step": 86700 }, { "epoch": 0.03355, "grad_norm": 0.07603990286588669, "learning_rate": 3.2446167102933474e-05, "loss": 0.0356, "step": 86710 }, { "epoch": 0.0336, "grad_norm": 0.09684465080499649, "learning_rate": 3.244222093873342e-05, "loss": 0.0356, "step": 86720 }, { "epoch": 0.03365, "grad_norm": 0.08049037307500839, "learning_rate": 3.243827457106584e-05, "loss": 0.0362, "step": 86730 }, { "epoch": 0.0337, "grad_norm": 0.07895974814891815, "learning_rate": 3.243432800003863e-05, "loss": 0.0347, "step": 86740 }, { "epoch": 0.03375, "grad_norm": 0.06666752696037292, "learning_rate": 3.2430381225759686e-05, "loss": 0.0337, "step": 86750 }, { "epoch": 0.0338, "grad_norm": 0.0748344138264656, "learning_rate": 3.242643424833691e-05, "loss": 0.0354, "step": 86760 }, { "epoch": 0.03385, "grad_norm": 0.07513680309057236, "learning_rate": 3.242248706787821e-05, "loss": 0.0355, "step": 86770 }, { "epoch": 0.0339, "grad_norm": 0.07731368392705917, "learning_rate": 3.241853968449151e-05, "loss": 0.0353, "step": 86780 }, { "epoch": 0.03395, "grad_norm": 0.08503787219524384, "learning_rate": 3.241459209828471e-05, "loss": 0.0391, "step": 86790 }, { "epoch": 0.034, "grad_norm": 0.08694326132535934, "learning_rate": 3.241064430936575e-05, "loss": 0.0373, "step": 86800 }, { "epoch": 0.03405, "grad_norm": 0.07162105292081833, "learning_rate": 3.2406696317842566e-05, "loss": 0.035, "step": 86810 }, { "epoch": 0.0341, "grad_norm": 0.08332312852144241, "learning_rate": 3.2402748123823076e-05, "loss": 0.036, "step": 86820 }, { "epoch": 0.03415, "grad_norm": 0.08905179798603058, "learning_rate": 3.239879972741524e-05, "loss": 0.0381, "step": 86830 }, { "epoch": 0.0342, "grad_norm": 0.08517122268676758, "learning_rate": 3.2394851128727e-05, "loss": 0.0362, "step": 86840 }, { "epoch": 0.03425, "grad_norm": 0.07143902778625488, "learning_rate": 3.2390902327866315e-05, "loss": 0.0348, "step": 86850 }, { "epoch": 0.0343, "grad_norm": 0.08254444599151611, "learning_rate": 3.238695332494113e-05, "loss": 0.0351, "step": 86860 }, { "epoch": 0.03435, "grad_norm": 0.08892206102609634, "learning_rate": 3.2383004120059415e-05, "loss": 0.037, "step": 86870 }, { "epoch": 0.0344, "grad_norm": 0.08615243434906006, "learning_rate": 3.237905471332914e-05, "loss": 0.0355, "step": 86880 }, { "epoch": 0.03445, "grad_norm": 0.06337711960077286, "learning_rate": 3.237510510485828e-05, "loss": 0.0354, "step": 86890 }, { "epoch": 0.0345, "grad_norm": 0.08787225186824799, "learning_rate": 3.237115529475482e-05, "loss": 0.0366, "step": 86900 }, { "epoch": 0.03455, "grad_norm": 0.07330426573753357, "learning_rate": 3.2367205283126744e-05, "loss": 0.0344, "step": 86910 }, { "epoch": 0.0346, "grad_norm": 0.07716590911149979, "learning_rate": 3.236325507008204e-05, "loss": 0.0365, "step": 86920 }, { "epoch": 0.03465, "grad_norm": 0.0840856209397316, "learning_rate": 3.235930465572872e-05, "loss": 0.0349, "step": 86930 }, { "epoch": 0.0347, "grad_norm": 0.07107097655534744, "learning_rate": 3.2355354040174765e-05, "loss": 0.037, "step": 86940 }, { "epoch": 0.03475, "grad_norm": 0.07819947600364685, "learning_rate": 3.23514032235282e-05, "loss": 0.0347, "step": 86950 }, { "epoch": 0.0348, "grad_norm": 0.0675472691655159, "learning_rate": 3.234745220589702e-05, "loss": 0.0344, "step": 86960 }, { "epoch": 0.03485, "grad_norm": 0.08264216035604477, "learning_rate": 3.234350098738927e-05, "loss": 0.0339, "step": 86970 }, { "epoch": 0.0349, "grad_norm": 0.09070836007595062, "learning_rate": 3.233954956811295e-05, "loss": 0.0346, "step": 86980 }, { "epoch": 0.03495, "grad_norm": 0.07672609388828278, "learning_rate": 3.2335597948176116e-05, "loss": 0.0346, "step": 86990 }, { "epoch": 0.035, "grad_norm": 0.08686935156583786, "learning_rate": 3.233164612768678e-05, "loss": 0.0352, "step": 87000 }, { "epoch": 0.03505, "grad_norm": 0.07569900155067444, "learning_rate": 3.2327694106753e-05, "loss": 0.0353, "step": 87010 }, { "epoch": 0.0351, "grad_norm": 0.09087596088647842, "learning_rate": 3.232374188548281e-05, "loss": 0.0352, "step": 87020 }, { "epoch": 0.03515, "grad_norm": 0.09324821829795837, "learning_rate": 3.231978946398427e-05, "loss": 0.0368, "step": 87030 }, { "epoch": 0.0352, "grad_norm": 0.08531796187162399, "learning_rate": 3.2315836842365435e-05, "loss": 0.0354, "step": 87040 }, { "epoch": 0.03525, "grad_norm": 0.08075599372386932, "learning_rate": 3.231188402073437e-05, "loss": 0.0365, "step": 87050 }, { "epoch": 0.0353, "grad_norm": 0.08507949113845825, "learning_rate": 3.2307930999199155e-05, "loss": 0.0371, "step": 87060 }, { "epoch": 0.03535, "grad_norm": 0.07883370667695999, "learning_rate": 3.230397777786783e-05, "loss": 0.0348, "step": 87070 }, { "epoch": 0.0354, "grad_norm": 0.07218769192695618, "learning_rate": 3.2300024356848514e-05, "loss": 0.035, "step": 87080 }, { "epoch": 0.03545, "grad_norm": 0.09903611242771149, "learning_rate": 3.229607073624926e-05, "loss": 0.0353, "step": 87090 }, { "epoch": 0.0355, "grad_norm": 0.08573555201292038, "learning_rate": 3.229211691617819e-05, "loss": 0.0364, "step": 87100 }, { "epoch": 0.03555, "grad_norm": 0.06599839776754379, "learning_rate": 3.228816289674337e-05, "loss": 0.0362, "step": 87110 }, { "epoch": 0.0356, "grad_norm": 0.0798148587346077, "learning_rate": 3.2284208678052924e-05, "loss": 0.0357, "step": 87120 }, { "epoch": 0.03565, "grad_norm": 0.0782741829752922, "learning_rate": 3.2280254260214936e-05, "loss": 0.0343, "step": 87130 }, { "epoch": 0.0357, "grad_norm": 0.06847995519638062, "learning_rate": 3.227629964333755e-05, "loss": 0.0346, "step": 87140 }, { "epoch": 0.03575, "grad_norm": 0.07769029587507248, "learning_rate": 3.227234482752884e-05, "loss": 0.0344, "step": 87150 }, { "epoch": 0.0358, "grad_norm": 0.09033706784248352, "learning_rate": 3.226838981289698e-05, "loss": 0.0361, "step": 87160 }, { "epoch": 0.03585, "grad_norm": 0.0827813595533371, "learning_rate": 3.226443459955006e-05, "loss": 0.0345, "step": 87170 }, { "epoch": 0.0359, "grad_norm": 0.0844188928604126, "learning_rate": 3.226047918759623e-05, "loss": 0.0346, "step": 87180 }, { "epoch": 0.03595, "grad_norm": 0.073844313621521, "learning_rate": 3.225652357714363e-05, "loss": 0.036, "step": 87190 }, { "epoch": 0.036, "grad_norm": 0.07636504620313644, "learning_rate": 3.2252567768300394e-05, "loss": 0.0355, "step": 87200 }, { "epoch": 0.03605, "grad_norm": 0.08958642184734344, "learning_rate": 3.2248611761174684e-05, "loss": 0.0346, "step": 87210 }, { "epoch": 0.0361, "grad_norm": 0.06772074848413467, "learning_rate": 3.2244655555874645e-05, "loss": 0.0344, "step": 87220 }, { "epoch": 0.03615, "grad_norm": 0.11023818701505661, "learning_rate": 3.224069915250846e-05, "loss": 0.0362, "step": 87230 }, { "epoch": 0.0362, "grad_norm": 0.08262085169553757, "learning_rate": 3.2236742551184265e-05, "loss": 0.0357, "step": 87240 }, { "epoch": 0.03625, "grad_norm": 0.10467635840177536, "learning_rate": 3.223278575201026e-05, "loss": 0.0362, "step": 87250 }, { "epoch": 0.0363, "grad_norm": 0.09873107075691223, "learning_rate": 3.22288287550946e-05, "loss": 0.0374, "step": 87260 }, { "epoch": 0.03635, "grad_norm": 0.08017779141664505, "learning_rate": 3.2224871560545484e-05, "loss": 0.0358, "step": 87270 }, { "epoch": 0.0364, "grad_norm": 0.0882391408085823, "learning_rate": 3.222091416847109e-05, "loss": 0.0353, "step": 87280 }, { "epoch": 0.03645, "grad_norm": 0.09055408090353012, "learning_rate": 3.221695657897961e-05, "loss": 0.0355, "step": 87290 }, { "epoch": 0.0365, "grad_norm": 0.08563832938671112, "learning_rate": 3.2212998792179255e-05, "loss": 0.0354, "step": 87300 }, { "epoch": 0.03655, "grad_norm": 0.08165394514799118, "learning_rate": 3.2209040808178223e-05, "loss": 0.0353, "step": 87310 }, { "epoch": 0.0366, "grad_norm": 0.08915353566408157, "learning_rate": 3.220508262708473e-05, "loss": 0.0387, "step": 87320 }, { "epoch": 0.03665, "grad_norm": 0.11730699241161346, "learning_rate": 3.2201124249006976e-05, "loss": 0.0355, "step": 87330 }, { "epoch": 0.0367, "grad_norm": 0.1185787171125412, "learning_rate": 3.219716567405319e-05, "loss": 0.0375, "step": 87340 }, { "epoch": 0.03675, "grad_norm": 0.1295836716890335, "learning_rate": 3.21932069023316e-05, "loss": 0.0376, "step": 87350 }, { "epoch": 0.0368, "grad_norm": 0.12778916954994202, "learning_rate": 3.2189247933950436e-05, "loss": 0.0368, "step": 87360 }, { "epoch": 0.03685, "grad_norm": 0.09289523214101791, "learning_rate": 3.218528876901794e-05, "loss": 0.0353, "step": 87370 }, { "epoch": 0.0369, "grad_norm": 0.08766157180070877, "learning_rate": 3.218132940764234e-05, "loss": 0.0355, "step": 87380 }, { "epoch": 0.03695, "grad_norm": 0.08578447997570038, "learning_rate": 3.21773698499319e-05, "loss": 0.0362, "step": 87390 }, { "epoch": 0.037, "grad_norm": 0.07584724575281143, "learning_rate": 3.2173410095994854e-05, "loss": 0.0374, "step": 87400 }, { "epoch": 0.03705, "grad_norm": 0.07478147000074387, "learning_rate": 3.216945014593948e-05, "loss": 0.0402, "step": 87410 }, { "epoch": 0.0371, "grad_norm": 0.09229233115911484, "learning_rate": 3.2165489999874024e-05, "loss": 0.0364, "step": 87420 }, { "epoch": 0.03715, "grad_norm": 0.07323060184717178, "learning_rate": 3.216152965790677e-05, "loss": 0.0362, "step": 87430 }, { "epoch": 0.0372, "grad_norm": 0.08562704920768738, "learning_rate": 3.2157569120145986e-05, "loss": 0.0357, "step": 87440 }, { "epoch": 0.03725, "grad_norm": 0.07262744754552841, "learning_rate": 3.2153608386699955e-05, "loss": 0.0377, "step": 87450 }, { "epoch": 0.0373, "grad_norm": 0.0773356705904007, "learning_rate": 3.214964745767694e-05, "loss": 0.0359, "step": 87460 }, { "epoch": 0.03735, "grad_norm": 0.08478112518787384, "learning_rate": 3.214568633318526e-05, "loss": 0.0369, "step": 87470 }, { "epoch": 0.0374, "grad_norm": 0.08348061889410019, "learning_rate": 3.2141725013333206e-05, "loss": 0.036, "step": 87480 }, { "epoch": 0.03745, "grad_norm": 0.0778733417391777, "learning_rate": 3.213776349822907e-05, "loss": 0.0365, "step": 87490 }, { "epoch": 0.0375, "grad_norm": 0.0955268070101738, "learning_rate": 3.213380178798117e-05, "loss": 0.0356, "step": 87500 }, { "epoch": 0.03755, "grad_norm": 0.09940584748983383, "learning_rate": 3.212983988269779e-05, "loss": 0.0361, "step": 87510 }, { "epoch": 0.0376, "grad_norm": 0.08237015455961227, "learning_rate": 3.212587778248728e-05, "loss": 0.0372, "step": 87520 }, { "epoch": 0.03765, "grad_norm": 0.08107157051563263, "learning_rate": 3.212191548745794e-05, "loss": 0.0358, "step": 87530 }, { "epoch": 0.0377, "grad_norm": 0.09527316689491272, "learning_rate": 3.211795299771812e-05, "loss": 0.0369, "step": 87540 }, { "epoch": 0.03775, "grad_norm": 0.08648992329835892, "learning_rate": 3.211399031337612e-05, "loss": 0.0367, "step": 87550 }, { "epoch": 0.0378, "grad_norm": 0.0921984314918518, "learning_rate": 3.211002743454031e-05, "loss": 0.0359, "step": 87560 }, { "epoch": 0.03785, "grad_norm": 0.07162953913211823, "learning_rate": 3.210606436131902e-05, "loss": 0.0371, "step": 87570 }, { "epoch": 0.0379, "grad_norm": 0.0776718333363533, "learning_rate": 3.21021010938206e-05, "loss": 0.0349, "step": 87580 }, { "epoch": 0.03795, "grad_norm": 0.06633836776018143, "learning_rate": 3.20981376321534e-05, "loss": 0.0368, "step": 87590 }, { "epoch": 0.038, "grad_norm": 0.09633494913578033, "learning_rate": 3.209417397642579e-05, "loss": 0.0383, "step": 87600 }, { "epoch": 0.03805, "grad_norm": 0.08774057030677795, "learning_rate": 3.209021012674612e-05, "loss": 0.038, "step": 87610 }, { "epoch": 0.0381, "grad_norm": 0.08426745235919952, "learning_rate": 3.208624608322277e-05, "loss": 0.0359, "step": 87620 }, { "epoch": 0.03815, "grad_norm": 0.09102395176887512, "learning_rate": 3.2082281845964125e-05, "loss": 0.0345, "step": 87630 }, { "epoch": 0.0382, "grad_norm": 0.07798417657613754, "learning_rate": 3.207831741507855e-05, "loss": 0.0355, "step": 87640 }, { "epoch": 0.03825, "grad_norm": 0.07958195358514786, "learning_rate": 3.207435279067443e-05, "loss": 0.0356, "step": 87650 }, { "epoch": 0.0383, "grad_norm": 0.07889354974031448, "learning_rate": 3.207038797286017e-05, "loss": 0.0351, "step": 87660 }, { "epoch": 0.03835, "grad_norm": 0.08104643225669861, "learning_rate": 3.2066422961744155e-05, "loss": 0.0383, "step": 87670 }, { "epoch": 0.0384, "grad_norm": 0.07670079171657562, "learning_rate": 3.2062457757434794e-05, "loss": 0.0374, "step": 87680 }, { "epoch": 0.03845, "grad_norm": 0.08696895092725754, "learning_rate": 3.2058492360040485e-05, "loss": 0.0365, "step": 87690 }, { "epoch": 0.0385, "grad_norm": 0.07794123142957687, "learning_rate": 3.2054526769669654e-05, "loss": 0.0366, "step": 87700 }, { "epoch": 0.03855, "grad_norm": 0.0759347528219223, "learning_rate": 3.20505609864307e-05, "loss": 0.036, "step": 87710 }, { "epoch": 0.0386, "grad_norm": 0.07720106095075607, "learning_rate": 3.204659501043207e-05, "loss": 0.0349, "step": 87720 }, { "epoch": 0.03865, "grad_norm": 0.07206979393959045, "learning_rate": 3.204262884178218e-05, "loss": 0.0354, "step": 87730 }, { "epoch": 0.0387, "grad_norm": 0.07867118716239929, "learning_rate": 3.203866248058946e-05, "loss": 0.0355, "step": 87740 }, { "epoch": 0.03875, "grad_norm": 0.07198004424571991, "learning_rate": 3.2034695926962344e-05, "loss": 0.0344, "step": 87750 }, { "epoch": 0.0388, "grad_norm": 0.06714322417974472, "learning_rate": 3.203072918100929e-05, "loss": 0.035, "step": 87760 }, { "epoch": 0.03885, "grad_norm": 0.08932408690452576, "learning_rate": 3.202676224283874e-05, "loss": 0.035, "step": 87770 }, { "epoch": 0.0389, "grad_norm": 0.08211679756641388, "learning_rate": 3.202279511255915e-05, "loss": 0.0346, "step": 87780 }, { "epoch": 0.03895, "grad_norm": 0.07144937664270401, "learning_rate": 3.201882779027898e-05, "loss": 0.0366, "step": 87790 }, { "epoch": 0.039, "grad_norm": 0.08687329292297363, "learning_rate": 3.20148602761067e-05, "loss": 0.0336, "step": 87800 }, { "epoch": 0.03905, "grad_norm": 0.08850160986185074, "learning_rate": 3.201089257015077e-05, "loss": 0.0359, "step": 87810 }, { "epoch": 0.0391, "grad_norm": 0.09064152091741562, "learning_rate": 3.2006924672519677e-05, "loss": 0.0339, "step": 87820 }, { "epoch": 0.03915, "grad_norm": 0.09283946454524994, "learning_rate": 3.2002956583321895e-05, "loss": 0.0362, "step": 87830 }, { "epoch": 0.0392, "grad_norm": 0.09552500396966934, "learning_rate": 3.19989883026659e-05, "loss": 0.0353, "step": 87840 }, { "epoch": 0.03925, "grad_norm": 0.07619480788707733, "learning_rate": 3.1995019830660213e-05, "loss": 0.034, "step": 87850 }, { "epoch": 0.0393, "grad_norm": 0.07461630553007126, "learning_rate": 3.1991051167413296e-05, "loss": 0.0345, "step": 87860 }, { "epoch": 0.03935, "grad_norm": 0.07883042097091675, "learning_rate": 3.198708231303367e-05, "loss": 0.0353, "step": 87870 }, { "epoch": 0.0394, "grad_norm": 0.09476449340581894, "learning_rate": 3.1983113267629835e-05, "loss": 0.035, "step": 87880 }, { "epoch": 0.03945, "grad_norm": 0.08893871307373047, "learning_rate": 3.197914403131032e-05, "loss": 0.0353, "step": 87890 }, { "epoch": 0.0395, "grad_norm": 0.09010148793458939, "learning_rate": 3.197517460418362e-05, "loss": 0.0337, "step": 87900 }, { "epoch": 0.03955, "grad_norm": 0.08100104331970215, "learning_rate": 3.1971204986358274e-05, "loss": 0.0349, "step": 87910 }, { "epoch": 0.0396, "grad_norm": 0.07950063794851303, "learning_rate": 3.196723517794279e-05, "loss": 0.0365, "step": 87920 }, { "epoch": 0.03965, "grad_norm": 0.09475506097078323, "learning_rate": 3.196326517904572e-05, "loss": 0.0348, "step": 87930 }, { "epoch": 0.0397, "grad_norm": 0.07854641228914261, "learning_rate": 3.19592949897756e-05, "loss": 0.0363, "step": 87940 }, { "epoch": 0.03975, "grad_norm": 0.08651375770568848, "learning_rate": 3.1955324610240965e-05, "loss": 0.0362, "step": 87950 }, { "epoch": 0.0398, "grad_norm": 0.08224500715732574, "learning_rate": 3.195135404055037e-05, "loss": 0.0378, "step": 87960 }, { "epoch": 0.03985, "grad_norm": 0.07783657312393188, "learning_rate": 3.194738328081236e-05, "loss": 0.0362, "step": 87970 }, { "epoch": 0.0399, "grad_norm": 0.08973323553800583, "learning_rate": 3.1943412331135506e-05, "loss": 0.0363, "step": 87980 }, { "epoch": 0.03995, "grad_norm": 0.07899042963981628, "learning_rate": 3.193944119162837e-05, "loss": 0.036, "step": 87990 }, { "epoch": 0.04, "grad_norm": 0.0829811543226242, "learning_rate": 3.1935469862399515e-05, "loss": 0.0406, "step": 88000 }, { "epoch": 0.04005, "grad_norm": 0.08840011805295944, "learning_rate": 3.193149834355752e-05, "loss": 0.0369, "step": 88010 }, { "epoch": 0.0401, "grad_norm": 0.08670192956924438, "learning_rate": 3.1927526635210966e-05, "loss": 0.0356, "step": 88020 }, { "epoch": 0.04015, "grad_norm": 0.08463533222675323, "learning_rate": 3.1923554737468444e-05, "loss": 0.0361, "step": 88030 }, { "epoch": 0.0402, "grad_norm": 0.08968721330165863, "learning_rate": 3.191958265043852e-05, "loss": 0.0366, "step": 88040 }, { "epoch": 0.04025, "grad_norm": 0.08073660731315613, "learning_rate": 3.191561037422981e-05, "loss": 0.0361, "step": 88050 }, { "epoch": 0.0403, "grad_norm": 0.07392221689224243, "learning_rate": 3.191163790895092e-05, "loss": 0.0349, "step": 88060 }, { "epoch": 0.04035, "grad_norm": 0.0711127445101738, "learning_rate": 3.190766525471045e-05, "loss": 0.036, "step": 88070 }, { "epoch": 0.0404, "grad_norm": 0.07902902364730835, "learning_rate": 3.190369241161699e-05, "loss": 0.0374, "step": 88080 }, { "epoch": 0.04045, "grad_norm": 0.07067114859819412, "learning_rate": 3.189971937977918e-05, "loss": 0.0352, "step": 88090 }, { "epoch": 0.0405, "grad_norm": 0.07859675586223602, "learning_rate": 3.1895746159305646e-05, "loss": 0.0355, "step": 88100 }, { "epoch": 0.04055, "grad_norm": 0.06850699335336685, "learning_rate": 3.1891772750304985e-05, "loss": 0.037, "step": 88110 }, { "epoch": 0.0406, "grad_norm": 0.07936318963766098, "learning_rate": 3.1887799152885856e-05, "loss": 0.036, "step": 88120 }, { "epoch": 0.04065, "grad_norm": 0.11133728921413422, "learning_rate": 3.188382536715688e-05, "loss": 0.0359, "step": 88130 }, { "epoch": 0.0407, "grad_norm": 0.11879123747348785, "learning_rate": 3.187985139322671e-05, "loss": 0.0376, "step": 88140 }, { "epoch": 0.04075, "grad_norm": 0.06967378407716751, "learning_rate": 3.187587723120399e-05, "loss": 0.0364, "step": 88150 }, { "epoch": 0.0408, "grad_norm": 0.08400028944015503, "learning_rate": 3.1871902881197365e-05, "loss": 0.037, "step": 88160 }, { "epoch": 0.04085, "grad_norm": 0.08613050729036331, "learning_rate": 3.186792834331549e-05, "loss": 0.0354, "step": 88170 }, { "epoch": 0.0409, "grad_norm": 0.08761177211999893, "learning_rate": 3.186395361766704e-05, "loss": 0.0354, "step": 88180 }, { "epoch": 0.04095, "grad_norm": 0.07430850714445114, "learning_rate": 3.185997870436068e-05, "loss": 0.0353, "step": 88190 }, { "epoch": 0.041, "grad_norm": 0.0727161318063736, "learning_rate": 3.185600360350508e-05, "loss": 0.0355, "step": 88200 }, { "epoch": 0.04105, "grad_norm": 0.08402503281831741, "learning_rate": 3.1852028315208914e-05, "loss": 0.0361, "step": 88210 }, { "epoch": 0.0411, "grad_norm": 0.08046665042638779, "learning_rate": 3.1848052839580866e-05, "loss": 0.035, "step": 88220 }, { "epoch": 0.04115, "grad_norm": 0.07845325022935867, "learning_rate": 3.184407717672962e-05, "loss": 0.0366, "step": 88230 }, { "epoch": 0.0412, "grad_norm": 0.08222614973783493, "learning_rate": 3.1840101326763894e-05, "loss": 0.0362, "step": 88240 }, { "epoch": 0.04125, "grad_norm": 0.07744602859020233, "learning_rate": 3.183612528979235e-05, "loss": 0.0352, "step": 88250 }, { "epoch": 0.0413, "grad_norm": 0.07237857580184937, "learning_rate": 3.183214906592372e-05, "loss": 0.0342, "step": 88260 }, { "epoch": 0.04135, "grad_norm": 0.07495765388011932, "learning_rate": 3.18281726552667e-05, "loss": 0.0346, "step": 88270 }, { "epoch": 0.0414, "grad_norm": 0.08611702919006348, "learning_rate": 3.182419605793e-05, "loss": 0.035, "step": 88280 }, { "epoch": 0.04145, "grad_norm": 0.0782887414097786, "learning_rate": 3.182021927402235e-05, "loss": 0.0367, "step": 88290 }, { "epoch": 0.0415, "grad_norm": 0.07414274662733078, "learning_rate": 3.181624230365245e-05, "loss": 0.0342, "step": 88300 }, { "epoch": 0.04155, "grad_norm": 0.07089847326278687, "learning_rate": 3.1812265146929064e-05, "loss": 0.0349, "step": 88310 }, { "epoch": 0.0416, "grad_norm": 0.08510833233594894, "learning_rate": 3.18082878039609e-05, "loss": 0.0354, "step": 88320 }, { "epoch": 0.04165, "grad_norm": 0.0712892934679985, "learning_rate": 3.180431027485672e-05, "loss": 0.0343, "step": 88330 }, { "epoch": 0.0417, "grad_norm": 0.10102806985378265, "learning_rate": 3.1800332559725235e-05, "loss": 0.0368, "step": 88340 }, { "epoch": 0.04175, "grad_norm": 0.07337898015975952, "learning_rate": 3.179635465867522e-05, "loss": 0.035, "step": 88350 }, { "epoch": 0.0418, "grad_norm": 0.0787159651517868, "learning_rate": 3.179237657181542e-05, "loss": 0.0357, "step": 88360 }, { "epoch": 0.04185, "grad_norm": 0.07552842795848846, "learning_rate": 3.1788398299254596e-05, "loss": 0.0348, "step": 88370 }, { "epoch": 0.0419, "grad_norm": 0.06384848058223724, "learning_rate": 3.178441984110151e-05, "loss": 0.0343, "step": 88380 }, { "epoch": 0.04195, "grad_norm": 0.0709843784570694, "learning_rate": 3.178044119746495e-05, "loss": 0.0344, "step": 88390 }, { "epoch": 0.042, "grad_norm": 0.0840144157409668, "learning_rate": 3.177646236845366e-05, "loss": 0.0343, "step": 88400 }, { "epoch": 0.04205, "grad_norm": 0.0872381180524826, "learning_rate": 3.177248335417644e-05, "loss": 0.0372, "step": 88410 }, { "epoch": 0.0421, "grad_norm": 0.08309582620859146, "learning_rate": 3.176850415474206e-05, "loss": 0.0351, "step": 88420 }, { "epoch": 0.04215, "grad_norm": 0.07073325663805008, "learning_rate": 3.176452477025933e-05, "loss": 0.0359, "step": 88430 }, { "epoch": 0.0422, "grad_norm": 0.06448944658041, "learning_rate": 3.176054520083703e-05, "loss": 0.0363, "step": 88440 }, { "epoch": 0.04225, "grad_norm": 0.08094968646764755, "learning_rate": 3.175656544658397e-05, "loss": 0.0349, "step": 88450 }, { "epoch": 0.0423, "grad_norm": 0.08361298590898514, "learning_rate": 3.175258550760894e-05, "loss": 0.0391, "step": 88460 }, { "epoch": 0.04235, "grad_norm": 0.09900790452957153, "learning_rate": 3.174860538402076e-05, "loss": 0.0365, "step": 88470 }, { "epoch": 0.0424, "grad_norm": 0.12086690217256546, "learning_rate": 3.174462507592825e-05, "loss": 0.0369, "step": 88480 }, { "epoch": 0.04245, "grad_norm": 0.0957237109541893, "learning_rate": 3.1740644583440224e-05, "loss": 0.0377, "step": 88490 }, { "epoch": 0.0425, "grad_norm": 0.11632449179887772, "learning_rate": 3.17366639066655e-05, "loss": 0.0373, "step": 88500 }, { "epoch": 0.04255, "grad_norm": 0.10616426169872284, "learning_rate": 3.173268304571292e-05, "loss": 0.0375, "step": 88510 }, { "epoch": 0.0426, "grad_norm": 0.09581998735666275, "learning_rate": 3.172870200069132e-05, "loss": 0.0363, "step": 88520 }, { "epoch": 0.04265, "grad_norm": 0.07968276739120483, "learning_rate": 3.1724720771709525e-05, "loss": 0.0352, "step": 88530 }, { "epoch": 0.0427, "grad_norm": 0.07968457788228989, "learning_rate": 3.17207393588764e-05, "loss": 0.0361, "step": 88540 }, { "epoch": 0.04275, "grad_norm": 0.07705254852771759, "learning_rate": 3.1716757762300775e-05, "loss": 0.0362, "step": 88550 }, { "epoch": 0.0428, "grad_norm": 0.07352401316165924, "learning_rate": 3.171277598209153e-05, "loss": 0.0351, "step": 88560 }, { "epoch": 0.04285, "grad_norm": 0.07496006041765213, "learning_rate": 3.17087940183575e-05, "loss": 0.0362, "step": 88570 }, { "epoch": 0.0429, "grad_norm": 0.09419265389442444, "learning_rate": 3.170481187120757e-05, "loss": 0.0353, "step": 88580 }, { "epoch": 0.04295, "grad_norm": 0.0760672315955162, "learning_rate": 3.1700829540750596e-05, "loss": 0.0363, "step": 88590 }, { "epoch": 0.043, "grad_norm": 0.07824535667896271, "learning_rate": 3.1696847027095466e-05, "loss": 0.0357, "step": 88600 }, { "epoch": 0.04305, "grad_norm": 0.07357686758041382, "learning_rate": 3.1692864330351046e-05, "loss": 0.0356, "step": 88610 }, { "epoch": 0.0431, "grad_norm": 0.08512439578771591, "learning_rate": 3.168888145062623e-05, "loss": 0.0367, "step": 88620 }, { "epoch": 0.04315, "grad_norm": 0.08869446069002151, "learning_rate": 3.168489838802991e-05, "loss": 0.0353, "step": 88630 }, { "epoch": 0.0432, "grad_norm": 0.0901620015501976, "learning_rate": 3.168091514267099e-05, "loss": 0.0358, "step": 88640 }, { "epoch": 0.04325, "grad_norm": 0.08479337394237518, "learning_rate": 3.167693171465835e-05, "loss": 0.0361, "step": 88650 }, { "epoch": 0.0433, "grad_norm": 0.09710962325334549, "learning_rate": 3.167294810410091e-05, "loss": 0.0364, "step": 88660 }, { "epoch": 0.04335, "grad_norm": 0.14224158227443695, "learning_rate": 3.166896431110757e-05, "loss": 0.0361, "step": 88670 }, { "epoch": 0.0434, "grad_norm": 0.10792012512683868, "learning_rate": 3.166498033578725e-05, "loss": 0.0358, "step": 88680 }, { "epoch": 0.04345, "grad_norm": 0.09018728882074356, "learning_rate": 3.166099617824888e-05, "loss": 0.038, "step": 88690 }, { "epoch": 0.0435, "grad_norm": 0.09542609006166458, "learning_rate": 3.165701183860137e-05, "loss": 0.037, "step": 88700 }, { "epoch": 0.04355, "grad_norm": 0.08831940591335297, "learning_rate": 3.165302731695366e-05, "loss": 0.0362, "step": 88710 }, { "epoch": 0.0436, "grad_norm": 0.0845828652381897, "learning_rate": 3.1649042613414684e-05, "loss": 0.0361, "step": 88720 }, { "epoch": 0.04365, "grad_norm": 0.07160351425409317, "learning_rate": 3.164505772809338e-05, "loss": 0.0358, "step": 88730 }, { "epoch": 0.0437, "grad_norm": 0.07241127640008926, "learning_rate": 3.164107266109869e-05, "loss": 0.035, "step": 88740 }, { "epoch": 0.04375, "grad_norm": 0.06150011345744133, "learning_rate": 3.163708741253957e-05, "loss": 0.034, "step": 88750 }, { "epoch": 0.0438, "grad_norm": 0.0823177844285965, "learning_rate": 3.163310198252497e-05, "loss": 0.035, "step": 88760 }, { "epoch": 0.04385, "grad_norm": 0.07345187664031982, "learning_rate": 3.162911637116386e-05, "loss": 0.0364, "step": 88770 }, { "epoch": 0.0439, "grad_norm": 0.07987566292285919, "learning_rate": 3.1625130578565196e-05, "loss": 0.0355, "step": 88780 }, { "epoch": 0.04395, "grad_norm": 0.07271317392587662, "learning_rate": 3.162114460483796e-05, "loss": 0.0362, "step": 88790 }, { "epoch": 0.044, "grad_norm": 0.08566244691610336, "learning_rate": 3.1617158450091114e-05, "loss": 0.0349, "step": 88800 }, { "epoch": 0.04405, "grad_norm": 0.0695837214589119, "learning_rate": 3.161317211443363e-05, "loss": 0.0359, "step": 88810 }, { "epoch": 0.0441, "grad_norm": 0.07219763100147247, "learning_rate": 3.160918559797451e-05, "loss": 0.0358, "step": 88820 }, { "epoch": 0.04415, "grad_norm": 0.10157402604818344, "learning_rate": 3.160519890082275e-05, "loss": 0.0359, "step": 88830 }, { "epoch": 0.0442, "grad_norm": 0.0994025319814682, "learning_rate": 3.1601212023087324e-05, "loss": 0.0358, "step": 88840 }, { "epoch": 0.04425, "grad_norm": 0.09215868264436722, "learning_rate": 3.159722496487725e-05, "loss": 0.0359, "step": 88850 }, { "epoch": 0.0443, "grad_norm": 0.08054187148809433, "learning_rate": 3.159323772630151e-05, "loss": 0.0351, "step": 88860 }, { "epoch": 0.04435, "grad_norm": 0.0746442899107933, "learning_rate": 3.1589250307469134e-05, "loss": 0.0367, "step": 88870 }, { "epoch": 0.0444, "grad_norm": 0.09109540283679962, "learning_rate": 3.158526270848913e-05, "loss": 0.0352, "step": 88880 }, { "epoch": 0.04445, "grad_norm": 0.08202842622995377, "learning_rate": 3.1581274929470514e-05, "loss": 0.036, "step": 88890 }, { "epoch": 0.0445, "grad_norm": 0.07617323100566864, "learning_rate": 3.1577286970522316e-05, "loss": 0.0364, "step": 88900 }, { "epoch": 0.04455, "grad_norm": 0.08100539445877075, "learning_rate": 3.157329883175357e-05, "loss": 0.0353, "step": 88910 }, { "epoch": 0.0446, "grad_norm": 0.0730210468173027, "learning_rate": 3.15693105132733e-05, "loss": 0.036, "step": 88920 }, { "epoch": 0.04465, "grad_norm": 0.07562186568975449, "learning_rate": 3.156532201519055e-05, "loss": 0.0352, "step": 88930 }, { "epoch": 0.0447, "grad_norm": 0.09780492633581161, "learning_rate": 3.156133333761435e-05, "loss": 0.0356, "step": 88940 }, { "epoch": 0.04475, "grad_norm": 0.08263695985078812, "learning_rate": 3.1557344480653776e-05, "loss": 0.0358, "step": 88950 }, { "epoch": 0.0448, "grad_norm": 0.08889447152614594, "learning_rate": 3.155335544441786e-05, "loss": 0.0369, "step": 88960 }, { "epoch": 0.04485, "grad_norm": 0.07156159728765488, "learning_rate": 3.154936622901567e-05, "loss": 0.0358, "step": 88970 }, { "epoch": 0.0449, "grad_norm": 0.07655026763677597, "learning_rate": 3.154537683455627e-05, "loss": 0.0373, "step": 88980 }, { "epoch": 0.04495, "grad_norm": 0.07747284322977066, "learning_rate": 3.154138726114872e-05, "loss": 0.0365, "step": 88990 }, { "epoch": 0.045, "grad_norm": 0.0795535296201706, "learning_rate": 3.15373975089021e-05, "loss": 0.0366, "step": 89000 }, { "epoch": 0.04505, "grad_norm": 0.0787782073020935, "learning_rate": 3.15334075779255e-05, "loss": 0.0363, "step": 89010 }, { "epoch": 0.0451, "grad_norm": 0.07431310415267944, "learning_rate": 3.152941746832798e-05, "loss": 0.0365, "step": 89020 }, { "epoch": 0.04515, "grad_norm": 0.09488285332918167, "learning_rate": 3.152542718021865e-05, "loss": 0.0364, "step": 89030 }, { "epoch": 0.0452, "grad_norm": 0.08887805789709091, "learning_rate": 3.1521436713706585e-05, "loss": 0.0362, "step": 89040 }, { "epoch": 0.04525, "grad_norm": 0.0670175775885582, "learning_rate": 3.151744606890089e-05, "loss": 0.0374, "step": 89050 }, { "epoch": 0.0453, "grad_norm": 0.09125831723213196, "learning_rate": 3.1513455245910666e-05, "loss": 0.0363, "step": 89060 }, { "epoch": 0.04535, "grad_norm": 0.09820882230997086, "learning_rate": 3.150946424484502e-05, "loss": 0.0363, "step": 89070 }, { "epoch": 0.0454, "grad_norm": 0.09063718467950821, "learning_rate": 3.150547306581308e-05, "loss": 0.0353, "step": 89080 }, { "epoch": 0.04545, "grad_norm": 0.07643202692270279, "learning_rate": 3.150148170892394e-05, "loss": 0.0346, "step": 89090 }, { "epoch": 0.0455, "grad_norm": 0.07743243128061295, "learning_rate": 3.149749017428674e-05, "loss": 0.0341, "step": 89100 }, { "epoch": 0.04555, "grad_norm": 0.07924839109182358, "learning_rate": 3.149349846201059e-05, "loss": 0.0363, "step": 89110 }, { "epoch": 0.0456, "grad_norm": 0.0933968722820282, "learning_rate": 3.1489506572204644e-05, "loss": 0.0378, "step": 89120 }, { "epoch": 0.04565, "grad_norm": 0.08461254835128784, "learning_rate": 3.148551450497801e-05, "loss": 0.0385, "step": 89130 }, { "epoch": 0.0457, "grad_norm": 0.08032810688018799, "learning_rate": 3.1481522260439856e-05, "loss": 0.0353, "step": 89140 }, { "epoch": 0.04575, "grad_norm": 0.08250021189451218, "learning_rate": 3.147752983869931e-05, "loss": 0.0366, "step": 89150 }, { "epoch": 0.0458, "grad_norm": 0.09803785383701324, "learning_rate": 3.1473537239865545e-05, "loss": 0.0358, "step": 89160 }, { "epoch": 0.04585, "grad_norm": 0.09414341300725937, "learning_rate": 3.14695444640477e-05, "loss": 0.0363, "step": 89170 }, { "epoch": 0.0459, "grad_norm": 0.08637914806604385, "learning_rate": 3.1465551511354934e-05, "loss": 0.0375, "step": 89180 }, { "epoch": 0.04595, "grad_norm": 0.08976668864488602, "learning_rate": 3.146155838189642e-05, "loss": 0.037, "step": 89190 }, { "epoch": 0.046, "grad_norm": 0.07519635558128357, "learning_rate": 3.1457565075781333e-05, "loss": 0.0343, "step": 89200 }, { "epoch": 0.04605, "grad_norm": 0.09900743514299393, "learning_rate": 3.145357159311884e-05, "loss": 0.0381, "step": 89210 }, { "epoch": 0.0461, "grad_norm": 0.10613662749528885, "learning_rate": 3.144957793401812e-05, "loss": 0.0364, "step": 89220 }, { "epoch": 0.04615, "grad_norm": 0.09278790652751923, "learning_rate": 3.144558409858837e-05, "loss": 0.0348, "step": 89230 }, { "epoch": 0.0462, "grad_norm": 0.08505608141422272, "learning_rate": 3.1441590086938764e-05, "loss": 0.0354, "step": 89240 }, { "epoch": 0.04625, "grad_norm": 0.07010231912136078, "learning_rate": 3.143759589917851e-05, "loss": 0.0353, "step": 89250 }, { "epoch": 0.0463, "grad_norm": 0.0737927183508873, "learning_rate": 3.14336015354168e-05, "loss": 0.0364, "step": 89260 }, { "epoch": 0.04635, "grad_norm": 0.08519606292247772, "learning_rate": 3.1429606995762844e-05, "loss": 0.0364, "step": 89270 }, { "epoch": 0.0464, "grad_norm": 0.0799202173948288, "learning_rate": 3.1425612280325844e-05, "loss": 0.0348, "step": 89280 }, { "epoch": 0.04645, "grad_norm": 0.07240907847881317, "learning_rate": 3.1421617389215025e-05, "loss": 0.0366, "step": 89290 }, { "epoch": 0.0465, "grad_norm": 0.09216149151325226, "learning_rate": 3.14176223225396e-05, "loss": 0.038, "step": 89300 }, { "epoch": 0.04655, "grad_norm": 0.08080775290727615, "learning_rate": 3.1413627080408784e-05, "loss": 0.0351, "step": 89310 }, { "epoch": 0.0466, "grad_norm": 0.07827922701835632, "learning_rate": 3.140963166293181e-05, "loss": 0.0395, "step": 89320 }, { "epoch": 0.04665, "grad_norm": 0.0770445168018341, "learning_rate": 3.140563607021793e-05, "loss": 0.0349, "step": 89330 }, { "epoch": 0.0467, "grad_norm": 0.08079741895198822, "learning_rate": 3.1401640302376346e-05, "loss": 0.0346, "step": 89340 }, { "epoch": 0.04675, "grad_norm": 0.07338516414165497, "learning_rate": 3.139764435951634e-05, "loss": 0.0357, "step": 89350 }, { "epoch": 0.0468, "grad_norm": 0.06986889243125916, "learning_rate": 3.139364824174713e-05, "loss": 0.0337, "step": 89360 }, { "epoch": 0.04685, "grad_norm": 0.09464634954929352, "learning_rate": 3.1389651949177987e-05, "loss": 0.036, "step": 89370 }, { "epoch": 0.0469, "grad_norm": 0.09366074949502945, "learning_rate": 3.138565548191814e-05, "loss": 0.0372, "step": 89380 }, { "epoch": 0.04695, "grad_norm": 0.07965853065252304, "learning_rate": 3.138165884007689e-05, "loss": 0.0354, "step": 89390 }, { "epoch": 0.047, "grad_norm": 0.08097198605537415, "learning_rate": 3.137766202376348e-05, "loss": 0.0368, "step": 89400 }, { "epoch": 0.04705, "grad_norm": 0.07096298038959503, "learning_rate": 3.137366503308719e-05, "loss": 0.0372, "step": 89410 }, { "epoch": 0.0471, "grad_norm": 0.06730058044195175, "learning_rate": 3.136966786815729e-05, "loss": 0.0347, "step": 89420 }, { "epoch": 0.04715, "grad_norm": 0.07121149450540543, "learning_rate": 3.136567052908306e-05, "loss": 0.0361, "step": 89430 }, { "epoch": 0.0472, "grad_norm": 0.07624838501214981, "learning_rate": 3.136167301597379e-05, "loss": 0.0359, "step": 89440 }, { "epoch": 0.04725, "grad_norm": 0.0810113474726677, "learning_rate": 3.135767532893877e-05, "loss": 0.0366, "step": 89450 }, { "epoch": 0.0473, "grad_norm": 0.08344297111034393, "learning_rate": 3.13536774680873e-05, "loss": 0.0356, "step": 89460 }, { "epoch": 0.04735, "grad_norm": 0.07102300971746445, "learning_rate": 3.1349679433528666e-05, "loss": 0.0354, "step": 89470 }, { "epoch": 0.0474, "grad_norm": 0.07442963868379593, "learning_rate": 3.134568122537219e-05, "loss": 0.0356, "step": 89480 }, { "epoch": 0.04745, "grad_norm": 0.08615221083164215, "learning_rate": 3.134168284372717e-05, "loss": 0.0364, "step": 89490 }, { "epoch": 0.0475, "grad_norm": 0.06951931864023209, "learning_rate": 3.1337684288702926e-05, "loss": 0.0368, "step": 89500 }, { "epoch": 0.04755, "grad_norm": 0.07635512948036194, "learning_rate": 3.133368556040877e-05, "loss": 0.0348, "step": 89510 }, { "epoch": 0.0476, "grad_norm": 0.0834120437502861, "learning_rate": 3.132968665895404e-05, "loss": 0.0364, "step": 89520 }, { "epoch": 0.04765, "grad_norm": 0.10034742951393127, "learning_rate": 3.1325687584448046e-05, "loss": 0.0383, "step": 89530 }, { "epoch": 0.0477, "grad_norm": 0.09734785556793213, "learning_rate": 3.132168833700013e-05, "loss": 0.0361, "step": 89540 }, { "epoch": 0.04775, "grad_norm": 0.07262781262397766, "learning_rate": 3.1317688916719636e-05, "loss": 0.0361, "step": 89550 }, { "epoch": 0.0478, "grad_norm": 0.08736280351877213, "learning_rate": 3.1313689323715895e-05, "loss": 0.0361, "step": 89560 }, { "epoch": 0.04785, "grad_norm": 0.08496066927909851, "learning_rate": 3.130968955809825e-05, "loss": 0.0375, "step": 89570 }, { "epoch": 0.0479, "grad_norm": 0.07764924317598343, "learning_rate": 3.130568961997608e-05, "loss": 0.0346, "step": 89580 }, { "epoch": 0.04795, "grad_norm": 0.08531037718057632, "learning_rate": 3.1301689509458715e-05, "loss": 0.0355, "step": 89590 }, { "epoch": 0.048, "grad_norm": 0.10499384999275208, "learning_rate": 3.1297689226655534e-05, "loss": 0.0374, "step": 89600 }, { "epoch": 0.04805, "grad_norm": 0.09910687059164047, "learning_rate": 3.129368877167589e-05, "loss": 0.0368, "step": 89610 }, { "epoch": 0.0481, "grad_norm": 0.10891690850257874, "learning_rate": 3.128968814462916e-05, "loss": 0.0368, "step": 89620 }, { "epoch": 0.04815, "grad_norm": 0.08418095111846924, "learning_rate": 3.128568734562472e-05, "loss": 0.0366, "step": 89630 }, { "epoch": 0.0482, "grad_norm": 0.09086181968450546, "learning_rate": 3.128168637477195e-05, "loss": 0.037, "step": 89640 }, { "epoch": 0.04825, "grad_norm": 0.11261242628097534, "learning_rate": 3.1277685232180234e-05, "loss": 0.0377, "step": 89650 }, { "epoch": 0.0483, "grad_norm": 0.08868662267923355, "learning_rate": 3.1273683917958965e-05, "loss": 0.04, "step": 89660 }, { "epoch": 0.04835, "grad_norm": 0.0898551195859909, "learning_rate": 3.126968243221752e-05, "loss": 0.0366, "step": 89670 }, { "epoch": 0.0484, "grad_norm": 0.08138702809810638, "learning_rate": 3.126568077506533e-05, "loss": 0.0362, "step": 89680 }, { "epoch": 0.04845, "grad_norm": 0.09012820571660995, "learning_rate": 3.126167894661177e-05, "loss": 0.0355, "step": 89690 }, { "epoch": 0.0485, "grad_norm": 0.08225119113922119, "learning_rate": 3.125767694696627e-05, "loss": 0.035, "step": 89700 }, { "epoch": 0.04855, "grad_norm": 0.08970760554075241, "learning_rate": 3.125367477623822e-05, "loss": 0.0363, "step": 89710 }, { "epoch": 0.0486, "grad_norm": 0.08274450898170471, "learning_rate": 3.124967243453707e-05, "loss": 0.0351, "step": 89720 }, { "epoch": 0.04865, "grad_norm": 0.08255013078451157, "learning_rate": 3.124566992197221e-05, "loss": 0.0354, "step": 89730 }, { "epoch": 0.0487, "grad_norm": 0.06928831338882446, "learning_rate": 3.1241667238653084e-05, "loss": 0.0347, "step": 89740 }, { "epoch": 0.04875, "grad_norm": 0.08170973509550095, "learning_rate": 3.123766438468912e-05, "loss": 0.0367, "step": 89750 }, { "epoch": 0.0488, "grad_norm": 0.06689556688070297, "learning_rate": 3.123366136018975e-05, "loss": 0.0345, "step": 89760 }, { "epoch": 0.04885, "grad_norm": 0.07722654193639755, "learning_rate": 3.1229658165264424e-05, "loss": 0.0362, "step": 89770 }, { "epoch": 0.0489, "grad_norm": 0.08215050399303436, "learning_rate": 3.122565480002259e-05, "loss": 0.0339, "step": 89780 }, { "epoch": 0.04895, "grad_norm": 0.10155977308750153, "learning_rate": 3.122165126457369e-05, "loss": 0.0363, "step": 89790 }, { "epoch": 0.049, "grad_norm": 0.08723064512014389, "learning_rate": 3.1217647559027176e-05, "loss": 0.0344, "step": 89800 }, { "epoch": 0.04905, "grad_norm": 0.09421028941869736, "learning_rate": 3.121364368349252e-05, "loss": 0.0371, "step": 89810 }, { "epoch": 0.0491, "grad_norm": 0.08751319348812103, "learning_rate": 3.120963963807918e-05, "loss": 0.0356, "step": 89820 }, { "epoch": 0.04915, "grad_norm": 0.07526911795139313, "learning_rate": 3.1205635422896616e-05, "loss": 0.0345, "step": 89830 }, { "epoch": 0.0492, "grad_norm": 0.08347878605127335, "learning_rate": 3.120163103805432e-05, "loss": 0.0356, "step": 89840 }, { "epoch": 0.04925, "grad_norm": 0.07796986401081085, "learning_rate": 3.119762648366176e-05, "loss": 0.035, "step": 89850 }, { "epoch": 0.0493, "grad_norm": 0.08017401397228241, "learning_rate": 3.1193621759828415e-05, "loss": 0.037, "step": 89860 }, { "epoch": 0.04935, "grad_norm": 0.09265467524528503, "learning_rate": 3.118961686666379e-05, "loss": 0.0356, "step": 89870 }, { "epoch": 0.0494, "grad_norm": 0.11003389209508896, "learning_rate": 3.118561180427736e-05, "loss": 0.0362, "step": 89880 }, { "epoch": 0.04945, "grad_norm": 0.08867444843053818, "learning_rate": 3.1181606572778623e-05, "loss": 0.0364, "step": 89890 }, { "epoch": 0.0495, "grad_norm": 0.08906946331262589, "learning_rate": 3.117760117227708e-05, "loss": 0.0362, "step": 89900 }, { "epoch": 0.04955, "grad_norm": 0.08210599422454834, "learning_rate": 3.1173595602882255e-05, "loss": 0.0361, "step": 89910 }, { "epoch": 0.0496, "grad_norm": 0.08254941552877426, "learning_rate": 3.116958986470364e-05, "loss": 0.0346, "step": 89920 }, { "epoch": 0.04965, "grad_norm": 0.08105908334255219, "learning_rate": 3.116558395785075e-05, "loss": 0.0361, "step": 89930 }, { "epoch": 0.0497, "grad_norm": 0.08087185770273209, "learning_rate": 3.116157788243311e-05, "loss": 0.0354, "step": 89940 }, { "epoch": 0.04975, "grad_norm": 0.09427832067012787, "learning_rate": 3.115757163856026e-05, "loss": 0.0355, "step": 89950 }, { "epoch": 0.0498, "grad_norm": 0.09030015021562576, "learning_rate": 3.1153565226341695e-05, "loss": 0.0348, "step": 89960 }, { "epoch": 0.04985, "grad_norm": 0.08178424835205078, "learning_rate": 3.114955864588698e-05, "loss": 0.034, "step": 89970 }, { "epoch": 0.0499, "grad_norm": 0.07544025033712387, "learning_rate": 3.114555189730565e-05, "loss": 0.0357, "step": 89980 }, { "epoch": 0.04995, "grad_norm": 0.07285235822200775, "learning_rate": 3.114154498070723e-05, "loss": 0.0348, "step": 89990 }, { "epoch": 0.05, "grad_norm": 0.08107560873031616, "learning_rate": 3.113753789620127e-05, "loss": 0.0355, "step": 90000 }, { "epoch": 0.05005, "grad_norm": 0.06690838932991028, "learning_rate": 3.113353064389734e-05, "loss": 0.0339, "step": 90010 }, { "epoch": 0.0501, "grad_norm": 0.09131324291229248, "learning_rate": 3.1129523223904984e-05, "loss": 0.0355, "step": 90020 }, { "epoch": 0.05015, "grad_norm": 0.08263172954320908, "learning_rate": 3.1125515636333766e-05, "loss": 0.0353, "step": 90030 }, { "epoch": 0.0502, "grad_norm": 0.08971139043569565, "learning_rate": 3.112150788129326e-05, "loss": 0.0381, "step": 90040 }, { "epoch": 0.05025, "grad_norm": 0.08543914556503296, "learning_rate": 3.111749995889302e-05, "loss": 0.0386, "step": 90050 }, { "epoch": 0.0503, "grad_norm": 0.07980793714523315, "learning_rate": 3.111349186924263e-05, "loss": 0.0346, "step": 90060 }, { "epoch": 0.05035, "grad_norm": 0.08420425653457642, "learning_rate": 3.110948361245166e-05, "loss": 0.0339, "step": 90070 }, { "epoch": 0.0504, "grad_norm": 0.08622614294290543, "learning_rate": 3.110547518862971e-05, "loss": 0.0373, "step": 90080 }, { "epoch": 0.05045, "grad_norm": 0.06477276235818863, "learning_rate": 3.1101466597886365e-05, "loss": 0.0362, "step": 90090 }, { "epoch": 0.0505, "grad_norm": 0.08431649208068848, "learning_rate": 3.1097457840331217e-05, "loss": 0.037, "step": 90100 }, { "epoch": 0.05055, "grad_norm": 0.06587330996990204, "learning_rate": 3.109344891607386e-05, "loss": 0.0356, "step": 90110 }, { "epoch": 0.0506, "grad_norm": 0.10042694956064224, "learning_rate": 3.10894398252239e-05, "loss": 0.0385, "step": 90120 }, { "epoch": 0.05065, "grad_norm": 0.08202438056468964, "learning_rate": 3.1085430567890937e-05, "loss": 0.036, "step": 90130 }, { "epoch": 0.0507, "grad_norm": 0.07736998051404953, "learning_rate": 3.10814211441846e-05, "loss": 0.0359, "step": 90140 }, { "epoch": 0.05075, "grad_norm": 0.09101607650518417, "learning_rate": 3.107741155421448e-05, "loss": 0.0351, "step": 90150 }, { "epoch": 0.0508, "grad_norm": 0.07944773882627487, "learning_rate": 3.107340179809022e-05, "loss": 0.0352, "step": 90160 }, { "epoch": 0.05085, "grad_norm": 0.07260844856500626, "learning_rate": 3.106939187592143e-05, "loss": 0.0361, "step": 90170 }, { "epoch": 0.0509, "grad_norm": 0.07529579102993011, "learning_rate": 3.106538178781775e-05, "loss": 0.0352, "step": 90180 }, { "epoch": 0.05095, "grad_norm": 0.10861407220363617, "learning_rate": 3.106137153388882e-05, "loss": 0.0381, "step": 90190 }, { "epoch": 0.051, "grad_norm": 0.079187773168087, "learning_rate": 3.105736111424425e-05, "loss": 0.0344, "step": 90200 }, { "epoch": 0.05105, "grad_norm": 0.06789105385541916, "learning_rate": 3.105335052899372e-05, "loss": 0.0348, "step": 90210 }, { "epoch": 0.0511, "grad_norm": 0.07160353660583496, "learning_rate": 3.104933977824685e-05, "loss": 0.0357, "step": 90220 }, { "epoch": 0.05115, "grad_norm": 0.0799093246459961, "learning_rate": 3.104532886211331e-05, "loss": 0.0363, "step": 90230 }, { "epoch": 0.0512, "grad_norm": 0.08041369915008545, "learning_rate": 3.104131778070274e-05, "loss": 0.0346, "step": 90240 }, { "epoch": 0.05125, "grad_norm": 0.06926840543746948, "learning_rate": 3.1037306534124826e-05, "loss": 0.0355, "step": 90250 }, { "epoch": 0.0513, "grad_norm": 0.07907085865736008, "learning_rate": 3.103329512248922e-05, "loss": 0.0358, "step": 90260 }, { "epoch": 0.05135, "grad_norm": 0.08070918172597885, "learning_rate": 3.102928354590558e-05, "loss": 0.0365, "step": 90270 }, { "epoch": 0.0514, "grad_norm": 0.08280420303344727, "learning_rate": 3.102527180448359e-05, "loss": 0.0375, "step": 90280 }, { "epoch": 0.05145, "grad_norm": 0.08545567095279694, "learning_rate": 3.1021259898332944e-05, "loss": 0.0378, "step": 90290 }, { "epoch": 0.0515, "grad_norm": 0.10459499806165695, "learning_rate": 3.1017247827563306e-05, "loss": 0.0344, "step": 90300 }, { "epoch": 0.05155, "grad_norm": 0.08787772804498672, "learning_rate": 3.1013235592284386e-05, "loss": 0.0357, "step": 90310 }, { "epoch": 0.0516, "grad_norm": 0.08612469583749771, "learning_rate": 3.100922319260585e-05, "loss": 0.0365, "step": 90320 }, { "epoch": 0.05165, "grad_norm": 0.08855035156011581, "learning_rate": 3.1005210628637414e-05, "loss": 0.0367, "step": 90330 }, { "epoch": 0.0517, "grad_norm": 0.09178449958562851, "learning_rate": 3.100119790048877e-05, "loss": 0.0355, "step": 90340 }, { "epoch": 0.05175, "grad_norm": 0.08951833844184875, "learning_rate": 3.0997185008269645e-05, "loss": 0.0383, "step": 90350 }, { "epoch": 0.0518, "grad_norm": 0.07153066247701645, "learning_rate": 3.099317195208972e-05, "loss": 0.036, "step": 90360 }, { "epoch": 0.05185, "grad_norm": 0.08133627474308014, "learning_rate": 3.098915873205874e-05, "loss": 0.0354, "step": 90370 }, { "epoch": 0.0519, "grad_norm": 0.07581649720668793, "learning_rate": 3.0985145348286394e-05, "loss": 0.0362, "step": 90380 }, { "epoch": 0.05195, "grad_norm": 0.07978670299053192, "learning_rate": 3.098113180088243e-05, "loss": 0.0354, "step": 90390 }, { "epoch": 0.052, "grad_norm": 0.07078913599252701, "learning_rate": 3.097711808995657e-05, "loss": 0.0343, "step": 90400 }, { "epoch": 0.05205, "grad_norm": 0.07472700625658035, "learning_rate": 3.0973104215618546e-05, "loss": 0.0353, "step": 90410 }, { "epoch": 0.0521, "grad_norm": 0.10212170332670212, "learning_rate": 3.09690901779781e-05, "loss": 0.0377, "step": 90420 }, { "epoch": 0.05215, "grad_norm": 0.09118767082691193, "learning_rate": 3.0965075977144964e-05, "loss": 0.0366, "step": 90430 }, { "epoch": 0.0522, "grad_norm": 0.0685313269495964, "learning_rate": 3.09610616132289e-05, "loss": 0.0362, "step": 90440 }, { "epoch": 0.05225, "grad_norm": 0.07697869837284088, "learning_rate": 3.0957047086339644e-05, "loss": 0.0354, "step": 90450 }, { "epoch": 0.0523, "grad_norm": 0.07973739504814148, "learning_rate": 3.095303239658696e-05, "loss": 0.036, "step": 90460 }, { "epoch": 0.05235, "grad_norm": 0.08885246515274048, "learning_rate": 3.094901754408061e-05, "loss": 0.0348, "step": 90470 }, { "epoch": 0.0524, "grad_norm": 0.07255177944898605, "learning_rate": 3.0945002528930356e-05, "loss": 0.0355, "step": 90480 }, { "epoch": 0.05245, "grad_norm": 0.0941619724035263, "learning_rate": 3.094098735124596e-05, "loss": 0.0363, "step": 90490 }, { "epoch": 0.0525, "grad_norm": 0.11020806431770325, "learning_rate": 3.093697201113721e-05, "loss": 0.0369, "step": 90500 }, { "epoch": 0.05255, "grad_norm": 0.0922388806939125, "learning_rate": 3.093295650871387e-05, "loss": 0.0358, "step": 90510 }, { "epoch": 0.0526, "grad_norm": 0.09871993213891983, "learning_rate": 3.092894084408573e-05, "loss": 0.0378, "step": 90520 }, { "epoch": 0.05265, "grad_norm": 0.10309197753667831, "learning_rate": 3.0924925017362564e-05, "loss": 0.0359, "step": 90530 }, { "epoch": 0.0527, "grad_norm": 0.09653574228286743, "learning_rate": 3.092090902865419e-05, "loss": 0.0357, "step": 90540 }, { "epoch": 0.05275, "grad_norm": 0.08390119671821594, "learning_rate": 3.091689287807038e-05, "loss": 0.0352, "step": 90550 }, { "epoch": 0.0528, "grad_norm": 0.08636614680290222, "learning_rate": 3.091287656572095e-05, "loss": 0.0345, "step": 90560 }, { "epoch": 0.05285, "grad_norm": 0.07649514079093933, "learning_rate": 3.0908860091715686e-05, "loss": 0.0356, "step": 90570 }, { "epoch": 0.0529, "grad_norm": 0.07269894331693649, "learning_rate": 3.090484345616441e-05, "loss": 0.0343, "step": 90580 }, { "epoch": 0.05295, "grad_norm": 0.09663021564483643, "learning_rate": 3.090082665917693e-05, "loss": 0.0358, "step": 90590 }, { "epoch": 0.053, "grad_norm": 0.08514353632926941, "learning_rate": 3.089680970086307e-05, "loss": 0.0351, "step": 90600 }, { "epoch": 0.05305, "grad_norm": 0.08956708759069443, "learning_rate": 3.0892792581332645e-05, "loss": 0.0357, "step": 90610 }, { "epoch": 0.0531, "grad_norm": 0.08290702104568481, "learning_rate": 3.088877530069549e-05, "loss": 0.035, "step": 90620 }, { "epoch": 0.05315, "grad_norm": 0.0854518711566925, "learning_rate": 3.088475785906143e-05, "loss": 0.0365, "step": 90630 }, { "epoch": 0.0532, "grad_norm": 0.07518326491117477, "learning_rate": 3.088074025654029e-05, "loss": 0.0349, "step": 90640 }, { "epoch": 0.05325, "grad_norm": 0.0811287984251976, "learning_rate": 3.0876722493241924e-05, "loss": 0.0356, "step": 90650 }, { "epoch": 0.0533, "grad_norm": 0.08146429806947708, "learning_rate": 3.0872704569276184e-05, "loss": 0.0347, "step": 90660 }, { "epoch": 0.05335, "grad_norm": 0.08974656462669373, "learning_rate": 3.0868686484752897e-05, "loss": 0.0356, "step": 90670 }, { "epoch": 0.0534, "grad_norm": 0.07963674515485764, "learning_rate": 3.086466823978193e-05, "loss": 0.0354, "step": 90680 }, { "epoch": 0.05345, "grad_norm": 0.07970944792032242, "learning_rate": 3.086064983447314e-05, "loss": 0.0357, "step": 90690 }, { "epoch": 0.0535, "grad_norm": 0.09441101551055908, "learning_rate": 3.085663126893637e-05, "loss": 0.036, "step": 90700 }, { "epoch": 0.05355, "grad_norm": 0.08753246068954468, "learning_rate": 3.085261254328152e-05, "loss": 0.0361, "step": 90710 }, { "epoch": 0.0536, "grad_norm": 0.0795416384935379, "learning_rate": 3.084859365761843e-05, "loss": 0.0356, "step": 90720 }, { "epoch": 0.05365, "grad_norm": 0.07005535811185837, "learning_rate": 3.0844574612057e-05, "loss": 0.0358, "step": 90730 }, { "epoch": 0.0537, "grad_norm": 0.10531385987997055, "learning_rate": 3.0840555406707086e-05, "loss": 0.0361, "step": 90740 }, { "epoch": 0.05375, "grad_norm": 0.07994677871465683, "learning_rate": 3.083653604167858e-05, "loss": 0.0345, "step": 90750 }, { "epoch": 0.0538, "grad_norm": 0.09184665232896805, "learning_rate": 3.083251651708137e-05, "loss": 0.0349, "step": 90760 }, { "epoch": 0.05385, "grad_norm": 0.10555008798837662, "learning_rate": 3.082849683302536e-05, "loss": 0.034, "step": 90770 }, { "epoch": 0.0539, "grad_norm": 0.07258718460798264, "learning_rate": 3.0824476989620424e-05, "loss": 0.0366, "step": 90780 }, { "epoch": 0.05395, "grad_norm": 0.09504424780607224, "learning_rate": 3.082045698697648e-05, "loss": 0.0343, "step": 90790 }, { "epoch": 0.054, "grad_norm": 0.09034702926874161, "learning_rate": 3.0816436825203435e-05, "loss": 0.0367, "step": 90800 }, { "epoch": 0.05405, "grad_norm": 0.07364467531442642, "learning_rate": 3.081241650441118e-05, "loss": 0.0372, "step": 90810 }, { "epoch": 0.0541, "grad_norm": 0.06732908636331558, "learning_rate": 3.080839602470965e-05, "loss": 0.0346, "step": 90820 }, { "epoch": 0.05415, "grad_norm": 0.07207812368869781, "learning_rate": 3.080437538620876e-05, "loss": 0.0347, "step": 90830 }, { "epoch": 0.0542, "grad_norm": 0.08139686286449432, "learning_rate": 3.080035458901842e-05, "loss": 0.0353, "step": 90840 }, { "epoch": 0.05425, "grad_norm": 0.07417777180671692, "learning_rate": 3.0796333633248566e-05, "loss": 0.034, "step": 90850 }, { "epoch": 0.0543, "grad_norm": 0.07608595490455627, "learning_rate": 3.079231251900912e-05, "loss": 0.0354, "step": 90860 }, { "epoch": 0.05435, "grad_norm": 0.08691065013408661, "learning_rate": 3.0788291246410036e-05, "loss": 0.0341, "step": 90870 }, { "epoch": 0.0544, "grad_norm": 0.08706577867269516, "learning_rate": 3.078426981556124e-05, "loss": 0.0361, "step": 90880 }, { "epoch": 0.05445, "grad_norm": 0.09268978983163834, "learning_rate": 3.0780248226572686e-05, "loss": 0.0369, "step": 90890 }, { "epoch": 0.0545, "grad_norm": 0.10335977375507355, "learning_rate": 3.077622647955431e-05, "loss": 0.0356, "step": 90900 }, { "epoch": 0.05455, "grad_norm": 0.07753488421440125, "learning_rate": 3.077220457461607e-05, "loss": 0.0347, "step": 90910 }, { "epoch": 0.0546, "grad_norm": 0.07556873559951782, "learning_rate": 3.076818251186793e-05, "loss": 0.0377, "step": 90920 }, { "epoch": 0.05465, "grad_norm": 0.08519326895475388, "learning_rate": 3.0764160291419846e-05, "loss": 0.0358, "step": 90930 }, { "epoch": 0.0547, "grad_norm": 0.09992724657058716, "learning_rate": 3.0760137913381795e-05, "loss": 0.0359, "step": 90940 }, { "epoch": 0.05475, "grad_norm": 0.08147275447845459, "learning_rate": 3.075611537786372e-05, "loss": 0.0368, "step": 90950 }, { "epoch": 0.0548, "grad_norm": 0.09403491765260696, "learning_rate": 3.075209268497563e-05, "loss": 0.0364, "step": 90960 }, { "epoch": 0.05485, "grad_norm": 0.09434369951486588, "learning_rate": 3.0748069834827467e-05, "loss": 0.0346, "step": 90970 }, { "epoch": 0.0549, "grad_norm": 0.07880568504333496, "learning_rate": 3.074404682752925e-05, "loss": 0.035, "step": 90980 }, { "epoch": 0.05495, "grad_norm": 0.08967886865139008, "learning_rate": 3.074002366319094e-05, "loss": 0.0365, "step": 90990 }, { "epoch": 0.055, "grad_norm": 0.08391301333904266, "learning_rate": 3.0736000341922554e-05, "loss": 0.0363, "step": 91000 }, { "epoch": 0.05505, "grad_norm": 0.10305073857307434, "learning_rate": 3.073197686383406e-05, "loss": 0.0359, "step": 91010 }, { "epoch": 0.0551, "grad_norm": 0.09799043834209442, "learning_rate": 3.0727953229035486e-05, "loss": 0.0369, "step": 91020 }, { "epoch": 0.05515, "grad_norm": 0.09323476999998093, "learning_rate": 3.072392943763681e-05, "loss": 0.0354, "step": 91030 }, { "epoch": 0.0552, "grad_norm": 0.08354030549526215, "learning_rate": 3.071990548974806e-05, "loss": 0.0351, "step": 91040 }, { "epoch": 0.05525, "grad_norm": 0.07392622530460358, "learning_rate": 3.0715881385479236e-05, "loss": 0.0354, "step": 91050 }, { "epoch": 0.0553, "grad_norm": 0.0685829147696495, "learning_rate": 3.071185712494037e-05, "loss": 0.0363, "step": 91060 }, { "epoch": 0.05535, "grad_norm": 0.095224529504776, "learning_rate": 3.070783270824147e-05, "loss": 0.0363, "step": 91070 }, { "epoch": 0.0554, "grad_norm": 0.09711430221796036, "learning_rate": 3.0703808135492574e-05, "loss": 0.0399, "step": 91080 }, { "epoch": 0.05545, "grad_norm": 0.09328602254390717, "learning_rate": 3.06997834068037e-05, "loss": 0.0368, "step": 91090 }, { "epoch": 0.0555, "grad_norm": 0.0859946459531784, "learning_rate": 3.069575852228489e-05, "loss": 0.0343, "step": 91100 }, { "epoch": 0.05555, "grad_norm": 0.08314044773578644, "learning_rate": 3.069173348204618e-05, "loss": 0.0365, "step": 91110 }, { "epoch": 0.0556, "grad_norm": 0.08494190871715546, "learning_rate": 3.068770828619762e-05, "loss": 0.035, "step": 91120 }, { "epoch": 0.05565, "grad_norm": 0.06117646396160126, "learning_rate": 3.0683682934849254e-05, "loss": 0.0352, "step": 91130 }, { "epoch": 0.0557, "grad_norm": 0.07039974629878998, "learning_rate": 3.0679657428111125e-05, "loss": 0.0361, "step": 91140 }, { "epoch": 0.05575, "grad_norm": 0.06723567098379135, "learning_rate": 3.0675631766093304e-05, "loss": 0.0358, "step": 91150 }, { "epoch": 0.0558, "grad_norm": 0.06547726690769196, "learning_rate": 3.067160594890583e-05, "loss": 0.0368, "step": 91160 }, { "epoch": 0.05585, "grad_norm": 0.09160833805799484, "learning_rate": 3.06675799766588e-05, "loss": 0.0381, "step": 91170 }, { "epoch": 0.0559, "grad_norm": 0.08084623515605927, "learning_rate": 3.0663553849462245e-05, "loss": 0.0353, "step": 91180 }, { "epoch": 0.05595, "grad_norm": 0.07233183830976486, "learning_rate": 3.065952756742626e-05, "loss": 0.0371, "step": 91190 }, { "epoch": 0.056, "grad_norm": 0.0768621563911438, "learning_rate": 3.065550113066092e-05, "loss": 0.0351, "step": 91200 }, { "epoch": 0.05605, "grad_norm": 0.07012893259525299, "learning_rate": 3.0651474539276304e-05, "loss": 0.0353, "step": 91210 }, { "epoch": 0.0561, "grad_norm": 0.07694104313850403, "learning_rate": 3.064744779338249e-05, "loss": 0.0367, "step": 91220 }, { "epoch": 0.05615, "grad_norm": 0.0797204077243805, "learning_rate": 3.0643420893089585e-05, "loss": 0.0367, "step": 91230 }, { "epoch": 0.0562, "grad_norm": 0.06652817875146866, "learning_rate": 3.063939383850767e-05, "loss": 0.0345, "step": 91240 }, { "epoch": 0.05625, "grad_norm": 0.07184158265590668, "learning_rate": 3.063536662974684e-05, "loss": 0.035, "step": 91250 }, { "epoch": 0.0563, "grad_norm": 0.0723545253276825, "learning_rate": 3.063133926691721e-05, "loss": 0.037, "step": 91260 }, { "epoch": 0.05635, "grad_norm": 0.0889274924993515, "learning_rate": 3.062731175012888e-05, "loss": 0.0362, "step": 91270 }, { "epoch": 0.0564, "grad_norm": 0.08235359936952591, "learning_rate": 3.062328407949196e-05, "loss": 0.0353, "step": 91280 }, { "epoch": 0.05645, "grad_norm": 0.07834339141845703, "learning_rate": 3.0619256255116566e-05, "loss": 0.0337, "step": 91290 }, { "epoch": 0.0565, "grad_norm": 0.08131270110607147, "learning_rate": 3.061522827711281e-05, "loss": 0.0355, "step": 91300 }, { "epoch": 0.05655, "grad_norm": 0.0728047713637352, "learning_rate": 3.061120014559084e-05, "loss": 0.0347, "step": 91310 }, { "epoch": 0.0566, "grad_norm": 0.07191229611635208, "learning_rate": 3.0607171860660746e-05, "loss": 0.035, "step": 91320 }, { "epoch": 0.05665, "grad_norm": 0.06673692911863327, "learning_rate": 3.060314342243269e-05, "loss": 0.0336, "step": 91330 }, { "epoch": 0.0567, "grad_norm": 0.07257112115621567, "learning_rate": 3.0599114831016796e-05, "loss": 0.0348, "step": 91340 }, { "epoch": 0.05675, "grad_norm": 0.07849828898906708, "learning_rate": 3.0595086086523206e-05, "loss": 0.0351, "step": 91350 }, { "epoch": 0.0568, "grad_norm": 0.0824127122759819, "learning_rate": 3.059105718906206e-05, "loss": 0.0359, "step": 91360 }, { "epoch": 0.05685, "grad_norm": 0.0766623467206955, "learning_rate": 3.0587028138743516e-05, "loss": 0.0356, "step": 91370 }, { "epoch": 0.0569, "grad_norm": 0.07816511392593384, "learning_rate": 3.0582998935677726e-05, "loss": 0.0363, "step": 91380 }, { "epoch": 0.05695, "grad_norm": 0.09526880830526352, "learning_rate": 3.057896957997484e-05, "loss": 0.0356, "step": 91390 }, { "epoch": 0.057, "grad_norm": 0.08799227327108383, "learning_rate": 3.057494007174502e-05, "loss": 0.0343, "step": 91400 }, { "epoch": 0.05705, "grad_norm": 0.09132442623376846, "learning_rate": 3.057091041109843e-05, "loss": 0.036, "step": 91410 }, { "epoch": 0.0571, "grad_norm": 0.12402817606925964, "learning_rate": 3.0566880598145244e-05, "loss": 0.0367, "step": 91420 }, { "epoch": 0.05715, "grad_norm": 0.12232159078121185, "learning_rate": 3.0562850632995624e-05, "loss": 0.0368, "step": 91430 }, { "epoch": 0.0572, "grad_norm": 0.09972596168518066, "learning_rate": 3.0558820515759775e-05, "loss": 0.0374, "step": 91440 }, { "epoch": 0.05725, "grad_norm": 0.1160351112484932, "learning_rate": 3.055479024654785e-05, "loss": 0.0355, "step": 91450 }, { "epoch": 0.0573, "grad_norm": 0.07318606972694397, "learning_rate": 3.055075982547006e-05, "loss": 0.0365, "step": 91460 }, { "epoch": 0.05735, "grad_norm": 0.0797891616821289, "learning_rate": 3.054672925263657e-05, "loss": 0.0351, "step": 91470 }, { "epoch": 0.0574, "grad_norm": 0.0765325278043747, "learning_rate": 3.0542698528157585e-05, "loss": 0.0363, "step": 91480 }, { "epoch": 0.05745, "grad_norm": 0.09091998636722565, "learning_rate": 3.0538667652143306e-05, "loss": 0.035, "step": 91490 }, { "epoch": 0.0575, "grad_norm": 0.07104264199733734, "learning_rate": 3.053463662470394e-05, "loss": 0.0355, "step": 91500 }, { "epoch": 0.05755, "grad_norm": 0.06663914024829865, "learning_rate": 3.053060544594968e-05, "loss": 0.0354, "step": 91510 }, { "epoch": 0.0576, "grad_norm": 0.06117236614227295, "learning_rate": 3.052657411599075e-05, "loss": 0.0343, "step": 91520 }, { "epoch": 0.05765, "grad_norm": 0.08269451558589935, "learning_rate": 3.052254263493736e-05, "loss": 0.0341, "step": 91530 }, { "epoch": 0.0577, "grad_norm": 0.08021652698516846, "learning_rate": 3.051851100289973e-05, "loss": 0.0344, "step": 91540 }, { "epoch": 0.05775, "grad_norm": 0.08179452270269394, "learning_rate": 3.0514479219988073e-05, "loss": 0.035, "step": 91550 }, { "epoch": 0.0578, "grad_norm": 0.08777602016925812, "learning_rate": 3.0510447286312637e-05, "loss": 0.0346, "step": 91560 }, { "epoch": 0.05785, "grad_norm": 0.06952960789203644, "learning_rate": 3.050641520198364e-05, "loss": 0.0348, "step": 91570 }, { "epoch": 0.0579, "grad_norm": 0.08493976294994354, "learning_rate": 3.0502382967111315e-05, "loss": 0.0355, "step": 91580 }, { "epoch": 0.05795, "grad_norm": 0.08295717090368271, "learning_rate": 3.049835058180591e-05, "loss": 0.0339, "step": 91590 }, { "epoch": 0.058, "grad_norm": 0.09477686136960983, "learning_rate": 3.0494318046177668e-05, "loss": 0.0359, "step": 91600 }, { "epoch": 0.05805, "grad_norm": 0.07548397779464722, "learning_rate": 3.0490285360336836e-05, "loss": 0.0349, "step": 91610 }, { "epoch": 0.0581, "grad_norm": 0.07475226372480392, "learning_rate": 3.0486252524393654e-05, "loss": 0.0341, "step": 91620 }, { "epoch": 0.05815, "grad_norm": 0.0728631541132927, "learning_rate": 3.04822195384584e-05, "loss": 0.0336, "step": 91630 }, { "epoch": 0.0582, "grad_norm": 0.07909126579761505, "learning_rate": 3.0478186402641317e-05, "loss": 0.0355, "step": 91640 }, { "epoch": 0.05825, "grad_norm": 0.06893298774957657, "learning_rate": 3.0474153117052684e-05, "loss": 0.0346, "step": 91650 }, { "epoch": 0.0583, "grad_norm": 0.06769859045743942, "learning_rate": 3.0470119681802756e-05, "loss": 0.0346, "step": 91660 }, { "epoch": 0.05835, "grad_norm": 0.08800292015075684, "learning_rate": 3.046608609700181e-05, "loss": 0.0352, "step": 91670 }, { "epoch": 0.0584, "grad_norm": 0.07564336806535721, "learning_rate": 3.0462052362760125e-05, "loss": 0.0345, "step": 91680 }, { "epoch": 0.05845, "grad_norm": 0.06514771282672882, "learning_rate": 3.0458018479187983e-05, "loss": 0.035, "step": 91690 }, { "epoch": 0.0585, "grad_norm": 0.07323320209980011, "learning_rate": 3.045398444639566e-05, "loss": 0.0351, "step": 91700 }, { "epoch": 0.05855, "grad_norm": 0.06429063528776169, "learning_rate": 3.044995026449346e-05, "loss": 0.0339, "step": 91710 }, { "epoch": 0.0586, "grad_norm": 0.07759048789739609, "learning_rate": 3.0445915933591658e-05, "loss": 0.0352, "step": 91720 }, { "epoch": 0.05865, "grad_norm": 0.07607323676347733, "learning_rate": 3.044188145380057e-05, "loss": 0.0335, "step": 91730 }, { "epoch": 0.0587, "grad_norm": 0.07866210490465164, "learning_rate": 3.0437846825230476e-05, "loss": 0.0348, "step": 91740 }, { "epoch": 0.05875, "grad_norm": 0.08538687229156494, "learning_rate": 3.04338120479917e-05, "loss": 0.0351, "step": 91750 }, { "epoch": 0.0588, "grad_norm": 0.08252998441457748, "learning_rate": 3.042977712219454e-05, "loss": 0.0354, "step": 91760 }, { "epoch": 0.05885, "grad_norm": 0.08144179731607437, "learning_rate": 3.042574204794932e-05, "loss": 0.035, "step": 91770 }, { "epoch": 0.0589, "grad_norm": 0.07281932234764099, "learning_rate": 3.0421706825366343e-05, "loss": 0.0342, "step": 91780 }, { "epoch": 0.05895, "grad_norm": 0.09572433680295944, "learning_rate": 3.041767145455594e-05, "loss": 0.0368, "step": 91790 }, { "epoch": 0.059, "grad_norm": 0.08905182778835297, "learning_rate": 3.041363593562844e-05, "loss": 0.0376, "step": 91800 }, { "epoch": 0.05905, "grad_norm": 0.08899427205324173, "learning_rate": 3.040960026869416e-05, "loss": 0.0354, "step": 91810 }, { "epoch": 0.0591, "grad_norm": 0.09908939152956009, "learning_rate": 3.0405564453863445e-05, "loss": 0.0355, "step": 91820 }, { "epoch": 0.05915, "grad_norm": 0.0857677087187767, "learning_rate": 3.0401528491246628e-05, "loss": 0.0363, "step": 91830 }, { "epoch": 0.0592, "grad_norm": 0.08181668072938919, "learning_rate": 3.0397492380954057e-05, "loss": 0.0349, "step": 91840 }, { "epoch": 0.05925, "grad_norm": 0.08873151242733002, "learning_rate": 3.0393456123096065e-05, "loss": 0.0382, "step": 91850 }, { "epoch": 0.0593, "grad_norm": 0.0892871618270874, "learning_rate": 3.0389419717783007e-05, "loss": 0.0382, "step": 91860 }, { "epoch": 0.05935, "grad_norm": 0.07593189179897308, "learning_rate": 3.0385383165125243e-05, "loss": 0.0341, "step": 91870 }, { "epoch": 0.0594, "grad_norm": 0.09186351299285889, "learning_rate": 3.038134646523313e-05, "loss": 0.0357, "step": 91880 }, { "epoch": 0.05945, "grad_norm": 0.10620171576738358, "learning_rate": 3.0377309618217015e-05, "loss": 0.0358, "step": 91890 }, { "epoch": 0.0595, "grad_norm": 0.07334204763174057, "learning_rate": 3.037327262418729e-05, "loss": 0.0354, "step": 91900 }, { "epoch": 0.05955, "grad_norm": 0.08394617587327957, "learning_rate": 3.03692354832543e-05, "loss": 0.0367, "step": 91910 }, { "epoch": 0.0596, "grad_norm": 0.08659309893846512, "learning_rate": 3.036519819552843e-05, "loss": 0.0374, "step": 91920 }, { "epoch": 0.05965, "grad_norm": 0.07451268285512924, "learning_rate": 3.0361160761120056e-05, "loss": 0.0348, "step": 91930 }, { "epoch": 0.0597, "grad_norm": 0.0766073614358902, "learning_rate": 3.035712318013956e-05, "loss": 0.0344, "step": 91940 }, { "epoch": 0.05975, "grad_norm": 0.06007981672883034, "learning_rate": 3.0353085452697333e-05, "loss": 0.0333, "step": 91950 }, { "epoch": 0.0598, "grad_norm": 0.06408718973398209, "learning_rate": 3.0349047578903765e-05, "loss": 0.0344, "step": 91960 }, { "epoch": 0.05985, "grad_norm": 0.10001283884048462, "learning_rate": 3.0345009558869235e-05, "loss": 0.0345, "step": 91970 }, { "epoch": 0.0599, "grad_norm": 0.08132842183113098, "learning_rate": 3.0340971392704167e-05, "loss": 0.0349, "step": 91980 }, { "epoch": 0.05995, "grad_norm": 0.10986791551113129, "learning_rate": 3.0336933080518926e-05, "loss": 0.035, "step": 91990 }, { "epoch": 0.06, "grad_norm": 0.11718198657035828, "learning_rate": 3.0332894622423956e-05, "loss": 0.0342, "step": 92000 }, { "epoch": 0.06005, "grad_norm": 0.07450106739997864, "learning_rate": 3.0328856018529645e-05, "loss": 0.0358, "step": 92010 }, { "epoch": 0.0601, "grad_norm": 0.07318412512540817, "learning_rate": 3.0324817268946416e-05, "loss": 0.0371, "step": 92020 }, { "epoch": 0.06015, "grad_norm": 0.07544299960136414, "learning_rate": 3.032077837378468e-05, "loss": 0.0355, "step": 92030 }, { "epoch": 0.0602, "grad_norm": 0.08277773857116699, "learning_rate": 3.0316739333154857e-05, "loss": 0.0347, "step": 92040 }, { "epoch": 0.06025, "grad_norm": 0.08776474744081497, "learning_rate": 3.0312700147167382e-05, "loss": 0.0343, "step": 92050 }, { "epoch": 0.0603, "grad_norm": 0.07797855138778687, "learning_rate": 3.0308660815932686e-05, "loss": 0.0368, "step": 92060 }, { "epoch": 0.06035, "grad_norm": 0.11843080073595047, "learning_rate": 3.0304621339561196e-05, "loss": 0.0373, "step": 92070 }, { "epoch": 0.0604, "grad_norm": 0.0953860804438591, "learning_rate": 3.0300581718163346e-05, "loss": 0.036, "step": 92080 }, { "epoch": 0.06045, "grad_norm": 0.08200633525848389, "learning_rate": 3.0296541951849594e-05, "loss": 0.0352, "step": 92090 }, { "epoch": 0.0605, "grad_norm": 0.0750812292098999, "learning_rate": 3.0292502040730362e-05, "loss": 0.0375, "step": 92100 }, { "epoch": 0.06055, "grad_norm": 0.07450171560049057, "learning_rate": 3.028846198491612e-05, "loss": 0.0347, "step": 92110 }, { "epoch": 0.0606, "grad_norm": 0.07821846008300781, "learning_rate": 3.028442178451731e-05, "loss": 0.0349, "step": 92120 }, { "epoch": 0.06065, "grad_norm": 0.07577892392873764, "learning_rate": 3.02803814396444e-05, "loss": 0.0362, "step": 92130 }, { "epoch": 0.0607, "grad_norm": 0.09052744507789612, "learning_rate": 3.027634095040784e-05, "loss": 0.0365, "step": 92140 }, { "epoch": 0.06075, "grad_norm": 0.10810858756303787, "learning_rate": 3.0272300316918107e-05, "loss": 0.0372, "step": 92150 }, { "epoch": 0.0608, "grad_norm": 0.1269952356815338, "learning_rate": 3.0268259539285654e-05, "loss": 0.0371, "step": 92160 }, { "epoch": 0.06085, "grad_norm": 0.09791803359985352, "learning_rate": 3.0264218617620975e-05, "loss": 0.0365, "step": 92170 }, { "epoch": 0.0609, "grad_norm": 0.09965765476226807, "learning_rate": 3.0260177552034525e-05, "loss": 0.0364, "step": 92180 }, { "epoch": 0.06095, "grad_norm": 0.09902802109718323, "learning_rate": 3.0256136342636803e-05, "loss": 0.0356, "step": 92190 }, { "epoch": 0.061, "grad_norm": 0.08540318161249161, "learning_rate": 3.0252094989538287e-05, "loss": 0.0354, "step": 92200 }, { "epoch": 0.06105, "grad_norm": 0.08087259531021118, "learning_rate": 3.0248053492849472e-05, "loss": 0.0357, "step": 92210 }, { "epoch": 0.0611, "grad_norm": 0.07754070311784744, "learning_rate": 3.024401185268084e-05, "loss": 0.0355, "step": 92220 }, { "epoch": 0.06115, "grad_norm": 0.0839327871799469, "learning_rate": 3.02399700691429e-05, "loss": 0.0376, "step": 92230 }, { "epoch": 0.0612, "grad_norm": 0.09323536604642868, "learning_rate": 3.0235928142346143e-05, "loss": 0.0356, "step": 92240 }, { "epoch": 0.06125, "grad_norm": 0.07739691436290741, "learning_rate": 3.0231886072401072e-05, "loss": 0.0337, "step": 92250 }, { "epoch": 0.0613, "grad_norm": 0.08068235963582993, "learning_rate": 3.022784385941821e-05, "loss": 0.0364, "step": 92260 }, { "epoch": 0.06135, "grad_norm": 0.08458296209573746, "learning_rate": 3.022380150350806e-05, "loss": 0.0357, "step": 92270 }, { "epoch": 0.0614, "grad_norm": 0.07799829542636871, "learning_rate": 3.0219759004781134e-05, "loss": 0.035, "step": 92280 }, { "epoch": 0.06145, "grad_norm": 0.08659221231937408, "learning_rate": 3.0215716363347956e-05, "loss": 0.0364, "step": 92290 }, { "epoch": 0.0615, "grad_norm": 0.08793576806783676, "learning_rate": 3.0211673579319067e-05, "loss": 0.0353, "step": 92300 }, { "epoch": 0.06155, "grad_norm": 0.07005950063467026, "learning_rate": 3.0207630652804963e-05, "loss": 0.036, "step": 92310 }, { "epoch": 0.0616, "grad_norm": 0.07392650842666626, "learning_rate": 3.0203587583916204e-05, "loss": 0.0346, "step": 92320 }, { "epoch": 0.06165, "grad_norm": 0.0666443482041359, "learning_rate": 3.0199544372763304e-05, "loss": 0.034, "step": 92330 }, { "epoch": 0.0617, "grad_norm": 0.06908619403839111, "learning_rate": 3.019550101945683e-05, "loss": 0.0356, "step": 92340 }, { "epoch": 0.06175, "grad_norm": 0.07427336275577545, "learning_rate": 3.0191457524107304e-05, "loss": 0.0365, "step": 92350 }, { "epoch": 0.0618, "grad_norm": 0.07534376531839371, "learning_rate": 3.018741388682528e-05, "loss": 0.0381, "step": 92360 }, { "epoch": 0.06185, "grad_norm": 0.08381146192550659, "learning_rate": 3.0183370107721297e-05, "loss": 0.0359, "step": 92370 }, { "epoch": 0.0619, "grad_norm": 0.08099161833524704, "learning_rate": 3.0179326186905936e-05, "loss": 0.0348, "step": 92380 }, { "epoch": 0.06195, "grad_norm": 0.08048680424690247, "learning_rate": 3.017528212448974e-05, "loss": 0.0346, "step": 92390 }, { "epoch": 0.062, "grad_norm": 0.0848207175731659, "learning_rate": 3.017123792058328e-05, "loss": 0.0352, "step": 92400 }, { "epoch": 0.06205, "grad_norm": 0.07833609730005264, "learning_rate": 3.016719357529711e-05, "loss": 0.0351, "step": 92410 }, { "epoch": 0.0621, "grad_norm": 0.0627160593867302, "learning_rate": 3.0163149088741816e-05, "loss": 0.0345, "step": 92420 }, { "epoch": 0.06215, "grad_norm": 0.06662797927856445, "learning_rate": 3.0159104461027953e-05, "loss": 0.0366, "step": 92430 }, { "epoch": 0.0622, "grad_norm": 0.09291603416204453, "learning_rate": 3.015505969226612e-05, "loss": 0.0365, "step": 92440 }, { "epoch": 0.06225, "grad_norm": 0.09162125736474991, "learning_rate": 3.0151014782566887e-05, "loss": 0.0344, "step": 92450 }, { "epoch": 0.0623, "grad_norm": 0.10630109906196594, "learning_rate": 3.014696973204086e-05, "loss": 0.0365, "step": 92460 }, { "epoch": 0.06235, "grad_norm": 0.08084352314472198, "learning_rate": 3.01429245407986e-05, "loss": 0.0358, "step": 92470 }, { "epoch": 0.0624, "grad_norm": 0.062248535454273224, "learning_rate": 3.0138879208950722e-05, "loss": 0.0365, "step": 92480 }, { "epoch": 0.06245, "grad_norm": 0.08606372773647308, "learning_rate": 3.0134833736607815e-05, "loss": 0.0358, "step": 92490 }, { "epoch": 0.0625, "grad_norm": 0.07619146257638931, "learning_rate": 3.0130788123880476e-05, "loss": 0.0352, "step": 92500 }, { "epoch": 0.06255, "grad_norm": 0.07813157886266708, "learning_rate": 3.0126742370879324e-05, "loss": 0.0353, "step": 92510 }, { "epoch": 0.0626, "grad_norm": 0.07503335922956467, "learning_rate": 3.0122696477714962e-05, "loss": 0.0347, "step": 92520 }, { "epoch": 0.06265, "grad_norm": 0.06751769781112671, "learning_rate": 3.0118650444498005e-05, "loss": 0.0365, "step": 92530 }, { "epoch": 0.0627, "grad_norm": 0.07441738992929459, "learning_rate": 3.011460427133906e-05, "loss": 0.0354, "step": 92540 }, { "epoch": 0.06275, "grad_norm": 0.06860997527837753, "learning_rate": 3.0110557958348762e-05, "loss": 0.036, "step": 92550 }, { "epoch": 0.0628, "grad_norm": 0.06549837440252304, "learning_rate": 3.0106511505637725e-05, "loss": 0.0378, "step": 92560 }, { "epoch": 0.06285, "grad_norm": 0.06085606664419174, "learning_rate": 3.0102464913316586e-05, "loss": 0.0358, "step": 92570 }, { "epoch": 0.0629, "grad_norm": 0.07938232272863388, "learning_rate": 3.0098418181495968e-05, "loss": 0.0342, "step": 92580 }, { "epoch": 0.06295, "grad_norm": 0.06906241178512573, "learning_rate": 3.0094371310286517e-05, "loss": 0.0333, "step": 92590 }, { "epoch": 0.063, "grad_norm": 0.07146913558244705, "learning_rate": 3.0090324299798866e-05, "loss": 0.0348, "step": 92600 }, { "epoch": 0.06305, "grad_norm": 0.09440640360116959, "learning_rate": 3.0086277150143665e-05, "loss": 0.0347, "step": 92610 }, { "epoch": 0.0631, "grad_norm": 0.0820523127913475, "learning_rate": 3.0082229861431556e-05, "loss": 0.0354, "step": 92620 }, { "epoch": 0.06315, "grad_norm": 0.08139200508594513, "learning_rate": 3.007818243377319e-05, "loss": 0.0357, "step": 92630 }, { "epoch": 0.0632, "grad_norm": 0.07425463199615479, "learning_rate": 3.007413486727922e-05, "loss": 0.0349, "step": 92640 }, { "epoch": 0.06325, "grad_norm": 0.07356837391853333, "learning_rate": 3.0070087162060316e-05, "loss": 0.034, "step": 92650 }, { "epoch": 0.0633, "grad_norm": 0.0619027353823185, "learning_rate": 3.006603931822713e-05, "loss": 0.0344, "step": 92660 }, { "epoch": 0.06335, "grad_norm": 0.06232372298836708, "learning_rate": 3.006199133589034e-05, "loss": 0.0336, "step": 92670 }, { "epoch": 0.0634, "grad_norm": 0.06282227486371994, "learning_rate": 3.00579432151606e-05, "loss": 0.0331, "step": 92680 }, { "epoch": 0.06345, "grad_norm": 0.0717713013291359, "learning_rate": 3.0053894956148593e-05, "loss": 0.0347, "step": 92690 }, { "epoch": 0.0635, "grad_norm": 0.055683765560388565, "learning_rate": 3.0049846558964995e-05, "loss": 0.0335, "step": 92700 }, { "epoch": 0.06355, "grad_norm": 0.05818936973810196, "learning_rate": 3.0045798023720494e-05, "loss": 0.0346, "step": 92710 }, { "epoch": 0.0636, "grad_norm": 0.0628824457526207, "learning_rate": 3.0041749350525772e-05, "loss": 0.0346, "step": 92720 }, { "epoch": 0.06365, "grad_norm": 0.07777944952249527, "learning_rate": 3.0037700539491515e-05, "loss": 0.0341, "step": 92730 }, { "epoch": 0.0637, "grad_norm": 0.07659552246332169, "learning_rate": 3.0033651590728417e-05, "loss": 0.0346, "step": 92740 }, { "epoch": 0.06375, "grad_norm": 0.06526902318000793, "learning_rate": 3.002960250434717e-05, "loss": 0.0329, "step": 92750 }, { "epoch": 0.0638, "grad_norm": 0.07887840270996094, "learning_rate": 3.0025553280458485e-05, "loss": 0.0344, "step": 92760 }, { "epoch": 0.06385, "grad_norm": 0.067435123026371, "learning_rate": 3.0021503919173065e-05, "loss": 0.0357, "step": 92770 }, { "epoch": 0.0639, "grad_norm": 0.0651765689253807, "learning_rate": 3.001745442060161e-05, "loss": 0.0347, "step": 92780 }, { "epoch": 0.06395, "grad_norm": 0.07981258630752563, "learning_rate": 3.0013404784854838e-05, "loss": 0.0339, "step": 92790 }, { "epoch": 0.064, "grad_norm": 0.07509902119636536, "learning_rate": 3.000935501204346e-05, "loss": 0.0348, "step": 92800 }, { "epoch": 0.06405, "grad_norm": 0.06714314222335815, "learning_rate": 3.0005305102278204e-05, "loss": 0.0354, "step": 92810 }, { "epoch": 0.0641, "grad_norm": 0.07060767710208893, "learning_rate": 3.000125505566978e-05, "loss": 0.0349, "step": 92820 }, { "epoch": 0.06415, "grad_norm": 0.07698815315961838, "learning_rate": 2.999720487232892e-05, "loss": 0.0336, "step": 92830 }, { "epoch": 0.0642, "grad_norm": 0.08230160176753998, "learning_rate": 2.9993154552366363e-05, "loss": 0.036, "step": 92840 }, { "epoch": 0.06425, "grad_norm": 0.08095083385705948, "learning_rate": 2.9989104095892835e-05, "loss": 0.0361, "step": 92850 }, { "epoch": 0.0643, "grad_norm": 0.07354466617107391, "learning_rate": 2.9985053503019078e-05, "loss": 0.035, "step": 92860 }, { "epoch": 0.06435, "grad_norm": 0.07451581209897995, "learning_rate": 2.9981002773855825e-05, "loss": 0.0341, "step": 92870 }, { "epoch": 0.0644, "grad_norm": 0.08016319572925568, "learning_rate": 2.9976951908513828e-05, "loss": 0.0343, "step": 92880 }, { "epoch": 0.06445, "grad_norm": 0.07381189614534378, "learning_rate": 2.9972900907103835e-05, "loss": 0.0339, "step": 92890 }, { "epoch": 0.0645, "grad_norm": 0.06846525520086288, "learning_rate": 2.9968849769736608e-05, "loss": 0.0347, "step": 92900 }, { "epoch": 0.06455, "grad_norm": 0.07587923109531403, "learning_rate": 2.9964798496522883e-05, "loss": 0.0343, "step": 92910 }, { "epoch": 0.0646, "grad_norm": 0.07672179490327835, "learning_rate": 2.9960747087573443e-05, "loss": 0.0352, "step": 92920 }, { "epoch": 0.06465, "grad_norm": 0.0883919820189476, "learning_rate": 2.9956695542999036e-05, "loss": 0.0349, "step": 92930 }, { "epoch": 0.0647, "grad_norm": 0.07688490301370621, "learning_rate": 2.9952643862910434e-05, "loss": 0.035, "step": 92940 }, { "epoch": 0.06475, "grad_norm": 0.07831470668315887, "learning_rate": 2.9948592047418407e-05, "loss": 0.0343, "step": 92950 }, { "epoch": 0.0648, "grad_norm": 0.07938302308320999, "learning_rate": 2.9944540096633737e-05, "loss": 0.0354, "step": 92960 }, { "epoch": 0.06485, "grad_norm": 0.07706853747367859, "learning_rate": 2.99404880106672e-05, "loss": 0.0363, "step": 92970 }, { "epoch": 0.0649, "grad_norm": 0.08308210223913193, "learning_rate": 2.9936435789629575e-05, "loss": 0.0348, "step": 92980 }, { "epoch": 0.06495, "grad_norm": 0.06706178188323975, "learning_rate": 2.993238343363165e-05, "loss": 0.0352, "step": 92990 }, { "epoch": 0.065, "grad_norm": 0.06068303436040878, "learning_rate": 2.992833094278421e-05, "loss": 0.0355, "step": 93000 }, { "epoch": 0.06505, "grad_norm": 0.11114989221096039, "learning_rate": 2.9924278317198057e-05, "loss": 0.0375, "step": 93010 }, { "epoch": 0.0651, "grad_norm": 0.07859257608652115, "learning_rate": 2.9920225556983987e-05, "loss": 0.0354, "step": 93020 }, { "epoch": 0.06515, "grad_norm": 0.0722615048289299, "learning_rate": 2.99161726622528e-05, "loss": 0.035, "step": 93030 }, { "epoch": 0.0652, "grad_norm": 0.08284981548786163, "learning_rate": 2.9912119633115295e-05, "loss": 0.0358, "step": 93040 }, { "epoch": 0.06525, "grad_norm": 0.07400976121425629, "learning_rate": 2.990806646968229e-05, "loss": 0.0369, "step": 93050 }, { "epoch": 0.0653, "grad_norm": 0.07793234288692474, "learning_rate": 2.990401317206458e-05, "loss": 0.0363, "step": 93060 }, { "epoch": 0.06535, "grad_norm": 0.09183067083358765, "learning_rate": 2.9899959740373003e-05, "loss": 0.0351, "step": 93070 }, { "epoch": 0.0654, "grad_norm": 0.0798955112695694, "learning_rate": 2.9895906174718363e-05, "loss": 0.0356, "step": 93080 }, { "epoch": 0.06545, "grad_norm": 0.07224183529615402, "learning_rate": 2.989185247521149e-05, "loss": 0.0356, "step": 93090 }, { "epoch": 0.0655, "grad_norm": 0.07504131644964218, "learning_rate": 2.9887798641963204e-05, "loss": 0.0359, "step": 93100 }, { "epoch": 0.06555, "grad_norm": 0.0735727921128273, "learning_rate": 2.988374467508435e-05, "loss": 0.0347, "step": 93110 }, { "epoch": 0.0656, "grad_norm": 0.06349542737007141, "learning_rate": 2.9879690574685743e-05, "loss": 0.0345, "step": 93120 }, { "epoch": 0.06565, "grad_norm": 0.08468075096607208, "learning_rate": 2.9875636340878233e-05, "loss": 0.0352, "step": 93130 }, { "epoch": 0.0657, "grad_norm": 0.07736873626708984, "learning_rate": 2.9871581973772646e-05, "loss": 0.0354, "step": 93140 }, { "epoch": 0.06575, "grad_norm": 0.12461365759372711, "learning_rate": 2.986752747347985e-05, "loss": 0.0351, "step": 93150 }, { "epoch": 0.0658, "grad_norm": 0.10645054280757904, "learning_rate": 2.9863472840110672e-05, "loss": 0.035, "step": 93160 }, { "epoch": 0.06585, "grad_norm": 0.09245552867650986, "learning_rate": 2.9859418073775987e-05, "loss": 0.0364, "step": 93170 }, { "epoch": 0.0659, "grad_norm": 0.09452535957098007, "learning_rate": 2.9855363174586627e-05, "loss": 0.0355, "step": 93180 }, { "epoch": 0.06595, "grad_norm": 0.10148017853498459, "learning_rate": 2.9851308142653466e-05, "loss": 0.0374, "step": 93190 }, { "epoch": 0.066, "grad_norm": 0.08406191319227219, "learning_rate": 2.984725297808736e-05, "loss": 0.036, "step": 93200 }, { "epoch": 0.06605, "grad_norm": 0.0741802453994751, "learning_rate": 2.984319768099918e-05, "loss": 0.0349, "step": 93210 }, { "epoch": 0.0661, "grad_norm": 0.06948202103376389, "learning_rate": 2.9839142251499798e-05, "loss": 0.0352, "step": 93220 }, { "epoch": 0.06615, "grad_norm": 0.09487444907426834, "learning_rate": 2.983508668970008e-05, "loss": 0.0348, "step": 93230 }, { "epoch": 0.0662, "grad_norm": 0.07550612837076187, "learning_rate": 2.983103099571091e-05, "loss": 0.0351, "step": 93240 }, { "epoch": 0.06625, "grad_norm": 0.07216605544090271, "learning_rate": 2.9826975169643168e-05, "loss": 0.0349, "step": 93250 }, { "epoch": 0.0663, "grad_norm": 0.08113081008195877, "learning_rate": 2.9822919211607737e-05, "loss": 0.0381, "step": 93260 }, { "epoch": 0.06635, "grad_norm": 0.08273231238126755, "learning_rate": 2.9818863121715506e-05, "loss": 0.0354, "step": 93270 }, { "epoch": 0.0664, "grad_norm": 0.082130566239357, "learning_rate": 2.981480690007737e-05, "loss": 0.0372, "step": 93280 }, { "epoch": 0.06645, "grad_norm": 0.09722493588924408, "learning_rate": 2.9810750546804222e-05, "loss": 0.0356, "step": 93290 }, { "epoch": 0.0665, "grad_norm": 0.11571363359689713, "learning_rate": 2.9806694062006963e-05, "loss": 0.0393, "step": 93300 }, { "epoch": 0.06655, "grad_norm": 0.09508045017719269, "learning_rate": 2.980263744579649e-05, "loss": 0.035, "step": 93310 }, { "epoch": 0.0666, "grad_norm": 0.07783970981836319, "learning_rate": 2.9798580698283718e-05, "loss": 0.0375, "step": 93320 }, { "epoch": 0.06665, "grad_norm": 0.07939450442790985, "learning_rate": 2.9794523819579546e-05, "loss": 0.0362, "step": 93330 }, { "epoch": 0.0667, "grad_norm": 0.07583706825971603, "learning_rate": 2.9790466809794898e-05, "loss": 0.0353, "step": 93340 }, { "epoch": 0.06675, "grad_norm": 0.09335724264383316, "learning_rate": 2.9786409669040687e-05, "loss": 0.036, "step": 93350 }, { "epoch": 0.0668, "grad_norm": 0.08478909730911255, "learning_rate": 2.978235239742784e-05, "loss": 0.0368, "step": 93360 }, { "epoch": 0.06685, "grad_norm": 0.09881918877363205, "learning_rate": 2.9778294995067262e-05, "loss": 0.0356, "step": 93370 }, { "epoch": 0.0669, "grad_norm": 0.08156245946884155, "learning_rate": 2.9774237462069905e-05, "loss": 0.0354, "step": 93380 }, { "epoch": 0.06695, "grad_norm": 0.09070413559675217, "learning_rate": 2.9770179798546678e-05, "loss": 0.0389, "step": 93390 }, { "epoch": 0.067, "grad_norm": 0.08894050866365433, "learning_rate": 2.9766122004608537e-05, "loss": 0.0362, "step": 93400 }, { "epoch": 0.06705, "grad_norm": 0.08819899708032608, "learning_rate": 2.9762064080366407e-05, "loss": 0.0354, "step": 93410 }, { "epoch": 0.0671, "grad_norm": 0.11669158190488815, "learning_rate": 2.975800602593124e-05, "loss": 0.0399, "step": 93420 }, { "epoch": 0.06715, "grad_norm": 0.10961344093084335, "learning_rate": 2.975394784141397e-05, "loss": 0.0352, "step": 93430 }, { "epoch": 0.0672, "grad_norm": 0.09197092801332474, "learning_rate": 2.9749889526925557e-05, "loss": 0.0357, "step": 93440 }, { "epoch": 0.06725, "grad_norm": 0.10645048320293427, "learning_rate": 2.9745831082576948e-05, "loss": 0.0349, "step": 93450 }, { "epoch": 0.0673, "grad_norm": 0.10623519122600555, "learning_rate": 2.9741772508479093e-05, "loss": 0.0356, "step": 93460 }, { "epoch": 0.06735, "grad_norm": 0.07466422021389008, "learning_rate": 2.973771380474296e-05, "loss": 0.0347, "step": 93470 }, { "epoch": 0.0674, "grad_norm": 0.07325384020805359, "learning_rate": 2.973365497147952e-05, "loss": 0.0351, "step": 93480 }, { "epoch": 0.06745, "grad_norm": 0.08341530710458755, "learning_rate": 2.972959600879972e-05, "loss": 0.0368, "step": 93490 }, { "epoch": 0.0675, "grad_norm": 0.09049838036298752, "learning_rate": 2.972553691681455e-05, "loss": 0.0362, "step": 93500 }, { "epoch": 0.06755, "grad_norm": 0.07634222507476807, "learning_rate": 2.9721477695634977e-05, "loss": 0.0336, "step": 93510 }, { "epoch": 0.0676, "grad_norm": 0.07836073637008667, "learning_rate": 2.9717418345371972e-05, "loss": 0.0342, "step": 93520 }, { "epoch": 0.06765, "grad_norm": 0.09737568348646164, "learning_rate": 2.9713358866136526e-05, "loss": 0.0347, "step": 93530 }, { "epoch": 0.0677, "grad_norm": 0.07100161910057068, "learning_rate": 2.9709299258039613e-05, "loss": 0.0349, "step": 93540 }, { "epoch": 0.06775, "grad_norm": 0.07358980178833008, "learning_rate": 2.9705239521192235e-05, "loss": 0.0332, "step": 93550 }, { "epoch": 0.0678, "grad_norm": 0.07812168449163437, "learning_rate": 2.9701179655705374e-05, "loss": 0.0342, "step": 93560 }, { "epoch": 0.06785, "grad_norm": 0.09706774353981018, "learning_rate": 2.9697119661690032e-05, "loss": 0.0364, "step": 93570 }, { "epoch": 0.0679, "grad_norm": 0.08430415391921997, "learning_rate": 2.969305953925719e-05, "loss": 0.0342, "step": 93580 }, { "epoch": 0.06795, "grad_norm": 0.0800105482339859, "learning_rate": 2.968899928851787e-05, "loss": 0.0342, "step": 93590 }, { "epoch": 0.068, "grad_norm": 0.08123991638422012, "learning_rate": 2.9684938909583073e-05, "loss": 0.0337, "step": 93600 }, { "epoch": 0.06805, "grad_norm": 0.09043265879154205, "learning_rate": 2.968087840256381e-05, "loss": 0.0341, "step": 93610 }, { "epoch": 0.0681, "grad_norm": 0.08123215287923813, "learning_rate": 2.9676817767571086e-05, "loss": 0.0339, "step": 93620 }, { "epoch": 0.06815, "grad_norm": 0.07657421380281448, "learning_rate": 2.9672757004715924e-05, "loss": 0.0345, "step": 93630 }, { "epoch": 0.0682, "grad_norm": 0.07973584532737732, "learning_rate": 2.9668696114109333e-05, "loss": 0.0353, "step": 93640 }, { "epoch": 0.06825, "grad_norm": 0.07709518820047379, "learning_rate": 2.9664635095862347e-05, "loss": 0.0348, "step": 93650 }, { "epoch": 0.0683, "grad_norm": 0.08385272324085236, "learning_rate": 2.966057395008599e-05, "loss": 0.036, "step": 93660 }, { "epoch": 0.06835, "grad_norm": 0.07511667162179947, "learning_rate": 2.9656512676891295e-05, "loss": 0.0344, "step": 93670 }, { "epoch": 0.0684, "grad_norm": 0.07143174856901169, "learning_rate": 2.9652451276389294e-05, "loss": 0.0345, "step": 93680 }, { "epoch": 0.06845, "grad_norm": 0.062331750988960266, "learning_rate": 2.9648389748691025e-05, "loss": 0.0344, "step": 93690 }, { "epoch": 0.0685, "grad_norm": 0.06511224806308746, "learning_rate": 2.964432809390752e-05, "loss": 0.0338, "step": 93700 }, { "epoch": 0.06855, "grad_norm": 0.0728011503815651, "learning_rate": 2.9640266312149827e-05, "loss": 0.0347, "step": 93710 }, { "epoch": 0.0686, "grad_norm": 0.06844881922006607, "learning_rate": 2.9636204403529e-05, "loss": 0.0339, "step": 93720 }, { "epoch": 0.06865, "grad_norm": 0.0771973580121994, "learning_rate": 2.963214236815609e-05, "loss": 0.0352, "step": 93730 }, { "epoch": 0.0687, "grad_norm": 0.08221804350614548, "learning_rate": 2.9628080206142145e-05, "loss": 0.0363, "step": 93740 }, { "epoch": 0.06875, "grad_norm": 0.07745331525802612, "learning_rate": 2.9624017917598225e-05, "loss": 0.0368, "step": 93750 }, { "epoch": 0.0688, "grad_norm": 0.09254749119281769, "learning_rate": 2.96199555026354e-05, "loss": 0.0346, "step": 93760 }, { "epoch": 0.06885, "grad_norm": 0.08585920929908752, "learning_rate": 2.9615892961364716e-05, "loss": 0.0337, "step": 93770 }, { "epoch": 0.0689, "grad_norm": 0.06769262999296188, "learning_rate": 2.9611830293897253e-05, "loss": 0.034, "step": 93780 }, { "epoch": 0.06895, "grad_norm": 0.07950049638748169, "learning_rate": 2.9607767500344084e-05, "loss": 0.0347, "step": 93790 }, { "epoch": 0.069, "grad_norm": 0.07528354972600937, "learning_rate": 2.9603704580816293e-05, "loss": 0.0364, "step": 93800 }, { "epoch": 0.06905, "grad_norm": 0.07296212017536163, "learning_rate": 2.9599641535424938e-05, "loss": 0.0343, "step": 93810 }, { "epoch": 0.0691, "grad_norm": 0.07152281701564789, "learning_rate": 2.9595578364281117e-05, "loss": 0.0353, "step": 93820 }, { "epoch": 0.06915, "grad_norm": 0.062432125210762024, "learning_rate": 2.9591515067495906e-05, "loss": 0.0345, "step": 93830 }, { "epoch": 0.0692, "grad_norm": 0.09543795883655548, "learning_rate": 2.95874516451804e-05, "loss": 0.0356, "step": 93840 }, { "epoch": 0.06925, "grad_norm": 0.08136715739965439, "learning_rate": 2.958338809744568e-05, "loss": 0.0353, "step": 93850 }, { "epoch": 0.0693, "grad_norm": 0.09028245508670807, "learning_rate": 2.9579324424402865e-05, "loss": 0.0346, "step": 93860 }, { "epoch": 0.06935, "grad_norm": 0.07663322985172272, "learning_rate": 2.9575260626163037e-05, "loss": 0.0346, "step": 93870 }, { "epoch": 0.0694, "grad_norm": 0.07476460933685303, "learning_rate": 2.95711967028373e-05, "loss": 0.0359, "step": 93880 }, { "epoch": 0.06945, "grad_norm": 0.09127458930015564, "learning_rate": 2.9567132654536763e-05, "loss": 0.0377, "step": 93890 }, { "epoch": 0.0695, "grad_norm": 0.07893301546573639, "learning_rate": 2.9563068481372535e-05, "loss": 0.036, "step": 93900 }, { "epoch": 0.06955, "grad_norm": 0.08074024319648743, "learning_rate": 2.9559004183455726e-05, "loss": 0.0361, "step": 93910 }, { "epoch": 0.0696, "grad_norm": 0.07430429756641388, "learning_rate": 2.955493976089746e-05, "loss": 0.0353, "step": 93920 }, { "epoch": 0.06965, "grad_norm": 0.08006362617015839, "learning_rate": 2.955087521380885e-05, "loss": 0.0364, "step": 93930 }, { "epoch": 0.0697, "grad_norm": 0.07894095778465271, "learning_rate": 2.9546810542301024e-05, "loss": 0.0356, "step": 93940 }, { "epoch": 0.06975, "grad_norm": 0.0702204704284668, "learning_rate": 2.9542745746485102e-05, "loss": 0.0353, "step": 93950 }, { "epoch": 0.0698, "grad_norm": 0.07127805799245834, "learning_rate": 2.953868082647222e-05, "loss": 0.0352, "step": 93960 }, { "epoch": 0.06985, "grad_norm": 0.07629313319921494, "learning_rate": 2.953461578237351e-05, "loss": 0.0348, "step": 93970 }, { "epoch": 0.0699, "grad_norm": 0.08887936174869537, "learning_rate": 2.9530550614300106e-05, "loss": 0.036, "step": 93980 }, { "epoch": 0.06995, "grad_norm": 0.0876452699303627, "learning_rate": 2.952648532236315e-05, "loss": 0.036, "step": 93990 }, { "epoch": 0.07, "grad_norm": 0.07844098657369614, "learning_rate": 2.9522419906673786e-05, "loss": 0.0353, "step": 94000 }, { "epoch": 0.07005, "grad_norm": 0.09425082057714462, "learning_rate": 2.9518354367343166e-05, "loss": 0.0341, "step": 94010 }, { "epoch": 0.0701, "grad_norm": 0.09124142676591873, "learning_rate": 2.951428870448243e-05, "loss": 0.0352, "step": 94020 }, { "epoch": 0.07015, "grad_norm": 0.07855840772390366, "learning_rate": 2.9510222918202733e-05, "loss": 0.0352, "step": 94030 }, { "epoch": 0.0702, "grad_norm": 0.0941929966211319, "learning_rate": 2.9506157008615244e-05, "loss": 0.0348, "step": 94040 }, { "epoch": 0.07025, "grad_norm": 0.087624192237854, "learning_rate": 2.9502090975831116e-05, "loss": 0.0343, "step": 94050 }, { "epoch": 0.0703, "grad_norm": 0.06899592280387878, "learning_rate": 2.94980248199615e-05, "loss": 0.0335, "step": 94060 }, { "epoch": 0.07035, "grad_norm": 0.08209431916475296, "learning_rate": 2.949395854111759e-05, "loss": 0.0353, "step": 94070 }, { "epoch": 0.0704, "grad_norm": 0.08980611711740494, "learning_rate": 2.9489892139410536e-05, "loss": 0.0346, "step": 94080 }, { "epoch": 0.07045, "grad_norm": 0.07221923768520355, "learning_rate": 2.9485825614951513e-05, "loss": 0.0355, "step": 94090 }, { "epoch": 0.0705, "grad_norm": 0.08524159342050552, "learning_rate": 2.9481758967851702e-05, "loss": 0.0345, "step": 94100 }, { "epoch": 0.07055, "grad_norm": 0.0812133252620697, "learning_rate": 2.9477692198222297e-05, "loss": 0.0348, "step": 94110 }, { "epoch": 0.0706, "grad_norm": 0.093808114528656, "learning_rate": 2.947362530617446e-05, "loss": 0.0375, "step": 94120 }, { "epoch": 0.07065, "grad_norm": 0.08137372881174088, "learning_rate": 2.946955829181939e-05, "loss": 0.0352, "step": 94130 }, { "epoch": 0.0707, "grad_norm": 0.08073768019676208, "learning_rate": 2.9465491155268278e-05, "loss": 0.0368, "step": 94140 }, { "epoch": 0.07075, "grad_norm": 0.09060634672641754, "learning_rate": 2.9461423896632312e-05, "loss": 0.0348, "step": 94150 }, { "epoch": 0.0708, "grad_norm": 0.08813582360744476, "learning_rate": 2.9457356516022683e-05, "loss": 0.0352, "step": 94160 }, { "epoch": 0.07085, "grad_norm": 0.08457762002944946, "learning_rate": 2.9453289013550618e-05, "loss": 0.0381, "step": 94170 }, { "epoch": 0.0709, "grad_norm": 0.072766974568367, "learning_rate": 2.9449221389327297e-05, "loss": 0.0359, "step": 94180 }, { "epoch": 0.07095, "grad_norm": 0.0687379390001297, "learning_rate": 2.9445153643463942e-05, "loss": 0.0347, "step": 94190 }, { "epoch": 0.071, "grad_norm": 0.076286181807518, "learning_rate": 2.9441085776071743e-05, "loss": 0.0363, "step": 94200 }, { "epoch": 0.07105, "grad_norm": 0.07225333899259567, "learning_rate": 2.9437017787261935e-05, "loss": 0.0349, "step": 94210 }, { "epoch": 0.0711, "grad_norm": 0.06537607312202454, "learning_rate": 2.9432949677145722e-05, "loss": 0.0355, "step": 94220 }, { "epoch": 0.07115, "grad_norm": 0.06774396449327469, "learning_rate": 2.942888144583434e-05, "loss": 0.0341, "step": 94230 }, { "epoch": 0.0712, "grad_norm": 0.07002709805965424, "learning_rate": 2.9424813093438995e-05, "loss": 0.0349, "step": 94240 }, { "epoch": 0.07125, "grad_norm": 0.09582309424877167, "learning_rate": 2.9420744620070928e-05, "loss": 0.0372, "step": 94250 }, { "epoch": 0.0713, "grad_norm": 0.09352750331163406, "learning_rate": 2.9416676025841363e-05, "loss": 0.0375, "step": 94260 }, { "epoch": 0.07135, "grad_norm": 0.08576199412345886, "learning_rate": 2.9412607310861528e-05, "loss": 0.0347, "step": 94270 }, { "epoch": 0.0714, "grad_norm": 0.08172550797462463, "learning_rate": 2.9408538475242674e-05, "loss": 0.0378, "step": 94280 }, { "epoch": 0.07145, "grad_norm": 0.07877188175916672, "learning_rate": 2.940446951909603e-05, "loss": 0.036, "step": 94290 }, { "epoch": 0.0715, "grad_norm": 0.08867704123258591, "learning_rate": 2.940040044253285e-05, "loss": 0.0361, "step": 94300 }, { "epoch": 0.07155, "grad_norm": 0.064987413585186, "learning_rate": 2.9396331245664366e-05, "loss": 0.0356, "step": 94310 }, { "epoch": 0.0716, "grad_norm": 0.0645001232624054, "learning_rate": 2.939226192860185e-05, "loss": 0.0354, "step": 94320 }, { "epoch": 0.07165, "grad_norm": 0.06392812728881836, "learning_rate": 2.9388192491456535e-05, "loss": 0.0356, "step": 94330 }, { "epoch": 0.0717, "grad_norm": 0.07957509905099869, "learning_rate": 2.9384122934339685e-05, "loss": 0.035, "step": 94340 }, { "epoch": 0.07175, "grad_norm": 0.066752128303051, "learning_rate": 2.938005325736256e-05, "loss": 0.0356, "step": 94350 }, { "epoch": 0.0718, "grad_norm": 0.09556171298027039, "learning_rate": 2.937598346063643e-05, "loss": 0.0353, "step": 94360 }, { "epoch": 0.07185, "grad_norm": 0.06822418421506882, "learning_rate": 2.937191354427255e-05, "loss": 0.0355, "step": 94370 }, { "epoch": 0.0719, "grad_norm": 0.06926630437374115, "learning_rate": 2.9367843508382203e-05, "loss": 0.0355, "step": 94380 }, { "epoch": 0.07195, "grad_norm": 0.0884789526462555, "learning_rate": 2.9363773353076652e-05, "loss": 0.036, "step": 94390 }, { "epoch": 0.072, "grad_norm": 0.07868551462888718, "learning_rate": 2.9359703078467178e-05, "loss": 0.0386, "step": 94400 }, { "epoch": 0.07205, "grad_norm": 0.07347268611192703, "learning_rate": 2.935563268466505e-05, "loss": 0.0358, "step": 94410 }, { "epoch": 0.0721, "grad_norm": 0.09271713346242905, "learning_rate": 2.9351562171781576e-05, "loss": 0.0364, "step": 94420 }, { "epoch": 0.07215, "grad_norm": 0.08625374734401703, "learning_rate": 2.934749153992802e-05, "loss": 0.0349, "step": 94430 }, { "epoch": 0.0722, "grad_norm": 0.08623244613409042, "learning_rate": 2.934342078921568e-05, "loss": 0.035, "step": 94440 }, { "epoch": 0.07225, "grad_norm": 0.09188719093799591, "learning_rate": 2.933934991975584e-05, "loss": 0.0355, "step": 94450 }, { "epoch": 0.0723, "grad_norm": 0.0780775398015976, "learning_rate": 2.933527893165981e-05, "loss": 0.0366, "step": 94460 }, { "epoch": 0.07235, "grad_norm": 0.07540776580572128, "learning_rate": 2.933120782503888e-05, "loss": 0.0368, "step": 94470 }, { "epoch": 0.0724, "grad_norm": 0.0667237788438797, "learning_rate": 2.932713660000436e-05, "loss": 0.0373, "step": 94480 }, { "epoch": 0.07245, "grad_norm": 0.08438503742218018, "learning_rate": 2.932306525666755e-05, "loss": 0.0376, "step": 94490 }, { "epoch": 0.0725, "grad_norm": 0.06112914904952049, "learning_rate": 2.9318993795139754e-05, "loss": 0.036, "step": 94500 }, { "epoch": 0.07255, "grad_norm": 0.10258391499519348, "learning_rate": 2.9314922215532304e-05, "loss": 0.0371, "step": 94510 }, { "epoch": 0.0726, "grad_norm": 0.09032279998064041, "learning_rate": 2.931085051795649e-05, "loss": 0.0382, "step": 94520 }, { "epoch": 0.07265, "grad_norm": 0.09368538856506348, "learning_rate": 2.9306778702523646e-05, "loss": 0.0359, "step": 94530 }, { "epoch": 0.0727, "grad_norm": 0.09324487298727036, "learning_rate": 2.9302706769345077e-05, "loss": 0.0349, "step": 94540 }, { "epoch": 0.07275, "grad_norm": 0.08473429083824158, "learning_rate": 2.929863471853214e-05, "loss": 0.0342, "step": 94550 }, { "epoch": 0.0728, "grad_norm": 0.08495772629976273, "learning_rate": 2.9294562550196138e-05, "loss": 0.0363, "step": 94560 }, { "epoch": 0.07285, "grad_norm": 0.10111489146947861, "learning_rate": 2.9290490264448412e-05, "loss": 0.0355, "step": 94570 }, { "epoch": 0.0729, "grad_norm": 0.07820268720388412, "learning_rate": 2.928641786140029e-05, "loss": 0.0388, "step": 94580 }, { "epoch": 0.07295, "grad_norm": 0.08622400462627411, "learning_rate": 2.9282345341163118e-05, "loss": 0.0351, "step": 94590 }, { "epoch": 0.073, "grad_norm": 0.08051776885986328, "learning_rate": 2.9278272703848225e-05, "loss": 0.037, "step": 94600 }, { "epoch": 0.07305, "grad_norm": 0.08742921054363251, "learning_rate": 2.9274199949566972e-05, "loss": 0.0373, "step": 94610 }, { "epoch": 0.0731, "grad_norm": 0.09345852583646774, "learning_rate": 2.9270127078430694e-05, "loss": 0.0367, "step": 94620 }, { "epoch": 0.07315, "grad_norm": 0.0913676917552948, "learning_rate": 2.9266054090550748e-05, "loss": 0.0355, "step": 94630 }, { "epoch": 0.0732, "grad_norm": 0.08236590772867203, "learning_rate": 2.9261980986038485e-05, "loss": 0.0362, "step": 94640 }, { "epoch": 0.07325, "grad_norm": 0.07883656024932861, "learning_rate": 2.9257907765005266e-05, "loss": 0.0338, "step": 94650 }, { "epoch": 0.0733, "grad_norm": 0.08704619109630585, "learning_rate": 2.9253834427562442e-05, "loss": 0.0361, "step": 94660 }, { "epoch": 0.07335, "grad_norm": 0.10479659587144852, "learning_rate": 2.9249760973821382e-05, "loss": 0.0359, "step": 94670 }, { "epoch": 0.0734, "grad_norm": 0.08075715601444244, "learning_rate": 2.9245687403893456e-05, "loss": 0.0349, "step": 94680 }, { "epoch": 0.07345, "grad_norm": 0.07828488945960999, "learning_rate": 2.924161371789004e-05, "loss": 0.0357, "step": 94690 }, { "epoch": 0.0735, "grad_norm": 0.07711922377347946, "learning_rate": 2.9237539915922492e-05, "loss": 0.0352, "step": 94700 }, { "epoch": 0.07355, "grad_norm": 0.07504051923751831, "learning_rate": 2.923346599810219e-05, "loss": 0.0351, "step": 94710 }, { "epoch": 0.0736, "grad_norm": 0.08983557671308517, "learning_rate": 2.9229391964540524e-05, "loss": 0.0357, "step": 94720 }, { "epoch": 0.07365, "grad_norm": 0.07545730471611023, "learning_rate": 2.922531781534887e-05, "loss": 0.0347, "step": 94730 }, { "epoch": 0.0737, "grad_norm": 0.08545920997858047, "learning_rate": 2.922124355063861e-05, "loss": 0.0351, "step": 94740 }, { "epoch": 0.07375, "grad_norm": 0.07113443315029144, "learning_rate": 2.9217169170521136e-05, "loss": 0.034, "step": 94750 }, { "epoch": 0.0738, "grad_norm": 0.09056176990270615, "learning_rate": 2.9213094675107848e-05, "loss": 0.0353, "step": 94760 }, { "epoch": 0.07385, "grad_norm": 0.07598128914833069, "learning_rate": 2.9209020064510133e-05, "loss": 0.0354, "step": 94770 }, { "epoch": 0.0739, "grad_norm": 0.08315098285675049, "learning_rate": 2.920494533883939e-05, "loss": 0.0355, "step": 94780 }, { "epoch": 0.07395, "grad_norm": 0.0804397314786911, "learning_rate": 2.9200870498207017e-05, "loss": 0.0394, "step": 94790 }, { "epoch": 0.074, "grad_norm": 0.07506150007247925, "learning_rate": 2.9196795542724432e-05, "loss": 0.0356, "step": 94800 }, { "epoch": 0.07405, "grad_norm": 0.08508822321891785, "learning_rate": 2.9192720472503022e-05, "loss": 0.0388, "step": 94810 }, { "epoch": 0.0741, "grad_norm": 0.08116913586854935, "learning_rate": 2.9188645287654222e-05, "loss": 0.0367, "step": 94820 }, { "epoch": 0.07415, "grad_norm": 0.0697377398610115, "learning_rate": 2.9184569988289424e-05, "loss": 0.0362, "step": 94830 }, { "epoch": 0.0742, "grad_norm": 0.07976461946964264, "learning_rate": 2.9180494574520063e-05, "loss": 0.0365, "step": 94840 }, { "epoch": 0.07425, "grad_norm": 0.07997091114521027, "learning_rate": 2.9176419046457544e-05, "loss": 0.0373, "step": 94850 }, { "epoch": 0.0743, "grad_norm": 0.07684572786092758, "learning_rate": 2.9172343404213294e-05, "loss": 0.0352, "step": 94860 }, { "epoch": 0.07435, "grad_norm": 0.0832042247056961, "learning_rate": 2.9168267647898743e-05, "loss": 0.0345, "step": 94870 }, { "epoch": 0.0744, "grad_norm": 0.08436644077301025, "learning_rate": 2.9164191777625326e-05, "loss": 0.0348, "step": 94880 }, { "epoch": 0.07445, "grad_norm": 0.08339640498161316, "learning_rate": 2.9160115793504473e-05, "loss": 0.0376, "step": 94890 }, { "epoch": 0.0745, "grad_norm": 0.08925966173410416, "learning_rate": 2.9156039695647614e-05, "loss": 0.0372, "step": 94900 }, { "epoch": 0.07455, "grad_norm": 0.07819242775440216, "learning_rate": 2.9151963484166188e-05, "loss": 0.035, "step": 94910 }, { "epoch": 0.0746, "grad_norm": 0.08378436416387558, "learning_rate": 2.914788715917164e-05, "loss": 0.0352, "step": 94920 }, { "epoch": 0.07465, "grad_norm": 0.07194891571998596, "learning_rate": 2.9143810720775417e-05, "loss": 0.0347, "step": 94930 }, { "epoch": 0.0747, "grad_norm": 0.0768907442688942, "learning_rate": 2.9139734169088974e-05, "loss": 0.0354, "step": 94940 }, { "epoch": 0.07475, "grad_norm": 0.08474749326705933, "learning_rate": 2.913565750422374e-05, "loss": 0.0358, "step": 94950 }, { "epoch": 0.0748, "grad_norm": 0.07622528076171875, "learning_rate": 2.9131580726291192e-05, "loss": 0.0364, "step": 94960 }, { "epoch": 0.07485, "grad_norm": 0.06749384105205536, "learning_rate": 2.9127503835402782e-05, "loss": 0.0358, "step": 94970 }, { "epoch": 0.0749, "grad_norm": 0.07656152546405792, "learning_rate": 2.912342683166996e-05, "loss": 0.0345, "step": 94980 }, { "epoch": 0.07495, "grad_norm": 0.07424613833427429, "learning_rate": 2.9119349715204197e-05, "loss": 0.035, "step": 94990 }, { "epoch": 0.075, "grad_norm": 0.06224847212433815, "learning_rate": 2.911527248611696e-05, "loss": 0.0384, "step": 95000 }, { "epoch": 0.07505, "grad_norm": 0.08580512553453445, "learning_rate": 2.9111195144519728e-05, "loss": 0.0376, "step": 95010 }, { "epoch": 0.0751, "grad_norm": 0.08321086317300797, "learning_rate": 2.9107117690523957e-05, "loss": 0.0353, "step": 95020 }, { "epoch": 0.07515, "grad_norm": 0.08063076436519623, "learning_rate": 2.9103040124241138e-05, "loss": 0.0375, "step": 95030 }, { "epoch": 0.0752, "grad_norm": 0.08435116708278656, "learning_rate": 2.909896244578274e-05, "loss": 0.0363, "step": 95040 }, { "epoch": 0.07525, "grad_norm": 0.07457546889781952, "learning_rate": 2.9094884655260247e-05, "loss": 0.0357, "step": 95050 }, { "epoch": 0.0753, "grad_norm": 0.07458683103322983, "learning_rate": 2.9090806752785142e-05, "loss": 0.0358, "step": 95060 }, { "epoch": 0.07535, "grad_norm": 0.07121626287698746, "learning_rate": 2.9086728738468928e-05, "loss": 0.0338, "step": 95070 }, { "epoch": 0.0754, "grad_norm": 0.0680113285779953, "learning_rate": 2.9082650612423078e-05, "loss": 0.036, "step": 95080 }, { "epoch": 0.07545, "grad_norm": 0.07844933867454529, "learning_rate": 2.90785723747591e-05, "loss": 0.0351, "step": 95090 }, { "epoch": 0.0755, "grad_norm": 0.08018365502357483, "learning_rate": 2.9074494025588477e-05, "loss": 0.0342, "step": 95100 }, { "epoch": 0.07555, "grad_norm": 0.11203812807798386, "learning_rate": 2.9070415565022722e-05, "loss": 0.0397, "step": 95110 }, { "epoch": 0.0756, "grad_norm": 0.1107054129242897, "learning_rate": 2.906633699317334e-05, "loss": 0.0356, "step": 95120 }, { "epoch": 0.07565, "grad_norm": 0.11537740379571915, "learning_rate": 2.906225831015183e-05, "loss": 0.0365, "step": 95130 }, { "epoch": 0.0757, "grad_norm": 0.08692065626382828, "learning_rate": 2.9058179516069695e-05, "loss": 0.0357, "step": 95140 }, { "epoch": 0.07575, "grad_norm": 0.09226984530687332, "learning_rate": 2.9054100611038472e-05, "loss": 0.0355, "step": 95150 }, { "epoch": 0.0758, "grad_norm": 0.09094180911779404, "learning_rate": 2.9050021595169647e-05, "loss": 0.0361, "step": 95160 }, { "epoch": 0.07585, "grad_norm": 0.08938279002904892, "learning_rate": 2.904594246857476e-05, "loss": 0.0364, "step": 95170 }, { "epoch": 0.0759, "grad_norm": 0.08622892200946808, "learning_rate": 2.9041863231365318e-05, "loss": 0.0353, "step": 95180 }, { "epoch": 0.07595, "grad_norm": 0.08137310296297073, "learning_rate": 2.903778388365287e-05, "loss": 0.0361, "step": 95190 }, { "epoch": 0.076, "grad_norm": 0.07452932745218277, "learning_rate": 2.9033704425548913e-05, "loss": 0.0361, "step": 95200 }, { "epoch": 0.07605, "grad_norm": 0.0670681893825531, "learning_rate": 2.9029624857164996e-05, "loss": 0.0355, "step": 95210 }, { "epoch": 0.0761, "grad_norm": 0.09448492527008057, "learning_rate": 2.9025545178612657e-05, "loss": 0.0381, "step": 95220 }, { "epoch": 0.07615, "grad_norm": 0.060585349798202515, "learning_rate": 2.9021465390003416e-05, "loss": 0.0369, "step": 95230 }, { "epoch": 0.0762, "grad_norm": 0.07551992684602737, "learning_rate": 2.9017385491448824e-05, "loss": 0.0389, "step": 95240 }, { "epoch": 0.07625, "grad_norm": 0.07179474830627441, "learning_rate": 2.9013305483060416e-05, "loss": 0.036, "step": 95250 }, { "epoch": 0.0763, "grad_norm": 0.07472289353609085, "learning_rate": 2.9009225364949754e-05, "loss": 0.039, "step": 95260 }, { "epoch": 0.07635, "grad_norm": 0.07139992713928223, "learning_rate": 2.9005145137228375e-05, "loss": 0.0362, "step": 95270 }, { "epoch": 0.0764, "grad_norm": 0.07775798439979553, "learning_rate": 2.9001064800007834e-05, "loss": 0.0392, "step": 95280 }, { "epoch": 0.07645, "grad_norm": 0.08753141015768051, "learning_rate": 2.8996984353399682e-05, "loss": 0.0374, "step": 95290 }, { "epoch": 0.0765, "grad_norm": 0.09571100771427155, "learning_rate": 2.8992903797515475e-05, "loss": 0.0362, "step": 95300 }, { "epoch": 0.07655, "grad_norm": 0.061570536345243454, "learning_rate": 2.898882313246678e-05, "loss": 0.0372, "step": 95310 }, { "epoch": 0.0766, "grad_norm": 0.06046005338430405, "learning_rate": 2.8984742358365165e-05, "loss": 0.0351, "step": 95320 }, { "epoch": 0.07665, "grad_norm": 0.0736611932516098, "learning_rate": 2.8980661475322186e-05, "loss": 0.0375, "step": 95330 }, { "epoch": 0.0767, "grad_norm": 0.08278156071901321, "learning_rate": 2.8976580483449423e-05, "loss": 0.0366, "step": 95340 }, { "epoch": 0.07675, "grad_norm": 0.0938616693019867, "learning_rate": 2.8972499382858433e-05, "loss": 0.0361, "step": 95350 }, { "epoch": 0.0768, "grad_norm": 0.07551280409097672, "learning_rate": 2.8968418173660817e-05, "loss": 0.0366, "step": 95360 }, { "epoch": 0.07685, "grad_norm": 0.06807341426610947, "learning_rate": 2.8964336855968123e-05, "loss": 0.0379, "step": 95370 }, { "epoch": 0.0769, "grad_norm": 0.0739855021238327, "learning_rate": 2.8960255429891957e-05, "loss": 0.0379, "step": 95380 }, { "epoch": 0.07695, "grad_norm": 0.09074331820011139, "learning_rate": 2.895617389554389e-05, "loss": 0.0351, "step": 95390 }, { "epoch": 0.077, "grad_norm": 0.07602587342262268, "learning_rate": 2.8952092253035523e-05, "loss": 0.0368, "step": 95400 }, { "epoch": 0.07705, "grad_norm": 0.08063099533319473, "learning_rate": 2.8948010502478435e-05, "loss": 0.0365, "step": 95410 }, { "epoch": 0.0771, "grad_norm": 0.08302353322505951, "learning_rate": 2.894392864398422e-05, "loss": 0.0365, "step": 95420 }, { "epoch": 0.07715, "grad_norm": 0.08162595331668854, "learning_rate": 2.8939846677664477e-05, "loss": 0.0366, "step": 95430 }, { "epoch": 0.0772, "grad_norm": 0.08327838033437729, "learning_rate": 2.8935764603630816e-05, "loss": 0.0355, "step": 95440 }, { "epoch": 0.07725, "grad_norm": 0.12089723348617554, "learning_rate": 2.893168242199482e-05, "loss": 0.0362, "step": 95450 }, { "epoch": 0.0773, "grad_norm": 0.0806155577301979, "learning_rate": 2.8927600132868106e-05, "loss": 0.0361, "step": 95460 }, { "epoch": 0.07735, "grad_norm": 0.08697088062763214, "learning_rate": 2.8923517736362284e-05, "loss": 0.0365, "step": 95470 }, { "epoch": 0.0774, "grad_norm": 0.07858090102672577, "learning_rate": 2.8919435232588954e-05, "loss": 0.0365, "step": 95480 }, { "epoch": 0.07745, "grad_norm": 0.0615830160677433, "learning_rate": 2.8915352621659743e-05, "loss": 0.0345, "step": 95490 }, { "epoch": 0.0775, "grad_norm": 0.08191350847482681, "learning_rate": 2.8911269903686255e-05, "loss": 0.036, "step": 95500 }, { "epoch": 0.07755, "grad_norm": 0.06817498803138733, "learning_rate": 2.890718707878013e-05, "loss": 0.0351, "step": 95510 }, { "epoch": 0.0776, "grad_norm": 0.0775030255317688, "learning_rate": 2.890310414705297e-05, "loss": 0.0354, "step": 95520 }, { "epoch": 0.07765, "grad_norm": 0.10623601824045181, "learning_rate": 2.889902110861641e-05, "loss": 0.0369, "step": 95530 }, { "epoch": 0.0777, "grad_norm": 0.08662627637386322, "learning_rate": 2.8894937963582073e-05, "loss": 0.039, "step": 95540 }, { "epoch": 0.07775, "grad_norm": 0.08613138645887375, "learning_rate": 2.8890854712061604e-05, "loss": 0.036, "step": 95550 }, { "epoch": 0.0778, "grad_norm": 0.06717013567686081, "learning_rate": 2.888677135416662e-05, "loss": 0.0351, "step": 95560 }, { "epoch": 0.07785, "grad_norm": 0.09230372309684753, "learning_rate": 2.8882687890008775e-05, "loss": 0.0356, "step": 95570 }, { "epoch": 0.0779, "grad_norm": 0.11337987333536148, "learning_rate": 2.8878604319699693e-05, "loss": 0.0383, "step": 95580 }, { "epoch": 0.07795, "grad_norm": 0.10768648236989975, "learning_rate": 2.887452064335103e-05, "loss": 0.0371, "step": 95590 }, { "epoch": 0.078, "grad_norm": 0.0736958459019661, "learning_rate": 2.8870436861074435e-05, "loss": 0.0359, "step": 95600 }, { "epoch": 0.07805, "grad_norm": 0.07899592816829681, "learning_rate": 2.8866352972981546e-05, "loss": 0.0354, "step": 95610 }, { "epoch": 0.0781, "grad_norm": 0.06220125034451485, "learning_rate": 2.8862268979184015e-05, "loss": 0.0349, "step": 95620 }, { "epoch": 0.07815, "grad_norm": 0.10204660147428513, "learning_rate": 2.8858184879793506e-05, "loss": 0.0369, "step": 95630 }, { "epoch": 0.0782, "grad_norm": 0.08213819563388824, "learning_rate": 2.885410067492167e-05, "loss": 0.0346, "step": 95640 }, { "epoch": 0.07825, "grad_norm": 0.08925966173410416, "learning_rate": 2.8850016364680173e-05, "loss": 0.0365, "step": 95650 }, { "epoch": 0.0783, "grad_norm": 0.07410264760255814, "learning_rate": 2.8845931949180664e-05, "loss": 0.0348, "step": 95660 }, { "epoch": 0.07835, "grad_norm": 0.08311299234628677, "learning_rate": 2.884184742853483e-05, "loss": 0.036, "step": 95670 }, { "epoch": 0.0784, "grad_norm": 0.07730239629745483, "learning_rate": 2.8837762802854324e-05, "loss": 0.036, "step": 95680 }, { "epoch": 0.07845, "grad_norm": 0.07230732589960098, "learning_rate": 2.8833678072250838e-05, "loss": 0.0372, "step": 95690 }, { "epoch": 0.0785, "grad_norm": 0.06986280530691147, "learning_rate": 2.8829593236836016e-05, "loss": 0.0355, "step": 95700 }, { "epoch": 0.07855, "grad_norm": 0.08273430913686752, "learning_rate": 2.8825508296721566e-05, "loss": 0.0369, "step": 95710 }, { "epoch": 0.0786, "grad_norm": 0.0761200562119484, "learning_rate": 2.8821423252019154e-05, "loss": 0.0353, "step": 95720 }, { "epoch": 0.07865, "grad_norm": 0.07793602347373962, "learning_rate": 2.8817338102840462e-05, "loss": 0.035, "step": 95730 }, { "epoch": 0.0787, "grad_norm": 0.06937800347805023, "learning_rate": 2.881325284929719e-05, "loss": 0.0339, "step": 95740 }, { "epoch": 0.07875, "grad_norm": 0.07704199850559235, "learning_rate": 2.8809167491501003e-05, "loss": 0.0345, "step": 95750 }, { "epoch": 0.0788, "grad_norm": 0.09481144696474075, "learning_rate": 2.880508202956362e-05, "loss": 0.0352, "step": 95760 }, { "epoch": 0.07885, "grad_norm": 0.06698376685380936, "learning_rate": 2.8800996463596717e-05, "loss": 0.0359, "step": 95770 }, { "epoch": 0.0789, "grad_norm": 0.06863502413034439, "learning_rate": 2.8796910793712006e-05, "loss": 0.0343, "step": 95780 }, { "epoch": 0.07895, "grad_norm": 0.06124764680862427, "learning_rate": 2.8792825020021174e-05, "loss": 0.0341, "step": 95790 }, { "epoch": 0.079, "grad_norm": 0.06939634680747986, "learning_rate": 2.8788739142635935e-05, "loss": 0.037, "step": 95800 }, { "epoch": 0.07905, "grad_norm": 0.07363869994878769, "learning_rate": 2.878465316166798e-05, "loss": 0.0358, "step": 95810 }, { "epoch": 0.0791, "grad_norm": 0.06982981413602829, "learning_rate": 2.8780567077229042e-05, "loss": 0.0371, "step": 95820 }, { "epoch": 0.07915, "grad_norm": 0.07453575730323792, "learning_rate": 2.8776480889430812e-05, "loss": 0.0357, "step": 95830 }, { "epoch": 0.0792, "grad_norm": 0.0722508579492569, "learning_rate": 2.8772394598385022e-05, "loss": 0.0352, "step": 95840 }, { "epoch": 0.07925, "grad_norm": 0.06787462532520294, "learning_rate": 2.8768308204203376e-05, "loss": 0.0354, "step": 95850 }, { "epoch": 0.0793, "grad_norm": 0.06790974736213684, "learning_rate": 2.87642217069976e-05, "loss": 0.0351, "step": 95860 }, { "epoch": 0.07935, "grad_norm": 0.07508399337530136, "learning_rate": 2.8760135106879415e-05, "loss": 0.0358, "step": 95870 }, { "epoch": 0.0794, "grad_norm": 0.0850457027554512, "learning_rate": 2.875604840396055e-05, "loss": 0.0348, "step": 95880 }, { "epoch": 0.07945, "grad_norm": 0.07781939208507538, "learning_rate": 2.8751961598352732e-05, "loss": 0.035, "step": 95890 }, { "epoch": 0.0795, "grad_norm": 0.08043967932462692, "learning_rate": 2.87478746901677e-05, "loss": 0.0353, "step": 95900 }, { "epoch": 0.07955, "grad_norm": 0.07491166889667511, "learning_rate": 2.8743787679517174e-05, "loss": 0.0353, "step": 95910 }, { "epoch": 0.0796, "grad_norm": 0.08218996971845627, "learning_rate": 2.87397005665129e-05, "loss": 0.0356, "step": 95920 }, { "epoch": 0.07965, "grad_norm": 0.08948659151792526, "learning_rate": 2.8735613351266622e-05, "loss": 0.0343, "step": 95930 }, { "epoch": 0.0797, "grad_norm": 0.07800249010324478, "learning_rate": 2.873152603389008e-05, "loss": 0.0357, "step": 95940 }, { "epoch": 0.07975, "grad_norm": 0.07463128864765167, "learning_rate": 2.8727438614495006e-05, "loss": 0.035, "step": 95950 }, { "epoch": 0.0798, "grad_norm": 0.07149926573038101, "learning_rate": 2.872335109319317e-05, "loss": 0.035, "step": 95960 }, { "epoch": 0.07985, "grad_norm": 0.06871454417705536, "learning_rate": 2.8719263470096313e-05, "loss": 0.0347, "step": 95970 }, { "epoch": 0.0799, "grad_norm": 0.09457767009735107, "learning_rate": 2.8715175745316187e-05, "loss": 0.0368, "step": 95980 }, { "epoch": 0.07995, "grad_norm": 0.08058516681194305, "learning_rate": 2.871108791896456e-05, "loss": 0.0354, "step": 95990 }, { "epoch": 0.08, "grad_norm": 0.08109599351882935, "learning_rate": 2.8706999991153173e-05, "loss": 0.0363, "step": 96000 }, { "epoch": 0.08005, "grad_norm": 0.07671806216239929, "learning_rate": 2.8702911961993807e-05, "loss": 0.0356, "step": 96010 }, { "epoch": 0.0801, "grad_norm": 0.0780465304851532, "learning_rate": 2.8698823831598208e-05, "loss": 0.0372, "step": 96020 }, { "epoch": 0.08015, "grad_norm": 0.10541101545095444, "learning_rate": 2.869473560007817e-05, "loss": 0.0373, "step": 96030 }, { "epoch": 0.0802, "grad_norm": 0.10399451106786728, "learning_rate": 2.869064726754544e-05, "loss": 0.0356, "step": 96040 }, { "epoch": 0.08025, "grad_norm": 0.07450667768716812, "learning_rate": 2.8686558834111804e-05, "loss": 0.0363, "step": 96050 }, { "epoch": 0.0803, "grad_norm": 0.09118582308292389, "learning_rate": 2.868247029988903e-05, "loss": 0.0362, "step": 96060 }, { "epoch": 0.08035, "grad_norm": 0.09464675188064575, "learning_rate": 2.8678381664988902e-05, "loss": 0.0358, "step": 96070 }, { "epoch": 0.0804, "grad_norm": 0.07721760869026184, "learning_rate": 2.86742929295232e-05, "loss": 0.0372, "step": 96080 }, { "epoch": 0.08045, "grad_norm": 0.09013506770133972, "learning_rate": 2.8670204093603713e-05, "loss": 0.036, "step": 96090 }, { "epoch": 0.0805, "grad_norm": 0.07364276796579361, "learning_rate": 2.8666115157342226e-05, "loss": 0.0365, "step": 96100 }, { "epoch": 0.08055, "grad_norm": 0.08563251793384552, "learning_rate": 2.8662026120850526e-05, "loss": 0.0348, "step": 96110 }, { "epoch": 0.0806, "grad_norm": 0.09776796400547028, "learning_rate": 2.8657936984240407e-05, "loss": 0.036, "step": 96120 }, { "epoch": 0.08065, "grad_norm": 0.09072226285934448, "learning_rate": 2.8653847747623665e-05, "loss": 0.0345, "step": 96130 }, { "epoch": 0.0807, "grad_norm": 0.10321402549743652, "learning_rate": 2.86497584111121e-05, "loss": 0.035, "step": 96140 }, { "epoch": 0.08075, "grad_norm": 0.08187662065029144, "learning_rate": 2.864566897481751e-05, "loss": 0.0371, "step": 96150 }, { "epoch": 0.0808, "grad_norm": 0.0743849128484726, "learning_rate": 2.86415794388517e-05, "loss": 0.0374, "step": 96160 }, { "epoch": 0.08085, "grad_norm": 0.09088624268770218, "learning_rate": 2.8637489803326472e-05, "loss": 0.0373, "step": 96170 }, { "epoch": 0.0809, "grad_norm": 0.09648296236991882, "learning_rate": 2.863340006835365e-05, "loss": 0.0368, "step": 96180 }, { "epoch": 0.08095, "grad_norm": 0.11575803905725479, "learning_rate": 2.8629310234045027e-05, "loss": 0.0365, "step": 96190 }, { "epoch": 0.081, "grad_norm": 0.10136217623949051, "learning_rate": 2.8625220300512422e-05, "loss": 0.0358, "step": 96200 }, { "epoch": 0.08105, "grad_norm": 0.08919744193553925, "learning_rate": 2.8621130267867663e-05, "loss": 0.0353, "step": 96210 }, { "epoch": 0.0811, "grad_norm": 0.08605343848466873, "learning_rate": 2.8617040136222566e-05, "loss": 0.0364, "step": 96220 }, { "epoch": 0.08115, "grad_norm": 0.08273573219776154, "learning_rate": 2.861294990568894e-05, "loss": 0.0375, "step": 96230 }, { "epoch": 0.0812, "grad_norm": 0.08137491345405579, "learning_rate": 2.8608859576378634e-05, "loss": 0.0395, "step": 96240 }, { "epoch": 0.08125, "grad_norm": 0.09367059916257858, "learning_rate": 2.8604769148403455e-05, "loss": 0.0383, "step": 96250 }, { "epoch": 0.0813, "grad_norm": 0.08180638402700424, "learning_rate": 2.8600678621875237e-05, "loss": 0.0362, "step": 96260 }, { "epoch": 0.08135, "grad_norm": 0.07581344246864319, "learning_rate": 2.8596587996905823e-05, "loss": 0.0348, "step": 96270 }, { "epoch": 0.0814, "grad_norm": 0.07765164971351624, "learning_rate": 2.859249727360705e-05, "loss": 0.0344, "step": 96280 }, { "epoch": 0.08145, "grad_norm": 0.07539539784193039, "learning_rate": 2.8588406452090742e-05, "loss": 0.0344, "step": 96290 }, { "epoch": 0.0815, "grad_norm": 0.06976207345724106, "learning_rate": 2.8584315532468757e-05, "loss": 0.0356, "step": 96300 }, { "epoch": 0.08155, "grad_norm": 0.0770931988954544, "learning_rate": 2.8580224514852928e-05, "loss": 0.0351, "step": 96310 }, { "epoch": 0.0816, "grad_norm": 0.0735212191939354, "learning_rate": 2.8576133399355105e-05, "loss": 0.0351, "step": 96320 }, { "epoch": 0.08165, "grad_norm": 0.08054427802562714, "learning_rate": 2.857204218608714e-05, "loss": 0.0335, "step": 96330 }, { "epoch": 0.0817, "grad_norm": 0.07628805190324783, "learning_rate": 2.8567950875160887e-05, "loss": 0.0343, "step": 96340 }, { "epoch": 0.08175, "grad_norm": 0.0641375333070755, "learning_rate": 2.8563859466688192e-05, "loss": 0.0336, "step": 96350 }, { "epoch": 0.0818, "grad_norm": 0.08200865238904953, "learning_rate": 2.855976796078092e-05, "loss": 0.036, "step": 96360 }, { "epoch": 0.08185, "grad_norm": 0.09143612533807755, "learning_rate": 2.8555676357550933e-05, "loss": 0.0347, "step": 96370 }, { "epoch": 0.0819, "grad_norm": 0.07617480307817459, "learning_rate": 2.855158465711008e-05, "loss": 0.0342, "step": 96380 }, { "epoch": 0.08195, "grad_norm": 0.08174902945756912, "learning_rate": 2.854749285957024e-05, "loss": 0.0362, "step": 96390 }, { "epoch": 0.082, "grad_norm": 0.07885648310184479, "learning_rate": 2.8543400965043287e-05, "loss": 0.0366, "step": 96400 }, { "epoch": 0.08205, "grad_norm": 0.07714606821537018, "learning_rate": 2.8539308973641078e-05, "loss": 0.0351, "step": 96410 }, { "epoch": 0.0821, "grad_norm": 0.07244784384965897, "learning_rate": 2.8535216885475485e-05, "loss": 0.0347, "step": 96420 }, { "epoch": 0.08215, "grad_norm": 0.0732586458325386, "learning_rate": 2.85311247006584e-05, "loss": 0.0362, "step": 96430 }, { "epoch": 0.0822, "grad_norm": 0.05604038015007973, "learning_rate": 2.8527032419301686e-05, "loss": 0.0335, "step": 96440 }, { "epoch": 0.08225, "grad_norm": 0.06942863017320633, "learning_rate": 2.8522940041517232e-05, "loss": 0.0343, "step": 96450 }, { "epoch": 0.0823, "grad_norm": 0.06616475433111191, "learning_rate": 2.8518847567416916e-05, "loss": 0.034, "step": 96460 }, { "epoch": 0.08235, "grad_norm": 0.06547105312347412, "learning_rate": 2.851475499711264e-05, "loss": 0.0369, "step": 96470 }, { "epoch": 0.0824, "grad_norm": 0.0807575210928917, "learning_rate": 2.8510662330716276e-05, "loss": 0.0357, "step": 96480 }, { "epoch": 0.08245, "grad_norm": 0.08193361759185791, "learning_rate": 2.8506569568339732e-05, "loss": 0.035, "step": 96490 }, { "epoch": 0.0825, "grad_norm": 0.07796820998191833, "learning_rate": 2.8502476710094884e-05, "loss": 0.0347, "step": 96500 }, { "epoch": 0.08255, "grad_norm": 0.07326173037290573, "learning_rate": 2.849838375609364e-05, "loss": 0.0358, "step": 96510 }, { "epoch": 0.0826, "grad_norm": 0.08830005675554276, "learning_rate": 2.8494290706447896e-05, "loss": 0.0351, "step": 96520 }, { "epoch": 0.08265, "grad_norm": 0.09686273336410522, "learning_rate": 2.849019756126956e-05, "loss": 0.0358, "step": 96530 }, { "epoch": 0.0827, "grad_norm": 0.08772691339254379, "learning_rate": 2.8486104320670532e-05, "loss": 0.035, "step": 96540 }, { "epoch": 0.08275, "grad_norm": 0.07770222425460815, "learning_rate": 2.848201098476273e-05, "loss": 0.0358, "step": 96550 }, { "epoch": 0.0828, "grad_norm": 0.07158953696489334, "learning_rate": 2.8477917553658045e-05, "loss": 0.0361, "step": 96560 }, { "epoch": 0.08285, "grad_norm": 0.07134388387203217, "learning_rate": 2.847382402746841e-05, "loss": 0.0358, "step": 96570 }, { "epoch": 0.0829, "grad_norm": 0.06280628591775894, "learning_rate": 2.8469730406305718e-05, "loss": 0.0349, "step": 96580 }, { "epoch": 0.08295, "grad_norm": 0.08362621068954468, "learning_rate": 2.846563669028191e-05, "loss": 0.0347, "step": 96590 }, { "epoch": 0.083, "grad_norm": 0.07226254791021347, "learning_rate": 2.8461542879508895e-05, "loss": 0.0341, "step": 96600 }, { "epoch": 0.08305, "grad_norm": 0.07029656320810318, "learning_rate": 2.84574489740986e-05, "loss": 0.0347, "step": 96610 }, { "epoch": 0.0831, "grad_norm": 0.059351012110710144, "learning_rate": 2.8453354974162945e-05, "loss": 0.0354, "step": 96620 }, { "epoch": 0.08315, "grad_norm": 0.08634937554597855, "learning_rate": 2.844926087981386e-05, "loss": 0.0351, "step": 96630 }, { "epoch": 0.0832, "grad_norm": 0.07627951353788376, "learning_rate": 2.8445166691163283e-05, "loss": 0.0355, "step": 96640 }, { "epoch": 0.08325, "grad_norm": 0.07544269412755966, "learning_rate": 2.8441072408323143e-05, "loss": 0.0353, "step": 96650 }, { "epoch": 0.0833, "grad_norm": 0.08564888685941696, "learning_rate": 2.8436978031405375e-05, "loss": 0.0349, "step": 96660 }, { "epoch": 0.08335, "grad_norm": 0.064504474401474, "learning_rate": 2.8432883560521915e-05, "loss": 0.0333, "step": 96670 }, { "epoch": 0.0834, "grad_norm": 0.09171956032514572, "learning_rate": 2.842878899578472e-05, "loss": 0.0344, "step": 96680 }, { "epoch": 0.08345, "grad_norm": 0.08900267630815506, "learning_rate": 2.8424694337305714e-05, "loss": 0.0355, "step": 96690 }, { "epoch": 0.0835, "grad_norm": 0.14382585883140564, "learning_rate": 2.842059958519685e-05, "loss": 0.0348, "step": 96700 }, { "epoch": 0.08355, "grad_norm": 0.09673803299665451, "learning_rate": 2.8416504739570076e-05, "loss": 0.0389, "step": 96710 }, { "epoch": 0.0836, "grad_norm": 0.1290545016527176, "learning_rate": 2.8412409800537354e-05, "loss": 0.0362, "step": 96720 }, { "epoch": 0.08365, "grad_norm": 0.07535991072654724, "learning_rate": 2.8408314768210625e-05, "loss": 0.0352, "step": 96730 }, { "epoch": 0.0837, "grad_norm": 0.07553493976593018, "learning_rate": 2.8404219642701858e-05, "loss": 0.0339, "step": 96740 }, { "epoch": 0.08375, "grad_norm": 0.09884503483772278, "learning_rate": 2.8400124424123e-05, "loss": 0.0342, "step": 96750 }, { "epoch": 0.0838, "grad_norm": 0.07270557433366776, "learning_rate": 2.839602911258602e-05, "loss": 0.0328, "step": 96760 }, { "epoch": 0.08385, "grad_norm": 0.08007807284593582, "learning_rate": 2.8391933708202867e-05, "loss": 0.0351, "step": 96770 }, { "epoch": 0.0839, "grad_norm": 0.08742036670446396, "learning_rate": 2.8387838211085534e-05, "loss": 0.0349, "step": 96780 }, { "epoch": 0.08395, "grad_norm": 0.07782687991857529, "learning_rate": 2.838374262134597e-05, "loss": 0.0352, "step": 96790 }, { "epoch": 0.084, "grad_norm": 0.07308624684810638, "learning_rate": 2.837964693909616e-05, "loss": 0.0341, "step": 96800 }, { "epoch": 0.08405, "grad_norm": 0.06709211319684982, "learning_rate": 2.837555116444807e-05, "loss": 0.0346, "step": 96810 }, { "epoch": 0.0841, "grad_norm": 0.07162594050168991, "learning_rate": 2.8371455297513683e-05, "loss": 0.0346, "step": 96820 }, { "epoch": 0.08415, "grad_norm": 0.0674961507320404, "learning_rate": 2.8367359338404963e-05, "loss": 0.0329, "step": 96830 }, { "epoch": 0.0842, "grad_norm": 0.061539795249700546, "learning_rate": 2.8363263287233916e-05, "loss": 0.0339, "step": 96840 }, { "epoch": 0.08425, "grad_norm": 0.07633615285158157, "learning_rate": 2.835916714411251e-05, "loss": 0.0341, "step": 96850 }, { "epoch": 0.0843, "grad_norm": 0.07183895260095596, "learning_rate": 2.8355070909152738e-05, "loss": 0.0344, "step": 96860 }, { "epoch": 0.08435, "grad_norm": 0.07868233323097229, "learning_rate": 2.8350974582466583e-05, "loss": 0.0401, "step": 96870 }, { "epoch": 0.0844, "grad_norm": 0.10423652827739716, "learning_rate": 2.8346878164166042e-05, "loss": 0.0372, "step": 96880 }, { "epoch": 0.08445, "grad_norm": 0.09205686300992966, "learning_rate": 2.834278165436311e-05, "loss": 0.0349, "step": 96890 }, { "epoch": 0.0845, "grad_norm": 0.08706831187009811, "learning_rate": 2.833868505316979e-05, "loss": 0.0368, "step": 96900 }, { "epoch": 0.08455, "grad_norm": 0.06951024383306503, "learning_rate": 2.8334588360698066e-05, "loss": 0.0343, "step": 96910 }, { "epoch": 0.0846, "grad_norm": 0.09689069539308548, "learning_rate": 2.8330491577059953e-05, "loss": 0.0352, "step": 96920 }, { "epoch": 0.08465, "grad_norm": 0.07028918713331223, "learning_rate": 2.8326394702367452e-05, "loss": 0.0347, "step": 96930 }, { "epoch": 0.0847, "grad_norm": 0.0676390528678894, "learning_rate": 2.832229773673257e-05, "loss": 0.0346, "step": 96940 }, { "epoch": 0.08475, "grad_norm": 0.06127481535077095, "learning_rate": 2.831820068026732e-05, "loss": 0.0344, "step": 96950 }, { "epoch": 0.0848, "grad_norm": 0.07048854976892471, "learning_rate": 2.8314103533083698e-05, "loss": 0.0359, "step": 96960 }, { "epoch": 0.08485, "grad_norm": 0.08029826730489731, "learning_rate": 2.831000629529374e-05, "loss": 0.0371, "step": 96970 }, { "epoch": 0.0849, "grad_norm": 0.08408576250076294, "learning_rate": 2.8305908967009446e-05, "loss": 0.0358, "step": 96980 }, { "epoch": 0.08495, "grad_norm": 0.0695815235376358, "learning_rate": 2.8301811548342856e-05, "loss": 0.0371, "step": 96990 }, { "epoch": 0.085, "grad_norm": 0.07242295891046524, "learning_rate": 2.8297714039405965e-05, "loss": 0.0345, "step": 97000 }, { "epoch": 0.08505, "grad_norm": 0.06739848107099533, "learning_rate": 2.8293616440310823e-05, "loss": 0.0365, "step": 97010 }, { "epoch": 0.0851, "grad_norm": 0.0829663798213005, "learning_rate": 2.828951875116943e-05, "loss": 0.0365, "step": 97020 }, { "epoch": 0.08515, "grad_norm": 0.07622525840997696, "learning_rate": 2.828542097209384e-05, "loss": 0.0365, "step": 97030 }, { "epoch": 0.0852, "grad_norm": 0.09016449749469757, "learning_rate": 2.8281323103196073e-05, "loss": 0.0362, "step": 97040 }, { "epoch": 0.08525, "grad_norm": 0.09533847868442535, "learning_rate": 2.827722514458817e-05, "loss": 0.0361, "step": 97050 }, { "epoch": 0.0853, "grad_norm": 0.08968351781368256, "learning_rate": 2.8273127096382157e-05, "loss": 0.036, "step": 97060 }, { "epoch": 0.08535, "grad_norm": 0.0862366333603859, "learning_rate": 2.8269028958690087e-05, "loss": 0.035, "step": 97070 }, { "epoch": 0.0854, "grad_norm": 0.08487775176763535, "learning_rate": 2.8264930731623983e-05, "loss": 0.0371, "step": 97080 }, { "epoch": 0.08545, "grad_norm": 0.09566416591405869, "learning_rate": 2.82608324152959e-05, "loss": 0.0344, "step": 97090 }, { "epoch": 0.0855, "grad_norm": 0.07718129456043243, "learning_rate": 2.8256734009817887e-05, "loss": 0.0355, "step": 97100 }, { "epoch": 0.08555, "grad_norm": 0.0778222531080246, "learning_rate": 2.825263551530199e-05, "loss": 0.0343, "step": 97110 }, { "epoch": 0.0856, "grad_norm": 0.07489711046218872, "learning_rate": 2.824853693186026e-05, "loss": 0.0342, "step": 97120 }, { "epoch": 0.08565, "grad_norm": 0.07497640699148178, "learning_rate": 2.8244438259604744e-05, "loss": 0.0373, "step": 97130 }, { "epoch": 0.0857, "grad_norm": 0.07032839953899384, "learning_rate": 2.8240339498647518e-05, "loss": 0.0336, "step": 97140 }, { "epoch": 0.08575, "grad_norm": 0.0681866928935051, "learning_rate": 2.823624064910061e-05, "loss": 0.0351, "step": 97150 }, { "epoch": 0.0858, "grad_norm": 0.06538959592580795, "learning_rate": 2.8232141711076115e-05, "loss": 0.0333, "step": 97160 }, { "epoch": 0.08585, "grad_norm": 0.09290921688079834, "learning_rate": 2.8228042684686072e-05, "loss": 0.0348, "step": 97170 }, { "epoch": 0.0859, "grad_norm": 0.08239532262086868, "learning_rate": 2.822394357004256e-05, "loss": 0.0347, "step": 97180 }, { "epoch": 0.08595, "grad_norm": 0.08641859143972397, "learning_rate": 2.8219844367257637e-05, "loss": 0.0348, "step": 97190 }, { "epoch": 0.086, "grad_norm": 0.0688556358218193, "learning_rate": 2.8215745076443383e-05, "loss": 0.0341, "step": 97200 }, { "epoch": 0.08605, "grad_norm": 0.058573488146066666, "learning_rate": 2.821164569771186e-05, "loss": 0.0339, "step": 97210 }, { "epoch": 0.0861, "grad_norm": 0.05978013575077057, "learning_rate": 2.8207546231175152e-05, "loss": 0.0338, "step": 97220 }, { "epoch": 0.08615, "grad_norm": 0.0704859048128128, "learning_rate": 2.8203446676945337e-05, "loss": 0.0379, "step": 97230 }, { "epoch": 0.0862, "grad_norm": 0.07969992607831955, "learning_rate": 2.8199347035134498e-05, "loss": 0.0349, "step": 97240 }, { "epoch": 0.08625, "grad_norm": 0.07378263771533966, "learning_rate": 2.8195247305854706e-05, "loss": 0.0358, "step": 97250 }, { "epoch": 0.0863, "grad_norm": 0.0759364664554596, "learning_rate": 2.8191147489218062e-05, "loss": 0.0348, "step": 97260 }, { "epoch": 0.08635, "grad_norm": 0.06266133487224579, "learning_rate": 2.8187047585336634e-05, "loss": 0.0341, "step": 97270 }, { "epoch": 0.0864, "grad_norm": 0.06443309783935547, "learning_rate": 2.8182947594322524e-05, "loss": 0.0342, "step": 97280 }, { "epoch": 0.08645, "grad_norm": 0.09148667752742767, "learning_rate": 2.8178847516287822e-05, "loss": 0.0341, "step": 97290 }, { "epoch": 0.0865, "grad_norm": 0.07785467058420181, "learning_rate": 2.8174747351344633e-05, "loss": 0.035, "step": 97300 }, { "epoch": 0.08655, "grad_norm": 0.08465701341629028, "learning_rate": 2.817064709960503e-05, "loss": 0.0359, "step": 97310 }, { "epoch": 0.0866, "grad_norm": 0.07838905602693558, "learning_rate": 2.8166546761181138e-05, "loss": 0.0343, "step": 97320 }, { "epoch": 0.08665, "grad_norm": 0.08312281221151352, "learning_rate": 2.8162446336185045e-05, "loss": 0.0347, "step": 97330 }, { "epoch": 0.0867, "grad_norm": 0.06643358618021011, "learning_rate": 2.815834582472885e-05, "loss": 0.0343, "step": 97340 }, { "epoch": 0.08675, "grad_norm": 0.06893420219421387, "learning_rate": 2.815424522692467e-05, "loss": 0.0342, "step": 97350 }, { "epoch": 0.0868, "grad_norm": 0.07925647497177124, "learning_rate": 2.815014454288461e-05, "loss": 0.0344, "step": 97360 }, { "epoch": 0.08685, "grad_norm": 0.10124651342630386, "learning_rate": 2.8146043772720787e-05, "loss": 0.038, "step": 97370 }, { "epoch": 0.0869, "grad_norm": 0.10052763670682907, "learning_rate": 2.8141942916545306e-05, "loss": 0.034, "step": 97380 }, { "epoch": 0.08695, "grad_norm": 0.10913696885108948, "learning_rate": 2.8137841974470286e-05, "loss": 0.0359, "step": 97390 }, { "epoch": 0.087, "grad_norm": 0.11984412372112274, "learning_rate": 2.813374094660784e-05, "loss": 0.0402, "step": 97400 }, { "epoch": 0.08705, "grad_norm": 0.09089337289333344, "learning_rate": 2.8129639833070103e-05, "loss": 0.035, "step": 97410 }, { "epoch": 0.0871, "grad_norm": 0.07296092063188553, "learning_rate": 2.8125538633969183e-05, "loss": 0.0367, "step": 97420 }, { "epoch": 0.08715, "grad_norm": 0.07328640669584274, "learning_rate": 2.8121437349417218e-05, "loss": 0.0335, "step": 97430 }, { "epoch": 0.0872, "grad_norm": 0.07069529592990875, "learning_rate": 2.811733597952632e-05, "loss": 0.0342, "step": 97440 }, { "epoch": 0.08725, "grad_norm": 0.11309466511011124, "learning_rate": 2.811323452440863e-05, "loss": 0.0365, "step": 97450 }, { "epoch": 0.0873, "grad_norm": 0.07509417086839676, "learning_rate": 2.8109132984176278e-05, "loss": 0.0356, "step": 97460 }, { "epoch": 0.08735, "grad_norm": 0.08614693582057953, "learning_rate": 2.8105031358941397e-05, "loss": 0.0334, "step": 97470 }, { "epoch": 0.0874, "grad_norm": 0.08508718758821487, "learning_rate": 2.8100929648816128e-05, "loss": 0.0357, "step": 97480 }, { "epoch": 0.08745, "grad_norm": 0.07222725450992584, "learning_rate": 2.8096827853912612e-05, "loss": 0.0346, "step": 97490 }, { "epoch": 0.0875, "grad_norm": 0.08393757790327072, "learning_rate": 2.8092725974342976e-05, "loss": 0.0343, "step": 97500 }, { "epoch": 0.08755, "grad_norm": 0.07629408687353134, "learning_rate": 2.8088624010219378e-05, "loss": 0.0363, "step": 97510 }, { "epoch": 0.0876, "grad_norm": 0.0961424857378006, "learning_rate": 2.808452196165396e-05, "loss": 0.0354, "step": 97520 }, { "epoch": 0.08765, "grad_norm": 0.09951150417327881, "learning_rate": 2.808041982875887e-05, "loss": 0.0348, "step": 97530 }, { "epoch": 0.0877, "grad_norm": 0.11493387818336487, "learning_rate": 2.8076317611646253e-05, "loss": 0.0355, "step": 97540 }, { "epoch": 0.08775, "grad_norm": 0.09941182285547256, "learning_rate": 2.8072215310428278e-05, "loss": 0.0347, "step": 97550 }, { "epoch": 0.0878, "grad_norm": 0.11129660159349442, "learning_rate": 2.806811292521709e-05, "loss": 0.0386, "step": 97560 }, { "epoch": 0.08785, "grad_norm": 0.07355191558599472, "learning_rate": 2.8064010456124838e-05, "loss": 0.0346, "step": 97570 }, { "epoch": 0.0879, "grad_norm": 0.07286284118890762, "learning_rate": 2.8059907903263705e-05, "loss": 0.0342, "step": 97580 }, { "epoch": 0.08795, "grad_norm": 0.06354943662881851, "learning_rate": 2.8055805266745827e-05, "loss": 0.0346, "step": 97590 }, { "epoch": 0.088, "grad_norm": 0.05854567885398865, "learning_rate": 2.8051702546683385e-05, "loss": 0.0349, "step": 97600 }, { "epoch": 0.08805, "grad_norm": 0.07032527029514313, "learning_rate": 2.804759974318854e-05, "loss": 0.0346, "step": 97610 }, { "epoch": 0.0881, "grad_norm": 0.07550732046365738, "learning_rate": 2.804349685637347e-05, "loss": 0.035, "step": 97620 }, { "epoch": 0.08815, "grad_norm": 0.06810349971055984, "learning_rate": 2.8039393886350335e-05, "loss": 0.0345, "step": 97630 }, { "epoch": 0.0882, "grad_norm": 0.07226649671792984, "learning_rate": 2.8035290833231316e-05, "loss": 0.034, "step": 97640 }, { "epoch": 0.08825, "grad_norm": 0.07276780903339386, "learning_rate": 2.803118769712858e-05, "loss": 0.0354, "step": 97650 }, { "epoch": 0.0883, "grad_norm": 0.07253876328468323, "learning_rate": 2.8027084478154315e-05, "loss": 0.0339, "step": 97660 }, { "epoch": 0.08835, "grad_norm": 0.10958126932382584, "learning_rate": 2.8022981176420694e-05, "loss": 0.035, "step": 97670 }, { "epoch": 0.0884, "grad_norm": 0.09185132384300232, "learning_rate": 2.801887779203991e-05, "loss": 0.0354, "step": 97680 }, { "epoch": 0.08845, "grad_norm": 0.08397877961397171, "learning_rate": 2.801477432512413e-05, "loss": 0.0345, "step": 97690 }, { "epoch": 0.0885, "grad_norm": 0.07449831068515778, "learning_rate": 2.8010670775785568e-05, "loss": 0.0366, "step": 97700 }, { "epoch": 0.08855, "grad_norm": 0.0895819440484047, "learning_rate": 2.8006567144136385e-05, "loss": 0.0354, "step": 97710 }, { "epoch": 0.0886, "grad_norm": 0.10191086679697037, "learning_rate": 2.8002463430288794e-05, "loss": 0.0356, "step": 97720 }, { "epoch": 0.08865, "grad_norm": 0.07982117682695389, "learning_rate": 2.7998359634354976e-05, "loss": 0.0338, "step": 97730 }, { "epoch": 0.0887, "grad_norm": 0.07811693847179413, "learning_rate": 2.7994255756447135e-05, "loss": 0.0349, "step": 97740 }, { "epoch": 0.08875, "grad_norm": 0.07560224831104279, "learning_rate": 2.799015179667746e-05, "loss": 0.0347, "step": 97750 }, { "epoch": 0.0888, "grad_norm": 0.06786207854747772, "learning_rate": 2.7986047755158168e-05, "loss": 0.0339, "step": 97760 }, { "epoch": 0.08885, "grad_norm": 0.06226731091737747, "learning_rate": 2.798194363200145e-05, "loss": 0.0341, "step": 97770 }, { "epoch": 0.0889, "grad_norm": 0.06887473911046982, "learning_rate": 2.7977839427319508e-05, "loss": 0.035, "step": 97780 }, { "epoch": 0.08895, "grad_norm": 0.06426707655191422, "learning_rate": 2.7973735141224555e-05, "loss": 0.0346, "step": 97790 }, { "epoch": 0.089, "grad_norm": 0.06857171654701233, "learning_rate": 2.7969630773828802e-05, "loss": 0.0362, "step": 97800 }, { "epoch": 0.08905, "grad_norm": 0.07259707897901535, "learning_rate": 2.7965526325244463e-05, "loss": 0.0366, "step": 97810 }, { "epoch": 0.0891, "grad_norm": 0.0817384123802185, "learning_rate": 2.7961421795583743e-05, "loss": 0.0374, "step": 97820 }, { "epoch": 0.08915, "grad_norm": 0.0681479424238205, "learning_rate": 2.795731718495887e-05, "loss": 0.036, "step": 97830 }, { "epoch": 0.0892, "grad_norm": 0.05921397730708122, "learning_rate": 2.795321249348205e-05, "loss": 0.0339, "step": 97840 }, { "epoch": 0.08925, "grad_norm": 0.06496920436620712, "learning_rate": 2.794910772126551e-05, "loss": 0.0353, "step": 97850 }, { "epoch": 0.0893, "grad_norm": 0.07719899713993073, "learning_rate": 2.7945002868421478e-05, "loss": 0.0359, "step": 97860 }, { "epoch": 0.08935, "grad_norm": 0.08060154318809509, "learning_rate": 2.794089793506217e-05, "loss": 0.0356, "step": 97870 }, { "epoch": 0.0894, "grad_norm": 0.09261590242385864, "learning_rate": 2.793679292129982e-05, "loss": 0.0351, "step": 97880 }, { "epoch": 0.08945, "grad_norm": 0.10092183202505112, "learning_rate": 2.7932687827246656e-05, "loss": 0.0374, "step": 97890 }, { "epoch": 0.0895, "grad_norm": 0.0752311423420906, "learning_rate": 2.79285826530149e-05, "loss": 0.0341, "step": 97900 }, { "epoch": 0.08955, "grad_norm": 0.11677830666303635, "learning_rate": 2.7924477398716803e-05, "loss": 0.0354, "step": 97910 }, { "epoch": 0.0896, "grad_norm": 0.07338506728410721, "learning_rate": 2.792037206446459e-05, "loss": 0.0361, "step": 97920 }, { "epoch": 0.08965, "grad_norm": 0.06555341184139252, "learning_rate": 2.7916266650370504e-05, "loss": 0.0354, "step": 97930 }, { "epoch": 0.0897, "grad_norm": 0.0656813457608223, "learning_rate": 2.791216115654678e-05, "loss": 0.035, "step": 97940 }, { "epoch": 0.08975, "grad_norm": 0.07416542619466782, "learning_rate": 2.7908055583105668e-05, "loss": 0.0358, "step": 97950 }, { "epoch": 0.0898, "grad_norm": 0.0651547834277153, "learning_rate": 2.7903949930159402e-05, "loss": 0.035, "step": 97960 }, { "epoch": 0.08985, "grad_norm": 0.0780981034040451, "learning_rate": 2.7899844197820246e-05, "loss": 0.0353, "step": 97970 }, { "epoch": 0.0899, "grad_norm": 0.0633014664053917, "learning_rate": 2.7895738386200425e-05, "loss": 0.0356, "step": 97980 }, { "epoch": 0.08995, "grad_norm": 0.06496810168027878, "learning_rate": 2.7891632495412217e-05, "loss": 0.0347, "step": 97990 }, { "epoch": 0.09, "grad_norm": 0.08917754143476486, "learning_rate": 2.788752652556785e-05, "loss": 0.0379, "step": 98000 }, { "epoch": 0.09005, "grad_norm": 0.08220583200454712, "learning_rate": 2.788342047677961e-05, "loss": 0.037, "step": 98010 }, { "epoch": 0.0901, "grad_norm": 0.083226278424263, "learning_rate": 2.7879314349159724e-05, "loss": 0.0346, "step": 98020 }, { "epoch": 0.09015, "grad_norm": 0.07575386017560959, "learning_rate": 2.787520814282047e-05, "loss": 0.0368, "step": 98030 }, { "epoch": 0.0902, "grad_norm": 0.07754475623369217, "learning_rate": 2.7871101857874106e-05, "loss": 0.0373, "step": 98040 }, { "epoch": 0.09025, "grad_norm": 0.08634736388921738, "learning_rate": 2.7866995494432897e-05, "loss": 0.0363, "step": 98050 }, { "epoch": 0.0903, "grad_norm": 0.08915960788726807, "learning_rate": 2.7862889052609105e-05, "loss": 0.0379, "step": 98060 }, { "epoch": 0.09035, "grad_norm": 0.07953356206417084, "learning_rate": 2.7858782532515e-05, "loss": 0.0371, "step": 98070 }, { "epoch": 0.0904, "grad_norm": 0.06254126876592636, "learning_rate": 2.7854675934262864e-05, "loss": 0.0357, "step": 98080 }, { "epoch": 0.09045, "grad_norm": 0.06277334690093994, "learning_rate": 2.7850569257964954e-05, "loss": 0.0364, "step": 98090 }, { "epoch": 0.0905, "grad_norm": 0.08272846043109894, "learning_rate": 2.7846462503733544e-05, "loss": 0.0369, "step": 98100 }, { "epoch": 0.09055, "grad_norm": 0.0841066911816597, "learning_rate": 2.7842355671680925e-05, "loss": 0.0362, "step": 98110 }, { "epoch": 0.0906, "grad_norm": 0.07980770617723465, "learning_rate": 2.783824876191938e-05, "loss": 0.0358, "step": 98120 }, { "epoch": 0.09065, "grad_norm": 0.08716199547052383, "learning_rate": 2.7834141774561168e-05, "loss": 0.0347, "step": 98130 }, { "epoch": 0.0907, "grad_norm": 0.07199542969465256, "learning_rate": 2.783003470971859e-05, "loss": 0.0344, "step": 98140 }, { "epoch": 0.09075, "grad_norm": 0.058281343430280685, "learning_rate": 2.7825927567503924e-05, "loss": 0.0356, "step": 98150 }, { "epoch": 0.0908, "grad_norm": 0.07014793157577515, "learning_rate": 2.782182034802946e-05, "loss": 0.0359, "step": 98160 }, { "epoch": 0.09085, "grad_norm": 0.07957247644662857, "learning_rate": 2.781771305140748e-05, "loss": 0.0341, "step": 98170 }, { "epoch": 0.0909, "grad_norm": 0.07609190791845322, "learning_rate": 2.7813605677750297e-05, "loss": 0.035, "step": 98180 }, { "epoch": 0.09095, "grad_norm": 0.09158436208963394, "learning_rate": 2.7809498227170184e-05, "loss": 0.0345, "step": 98190 }, { "epoch": 0.091, "grad_norm": 0.07892835885286331, "learning_rate": 2.780539069977945e-05, "loss": 0.0353, "step": 98200 }, { "epoch": 0.09105, "grad_norm": 0.08445654064416885, "learning_rate": 2.7801283095690384e-05, "loss": 0.0335, "step": 98210 }, { "epoch": 0.0911, "grad_norm": 0.09106568247079849, "learning_rate": 2.77971754150153e-05, "loss": 0.0344, "step": 98220 }, { "epoch": 0.09115, "grad_norm": 0.0864868089556694, "learning_rate": 2.779306765786647e-05, "loss": 0.0352, "step": 98230 }, { "epoch": 0.0912, "grad_norm": 0.07761655747890472, "learning_rate": 2.7788959824356238e-05, "loss": 0.0338, "step": 98240 }, { "epoch": 0.09125, "grad_norm": 0.07849200069904327, "learning_rate": 2.778485191459688e-05, "loss": 0.0347, "step": 98250 }, { "epoch": 0.0913, "grad_norm": 0.07718189060688019, "learning_rate": 2.778074392870073e-05, "loss": 0.0348, "step": 98260 }, { "epoch": 0.09135, "grad_norm": 0.08885542303323746, "learning_rate": 2.7776635866780077e-05, "loss": 0.0337, "step": 98270 }, { "epoch": 0.0914, "grad_norm": 0.07728283107280731, "learning_rate": 2.7772527728947247e-05, "loss": 0.0349, "step": 98280 }, { "epoch": 0.09145, "grad_norm": 0.06739954650402069, "learning_rate": 2.7768419515314542e-05, "loss": 0.0345, "step": 98290 }, { "epoch": 0.0915, "grad_norm": 0.07996700704097748, "learning_rate": 2.77643112259943e-05, "loss": 0.037, "step": 98300 }, { "epoch": 0.09155, "grad_norm": 0.07308963686227798, "learning_rate": 2.7760202861098815e-05, "loss": 0.0334, "step": 98310 }, { "epoch": 0.0916, "grad_norm": 0.07305646687746048, "learning_rate": 2.7756094420740432e-05, "loss": 0.0352, "step": 98320 }, { "epoch": 0.09165, "grad_norm": 0.085787333548069, "learning_rate": 2.775198590503146e-05, "loss": 0.0351, "step": 98330 }, { "epoch": 0.0917, "grad_norm": 0.06632562726736069, "learning_rate": 2.774787731408422e-05, "loss": 0.035, "step": 98340 }, { "epoch": 0.09175, "grad_norm": 0.07978537678718567, "learning_rate": 2.7743768648011053e-05, "loss": 0.0341, "step": 98350 }, { "epoch": 0.0918, "grad_norm": 0.07276801019906998, "learning_rate": 2.7739659906924274e-05, "loss": 0.0357, "step": 98360 }, { "epoch": 0.09185, "grad_norm": 0.08967699110507965, "learning_rate": 2.7735551090936236e-05, "loss": 0.0361, "step": 98370 }, { "epoch": 0.0919, "grad_norm": 0.07864616811275482, "learning_rate": 2.7731442200159247e-05, "loss": 0.0346, "step": 98380 }, { "epoch": 0.09195, "grad_norm": 0.11499129235744476, "learning_rate": 2.7727333234705665e-05, "loss": 0.0399, "step": 98390 }, { "epoch": 0.092, "grad_norm": 0.10010307282209396, "learning_rate": 2.7723224194687807e-05, "loss": 0.035, "step": 98400 }, { "epoch": 0.09205, "grad_norm": 0.08330327272415161, "learning_rate": 2.7719115080218033e-05, "loss": 0.042, "step": 98410 }, { "epoch": 0.0921, "grad_norm": 0.09687450528144836, "learning_rate": 2.7715005891408663e-05, "loss": 0.0343, "step": 98420 }, { "epoch": 0.09215, "grad_norm": 0.07836932688951492, "learning_rate": 2.7710896628372058e-05, "loss": 0.0383, "step": 98430 }, { "epoch": 0.0922, "grad_norm": 0.090105801820755, "learning_rate": 2.7706787291220554e-05, "loss": 0.0366, "step": 98440 }, { "epoch": 0.09225, "grad_norm": 0.07002262026071548, "learning_rate": 2.770267788006651e-05, "loss": 0.0348, "step": 98450 }, { "epoch": 0.0923, "grad_norm": 0.09269098192453384, "learning_rate": 2.7698568395022263e-05, "loss": 0.0347, "step": 98460 }, { "epoch": 0.09235, "grad_norm": 0.07973234355449677, "learning_rate": 2.769445883620017e-05, "loss": 0.0354, "step": 98470 }, { "epoch": 0.0924, "grad_norm": 0.07400687038898468, "learning_rate": 2.7690349203712585e-05, "loss": 0.0368, "step": 98480 }, { "epoch": 0.09245, "grad_norm": 0.10803262144327164, "learning_rate": 2.7686239497671863e-05, "loss": 0.0363, "step": 98490 }, { "epoch": 0.0925, "grad_norm": 0.07510066777467728, "learning_rate": 2.768212971819036e-05, "loss": 0.0341, "step": 98500 }, { "epoch": 0.09255, "grad_norm": 0.07289294898509979, "learning_rate": 2.7678019865380443e-05, "loss": 0.0344, "step": 98510 }, { "epoch": 0.0926, "grad_norm": 0.06677382439374924, "learning_rate": 2.7673909939354464e-05, "loss": 0.0343, "step": 98520 }, { "epoch": 0.09265, "grad_norm": 0.057776324450969696, "learning_rate": 2.7669799940224794e-05, "loss": 0.0341, "step": 98530 }, { "epoch": 0.0927, "grad_norm": 0.07983548939228058, "learning_rate": 2.76656898681038e-05, "loss": 0.0368, "step": 98540 }, { "epoch": 0.09275, "grad_norm": 0.08397910743951797, "learning_rate": 2.7661579723103844e-05, "loss": 0.0349, "step": 98550 }, { "epoch": 0.0928, "grad_norm": 0.0787234827876091, "learning_rate": 2.765746950533729e-05, "loss": 0.0339, "step": 98560 }, { "epoch": 0.09285, "grad_norm": 0.0803915411233902, "learning_rate": 2.7653359214916524e-05, "loss": 0.0346, "step": 98570 }, { "epoch": 0.0929, "grad_norm": 0.10380898416042328, "learning_rate": 2.7649248851953925e-05, "loss": 0.0346, "step": 98580 }, { "epoch": 0.09295, "grad_norm": 0.08209935575723648, "learning_rate": 2.7645138416561843e-05, "loss": 0.0341, "step": 98590 }, { "epoch": 0.093, "grad_norm": 0.07948790490627289, "learning_rate": 2.764102790885268e-05, "loss": 0.0349, "step": 98600 }, { "epoch": 0.09305, "grad_norm": 0.0787932276725769, "learning_rate": 2.7636917328938794e-05, "loss": 0.0353, "step": 98610 }, { "epoch": 0.0931, "grad_norm": 0.0815957635641098, "learning_rate": 2.7632806676932594e-05, "loss": 0.0344, "step": 98620 }, { "epoch": 0.09315, "grad_norm": 0.061056435108184814, "learning_rate": 2.7628695952946436e-05, "loss": 0.0371, "step": 98630 }, { "epoch": 0.0932, "grad_norm": 0.0791785940527916, "learning_rate": 2.762458515709273e-05, "loss": 0.0341, "step": 98640 }, { "epoch": 0.09325, "grad_norm": 0.09801524877548218, "learning_rate": 2.7620474289483843e-05, "loss": 0.036, "step": 98650 }, { "epoch": 0.0933, "grad_norm": 0.07026061415672302, "learning_rate": 2.7616363350232177e-05, "loss": 0.0352, "step": 98660 }, { "epoch": 0.09335, "grad_norm": 0.07985706627368927, "learning_rate": 2.761225233945012e-05, "loss": 0.0353, "step": 98670 }, { "epoch": 0.0934, "grad_norm": 0.07724630832672119, "learning_rate": 2.760814125725006e-05, "loss": 0.0351, "step": 98680 }, { "epoch": 0.09345, "grad_norm": 0.09305231273174286, "learning_rate": 2.76040301037444e-05, "loss": 0.0349, "step": 98690 }, { "epoch": 0.0935, "grad_norm": 0.09154807776212692, "learning_rate": 2.759991887904554e-05, "loss": 0.0363, "step": 98700 }, { "epoch": 0.09355, "grad_norm": 0.08939708769321442, "learning_rate": 2.759580758326587e-05, "loss": 0.0374, "step": 98710 }, { "epoch": 0.0936, "grad_norm": 0.07384146004915237, "learning_rate": 2.7591696216517804e-05, "loss": 0.0381, "step": 98720 }, { "epoch": 0.09365, "grad_norm": 0.07931797206401825, "learning_rate": 2.7587584778913727e-05, "loss": 0.0349, "step": 98730 }, { "epoch": 0.0937, "grad_norm": 0.06376132369041443, "learning_rate": 2.7583473270566058e-05, "loss": 0.0344, "step": 98740 }, { "epoch": 0.09375, "grad_norm": 0.0623442605137825, "learning_rate": 2.7579361691587198e-05, "loss": 0.0367, "step": 98750 }, { "epoch": 0.0938, "grad_norm": 0.06312055140733719, "learning_rate": 2.7575250042089562e-05, "loss": 0.0365, "step": 98760 }, { "epoch": 0.09385, "grad_norm": 0.06792078167200089, "learning_rate": 2.7571138322185558e-05, "loss": 0.0358, "step": 98770 }, { "epoch": 0.0939, "grad_norm": 0.06359875947237015, "learning_rate": 2.7567026531987594e-05, "loss": 0.0342, "step": 98780 }, { "epoch": 0.09395, "grad_norm": 0.06873776018619537, "learning_rate": 2.7562914671608092e-05, "loss": 0.0342, "step": 98790 }, { "epoch": 0.094, "grad_norm": 0.07305468618869781, "learning_rate": 2.7558802741159463e-05, "loss": 0.0357, "step": 98800 }, { "epoch": 0.09405, "grad_norm": 0.07306445389986038, "learning_rate": 2.755469074075413e-05, "loss": 0.0355, "step": 98810 }, { "epoch": 0.0941, "grad_norm": 0.08633474260568619, "learning_rate": 2.755057867050451e-05, "loss": 0.0363, "step": 98820 }, { "epoch": 0.09415, "grad_norm": 0.08300592750310898, "learning_rate": 2.7546466530523035e-05, "loss": 0.0331, "step": 98830 }, { "epoch": 0.0942, "grad_norm": 0.08110862970352173, "learning_rate": 2.7542354320922115e-05, "loss": 0.0357, "step": 98840 }, { "epoch": 0.09425, "grad_norm": 0.07080795615911484, "learning_rate": 2.753824204181419e-05, "loss": 0.0344, "step": 98850 }, { "epoch": 0.0943, "grad_norm": 0.0670158788561821, "learning_rate": 2.7534129693311674e-05, "loss": 0.0342, "step": 98860 }, { "epoch": 0.09435, "grad_norm": 0.0654955580830574, "learning_rate": 2.7530017275527e-05, "loss": 0.0354, "step": 98870 }, { "epoch": 0.0944, "grad_norm": 0.08094431459903717, "learning_rate": 2.7525904788572608e-05, "loss": 0.0341, "step": 98880 }, { "epoch": 0.09445, "grad_norm": 0.09253891557455063, "learning_rate": 2.7521792232560932e-05, "loss": 0.0382, "step": 98890 }, { "epoch": 0.0945, "grad_norm": 0.10331499576568604, "learning_rate": 2.7517679607604402e-05, "loss": 0.0353, "step": 98900 }, { "epoch": 0.09455, "grad_norm": 0.07507817447185516, "learning_rate": 2.7513566913815458e-05, "loss": 0.0349, "step": 98910 }, { "epoch": 0.0946, "grad_norm": 0.08221866935491562, "learning_rate": 2.7509454151306534e-05, "loss": 0.0353, "step": 98920 }, { "epoch": 0.09465, "grad_norm": 0.08716592192649841, "learning_rate": 2.750534132019008e-05, "loss": 0.0347, "step": 98930 }, { "epoch": 0.0947, "grad_norm": 0.0685884952545166, "learning_rate": 2.7501228420578533e-05, "loss": 0.037, "step": 98940 }, { "epoch": 0.09475, "grad_norm": 0.08891511708498001, "learning_rate": 2.749711545258435e-05, "loss": 0.0363, "step": 98950 }, { "epoch": 0.0948, "grad_norm": 0.08393566310405731, "learning_rate": 2.7493002416319958e-05, "loss": 0.0364, "step": 98960 }, { "epoch": 0.09485, "grad_norm": 0.070647694170475, "learning_rate": 2.7488889311897826e-05, "loss": 0.0357, "step": 98970 }, { "epoch": 0.0949, "grad_norm": 0.10159338265657425, "learning_rate": 2.748477613943039e-05, "loss": 0.0366, "step": 98980 }, { "epoch": 0.09495, "grad_norm": 0.0961875468492508, "learning_rate": 2.7480662899030103e-05, "loss": 0.0354, "step": 98990 }, { "epoch": 0.095, "grad_norm": 0.07758517563343048, "learning_rate": 2.7476549590809425e-05, "loss": 0.0367, "step": 99000 }, { "epoch": 0.09505, "grad_norm": 0.07822514325380325, "learning_rate": 2.747243621488082e-05, "loss": 0.0361, "step": 99010 }, { "epoch": 0.0951, "grad_norm": 0.0853765457868576, "learning_rate": 2.7468322771356736e-05, "loss": 0.0357, "step": 99020 }, { "epoch": 0.09515, "grad_norm": 0.10141955316066742, "learning_rate": 2.746420926034963e-05, "loss": 0.0353, "step": 99030 }, { "epoch": 0.0952, "grad_norm": 0.0946832075715065, "learning_rate": 2.746009568197197e-05, "loss": 0.0354, "step": 99040 }, { "epoch": 0.09525, "grad_norm": 0.08281733095645905, "learning_rate": 2.745598203633622e-05, "loss": 0.0351, "step": 99050 }, { "epoch": 0.0953, "grad_norm": 0.09299663454294205, "learning_rate": 2.7451868323554842e-05, "loss": 0.0351, "step": 99060 }, { "epoch": 0.09535, "grad_norm": 0.11017463356256485, "learning_rate": 2.74477545437403e-05, "loss": 0.0364, "step": 99070 }, { "epoch": 0.0954, "grad_norm": 0.09078861027956009, "learning_rate": 2.744364069700508e-05, "loss": 0.0367, "step": 99080 }, { "epoch": 0.09545, "grad_norm": 0.10564927011728287, "learning_rate": 2.7439526783461632e-05, "loss": 0.0346, "step": 99090 }, { "epoch": 0.0955, "grad_norm": 0.08630510419607162, "learning_rate": 2.7435412803222443e-05, "loss": 0.0348, "step": 99100 }, { "epoch": 0.09555, "grad_norm": 0.08209118992090225, "learning_rate": 2.7431298756399982e-05, "loss": 0.0336, "step": 99110 }, { "epoch": 0.0956, "grad_norm": 0.07582660764455795, "learning_rate": 2.7427184643106723e-05, "loss": 0.0343, "step": 99120 }, { "epoch": 0.09565, "grad_norm": 0.07559537142515182, "learning_rate": 2.7423070463455147e-05, "loss": 0.0345, "step": 99130 }, { "epoch": 0.0957, "grad_norm": 0.07149489223957062, "learning_rate": 2.7418956217557745e-05, "loss": 0.0345, "step": 99140 }, { "epoch": 0.09575, "grad_norm": 0.08505064249038696, "learning_rate": 2.741484190552698e-05, "loss": 0.035, "step": 99150 }, { "epoch": 0.0958, "grad_norm": 0.06394463777542114, "learning_rate": 2.741072752747535e-05, "loss": 0.0339, "step": 99160 }, { "epoch": 0.09585, "grad_norm": 0.06664594262838364, "learning_rate": 2.7406613083515333e-05, "loss": 0.0342, "step": 99170 }, { "epoch": 0.0959, "grad_norm": 0.0785508006811142, "learning_rate": 2.7402498573759415e-05, "loss": 0.0347, "step": 99180 }, { "epoch": 0.09595, "grad_norm": 0.06014531850814819, "learning_rate": 2.7398383998320088e-05, "loss": 0.0376, "step": 99190 }, { "epoch": 0.096, "grad_norm": 0.07316498458385468, "learning_rate": 2.739426935730985e-05, "loss": 0.0345, "step": 99200 }, { "epoch": 0.09605, "grad_norm": 0.06742087006568909, "learning_rate": 2.7390154650841182e-05, "loss": 0.0345, "step": 99210 }, { "epoch": 0.0961, "grad_norm": 0.07355812937021255, "learning_rate": 2.7386039879026586e-05, "loss": 0.0348, "step": 99220 }, { "epoch": 0.09615, "grad_norm": 0.06187834590673447, "learning_rate": 2.7381925041978558e-05, "loss": 0.0346, "step": 99230 }, { "epoch": 0.0962, "grad_norm": 0.07339289784431458, "learning_rate": 2.737781013980959e-05, "loss": 0.0354, "step": 99240 }, { "epoch": 0.09625, "grad_norm": 0.06592538952827454, "learning_rate": 2.7373695172632184e-05, "loss": 0.0356, "step": 99250 }, { "epoch": 0.0963, "grad_norm": 0.0785292387008667, "learning_rate": 2.7369580140558855e-05, "loss": 0.0354, "step": 99260 }, { "epoch": 0.09635, "grad_norm": 0.06731037050485611, "learning_rate": 2.736546504370208e-05, "loss": 0.0348, "step": 99270 }, { "epoch": 0.0964, "grad_norm": 0.0630384087562561, "learning_rate": 2.7361349882174385e-05, "loss": 0.0356, "step": 99280 }, { "epoch": 0.09645, "grad_norm": 0.08435707539319992, "learning_rate": 2.735723465608828e-05, "loss": 0.0355, "step": 99290 }, { "epoch": 0.0965, "grad_norm": 0.1166800782084465, "learning_rate": 2.7353119365556258e-05, "loss": 0.0379, "step": 99300 }, { "epoch": 0.09655, "grad_norm": 0.0964205339550972, "learning_rate": 2.7349004010690833e-05, "loss": 0.0372, "step": 99310 }, { "epoch": 0.0966, "grad_norm": 0.10768742859363556, "learning_rate": 2.7344888591604524e-05, "loss": 0.0367, "step": 99320 }, { "epoch": 0.09665, "grad_norm": 0.08268402516841888, "learning_rate": 2.7340773108409847e-05, "loss": 0.0355, "step": 99330 }, { "epoch": 0.0967, "grad_norm": 0.08508584648370743, "learning_rate": 2.7336657561219302e-05, "loss": 0.0349, "step": 99340 }, { "epoch": 0.09675, "grad_norm": 0.07057398557662964, "learning_rate": 2.733254195014543e-05, "loss": 0.0338, "step": 99350 }, { "epoch": 0.0968, "grad_norm": 0.08485107123851776, "learning_rate": 2.732842627530073e-05, "loss": 0.0344, "step": 99360 }, { "epoch": 0.09685, "grad_norm": 0.09654036164283752, "learning_rate": 2.732431053679773e-05, "loss": 0.0353, "step": 99370 }, { "epoch": 0.0969, "grad_norm": 0.09786161035299301, "learning_rate": 2.732019473474895e-05, "loss": 0.0372, "step": 99380 }, { "epoch": 0.09695, "grad_norm": 0.10060153901576996, "learning_rate": 2.7316078869266926e-05, "loss": 0.0353, "step": 99390 }, { "epoch": 0.097, "grad_norm": 0.07460450381040573, "learning_rate": 2.731196294046417e-05, "loss": 0.0362, "step": 99400 }, { "epoch": 0.09705, "grad_norm": 0.07266578823328018, "learning_rate": 2.730784694845322e-05, "loss": 0.0349, "step": 99410 }, { "epoch": 0.0971, "grad_norm": 0.06460300087928772, "learning_rate": 2.7303730893346598e-05, "loss": 0.0349, "step": 99420 }, { "epoch": 0.09715, "grad_norm": 0.07580089569091797, "learning_rate": 2.7299614775256843e-05, "loss": 0.0341, "step": 99430 }, { "epoch": 0.0972, "grad_norm": 0.07017458230257034, "learning_rate": 2.7295498594296477e-05, "loss": 0.0345, "step": 99440 }, { "epoch": 0.09725, "grad_norm": 0.09122201800346375, "learning_rate": 2.7291382350578048e-05, "loss": 0.0372, "step": 99450 }, { "epoch": 0.0973, "grad_norm": 0.06226756423711777, "learning_rate": 2.7287266044214082e-05, "loss": 0.034, "step": 99460 }, { "epoch": 0.09735, "grad_norm": 0.07291603088378906, "learning_rate": 2.7283149675317126e-05, "loss": 0.0349, "step": 99470 }, { "epoch": 0.0974, "grad_norm": 0.06803841888904572, "learning_rate": 2.7279033243999714e-05, "loss": 0.0359, "step": 99480 }, { "epoch": 0.09745, "grad_norm": 0.0804368108510971, "learning_rate": 2.7274916750374385e-05, "loss": 0.0372, "step": 99490 }, { "epoch": 0.0975, "grad_norm": 0.06407159566879272, "learning_rate": 2.7270800194553686e-05, "loss": 0.0352, "step": 99500 }, { "epoch": 0.09755, "grad_norm": 0.07560861855745316, "learning_rate": 2.726668357665017e-05, "loss": 0.0361, "step": 99510 }, { "epoch": 0.0976, "grad_norm": 0.08951292186975479, "learning_rate": 2.7262566896776376e-05, "loss": 0.0357, "step": 99520 }, { "epoch": 0.09765, "grad_norm": 0.09539582580327988, "learning_rate": 2.7258450155044844e-05, "loss": 0.0356, "step": 99530 }, { "epoch": 0.0977, "grad_norm": 0.07857454568147659, "learning_rate": 2.7254333351568144e-05, "loss": 0.0351, "step": 99540 }, { "epoch": 0.09775, "grad_norm": 0.08996032178401947, "learning_rate": 2.7250216486458813e-05, "loss": 0.036, "step": 99550 }, { "epoch": 0.0978, "grad_norm": 0.08443517982959747, "learning_rate": 2.7246099559829412e-05, "loss": 0.0358, "step": 99560 }, { "epoch": 0.09785, "grad_norm": 0.07626023143529892, "learning_rate": 2.7241982571792486e-05, "loss": 0.0355, "step": 99570 }, { "epoch": 0.0979, "grad_norm": 0.10047277808189392, "learning_rate": 2.7237865522460604e-05, "loss": 0.035, "step": 99580 }, { "epoch": 0.09795, "grad_norm": 0.12941400706768036, "learning_rate": 2.7233748411946313e-05, "loss": 0.0361, "step": 99590 }, { "epoch": 0.098, "grad_norm": 0.11946405470371246, "learning_rate": 2.722963124036219e-05, "loss": 0.0361, "step": 99600 }, { "epoch": 0.09805, "grad_norm": 0.088363878428936, "learning_rate": 2.722551400782078e-05, "loss": 0.0353, "step": 99610 }, { "epoch": 0.0981, "grad_norm": 0.09197662025690079, "learning_rate": 2.7221396714434655e-05, "loss": 0.0344, "step": 99620 }, { "epoch": 0.09815, "grad_norm": 0.08126228302717209, "learning_rate": 2.721727936031637e-05, "loss": 0.0364, "step": 99630 }, { "epoch": 0.0982, "grad_norm": 0.10342596471309662, "learning_rate": 2.7213161945578514e-05, "loss": 0.0358, "step": 99640 }, { "epoch": 0.09825, "grad_norm": 0.0790046751499176, "learning_rate": 2.7209044470333635e-05, "loss": 0.0352, "step": 99650 }, { "epoch": 0.0983, "grad_norm": 0.09566021710634232, "learning_rate": 2.7204926934694307e-05, "loss": 0.0358, "step": 99660 }, { "epoch": 0.09835, "grad_norm": 0.07163716107606888, "learning_rate": 2.7200809338773108e-05, "loss": 0.0351, "step": 99670 }, { "epoch": 0.0984, "grad_norm": 0.07070885598659515, "learning_rate": 2.719669168268261e-05, "loss": 0.0363, "step": 99680 }, { "epoch": 0.09845, "grad_norm": 0.07439655065536499, "learning_rate": 2.7192573966535385e-05, "loss": 0.036, "step": 99690 }, { "epoch": 0.0985, "grad_norm": 0.07881123572587967, "learning_rate": 2.718845619044401e-05, "loss": 0.0344, "step": 99700 }, { "epoch": 0.09855, "grad_norm": 0.08434942364692688, "learning_rate": 2.7184338354521067e-05, "loss": 0.0356, "step": 99710 }, { "epoch": 0.0986, "grad_norm": 0.0887083187699318, "learning_rate": 2.7180220458879136e-05, "loss": 0.0351, "step": 99720 }, { "epoch": 0.09865, "grad_norm": 0.07735418528318405, "learning_rate": 2.7176102503630796e-05, "loss": 0.0344, "step": 99730 }, { "epoch": 0.0987, "grad_norm": 0.07166688144207001, "learning_rate": 2.7171984488888623e-05, "loss": 0.0368, "step": 99740 }, { "epoch": 0.09875, "grad_norm": 0.07416993379592896, "learning_rate": 2.7167866414765226e-05, "loss": 0.0346, "step": 99750 }, { "epoch": 0.0988, "grad_norm": 0.07947028428316116, "learning_rate": 2.7163748281373164e-05, "loss": 0.0331, "step": 99760 }, { "epoch": 0.09885, "grad_norm": 0.06419810652732849, "learning_rate": 2.7159630088825034e-05, "loss": 0.0353, "step": 99770 }, { "epoch": 0.0989, "grad_norm": 0.07942546904087067, "learning_rate": 2.715551183723343e-05, "loss": 0.0341, "step": 99780 }, { "epoch": 0.09895, "grad_norm": 0.08818191289901733, "learning_rate": 2.7151393526710955e-05, "loss": 0.0348, "step": 99790 }, { "epoch": 0.099, "grad_norm": 0.07038391381502151, "learning_rate": 2.714727515737018e-05, "loss": 0.0342, "step": 99800 }, { "epoch": 0.09905, "grad_norm": 0.06655339896678925, "learning_rate": 2.714315672932371e-05, "loss": 0.0346, "step": 99810 }, { "epoch": 0.0991, "grad_norm": 0.07684661448001862, "learning_rate": 2.7139038242684127e-05, "loss": 0.0356, "step": 99820 }, { "epoch": 0.09915, "grad_norm": 0.06352897733449936, "learning_rate": 2.713491969756406e-05, "loss": 0.0351, "step": 99830 }, { "epoch": 0.0992, "grad_norm": 0.0849921852350235, "learning_rate": 2.7130801094076088e-05, "loss": 0.035, "step": 99840 }, { "epoch": 0.09925, "grad_norm": 0.08481822907924652, "learning_rate": 2.7126682432332812e-05, "loss": 0.0338, "step": 99850 }, { "epoch": 0.0993, "grad_norm": 0.0884605348110199, "learning_rate": 2.7122563712446834e-05, "loss": 0.0362, "step": 99860 }, { "epoch": 0.09935, "grad_norm": 0.0767376720905304, "learning_rate": 2.7118444934530768e-05, "loss": 0.0345, "step": 99870 }, { "epoch": 0.0994, "grad_norm": 0.07019027322530746, "learning_rate": 2.7114326098697207e-05, "loss": 0.0357, "step": 99880 }, { "epoch": 0.09945, "grad_norm": 0.08582523465156555, "learning_rate": 2.7110207205058768e-05, "loss": 0.0348, "step": 99890 }, { "epoch": 0.0995, "grad_norm": 0.087223120033741, "learning_rate": 2.710608825372805e-05, "loss": 0.0356, "step": 99900 }, { "epoch": 0.09955, "grad_norm": 0.07381154596805573, "learning_rate": 2.7101969244817683e-05, "loss": 0.0354, "step": 99910 }, { "epoch": 0.0996, "grad_norm": 0.07284123450517654, "learning_rate": 2.709785017844026e-05, "loss": 0.0358, "step": 99920 }, { "epoch": 0.09965, "grad_norm": 0.0720820426940918, "learning_rate": 2.7093731054708404e-05, "loss": 0.0354, "step": 99930 }, { "epoch": 0.0997, "grad_norm": 0.08696381002664566, "learning_rate": 2.708961187373472e-05, "loss": 0.0393, "step": 99940 }, { "epoch": 0.09975, "grad_norm": 0.09174282103776932, "learning_rate": 2.7085492635631838e-05, "loss": 0.0354, "step": 99950 }, { "epoch": 0.0998, "grad_norm": 0.07005579024553299, "learning_rate": 2.708137334051237e-05, "loss": 0.0352, "step": 99960 }, { "epoch": 0.09985, "grad_norm": 0.05808929353952408, "learning_rate": 2.7077253988488937e-05, "loss": 0.0348, "step": 99970 }, { "epoch": 0.0999, "grad_norm": 0.05459505692124367, "learning_rate": 2.707313457967416e-05, "loss": 0.0362, "step": 99980 }, { "epoch": 0.09995, "grad_norm": 0.07056388258934021, "learning_rate": 2.7069015114180664e-05, "loss": 0.0358, "step": 99990 }, { "epoch": 0.1, "grad_norm": 0.08420798927545547, "learning_rate": 2.706489559212107e-05, "loss": 0.0364, "step": 100000 }, { "epoch": 0.10005, "grad_norm": 0.06554892659187317, "learning_rate": 2.706077601360801e-05, "loss": 0.0351, "step": 100010 }, { "epoch": 0.1001, "grad_norm": 0.07413670420646667, "learning_rate": 2.7056656378754097e-05, "loss": 0.035, "step": 100020 }, { "epoch": 0.10015, "grad_norm": 0.0762542188167572, "learning_rate": 2.705253668767198e-05, "loss": 0.0359, "step": 100030 }, { "epoch": 0.1002, "grad_norm": 0.0821673646569252, "learning_rate": 2.7048416940474285e-05, "loss": 0.0356, "step": 100040 }, { "epoch": 0.10025, "grad_norm": 0.0915229469537735, "learning_rate": 2.7044297137273632e-05, "loss": 0.0357, "step": 100050 }, { "epoch": 0.1003, "grad_norm": 0.07093614339828491, "learning_rate": 2.7040177278182672e-05, "loss": 0.0341, "step": 100060 }, { "epoch": 0.10035, "grad_norm": 0.0898672342300415, "learning_rate": 2.7036057363314026e-05, "loss": 0.0348, "step": 100070 }, { "epoch": 0.1004, "grad_norm": 0.07349665462970734, "learning_rate": 2.7031937392780334e-05, "loss": 0.0333, "step": 100080 }, { "epoch": 0.10045, "grad_norm": 0.06879694014787674, "learning_rate": 2.7027817366694236e-05, "loss": 0.0359, "step": 100090 }, { "epoch": 0.1005, "grad_norm": 0.06934312731027603, "learning_rate": 2.7023697285168382e-05, "loss": 0.0334, "step": 100100 }, { "epoch": 0.10055, "grad_norm": 0.06713636219501495, "learning_rate": 2.70195771483154e-05, "loss": 0.0328, "step": 100110 }, { "epoch": 0.1006, "grad_norm": 0.0665070191025734, "learning_rate": 2.701545695624794e-05, "loss": 0.0329, "step": 100120 }, { "epoch": 0.10065, "grad_norm": 0.07813754677772522, "learning_rate": 2.7011336709078638e-05, "loss": 0.0342, "step": 100130 }, { "epoch": 0.1007, "grad_norm": 0.06740997731685638, "learning_rate": 2.700721640692015e-05, "loss": 0.0331, "step": 100140 }, { "epoch": 0.10075, "grad_norm": 0.08873384445905685, "learning_rate": 2.7003096049885112e-05, "loss": 0.0393, "step": 100150 }, { "epoch": 0.1008, "grad_norm": 0.08748091757297516, "learning_rate": 2.6998975638086194e-05, "loss": 0.0328, "step": 100160 }, { "epoch": 0.10085, "grad_norm": 0.07819615304470062, "learning_rate": 2.6994855171636026e-05, "loss": 0.035, "step": 100170 }, { "epoch": 0.1009, "grad_norm": 0.06912656128406525, "learning_rate": 2.699073465064727e-05, "loss": 0.0342, "step": 100180 }, { "epoch": 0.10095, "grad_norm": 0.07246105372905731, "learning_rate": 2.6986614075232574e-05, "loss": 0.0359, "step": 100190 }, { "epoch": 0.101, "grad_norm": 0.08377740532159805, "learning_rate": 2.698249344550459e-05, "loss": 0.0362, "step": 100200 }, { "epoch": 0.10105, "grad_norm": 0.07372310757637024, "learning_rate": 2.697837276157599e-05, "loss": 0.0352, "step": 100210 }, { "epoch": 0.1011, "grad_norm": 0.0842214971780777, "learning_rate": 2.6974252023559414e-05, "loss": 0.0364, "step": 100220 }, { "epoch": 0.10115, "grad_norm": 0.07575521618127823, "learning_rate": 2.697013123156753e-05, "loss": 0.0356, "step": 100230 }, { "epoch": 0.1012, "grad_norm": 0.07804497331380844, "learning_rate": 2.6966010385713003e-05, "loss": 0.0356, "step": 100240 }, { "epoch": 0.10125, "grad_norm": 0.10025062412023544, "learning_rate": 2.6961889486108495e-05, "loss": 0.0376, "step": 100250 }, { "epoch": 0.1013, "grad_norm": 0.07742762565612793, "learning_rate": 2.6957768532866656e-05, "loss": 0.0349, "step": 100260 }, { "epoch": 0.10135, "grad_norm": 0.10000407695770264, "learning_rate": 2.695364752610016e-05, "loss": 0.0352, "step": 100270 }, { "epoch": 0.1014, "grad_norm": 0.08434217423200607, "learning_rate": 2.6949526465921675e-05, "loss": 0.0338, "step": 100280 }, { "epoch": 0.10145, "grad_norm": 0.10215528309345245, "learning_rate": 2.6945405352443875e-05, "loss": 0.0353, "step": 100290 }, { "epoch": 0.1015, "grad_norm": 0.08367814868688583, "learning_rate": 2.694128418577942e-05, "loss": 0.0352, "step": 100300 }, { "epoch": 0.10155, "grad_norm": 0.08544318377971649, "learning_rate": 2.693716296604099e-05, "loss": 0.0332, "step": 100310 }, { "epoch": 0.1016, "grad_norm": 0.08193808048963547, "learning_rate": 2.6933041693341248e-05, "loss": 0.034, "step": 100320 }, { "epoch": 0.10165, "grad_norm": 0.08198003470897675, "learning_rate": 2.692892036779287e-05, "loss": 0.0351, "step": 100330 }, { "epoch": 0.1017, "grad_norm": 0.07437553256750107, "learning_rate": 2.6924798989508532e-05, "loss": 0.0343, "step": 100340 }, { "epoch": 0.10175, "grad_norm": 0.07319196313619614, "learning_rate": 2.692067755860092e-05, "loss": 0.0386, "step": 100350 }, { "epoch": 0.1018, "grad_norm": 0.06065535545349121, "learning_rate": 2.6916556075182704e-05, "loss": 0.0349, "step": 100360 }, { "epoch": 0.10185, "grad_norm": 0.0815400555729866, "learning_rate": 2.6912434539366565e-05, "loss": 0.0347, "step": 100370 }, { "epoch": 0.1019, "grad_norm": 0.07775052636861801, "learning_rate": 2.690831295126518e-05, "loss": 0.0371, "step": 100380 }, { "epoch": 0.10195, "grad_norm": 0.07136549055576324, "learning_rate": 2.6904191310991238e-05, "loss": 0.0365, "step": 100390 }, { "epoch": 0.102, "grad_norm": 0.07335707545280457, "learning_rate": 2.6900069618657413e-05, "loss": 0.0366, "step": 100400 }, { "epoch": 0.10205, "grad_norm": 0.07020821422338486, "learning_rate": 2.689594787437641e-05, "loss": 0.0346, "step": 100410 }, { "epoch": 0.1021, "grad_norm": 0.07441741228103638, "learning_rate": 2.68918260782609e-05, "loss": 0.0364, "step": 100420 }, { "epoch": 0.10215, "grad_norm": 0.07880008220672607, "learning_rate": 2.688770423042358e-05, "loss": 0.0378, "step": 100430 }, { "epoch": 0.1022, "grad_norm": 0.07149319350719452, "learning_rate": 2.688358233097713e-05, "loss": 0.0356, "step": 100440 }, { "epoch": 0.10225, "grad_norm": 0.08383669704198837, "learning_rate": 2.687946038003425e-05, "loss": 0.0337, "step": 100450 }, { "epoch": 0.1023, "grad_norm": 0.07485045492649078, "learning_rate": 2.687533837770762e-05, "loss": 0.0368, "step": 100460 }, { "epoch": 0.10235, "grad_norm": 0.07422316074371338, "learning_rate": 2.6871216324109956e-05, "loss": 0.0338, "step": 100470 }, { "epoch": 0.1024, "grad_norm": 0.09465152025222778, "learning_rate": 2.6867094219353933e-05, "loss": 0.0352, "step": 100480 }, { "epoch": 0.10245, "grad_norm": 0.10185496509075165, "learning_rate": 2.6862972063552262e-05, "loss": 0.0352, "step": 100490 }, { "epoch": 0.1025, "grad_norm": 0.09141545742750168, "learning_rate": 2.685884985681763e-05, "loss": 0.0353, "step": 100500 }, { "epoch": 0.10255, "grad_norm": 0.08120530843734741, "learning_rate": 2.685472759926274e-05, "loss": 0.0346, "step": 100510 }, { "epoch": 0.1026, "grad_norm": 0.0668620839715004, "learning_rate": 2.6850605291000297e-05, "loss": 0.0346, "step": 100520 }, { "epoch": 0.10265, "grad_norm": 0.060053206980228424, "learning_rate": 2.6846482932142996e-05, "loss": 0.0366, "step": 100530 }, { "epoch": 0.1027, "grad_norm": 0.07062922418117523, "learning_rate": 2.6842360522803554e-05, "loss": 0.0347, "step": 100540 }, { "epoch": 0.10275, "grad_norm": 0.05922499671578407, "learning_rate": 2.683823806309466e-05, "loss": 0.0339, "step": 100550 }, { "epoch": 0.1028, "grad_norm": 0.059927552938461304, "learning_rate": 2.6834115553129034e-05, "loss": 0.0354, "step": 100560 }, { "epoch": 0.10285, "grad_norm": 0.06442233920097351, "learning_rate": 2.682999299301937e-05, "loss": 0.0351, "step": 100570 }, { "epoch": 0.1029, "grad_norm": 0.07545732706785202, "learning_rate": 2.68258703828784e-05, "loss": 0.0357, "step": 100580 }, { "epoch": 0.10295, "grad_norm": 0.06960097700357437, "learning_rate": 2.6821747722818797e-05, "loss": 0.0357, "step": 100590 }, { "epoch": 0.103, "grad_norm": 0.07541335374116898, "learning_rate": 2.6817625012953313e-05, "loss": 0.0358, "step": 100600 }, { "epoch": 0.10305, "grad_norm": 0.06176866218447685, "learning_rate": 2.6813502253394635e-05, "loss": 0.0376, "step": 100610 }, { "epoch": 0.1031, "grad_norm": 0.06761564314365387, "learning_rate": 2.6809379444255493e-05, "loss": 0.0343, "step": 100620 }, { "epoch": 0.10315, "grad_norm": 0.07260114699602127, "learning_rate": 2.6805256585648597e-05, "loss": 0.0369, "step": 100630 }, { "epoch": 0.1032, "grad_norm": 0.06497721374034882, "learning_rate": 2.6801133677686663e-05, "loss": 0.035, "step": 100640 }, { "epoch": 0.10325, "grad_norm": 0.07391471415758133, "learning_rate": 2.67970107204824e-05, "loss": 0.0358, "step": 100650 }, { "epoch": 0.1033, "grad_norm": 0.06881912797689438, "learning_rate": 2.679288771414855e-05, "loss": 0.0352, "step": 100660 }, { "epoch": 0.10335, "grad_norm": 0.06782495975494385, "learning_rate": 2.6788764658797827e-05, "loss": 0.0353, "step": 100670 }, { "epoch": 0.1034, "grad_norm": 0.061936330050230026, "learning_rate": 2.6784641554542943e-05, "loss": 0.0365, "step": 100680 }, { "epoch": 0.10345, "grad_norm": 0.09320276975631714, "learning_rate": 2.6780518401496634e-05, "loss": 0.0353, "step": 100690 }, { "epoch": 0.1035, "grad_norm": 0.0687861442565918, "learning_rate": 2.6776395199771616e-05, "loss": 0.0351, "step": 100700 }, { "epoch": 0.10355, "grad_norm": 0.06580278277397156, "learning_rate": 2.6772271949480622e-05, "loss": 0.0348, "step": 100710 }, { "epoch": 0.1036, "grad_norm": 0.08795535564422607, "learning_rate": 2.676814865073638e-05, "loss": 0.0367, "step": 100720 }, { "epoch": 0.10365, "grad_norm": 0.06928795576095581, "learning_rate": 2.676402530365162e-05, "loss": 0.0347, "step": 100730 }, { "epoch": 0.1037, "grad_norm": 0.0687493160367012, "learning_rate": 2.6759901908339065e-05, "loss": 0.0353, "step": 100740 }, { "epoch": 0.10375, "grad_norm": 0.09345578402280807, "learning_rate": 2.6755778464911457e-05, "loss": 0.0356, "step": 100750 }, { "epoch": 0.1038, "grad_norm": 0.07706046849489212, "learning_rate": 2.6751654973481526e-05, "loss": 0.036, "step": 100760 }, { "epoch": 0.10385, "grad_norm": 0.08092194050550461, "learning_rate": 2.674753143416201e-05, "loss": 0.036, "step": 100770 }, { "epoch": 0.1039, "grad_norm": 0.08409049361944199, "learning_rate": 2.6743407847065627e-05, "loss": 0.0376, "step": 100780 }, { "epoch": 0.10395, "grad_norm": 0.08491519838571548, "learning_rate": 2.673928421230514e-05, "loss": 0.0338, "step": 100790 }, { "epoch": 0.104, "grad_norm": 0.11122860014438629, "learning_rate": 2.673516052999327e-05, "loss": 0.0419, "step": 100800 }, { "epoch": 0.10405, "grad_norm": 0.088767871260643, "learning_rate": 2.673103680024277e-05, "loss": 0.0369, "step": 100810 }, { "epoch": 0.1041, "grad_norm": 0.08989717811346054, "learning_rate": 2.6726913023166374e-05, "loss": 0.038, "step": 100820 }, { "epoch": 0.10415, "grad_norm": 0.08206108957529068, "learning_rate": 2.6722789198876825e-05, "loss": 0.0353, "step": 100830 }, { "epoch": 0.1042, "grad_norm": 0.07449140399694443, "learning_rate": 2.6718665327486854e-05, "loss": 0.0371, "step": 100840 }, { "epoch": 0.10425, "grad_norm": 0.09066558629274368, "learning_rate": 2.6714541409109228e-05, "loss": 0.0348, "step": 100850 }, { "epoch": 0.1043, "grad_norm": 0.07385815680027008, "learning_rate": 2.6710417443856683e-05, "loss": 0.0349, "step": 100860 }, { "epoch": 0.10435, "grad_norm": 0.07193297892808914, "learning_rate": 2.6706293431841974e-05, "loss": 0.0341, "step": 100870 }, { "epoch": 0.1044, "grad_norm": 0.07011745125055313, "learning_rate": 2.670216937317784e-05, "loss": 0.034, "step": 100880 }, { "epoch": 0.10445, "grad_norm": 0.06763279438018799, "learning_rate": 2.6698045267977034e-05, "loss": 0.0334, "step": 100890 }, { "epoch": 0.1045, "grad_norm": 0.0637497678399086, "learning_rate": 2.6693921116352304e-05, "loss": 0.0344, "step": 100900 }, { "epoch": 0.10455, "grad_norm": 0.07138977944850922, "learning_rate": 2.668979691841641e-05, "loss": 0.035, "step": 100910 }, { "epoch": 0.1046, "grad_norm": 0.0984807088971138, "learning_rate": 2.6685672674282097e-05, "loss": 0.0362, "step": 100920 }, { "epoch": 0.10465, "grad_norm": 0.06861315667629242, "learning_rate": 2.668154838406214e-05, "loss": 0.0354, "step": 100930 }, { "epoch": 0.1047, "grad_norm": 0.09650956094264984, "learning_rate": 2.667742404786927e-05, "loss": 0.0346, "step": 100940 }, { "epoch": 0.10475, "grad_norm": 0.07507655769586563, "learning_rate": 2.667329966581626e-05, "loss": 0.0348, "step": 100950 }, { "epoch": 0.1048, "grad_norm": 0.0630788579583168, "learning_rate": 2.666917523801587e-05, "loss": 0.0368, "step": 100960 }, { "epoch": 0.10485, "grad_norm": 0.06309845298528671, "learning_rate": 2.6665050764580852e-05, "loss": 0.0362, "step": 100970 }, { "epoch": 0.1049, "grad_norm": 0.07771171629428864, "learning_rate": 2.6660926245623968e-05, "loss": 0.0348, "step": 100980 }, { "epoch": 0.10495, "grad_norm": 0.06262598931789398, "learning_rate": 2.6656801681257986e-05, "loss": 0.0348, "step": 100990 }, { "epoch": 0.105, "grad_norm": 0.06783615052700043, "learning_rate": 2.6652677071595677e-05, "loss": 0.0342, "step": 101000 }, { "epoch": 0.10505, "grad_norm": 0.06547072529792786, "learning_rate": 2.6648552416749795e-05, "loss": 0.0357, "step": 101010 }, { "epoch": 0.1051, "grad_norm": 0.058621667325496674, "learning_rate": 2.664442771683311e-05, "loss": 0.0348, "step": 101020 }, { "epoch": 0.10515, "grad_norm": 0.08480167388916016, "learning_rate": 2.6640302971958376e-05, "loss": 0.0382, "step": 101030 }, { "epoch": 0.1052, "grad_norm": 0.0638345256447792, "learning_rate": 2.6636178182238387e-05, "loss": 0.035, "step": 101040 }, { "epoch": 0.10525, "grad_norm": 0.07020757347345352, "learning_rate": 2.6632053347785897e-05, "loss": 0.035, "step": 101050 }, { "epoch": 0.1053, "grad_norm": 0.0668468028306961, "learning_rate": 2.6627928468713687e-05, "loss": 0.0352, "step": 101060 }, { "epoch": 0.10535, "grad_norm": 0.0771222710609436, "learning_rate": 2.6623803545134517e-05, "loss": 0.0348, "step": 101070 }, { "epoch": 0.1054, "grad_norm": 0.0790410116314888, "learning_rate": 2.6619678577161178e-05, "loss": 0.0347, "step": 101080 }, { "epoch": 0.10545, "grad_norm": 0.0950852632522583, "learning_rate": 2.6615553564906426e-05, "loss": 0.0346, "step": 101090 }, { "epoch": 0.1055, "grad_norm": 0.06712023168802261, "learning_rate": 2.661142850848305e-05, "loss": 0.0344, "step": 101100 }, { "epoch": 0.10555, "grad_norm": 0.0824480876326561, "learning_rate": 2.660730340800382e-05, "loss": 0.0357, "step": 101110 }, { "epoch": 0.1056, "grad_norm": 0.08160246163606644, "learning_rate": 2.6603178263581525e-05, "loss": 0.0357, "step": 101120 }, { "epoch": 0.10565, "grad_norm": 0.07077962905168533, "learning_rate": 2.6599053075328933e-05, "loss": 0.035, "step": 101130 }, { "epoch": 0.1057, "grad_norm": 0.1023697629570961, "learning_rate": 2.6594927843358836e-05, "loss": 0.0352, "step": 101140 }, { "epoch": 0.10575, "grad_norm": 0.0759579986333847, "learning_rate": 2.6590802567784008e-05, "loss": 0.0343, "step": 101150 }, { "epoch": 0.1058, "grad_norm": 0.07697603106498718, "learning_rate": 2.6586677248717233e-05, "loss": 0.0356, "step": 101160 }, { "epoch": 0.10585, "grad_norm": 0.07262375950813293, "learning_rate": 2.65825518862713e-05, "loss": 0.0352, "step": 101170 }, { "epoch": 0.1059, "grad_norm": 0.07879820466041565, "learning_rate": 2.6578426480558993e-05, "loss": 0.0363, "step": 101180 }, { "epoch": 0.10595, "grad_norm": 0.0667225569486618, "learning_rate": 2.65743010316931e-05, "loss": 0.0342, "step": 101190 }, { "epoch": 0.106, "grad_norm": 0.08262769877910614, "learning_rate": 2.6570175539786406e-05, "loss": 0.0353, "step": 101200 }, { "epoch": 0.10605, "grad_norm": 0.07676971703767776, "learning_rate": 2.656605000495171e-05, "loss": 0.0353, "step": 101210 }, { "epoch": 0.1061, "grad_norm": 0.07954535633325577, "learning_rate": 2.656192442730179e-05, "loss": 0.0347, "step": 101220 }, { "epoch": 0.10615, "grad_norm": 0.07525185495615005, "learning_rate": 2.6557798806949437e-05, "loss": 0.034, "step": 101230 }, { "epoch": 0.1062, "grad_norm": 0.06477613747119904, "learning_rate": 2.6553673144007452e-05, "loss": 0.0344, "step": 101240 }, { "epoch": 0.10625, "grad_norm": 0.07150783389806747, "learning_rate": 2.6549547438588635e-05, "loss": 0.0345, "step": 101250 }, { "epoch": 0.1063, "grad_norm": 0.07217303663492203, "learning_rate": 2.6545421690805766e-05, "loss": 0.0344, "step": 101260 }, { "epoch": 0.10635, "grad_norm": 0.07395917177200317, "learning_rate": 2.6541295900771657e-05, "loss": 0.0351, "step": 101270 }, { "epoch": 0.1064, "grad_norm": 0.09638627618551254, "learning_rate": 2.6537170068599086e-05, "loss": 0.0357, "step": 101280 }, { "epoch": 0.10645, "grad_norm": 0.08601228892803192, "learning_rate": 2.653304419440087e-05, "loss": 0.0347, "step": 101290 }, { "epoch": 0.1065, "grad_norm": 0.056638069450855255, "learning_rate": 2.65289182782898e-05, "loss": 0.0338, "step": 101300 }, { "epoch": 0.10655, "grad_norm": 0.07380425930023193, "learning_rate": 2.6524792320378678e-05, "loss": 0.0348, "step": 101310 }, { "epoch": 0.1066, "grad_norm": 0.07186898589134216, "learning_rate": 2.6520666320780307e-05, "loss": 0.0339, "step": 101320 }, { "epoch": 0.10665, "grad_norm": 0.10297045111656189, "learning_rate": 2.6516540279607492e-05, "loss": 0.0351, "step": 101330 }, { "epoch": 0.1067, "grad_norm": 0.07520277053117752, "learning_rate": 2.6512414196973035e-05, "loss": 0.0334, "step": 101340 }, { "epoch": 0.10675, "grad_norm": 0.0703219398856163, "learning_rate": 2.6508288072989736e-05, "loss": 0.0354, "step": 101350 }, { "epoch": 0.1068, "grad_norm": 0.06466661393642426, "learning_rate": 2.6504161907770413e-05, "loss": 0.0334, "step": 101360 }, { "epoch": 0.10685, "grad_norm": 0.0602884441614151, "learning_rate": 2.650003570142787e-05, "loss": 0.0333, "step": 101370 }, { "epoch": 0.1069, "grad_norm": 0.05375294014811516, "learning_rate": 2.6495909454074915e-05, "loss": 0.0334, "step": 101380 }, { "epoch": 0.10695, "grad_norm": 0.06414522975683212, "learning_rate": 2.649178316582435e-05, "loss": 0.0333, "step": 101390 }, { "epoch": 0.107, "grad_norm": 0.06261350959539413, "learning_rate": 2.6487656836789e-05, "loss": 0.0323, "step": 101400 }, { "epoch": 0.10705, "grad_norm": 0.0568736270070076, "learning_rate": 2.648353046708167e-05, "loss": 0.0324, "step": 101410 }, { "epoch": 0.1071, "grad_norm": 0.06497329473495483, "learning_rate": 2.6479404056815172e-05, "loss": 0.033, "step": 101420 }, { "epoch": 0.10715, "grad_norm": 0.06477135419845581, "learning_rate": 2.6475277606102327e-05, "loss": 0.0342, "step": 101430 }, { "epoch": 0.1072, "grad_norm": 0.06530822813510895, "learning_rate": 2.6471151115055942e-05, "loss": 0.0338, "step": 101440 }, { "epoch": 0.10725, "grad_norm": 0.06552419811487198, "learning_rate": 2.646702458378884e-05, "loss": 0.035, "step": 101450 }, { "epoch": 0.1073, "grad_norm": 0.07599837332963943, "learning_rate": 2.646289801241384e-05, "loss": 0.0372, "step": 101460 }, { "epoch": 0.10735, "grad_norm": 0.07614503055810928, "learning_rate": 2.6458771401043753e-05, "loss": 0.0343, "step": 101470 }, { "epoch": 0.1074, "grad_norm": 0.07338558882474899, "learning_rate": 2.6454644749791406e-05, "loss": 0.0338, "step": 101480 }, { "epoch": 0.10745, "grad_norm": 0.06588335335254669, "learning_rate": 2.645051805876962e-05, "loss": 0.0343, "step": 101490 }, { "epoch": 0.1075, "grad_norm": 0.07893166691064835, "learning_rate": 2.6446391328091212e-05, "loss": 0.0347, "step": 101500 }, { "epoch": 0.10755, "grad_norm": 0.07759903371334076, "learning_rate": 2.6442264557869012e-05, "loss": 0.0342, "step": 101510 }, { "epoch": 0.1076, "grad_norm": 0.08482649177312851, "learning_rate": 2.6438137748215842e-05, "loss": 0.0354, "step": 101520 }, { "epoch": 0.10765, "grad_norm": 0.07509204745292664, "learning_rate": 2.643401089924452e-05, "loss": 0.0355, "step": 101530 }, { "epoch": 0.1077, "grad_norm": 0.09492380917072296, "learning_rate": 2.642988401106788e-05, "loss": 0.0359, "step": 101540 }, { "epoch": 0.10775, "grad_norm": 0.08142454922199249, "learning_rate": 2.642575708379875e-05, "loss": 0.0346, "step": 101550 }, { "epoch": 0.1078, "grad_norm": 0.07955773919820786, "learning_rate": 2.6421630117549962e-05, "loss": 0.0345, "step": 101560 }, { "epoch": 0.10785, "grad_norm": 0.0736236572265625, "learning_rate": 2.6417503112434334e-05, "loss": 0.034, "step": 101570 }, { "epoch": 0.1079, "grad_norm": 0.0750240609049797, "learning_rate": 2.641337606856471e-05, "loss": 0.0348, "step": 101580 }, { "epoch": 0.10795, "grad_norm": 0.06797299534082413, "learning_rate": 2.640924898605391e-05, "loss": 0.035, "step": 101590 }, { "epoch": 0.108, "grad_norm": 0.07420060783624649, "learning_rate": 2.640512186501477e-05, "loss": 0.0338, "step": 101600 }, { "epoch": 0.10805, "grad_norm": 0.06877487897872925, "learning_rate": 2.6400994705560122e-05, "loss": 0.0341, "step": 101610 }, { "epoch": 0.1081, "grad_norm": 0.080462247133255, "learning_rate": 2.639686750780282e-05, "loss": 0.0354, "step": 101620 }, { "epoch": 0.10815, "grad_norm": 0.10589322447776794, "learning_rate": 2.6392740271855677e-05, "loss": 0.0336, "step": 101630 }, { "epoch": 0.1082, "grad_norm": 0.08398301899433136, "learning_rate": 2.6388612997831537e-05, "loss": 0.0342, "step": 101640 }, { "epoch": 0.10825, "grad_norm": 0.0867784395813942, "learning_rate": 2.638448568584324e-05, "loss": 0.038, "step": 101650 }, { "epoch": 0.1083, "grad_norm": 0.07799968123435974, "learning_rate": 2.6380358336003626e-05, "loss": 0.0338, "step": 101660 }, { "epoch": 0.10835, "grad_norm": 0.0743381604552269, "learning_rate": 2.6376230948425527e-05, "loss": 0.0344, "step": 101670 }, { "epoch": 0.1084, "grad_norm": 0.07239367812871933, "learning_rate": 2.6372103523221802e-05, "loss": 0.0348, "step": 101680 }, { "epoch": 0.10845, "grad_norm": 0.0794605016708374, "learning_rate": 2.6367976060505274e-05, "loss": 0.0345, "step": 101690 }, { "epoch": 0.1085, "grad_norm": 0.11208935081958771, "learning_rate": 2.6363848560388793e-05, "loss": 0.0347, "step": 101700 }, { "epoch": 0.10855, "grad_norm": 0.0862400084733963, "learning_rate": 2.6359721022985217e-05, "loss": 0.035, "step": 101710 }, { "epoch": 0.1086, "grad_norm": 0.08822376281023026, "learning_rate": 2.6355593448407367e-05, "loss": 0.0361, "step": 101720 }, { "epoch": 0.10865, "grad_norm": 0.07624153047800064, "learning_rate": 2.63514658367681e-05, "loss": 0.0347, "step": 101730 }, { "epoch": 0.1087, "grad_norm": 0.07173407822847366, "learning_rate": 2.634733818818027e-05, "loss": 0.0341, "step": 101740 }, { "epoch": 0.10875, "grad_norm": 0.09655409306287766, "learning_rate": 2.6343210502756727e-05, "loss": 0.0398, "step": 101750 }, { "epoch": 0.1088, "grad_norm": 0.08558224141597748, "learning_rate": 2.63390827806103e-05, "loss": 0.0344, "step": 101760 }, { "epoch": 0.10885, "grad_norm": 0.07107986509799957, "learning_rate": 2.6334955021853868e-05, "loss": 0.0347, "step": 101770 }, { "epoch": 0.1089, "grad_norm": 0.08177818357944489, "learning_rate": 2.633082722660026e-05, "loss": 0.0351, "step": 101780 }, { "epoch": 0.10895, "grad_norm": 0.07822753489017487, "learning_rate": 2.6326699394962333e-05, "loss": 0.034, "step": 101790 }, { "epoch": 0.109, "grad_norm": 0.07757072150707245, "learning_rate": 2.6322571527052934e-05, "loss": 0.0351, "step": 101800 }, { "epoch": 0.10905, "grad_norm": 0.08093541860580444, "learning_rate": 2.6318443622984946e-05, "loss": 0.0349, "step": 101810 }, { "epoch": 0.1091, "grad_norm": 0.08605824410915375, "learning_rate": 2.6314315682871193e-05, "loss": 0.0371, "step": 101820 }, { "epoch": 0.10915, "grad_norm": 0.07828135788440704, "learning_rate": 2.6310187706824548e-05, "loss": 0.0345, "step": 101830 }, { "epoch": 0.1092, "grad_norm": 0.09517630934715271, "learning_rate": 2.6306059694957858e-05, "loss": 0.0346, "step": 101840 }, { "epoch": 0.10925, "grad_norm": 0.11697806417942047, "learning_rate": 2.6301931647383993e-05, "loss": 0.0349, "step": 101850 }, { "epoch": 0.1093, "grad_norm": 0.0877174437046051, "learning_rate": 2.6297803564215794e-05, "loss": 0.0342, "step": 101860 }, { "epoch": 0.10935, "grad_norm": 0.07876308262348175, "learning_rate": 2.6293675445566148e-05, "loss": 0.0337, "step": 101870 }, { "epoch": 0.1094, "grad_norm": 0.0758930966258049, "learning_rate": 2.6289547291547888e-05, "loss": 0.0341, "step": 101880 }, { "epoch": 0.10945, "grad_norm": 0.08109204471111298, "learning_rate": 2.62854191022739e-05, "loss": 0.0348, "step": 101890 }, { "epoch": 0.1095, "grad_norm": 0.07649962604045868, "learning_rate": 2.6281290877857033e-05, "loss": 0.0372, "step": 101900 }, { "epoch": 0.10955, "grad_norm": 0.08296927809715271, "learning_rate": 2.627716261841015e-05, "loss": 0.0336, "step": 101910 }, { "epoch": 0.1096, "grad_norm": 0.08457915484905243, "learning_rate": 2.6273034324046125e-05, "loss": 0.0338, "step": 101920 }, { "epoch": 0.10965, "grad_norm": 0.06991255283355713, "learning_rate": 2.6268905994877824e-05, "loss": 0.0344, "step": 101930 }, { "epoch": 0.1097, "grad_norm": 0.07757412642240524, "learning_rate": 2.6264777631018106e-05, "loss": 0.0338, "step": 101940 }, { "epoch": 0.10975, "grad_norm": 0.06705652922391891, "learning_rate": 2.6260649232579836e-05, "loss": 0.0334, "step": 101950 }, { "epoch": 0.1098, "grad_norm": 0.07722798734903336, "learning_rate": 2.6256520799675904e-05, "loss": 0.0334, "step": 101960 }, { "epoch": 0.10985, "grad_norm": 0.06761211901903152, "learning_rate": 2.6252392332419155e-05, "loss": 0.0344, "step": 101970 }, { "epoch": 0.1099, "grad_norm": 0.07083146274089813, "learning_rate": 2.6248263830922475e-05, "loss": 0.0356, "step": 101980 }, { "epoch": 0.10995, "grad_norm": 0.06778588891029358, "learning_rate": 2.6244135295298722e-05, "loss": 0.0361, "step": 101990 }, { "epoch": 0.11, "grad_norm": 0.07052619755268097, "learning_rate": 2.6240006725660786e-05, "loss": 0.0332, "step": 102000 }, { "epoch": 0.11005, "grad_norm": 0.08210963755846024, "learning_rate": 2.623587812212153e-05, "loss": 0.0332, "step": 102010 }, { "epoch": 0.1101, "grad_norm": 0.07508064806461334, "learning_rate": 2.623174948479383e-05, "loss": 0.0347, "step": 102020 }, { "epoch": 0.11015, "grad_norm": 0.0741884708404541, "learning_rate": 2.6227620813790564e-05, "loss": 0.0348, "step": 102030 }, { "epoch": 0.1102, "grad_norm": 0.06254855543375015, "learning_rate": 2.6223492109224613e-05, "loss": 0.0369, "step": 102040 }, { "epoch": 0.11025, "grad_norm": 0.08497337996959686, "learning_rate": 2.621936337120883e-05, "loss": 0.0357, "step": 102050 }, { "epoch": 0.1103, "grad_norm": 0.07627248018980026, "learning_rate": 2.621523459985612e-05, "loss": 0.0338, "step": 102060 }, { "epoch": 0.11035, "grad_norm": 0.07429230958223343, "learning_rate": 2.621110579527935e-05, "loss": 0.0346, "step": 102070 }, { "epoch": 0.1104, "grad_norm": 0.0647251307964325, "learning_rate": 2.620697695759141e-05, "loss": 0.0366, "step": 102080 }, { "epoch": 0.11045, "grad_norm": 0.10145459324121475, "learning_rate": 2.6202848086905164e-05, "loss": 0.0386, "step": 102090 }, { "epoch": 0.1105, "grad_norm": 0.07797590643167496, "learning_rate": 2.6198719183333508e-05, "loss": 0.0338, "step": 102100 }, { "epoch": 0.11055, "grad_norm": 0.07635772973299026, "learning_rate": 2.619459024698932e-05, "loss": 0.0371, "step": 102110 }, { "epoch": 0.1106, "grad_norm": 0.0660516545176506, "learning_rate": 2.619046127798548e-05, "loss": 0.035, "step": 102120 }, { "epoch": 0.11065, "grad_norm": 0.05850354954600334, "learning_rate": 2.618633227643488e-05, "loss": 0.0356, "step": 102130 }, { "epoch": 0.1107, "grad_norm": 0.06701915711164474, "learning_rate": 2.6182203242450397e-05, "loss": 0.0349, "step": 102140 }, { "epoch": 0.11075, "grad_norm": 0.09488559514284134, "learning_rate": 2.6178074176144924e-05, "loss": 0.0341, "step": 102150 }, { "epoch": 0.1108, "grad_norm": 0.06919229030609131, "learning_rate": 2.6173945077631345e-05, "loss": 0.0346, "step": 102160 }, { "epoch": 0.11085, "grad_norm": 0.0965726301074028, "learning_rate": 2.6169815947022553e-05, "loss": 0.0343, "step": 102170 }, { "epoch": 0.1109, "grad_norm": 0.08869483321905136, "learning_rate": 2.6165686784431426e-05, "loss": 0.0346, "step": 102180 }, { "epoch": 0.11095, "grad_norm": 0.07211389392614365, "learning_rate": 2.6161557589970865e-05, "loss": 0.0339, "step": 102190 }, { "epoch": 0.111, "grad_norm": 0.07438324391841888, "learning_rate": 2.615742836375375e-05, "loss": 0.0343, "step": 102200 }, { "epoch": 0.11105, "grad_norm": 0.07350186258554459, "learning_rate": 2.6153299105892986e-05, "loss": 0.0334, "step": 102210 }, { "epoch": 0.1111, "grad_norm": 0.08213651180267334, "learning_rate": 2.614916981650145e-05, "loss": 0.0356, "step": 102220 }, { "epoch": 0.11115, "grad_norm": 0.05933769419789314, "learning_rate": 2.6145040495692053e-05, "loss": 0.0365, "step": 102230 }, { "epoch": 0.1112, "grad_norm": 0.061010610312223434, "learning_rate": 2.614091114357766e-05, "loss": 0.0335, "step": 102240 }, { "epoch": 0.11125, "grad_norm": 0.06389537453651428, "learning_rate": 2.6136781760271205e-05, "loss": 0.0337, "step": 102250 }, { "epoch": 0.1113, "grad_norm": 0.06789553910493851, "learning_rate": 2.6132652345885555e-05, "loss": 0.0346, "step": 102260 }, { "epoch": 0.11135, "grad_norm": 0.06623226404190063, "learning_rate": 2.612852290053362e-05, "loss": 0.0357, "step": 102270 }, { "epoch": 0.1114, "grad_norm": 0.07182657718658447, "learning_rate": 2.6124393424328285e-05, "loss": 0.0357, "step": 102280 }, { "epoch": 0.11145, "grad_norm": 0.0692882090806961, "learning_rate": 2.612026391738247e-05, "loss": 0.0349, "step": 102290 }, { "epoch": 0.1115, "grad_norm": 0.08696920424699783, "learning_rate": 2.6116134379809047e-05, "loss": 0.0362, "step": 102300 }, { "epoch": 0.11155, "grad_norm": 0.08429858833551407, "learning_rate": 2.611200481172093e-05, "loss": 0.0353, "step": 102310 }, { "epoch": 0.1116, "grad_norm": 0.10219788551330566, "learning_rate": 2.6107875213231027e-05, "loss": 0.0356, "step": 102320 }, { "epoch": 0.11165, "grad_norm": 0.09618718922138214, "learning_rate": 2.6103745584452227e-05, "loss": 0.0336, "step": 102330 }, { "epoch": 0.1117, "grad_norm": 0.07313793152570724, "learning_rate": 2.609961592549744e-05, "loss": 0.0334, "step": 102340 }, { "epoch": 0.11175, "grad_norm": 0.07702039182186127, "learning_rate": 2.6095486236479567e-05, "loss": 0.0371, "step": 102350 }, { "epoch": 0.1118, "grad_norm": 0.09838972240686417, "learning_rate": 2.6091356517511505e-05, "loss": 0.0364, "step": 102360 }, { "epoch": 0.11185, "grad_norm": 0.08678235858678818, "learning_rate": 2.608722676870617e-05, "loss": 0.0355, "step": 102370 }, { "epoch": 0.1119, "grad_norm": 0.08802623301744461, "learning_rate": 2.6083096990176464e-05, "loss": 0.0349, "step": 102380 }, { "epoch": 0.11195, "grad_norm": 0.11088254302740097, "learning_rate": 2.6078967182035297e-05, "loss": 0.0363, "step": 102390 }, { "epoch": 0.112, "grad_norm": 0.0822426900267601, "learning_rate": 2.6074837344395564e-05, "loss": 0.0348, "step": 102400 }, { "epoch": 0.11205, "grad_norm": 0.07988717406988144, "learning_rate": 2.6070707477370188e-05, "loss": 0.0351, "step": 102410 }, { "epoch": 0.1121, "grad_norm": 0.08314771950244904, "learning_rate": 2.6066577581072072e-05, "loss": 0.0345, "step": 102420 }, { "epoch": 0.11215, "grad_norm": 0.07573059946298599, "learning_rate": 2.6062447655614125e-05, "loss": 0.0349, "step": 102430 }, { "epoch": 0.1122, "grad_norm": 0.07272529602050781, "learning_rate": 2.6058317701109253e-05, "loss": 0.0348, "step": 102440 }, { "epoch": 0.11225, "grad_norm": 0.08205297589302063, "learning_rate": 2.6054187717670375e-05, "loss": 0.0393, "step": 102450 }, { "epoch": 0.1123, "grad_norm": 0.08683769404888153, "learning_rate": 2.6050057705410406e-05, "loss": 0.0367, "step": 102460 }, { "epoch": 0.11235, "grad_norm": 0.082497738301754, "learning_rate": 2.604592766444225e-05, "loss": 0.0362, "step": 102470 }, { "epoch": 0.1124, "grad_norm": 0.07083296775817871, "learning_rate": 2.6041797594878832e-05, "loss": 0.0349, "step": 102480 }, { "epoch": 0.11245, "grad_norm": 0.0852193683385849, "learning_rate": 2.6037667496833046e-05, "loss": 0.0357, "step": 102490 }, { "epoch": 0.1125, "grad_norm": 0.08967921882867813, "learning_rate": 2.6033537370417827e-05, "loss": 0.0352, "step": 102500 }, { "epoch": 0.11255, "grad_norm": 0.09028714150190353, "learning_rate": 2.6029407215746082e-05, "loss": 0.0353, "step": 102510 }, { "epoch": 0.1126, "grad_norm": 0.08273370563983917, "learning_rate": 2.6025277032930734e-05, "loss": 0.0346, "step": 102520 }, { "epoch": 0.11265, "grad_norm": 0.07858886569738388, "learning_rate": 2.6021146822084696e-05, "loss": 0.0344, "step": 102530 }, { "epoch": 0.1127, "grad_norm": 0.07406245172023773, "learning_rate": 2.601701658332089e-05, "loss": 0.0353, "step": 102540 }, { "epoch": 0.11275, "grad_norm": 0.0656527429819107, "learning_rate": 2.6012886316752227e-05, "loss": 0.0362, "step": 102550 }, { "epoch": 0.1128, "grad_norm": 0.0866030603647232, "learning_rate": 2.6008756022491636e-05, "loss": 0.0358, "step": 102560 }, { "epoch": 0.11285, "grad_norm": 0.0703306645154953, "learning_rate": 2.6004625700652037e-05, "loss": 0.0361, "step": 102570 }, { "epoch": 0.1129, "grad_norm": 0.06990797817707062, "learning_rate": 2.6000495351346342e-05, "loss": 0.035, "step": 102580 }, { "epoch": 0.11295, "grad_norm": 0.06496477872133255, "learning_rate": 2.5996364974687486e-05, "loss": 0.0354, "step": 102590 }, { "epoch": 0.113, "grad_norm": 0.07002412527799606, "learning_rate": 2.5992234570788386e-05, "loss": 0.0348, "step": 102600 }, { "epoch": 0.11305, "grad_norm": 0.07898403704166412, "learning_rate": 2.5988104139761965e-05, "loss": 0.0366, "step": 102610 }, { "epoch": 0.1131, "grad_norm": 0.08444248139858246, "learning_rate": 2.5983973681721142e-05, "loss": 0.0352, "step": 102620 }, { "epoch": 0.11315, "grad_norm": 0.08479222655296326, "learning_rate": 2.597984319677885e-05, "loss": 0.0349, "step": 102630 }, { "epoch": 0.1132, "grad_norm": 0.09469138085842133, "learning_rate": 2.5975712685048022e-05, "loss": 0.0348, "step": 102640 }, { "epoch": 0.11325, "grad_norm": 0.08109046518802643, "learning_rate": 2.5971582146641564e-05, "loss": 0.0361, "step": 102650 }, { "epoch": 0.1133, "grad_norm": 0.07394376397132874, "learning_rate": 2.596745158167242e-05, "loss": 0.0351, "step": 102660 }, { "epoch": 0.11335, "grad_norm": 0.09283407777547836, "learning_rate": 2.596332099025352e-05, "loss": 0.0376, "step": 102670 }, { "epoch": 0.1134, "grad_norm": 0.08426985889673233, "learning_rate": 2.5959190372497778e-05, "loss": 0.0349, "step": 102680 }, { "epoch": 0.11345, "grad_norm": 0.082867331802845, "learning_rate": 2.5955059728518126e-05, "loss": 0.0361, "step": 102690 }, { "epoch": 0.1135, "grad_norm": 0.07347674667835236, "learning_rate": 2.5950929058427508e-05, "loss": 0.0348, "step": 102700 }, { "epoch": 0.11355, "grad_norm": 0.06538400053977966, "learning_rate": 2.5946798362338853e-05, "loss": 0.035, "step": 102710 }, { "epoch": 0.1136, "grad_norm": 0.07394007593393326, "learning_rate": 2.5942667640365075e-05, "loss": 0.0347, "step": 102720 }, { "epoch": 0.11365, "grad_norm": 0.07163910567760468, "learning_rate": 2.5938536892619126e-05, "loss": 0.0362, "step": 102730 }, { "epoch": 0.1137, "grad_norm": 0.0803307518362999, "learning_rate": 2.5934406119213928e-05, "loss": 0.0367, "step": 102740 }, { "epoch": 0.11375, "grad_norm": 0.09824536740779877, "learning_rate": 2.5930275320262415e-05, "loss": 0.0354, "step": 102750 }, { "epoch": 0.1138, "grad_norm": 0.07299476116895676, "learning_rate": 2.5926144495877525e-05, "loss": 0.0355, "step": 102760 }, { "epoch": 0.11385, "grad_norm": 0.06663201004266739, "learning_rate": 2.5922013646172195e-05, "loss": 0.035, "step": 102770 }, { "epoch": 0.1139, "grad_norm": 0.06026874855160713, "learning_rate": 2.5917882771259354e-05, "loss": 0.0347, "step": 102780 }, { "epoch": 0.11395, "grad_norm": 0.07106168568134308, "learning_rate": 2.5913751871251952e-05, "loss": 0.0348, "step": 102790 }, { "epoch": 0.114, "grad_norm": 0.06715033948421478, "learning_rate": 2.590962094626291e-05, "loss": 0.0346, "step": 102800 }, { "epoch": 0.11405, "grad_norm": 0.059849146753549576, "learning_rate": 2.5905489996405176e-05, "loss": 0.0352, "step": 102810 }, { "epoch": 0.1141, "grad_norm": 0.05644575506448746, "learning_rate": 2.5901359021791678e-05, "loss": 0.0356, "step": 102820 }, { "epoch": 0.11415, "grad_norm": 0.07059363275766373, "learning_rate": 2.589722802253537e-05, "loss": 0.0355, "step": 102830 }, { "epoch": 0.1142, "grad_norm": 0.07363907992839813, "learning_rate": 2.5893096998749183e-05, "loss": 0.0372, "step": 102840 }, { "epoch": 0.11425, "grad_norm": 0.07004018127918243, "learning_rate": 2.5888965950546062e-05, "loss": 0.0345, "step": 102850 }, { "epoch": 0.1143, "grad_norm": 0.07058753073215485, "learning_rate": 2.5884834878038944e-05, "loss": 0.0353, "step": 102860 }, { "epoch": 0.11435, "grad_norm": 0.07271018624305725, "learning_rate": 2.588070378134077e-05, "loss": 0.0344, "step": 102870 }, { "epoch": 0.1144, "grad_norm": 0.07091826945543289, "learning_rate": 2.5876572660564484e-05, "loss": 0.0392, "step": 102880 }, { "epoch": 0.11445, "grad_norm": 0.06540842354297638, "learning_rate": 2.5872441515823043e-05, "loss": 0.035, "step": 102890 }, { "epoch": 0.1145, "grad_norm": 0.07548410445451736, "learning_rate": 2.5868310347229368e-05, "loss": 0.0351, "step": 102900 }, { "epoch": 0.11455, "grad_norm": 0.07375458627939224, "learning_rate": 2.586417915489642e-05, "loss": 0.0354, "step": 102910 }, { "epoch": 0.1146, "grad_norm": 0.07141046226024628, "learning_rate": 2.586004793893713e-05, "loss": 0.0344, "step": 102920 }, { "epoch": 0.11465, "grad_norm": 0.07697925716638565, "learning_rate": 2.585591669946446e-05, "loss": 0.036, "step": 102930 }, { "epoch": 0.1147, "grad_norm": 0.08221516013145447, "learning_rate": 2.5851785436591346e-05, "loss": 0.0363, "step": 102940 }, { "epoch": 0.11475, "grad_norm": 0.07305683940649033, "learning_rate": 2.5847654150430738e-05, "loss": 0.0376, "step": 102950 }, { "epoch": 0.1148, "grad_norm": 0.07690191268920898, "learning_rate": 2.584352284109559e-05, "loss": 0.035, "step": 102960 }, { "epoch": 0.11485, "grad_norm": 0.0880003422498703, "learning_rate": 2.5839391508698834e-05, "loss": 0.0342, "step": 102970 }, { "epoch": 0.1149, "grad_norm": 0.06833967566490173, "learning_rate": 2.5835260153353442e-05, "loss": 0.0351, "step": 102980 }, { "epoch": 0.11495, "grad_norm": 0.08681900799274445, "learning_rate": 2.5831128775172343e-05, "loss": 0.0359, "step": 102990 }, { "epoch": 0.115, "grad_norm": 0.06930039823055267, "learning_rate": 2.5826997374268498e-05, "loss": 0.0345, "step": 103000 }, { "epoch": 0.11505, "grad_norm": 0.07829985022544861, "learning_rate": 2.582286595075485e-05, "loss": 0.0359, "step": 103010 }, { "epoch": 0.1151, "grad_norm": 0.072901152074337, "learning_rate": 2.5818734504744362e-05, "loss": 0.0347, "step": 103020 }, { "epoch": 0.11515, "grad_norm": 0.06215919554233551, "learning_rate": 2.581460303634998e-05, "loss": 0.0342, "step": 103030 }, { "epoch": 0.1152, "grad_norm": 0.07041651010513306, "learning_rate": 2.5810471545684656e-05, "loss": 0.0354, "step": 103040 }, { "epoch": 0.11525, "grad_norm": 0.0670313835144043, "learning_rate": 2.580634003286134e-05, "loss": 0.0356, "step": 103050 }, { "epoch": 0.1153, "grad_norm": 0.09243426471948624, "learning_rate": 2.5802208497993e-05, "loss": 0.0353, "step": 103060 }, { "epoch": 0.11535, "grad_norm": 0.11291596293449402, "learning_rate": 2.5798076941192573e-05, "loss": 0.0356, "step": 103070 }, { "epoch": 0.1154, "grad_norm": 0.08283979445695877, "learning_rate": 2.5793945362573026e-05, "loss": 0.0366, "step": 103080 }, { "epoch": 0.11545, "grad_norm": 0.0757361352443695, "learning_rate": 2.5789813762247305e-05, "loss": 0.0353, "step": 103090 }, { "epoch": 0.1155, "grad_norm": 0.07541199773550034, "learning_rate": 2.5785682140328382e-05, "loss": 0.0344, "step": 103100 }, { "epoch": 0.11555, "grad_norm": 0.07456483691930771, "learning_rate": 2.5781550496929203e-05, "loss": 0.0352, "step": 103110 }, { "epoch": 0.1156, "grad_norm": 0.0701032429933548, "learning_rate": 2.577741883216272e-05, "loss": 0.0356, "step": 103120 }, { "epoch": 0.11565, "grad_norm": 0.07182919979095459, "learning_rate": 2.5773287146141902e-05, "loss": 0.034, "step": 103130 }, { "epoch": 0.1157, "grad_norm": 0.06604748964309692, "learning_rate": 2.5769155438979698e-05, "loss": 0.034, "step": 103140 }, { "epoch": 0.11575, "grad_norm": 0.07194375991821289, "learning_rate": 2.576502371078908e-05, "loss": 0.0346, "step": 103150 }, { "epoch": 0.1158, "grad_norm": 0.07760827988386154, "learning_rate": 2.5760891961683005e-05, "loss": 0.0354, "step": 103160 }, { "epoch": 0.11585, "grad_norm": 0.07550985366106033, "learning_rate": 2.5756760191774427e-05, "loss": 0.0343, "step": 103170 }, { "epoch": 0.1159, "grad_norm": 0.07745255529880524, "learning_rate": 2.5752628401176303e-05, "loss": 0.037, "step": 103180 }, { "epoch": 0.11595, "grad_norm": 0.06956346333026886, "learning_rate": 2.5748496590001614e-05, "loss": 0.0348, "step": 103190 }, { "epoch": 0.116, "grad_norm": 0.06828167289495468, "learning_rate": 2.5744364758363294e-05, "loss": 0.0341, "step": 103200 }, { "epoch": 0.11605, "grad_norm": 0.06441890448331833, "learning_rate": 2.574023290637433e-05, "loss": 0.0347, "step": 103210 }, { "epoch": 0.1161, "grad_norm": 0.06448233872652054, "learning_rate": 2.5736101034147674e-05, "loss": 0.0339, "step": 103220 }, { "epoch": 0.11615, "grad_norm": 0.08343735337257385, "learning_rate": 2.5731969141796296e-05, "loss": 0.0347, "step": 103230 }, { "epoch": 0.1162, "grad_norm": 0.08863923698663712, "learning_rate": 2.572783722943315e-05, "loss": 0.0372, "step": 103240 }, { "epoch": 0.11625, "grad_norm": 0.08323957026004791, "learning_rate": 2.572370529717122e-05, "loss": 0.0338, "step": 103250 }, { "epoch": 0.1163, "grad_norm": 0.07746366411447525, "learning_rate": 2.571957334512344e-05, "loss": 0.0339, "step": 103260 }, { "epoch": 0.11635, "grad_norm": 0.07018949091434479, "learning_rate": 2.571544137340281e-05, "loss": 0.0334, "step": 103270 }, { "epoch": 0.1164, "grad_norm": 0.06720632314682007, "learning_rate": 2.5711309382122272e-05, "loss": 0.0343, "step": 103280 }, { "epoch": 0.11645, "grad_norm": 0.07699345797300339, "learning_rate": 2.5707177371394813e-05, "loss": 0.0343, "step": 103290 }, { "epoch": 0.1165, "grad_norm": 0.07368099689483643, "learning_rate": 2.5703045341333387e-05, "loss": 0.0347, "step": 103300 }, { "epoch": 0.11655, "grad_norm": 0.08487499505281448, "learning_rate": 2.5698913292050964e-05, "loss": 0.0368, "step": 103310 }, { "epoch": 0.1166, "grad_norm": 0.07324866950511932, "learning_rate": 2.5694781223660515e-05, "loss": 0.035, "step": 103320 }, { "epoch": 0.11665, "grad_norm": 0.06309445202350616, "learning_rate": 2.5690649136275002e-05, "loss": 0.0346, "step": 103330 }, { "epoch": 0.1167, "grad_norm": 0.05569295585155487, "learning_rate": 2.5686517030007408e-05, "loss": 0.0336, "step": 103340 }, { "epoch": 0.11675, "grad_norm": 0.06275855004787445, "learning_rate": 2.568238490497069e-05, "loss": 0.0375, "step": 103350 }, { "epoch": 0.1168, "grad_norm": 0.07405540347099304, "learning_rate": 2.5678252761277834e-05, "loss": 0.0352, "step": 103360 }, { "epoch": 0.11685, "grad_norm": 0.0736590251326561, "learning_rate": 2.5674120599041795e-05, "loss": 0.0358, "step": 103370 }, { "epoch": 0.1169, "grad_norm": 0.09769752621650696, "learning_rate": 2.5669988418375563e-05, "loss": 0.036, "step": 103380 }, { "epoch": 0.11695, "grad_norm": 0.09497690200805664, "learning_rate": 2.566585621939208e-05, "loss": 0.036, "step": 103390 }, { "epoch": 0.117, "grad_norm": 0.08184391260147095, "learning_rate": 2.5661724002204357e-05, "loss": 0.0365, "step": 103400 }, { "epoch": 0.11705, "grad_norm": 0.06988545507192612, "learning_rate": 2.5657591766925337e-05, "loss": 0.0356, "step": 103410 }, { "epoch": 0.1171, "grad_norm": 0.07233225554227829, "learning_rate": 2.5653459513668015e-05, "loss": 0.0346, "step": 103420 }, { "epoch": 0.11715, "grad_norm": 0.09395267814397812, "learning_rate": 2.5649327242545346e-05, "loss": 0.0329, "step": 103430 }, { "epoch": 0.1172, "grad_norm": 0.07595948129892349, "learning_rate": 2.564519495367032e-05, "loss": 0.0348, "step": 103440 }, { "epoch": 0.11725, "grad_norm": 0.08757159113883972, "learning_rate": 2.56410626471559e-05, "loss": 0.0351, "step": 103450 }, { "epoch": 0.1173, "grad_norm": 0.08131173998117447, "learning_rate": 2.563693032311507e-05, "loss": 0.0338, "step": 103460 }, { "epoch": 0.11735, "grad_norm": 0.07512043416500092, "learning_rate": 2.5632797981660813e-05, "loss": 0.036, "step": 103470 }, { "epoch": 0.1174, "grad_norm": 0.06512127071619034, "learning_rate": 2.562866562290609e-05, "loss": 0.034, "step": 103480 }, { "epoch": 0.11745, "grad_norm": 0.0664801150560379, "learning_rate": 2.5624533246963883e-05, "loss": 0.0341, "step": 103490 }, { "epoch": 0.1175, "grad_norm": 0.07180184870958328, "learning_rate": 2.562040085394718e-05, "loss": 0.0352, "step": 103500 }, { "epoch": 0.11755, "grad_norm": 0.07850921154022217, "learning_rate": 2.5616268443968938e-05, "loss": 0.0343, "step": 103510 }, { "epoch": 0.1176, "grad_norm": 0.07511237263679504, "learning_rate": 2.5612136017142158e-05, "loss": 0.0335, "step": 103520 }, { "epoch": 0.11765, "grad_norm": 0.0689835473895073, "learning_rate": 2.5608003573579803e-05, "loss": 0.0338, "step": 103530 }, { "epoch": 0.1177, "grad_norm": 0.06506875157356262, "learning_rate": 2.560387111339486e-05, "loss": 0.0358, "step": 103540 }, { "epoch": 0.11775, "grad_norm": 0.06760787963867188, "learning_rate": 2.559973863670031e-05, "loss": 0.0381, "step": 103550 }, { "epoch": 0.1178, "grad_norm": 0.0856146365404129, "learning_rate": 2.559560614360913e-05, "loss": 0.0366, "step": 103560 }, { "epoch": 0.11785, "grad_norm": 0.07530411332845688, "learning_rate": 2.559147363423431e-05, "loss": 0.034, "step": 103570 }, { "epoch": 0.1179, "grad_norm": 0.07682434469461441, "learning_rate": 2.558734110868881e-05, "loss": 0.0371, "step": 103580 }, { "epoch": 0.11795, "grad_norm": 0.07229023426771164, "learning_rate": 2.5583208567085625e-05, "loss": 0.034, "step": 103590 }, { "epoch": 0.118, "grad_norm": 0.07774133235216141, "learning_rate": 2.5579076009537745e-05, "loss": 0.034, "step": 103600 }, { "epoch": 0.11805, "grad_norm": 0.11243210732936859, "learning_rate": 2.5574943436158145e-05, "loss": 0.0343, "step": 103610 }, { "epoch": 0.1181, "grad_norm": 0.08603610843420029, "learning_rate": 2.55708108470598e-05, "loss": 0.0357, "step": 103620 }, { "epoch": 0.11815, "grad_norm": 0.08804672956466675, "learning_rate": 2.5566678242355706e-05, "loss": 0.0345, "step": 103630 }, { "epoch": 0.1182, "grad_norm": 0.07011974602937698, "learning_rate": 2.5562545622158833e-05, "loss": 0.0347, "step": 103640 }, { "epoch": 0.11825, "grad_norm": 0.08410502970218658, "learning_rate": 2.5558412986582186e-05, "loss": 0.0372, "step": 103650 }, { "epoch": 0.1183, "grad_norm": 0.0645311176776886, "learning_rate": 2.5554280335738733e-05, "loss": 0.035, "step": 103660 }, { "epoch": 0.11835, "grad_norm": 0.06442949175834656, "learning_rate": 2.555014766974147e-05, "loss": 0.0344, "step": 103670 }, { "epoch": 0.1184, "grad_norm": 0.07331974059343338, "learning_rate": 2.5546014988703366e-05, "loss": 0.037, "step": 103680 }, { "epoch": 0.11845, "grad_norm": 0.06093655526638031, "learning_rate": 2.554188229273743e-05, "loss": 0.0353, "step": 103690 }, { "epoch": 0.1185, "grad_norm": 0.07783251255750656, "learning_rate": 2.553774958195662e-05, "loss": 0.036, "step": 103700 }, { "epoch": 0.11855, "grad_norm": 0.0731554701924324, "learning_rate": 2.5533616856473945e-05, "loss": 0.0374, "step": 103710 }, { "epoch": 0.1186, "grad_norm": 0.06992392987012863, "learning_rate": 2.5529484116402384e-05, "loss": 0.0366, "step": 103720 }, { "epoch": 0.11865, "grad_norm": 0.07954194396734238, "learning_rate": 2.5525351361854932e-05, "loss": 0.0357, "step": 103730 }, { "epoch": 0.1187, "grad_norm": 0.07312312722206116, "learning_rate": 2.552121859294457e-05, "loss": 0.0356, "step": 103740 }, { "epoch": 0.11875, "grad_norm": 0.0759984701871872, "learning_rate": 2.5517085809784286e-05, "loss": 0.0367, "step": 103750 }, { "epoch": 0.1188, "grad_norm": 0.07431000471115112, "learning_rate": 2.5512953012487067e-05, "loss": 0.0354, "step": 103760 }, { "epoch": 0.11885, "grad_norm": 0.08185004442930222, "learning_rate": 2.5508820201165907e-05, "loss": 0.0366, "step": 103770 }, { "epoch": 0.1189, "grad_norm": 0.0904598981142044, "learning_rate": 2.5504687375933796e-05, "loss": 0.036, "step": 103780 }, { "epoch": 0.11895, "grad_norm": 0.07650790363550186, "learning_rate": 2.550055453690372e-05, "loss": 0.0349, "step": 103790 }, { "epoch": 0.119, "grad_norm": 0.06640942394733429, "learning_rate": 2.549642168418867e-05, "loss": 0.0344, "step": 103800 }, { "epoch": 0.11905, "grad_norm": 0.07499713450670242, "learning_rate": 2.5492288817901638e-05, "loss": 0.0348, "step": 103810 }, { "epoch": 0.1191, "grad_norm": 0.08475736528635025, "learning_rate": 2.548815593815562e-05, "loss": 0.0358, "step": 103820 }, { "epoch": 0.11915, "grad_norm": 0.07885356992483139, "learning_rate": 2.5484023045063598e-05, "loss": 0.0377, "step": 103830 }, { "epoch": 0.1192, "grad_norm": 0.07656852900981903, "learning_rate": 2.5479890138738565e-05, "loss": 0.0334, "step": 103840 }, { "epoch": 0.11925, "grad_norm": 0.06738829612731934, "learning_rate": 2.5475757219293516e-05, "loss": 0.0358, "step": 103850 }, { "epoch": 0.1193, "grad_norm": 0.07586340606212616, "learning_rate": 2.547162428684145e-05, "loss": 0.0363, "step": 103860 }, { "epoch": 0.11935, "grad_norm": 0.06756033003330231, "learning_rate": 2.5467491341495348e-05, "loss": 0.0342, "step": 103870 }, { "epoch": 0.1194, "grad_norm": 0.07476552575826645, "learning_rate": 2.5463358383368212e-05, "loss": 0.0337, "step": 103880 }, { "epoch": 0.11945, "grad_norm": 0.07246913760900497, "learning_rate": 2.545922541257303e-05, "loss": 0.0349, "step": 103890 }, { "epoch": 0.1195, "grad_norm": 0.06302965432405472, "learning_rate": 2.5455092429222793e-05, "loss": 0.0342, "step": 103900 }, { "epoch": 0.11955, "grad_norm": 0.07466553896665573, "learning_rate": 2.5450959433430505e-05, "loss": 0.0348, "step": 103910 }, { "epoch": 0.1196, "grad_norm": 0.09401459246873856, "learning_rate": 2.5446826425309157e-05, "loss": 0.0348, "step": 103920 }, { "epoch": 0.11965, "grad_norm": 0.08614931255578995, "learning_rate": 2.5442693404971735e-05, "loss": 0.0351, "step": 103930 }, { "epoch": 0.1197, "grad_norm": 0.11952122300863266, "learning_rate": 2.543856037253125e-05, "loss": 0.0359, "step": 103940 }, { "epoch": 0.11975, "grad_norm": 0.08765014261007309, "learning_rate": 2.5434427328100684e-05, "loss": 0.0356, "step": 103950 }, { "epoch": 0.1198, "grad_norm": 0.06619185209274292, "learning_rate": 2.5430294271793042e-05, "loss": 0.0352, "step": 103960 }, { "epoch": 0.11985, "grad_norm": 0.0630049780011177, "learning_rate": 2.542616120372131e-05, "loss": 0.0355, "step": 103970 }, { "epoch": 0.1199, "grad_norm": 0.06189363822340965, "learning_rate": 2.54220281239985e-05, "loss": 0.0344, "step": 103980 }, { "epoch": 0.11995, "grad_norm": 0.0778106078505516, "learning_rate": 2.5417895032737592e-05, "loss": 0.036, "step": 103990 }, { "epoch": 0.12, "grad_norm": 0.06497669219970703, "learning_rate": 2.54137619300516e-05, "loss": 0.0342, "step": 104000 }, { "epoch": 0.12005, "grad_norm": 0.07563336193561554, "learning_rate": 2.5409628816053498e-05, "loss": 0.0347, "step": 104010 }, { "epoch": 0.1201, "grad_norm": 0.07418262213468552, "learning_rate": 2.5405495690856307e-05, "loss": 0.0341, "step": 104020 }, { "epoch": 0.12015, "grad_norm": 0.06178402155637741, "learning_rate": 2.540136255457301e-05, "loss": 0.0341, "step": 104030 }, { "epoch": 0.1202, "grad_norm": 0.05717574805021286, "learning_rate": 2.5397229407316624e-05, "loss": 0.0338, "step": 104040 }, { "epoch": 0.12025, "grad_norm": 0.08341573178768158, "learning_rate": 2.5393096249200127e-05, "loss": 0.0355, "step": 104050 }, { "epoch": 0.1203, "grad_norm": 0.06537025421857834, "learning_rate": 2.538896308033652e-05, "loss": 0.0347, "step": 104060 }, { "epoch": 0.12035, "grad_norm": 0.06877518445253372, "learning_rate": 2.538482990083882e-05, "loss": 0.0363, "step": 104070 }, { "epoch": 0.1204, "grad_norm": 0.0737723708152771, "learning_rate": 2.5380696710820012e-05, "loss": 0.0337, "step": 104080 }, { "epoch": 0.12045, "grad_norm": 0.08432099223136902, "learning_rate": 2.537656351039309e-05, "loss": 0.0331, "step": 104090 }, { "epoch": 0.1205, "grad_norm": 0.07213162630796432, "learning_rate": 2.5372430299671075e-05, "loss": 0.0341, "step": 104100 }, { "epoch": 0.12055, "grad_norm": 0.08251155912876129, "learning_rate": 2.536829707876695e-05, "loss": 0.0351, "step": 104110 }, { "epoch": 0.1206, "grad_norm": 0.07315324246883392, "learning_rate": 2.536416384779372e-05, "loss": 0.0336, "step": 104120 }, { "epoch": 0.12065, "grad_norm": 0.0627518743276596, "learning_rate": 2.5360030606864392e-05, "loss": 0.0324, "step": 104130 }, { "epoch": 0.1207, "grad_norm": 0.07793736457824707, "learning_rate": 2.535589735609196e-05, "loss": 0.035, "step": 104140 }, { "epoch": 0.12075, "grad_norm": 0.092694953083992, "learning_rate": 2.5351764095589425e-05, "loss": 0.033, "step": 104150 }, { "epoch": 0.1208, "grad_norm": 0.07844278216362, "learning_rate": 2.5347630825469792e-05, "loss": 0.0353, "step": 104160 }, { "epoch": 0.12085, "grad_norm": 0.08317578583955765, "learning_rate": 2.5343497545846074e-05, "loss": 0.0349, "step": 104170 }, { "epoch": 0.1209, "grad_norm": 0.08206788450479507, "learning_rate": 2.533936425683125e-05, "loss": 0.0342, "step": 104180 }, { "epoch": 0.12095, "grad_norm": 0.07000011950731277, "learning_rate": 2.5335230958538343e-05, "loss": 0.0348, "step": 104190 }, { "epoch": 0.121, "grad_norm": 0.08563758432865143, "learning_rate": 2.533109765108034e-05, "loss": 0.0331, "step": 104200 }, { "epoch": 0.12105, "grad_norm": 0.09002260118722916, "learning_rate": 2.5326964334570257e-05, "loss": 0.0349, "step": 104210 }, { "epoch": 0.1211, "grad_norm": 0.0715671107172966, "learning_rate": 2.5322831009121084e-05, "loss": 0.0345, "step": 104220 }, { "epoch": 0.12115, "grad_norm": 0.07427511364221573, "learning_rate": 2.5318697674845844e-05, "loss": 0.0351, "step": 104230 }, { "epoch": 0.1212, "grad_norm": 0.06503819674253464, "learning_rate": 2.5314564331857515e-05, "loss": 0.0348, "step": 104240 }, { "epoch": 0.12125, "grad_norm": 0.07333590090274811, "learning_rate": 2.531043098026913e-05, "loss": 0.034, "step": 104250 }, { "epoch": 0.1213, "grad_norm": 0.06548633426427841, "learning_rate": 2.530629762019367e-05, "loss": 0.0333, "step": 104260 }, { "epoch": 0.12135, "grad_norm": 0.058450475335121155, "learning_rate": 2.530216425174415e-05, "loss": 0.0336, "step": 104270 }, { "epoch": 0.1214, "grad_norm": 0.07402554154396057, "learning_rate": 2.529803087503357e-05, "loss": 0.0353, "step": 104280 }, { "epoch": 0.12145, "grad_norm": 0.06446702778339386, "learning_rate": 2.5293897490174945e-05, "loss": 0.0335, "step": 104290 }, { "epoch": 0.1215, "grad_norm": 0.06754470616579056, "learning_rate": 2.5289764097281264e-05, "loss": 0.0344, "step": 104300 }, { "epoch": 0.12155, "grad_norm": 0.05972554534673691, "learning_rate": 2.5285630696465546e-05, "loss": 0.034, "step": 104310 }, { "epoch": 0.1216, "grad_norm": 0.06952910125255585, "learning_rate": 2.52814972878408e-05, "loss": 0.0354, "step": 104320 }, { "epoch": 0.12165, "grad_norm": 0.07992678135633469, "learning_rate": 2.527736387152001e-05, "loss": 0.0366, "step": 104330 }, { "epoch": 0.1217, "grad_norm": 0.09057517349720001, "learning_rate": 2.5273230447616203e-05, "loss": 0.0362, "step": 104340 }, { "epoch": 0.12175, "grad_norm": 0.0814778134226799, "learning_rate": 2.5269097016242375e-05, "loss": 0.0362, "step": 104350 }, { "epoch": 0.1218, "grad_norm": 0.08909647166728973, "learning_rate": 2.526496357751154e-05, "loss": 0.0352, "step": 104360 }, { "epoch": 0.12185, "grad_norm": 0.08012577146291733, "learning_rate": 2.52608301315367e-05, "loss": 0.0346, "step": 104370 }, { "epoch": 0.1219, "grad_norm": 0.08825207501649857, "learning_rate": 2.5256696678430864e-05, "loss": 0.0342, "step": 104380 }, { "epoch": 0.12195, "grad_norm": 0.07060939818620682, "learning_rate": 2.525256321830703e-05, "loss": 0.035, "step": 104390 }, { "epoch": 0.122, "grad_norm": 0.08157352358102798, "learning_rate": 2.5248429751278224e-05, "loss": 0.0349, "step": 104400 }, { "epoch": 0.12205, "grad_norm": 0.07402607053518295, "learning_rate": 2.5244296277457423e-05, "loss": 0.0336, "step": 104410 }, { "epoch": 0.1221, "grad_norm": 0.07185854017734528, "learning_rate": 2.5240162796957673e-05, "loss": 0.0345, "step": 104420 }, { "epoch": 0.12215, "grad_norm": 0.08592725545167923, "learning_rate": 2.523602930989195e-05, "loss": 0.0345, "step": 104430 }, { "epoch": 0.1222, "grad_norm": 0.0761452242732048, "learning_rate": 2.523189581637328e-05, "loss": 0.034, "step": 104440 }, { "epoch": 0.12225, "grad_norm": 0.07353299856185913, "learning_rate": 2.5227762316514662e-05, "loss": 0.034, "step": 104450 }, { "epoch": 0.1223, "grad_norm": 0.07914377748966217, "learning_rate": 2.5223628810429117e-05, "loss": 0.036, "step": 104460 }, { "epoch": 0.12235, "grad_norm": 0.0832958072423935, "learning_rate": 2.521949529822963e-05, "loss": 0.0361, "step": 104470 }, { "epoch": 0.1224, "grad_norm": 0.10569994151592255, "learning_rate": 2.5215361780029235e-05, "loss": 0.0355, "step": 104480 }, { "epoch": 0.12245, "grad_norm": 0.09755893796682358, "learning_rate": 2.5211228255940922e-05, "loss": 0.0353, "step": 104490 }, { "epoch": 0.1225, "grad_norm": 0.08682700991630554, "learning_rate": 2.5207094726077718e-05, "loss": 0.037, "step": 104500 }, { "epoch": 0.12255, "grad_norm": 0.09625197947025299, "learning_rate": 2.5202961190552617e-05, "loss": 0.0355, "step": 104510 }, { "epoch": 0.1226, "grad_norm": 0.0831693559885025, "learning_rate": 2.519882764947863e-05, "loss": 0.0348, "step": 104520 }, { "epoch": 0.12265, "grad_norm": 0.07173638045787811, "learning_rate": 2.519469410296877e-05, "loss": 0.0343, "step": 104530 }, { "epoch": 0.1227, "grad_norm": 0.08513516187667847, "learning_rate": 2.519056055113605e-05, "loss": 0.0353, "step": 104540 }, { "epoch": 0.12275, "grad_norm": 0.08899994939565659, "learning_rate": 2.518642699409347e-05, "loss": 0.0356, "step": 104550 }, { "epoch": 0.1228, "grad_norm": 0.0777038037776947, "learning_rate": 2.5182293431954052e-05, "loss": 0.0355, "step": 104560 }, { "epoch": 0.12285, "grad_norm": 0.09456444531679153, "learning_rate": 2.5178159864830804e-05, "loss": 0.0348, "step": 104570 }, { "epoch": 0.1229, "grad_norm": 0.08388503640890121, "learning_rate": 2.5174026292836723e-05, "loss": 0.034, "step": 104580 }, { "epoch": 0.12295, "grad_norm": 0.08418978005647659, "learning_rate": 2.5169892716084838e-05, "loss": 0.0355, "step": 104590 }, { "epoch": 0.123, "grad_norm": 0.07934024184942245, "learning_rate": 2.5165759134688132e-05, "loss": 0.0349, "step": 104600 }, { "epoch": 0.12305, "grad_norm": 0.07490506768226624, "learning_rate": 2.5161625548759654e-05, "loss": 0.0348, "step": 104610 }, { "epoch": 0.1231, "grad_norm": 0.08375281095504761, "learning_rate": 2.5157491958412382e-05, "loss": 0.0355, "step": 104620 }, { "epoch": 0.12315, "grad_norm": 0.08525901287794113, "learning_rate": 2.515335836375935e-05, "loss": 0.0364, "step": 104630 }, { "epoch": 0.1232, "grad_norm": 0.0929773822426796, "learning_rate": 2.514922476491355e-05, "loss": 0.0354, "step": 104640 }, { "epoch": 0.12325, "grad_norm": 0.08704689145088196, "learning_rate": 2.5145091161988003e-05, "loss": 0.0353, "step": 104650 }, { "epoch": 0.1233, "grad_norm": 0.06518177688121796, "learning_rate": 2.514095755509571e-05, "loss": 0.0347, "step": 104660 }, { "epoch": 0.12335, "grad_norm": 0.07102639973163605, "learning_rate": 2.5136823944349704e-05, "loss": 0.0336, "step": 104670 }, { "epoch": 0.1234, "grad_norm": 0.07148510962724686, "learning_rate": 2.5132690329862975e-05, "loss": 0.0344, "step": 104680 }, { "epoch": 0.12345, "grad_norm": 0.06739726662635803, "learning_rate": 2.5128556711748546e-05, "loss": 0.0362, "step": 104690 }, { "epoch": 0.1235, "grad_norm": 0.0750051736831665, "learning_rate": 2.512442309011942e-05, "loss": 0.0346, "step": 104700 }, { "epoch": 0.12355, "grad_norm": 0.07953881472349167, "learning_rate": 2.512028946508862e-05, "loss": 0.0365, "step": 104710 }, { "epoch": 0.1236, "grad_norm": 0.07243786752223969, "learning_rate": 2.5116155836769146e-05, "loss": 0.0345, "step": 104720 }, { "epoch": 0.12365, "grad_norm": 0.07352124899625778, "learning_rate": 2.511202220527401e-05, "loss": 0.0358, "step": 104730 }, { "epoch": 0.1237, "grad_norm": 0.073238305747509, "learning_rate": 2.5107888570716235e-05, "loss": 0.0349, "step": 104740 }, { "epoch": 0.12375, "grad_norm": 0.07899244874715805, "learning_rate": 2.510375493320883e-05, "loss": 0.036, "step": 104750 }, { "epoch": 0.1238, "grad_norm": 0.09344808012247086, "learning_rate": 2.50996212928648e-05, "loss": 0.0359, "step": 104760 }, { "epoch": 0.12385, "grad_norm": 0.09741408377885818, "learning_rate": 2.509548764979716e-05, "loss": 0.0359, "step": 104770 }, { "epoch": 0.1239, "grad_norm": 0.07594095170497894, "learning_rate": 2.5091354004118934e-05, "loss": 0.0348, "step": 104780 }, { "epoch": 0.12395, "grad_norm": 0.06269504874944687, "learning_rate": 2.508722035594311e-05, "loss": 0.0351, "step": 104790 }, { "epoch": 0.124, "grad_norm": 0.07669692486524582, "learning_rate": 2.5083086705382718e-05, "loss": 0.0365, "step": 104800 }, { "epoch": 0.12405, "grad_norm": 0.06946543604135513, "learning_rate": 2.5078953052550767e-05, "loss": 0.0358, "step": 104810 }, { "epoch": 0.1241, "grad_norm": 0.13907350599765778, "learning_rate": 2.507481939756028e-05, "loss": 0.0369, "step": 104820 }, { "epoch": 0.12415, "grad_norm": 0.0800219401717186, "learning_rate": 2.5070685740524246e-05, "loss": 0.0371, "step": 104830 }, { "epoch": 0.1242, "grad_norm": 0.07465817779302597, "learning_rate": 2.5066552081555693e-05, "loss": 0.0343, "step": 104840 }, { "epoch": 0.12425, "grad_norm": 0.06579568982124329, "learning_rate": 2.506241842076763e-05, "loss": 0.0341, "step": 104850 }, { "epoch": 0.1243, "grad_norm": 0.06052146852016449, "learning_rate": 2.5058284758273076e-05, "loss": 0.0335, "step": 104860 }, { "epoch": 0.12435, "grad_norm": 0.06922436505556107, "learning_rate": 2.5054151094185036e-05, "loss": 0.0328, "step": 104870 }, { "epoch": 0.1244, "grad_norm": 0.06804629415273666, "learning_rate": 2.5050017428616522e-05, "loss": 0.0348, "step": 104880 }, { "epoch": 0.12445, "grad_norm": 0.06646064668893814, "learning_rate": 2.5045883761680555e-05, "loss": 0.0346, "step": 104890 }, { "epoch": 0.1245, "grad_norm": 0.07009711861610413, "learning_rate": 2.5041750093490145e-05, "loss": 0.0331, "step": 104900 }, { "epoch": 0.12455, "grad_norm": 0.06541627645492554, "learning_rate": 2.50376164241583e-05, "loss": 0.0333, "step": 104910 }, { "epoch": 0.1246, "grad_norm": 0.06215963512659073, "learning_rate": 2.5033482753798033e-05, "loss": 0.0339, "step": 104920 }, { "epoch": 0.12465, "grad_norm": 0.06351916491985321, "learning_rate": 2.5029349082522363e-05, "loss": 0.034, "step": 104930 }, { "epoch": 0.1247, "grad_norm": 0.06076859310269356, "learning_rate": 2.5025215410444302e-05, "loss": 0.0341, "step": 104940 }, { "epoch": 0.12475, "grad_norm": 0.07077761739492416, "learning_rate": 2.5021081737676855e-05, "loss": 0.0336, "step": 104950 }, { "epoch": 0.1248, "grad_norm": 0.0637740045785904, "learning_rate": 2.5016948064333053e-05, "loss": 0.0381, "step": 104960 }, { "epoch": 0.12485, "grad_norm": 0.093325674533844, "learning_rate": 2.5012814390525886e-05, "loss": 0.0357, "step": 104970 }, { "epoch": 0.1249, "grad_norm": 0.07910604029893875, "learning_rate": 2.5008680716368383e-05, "loss": 0.0341, "step": 104980 }, { "epoch": 0.12495, "grad_norm": 0.07720960676670074, "learning_rate": 2.5004547041973548e-05, "loss": 0.0338, "step": 104990 }, { "epoch": 0.125, "grad_norm": 0.06617166846990585, "learning_rate": 2.5000413367454406e-05, "loss": 0.0344, "step": 105000 }, { "epoch": 0.12505, "grad_norm": 0.07431846857070923, "learning_rate": 2.499627969292395e-05, "loss": 0.0334, "step": 105010 }, { "epoch": 0.1251, "grad_norm": 0.0698188915848732, "learning_rate": 2.499214601849522e-05, "loss": 0.0343, "step": 105020 }, { "epoch": 0.12515, "grad_norm": 0.0604122169315815, "learning_rate": 2.4988012344281205e-05, "loss": 0.0354, "step": 105030 }, { "epoch": 0.1252, "grad_norm": 0.06649839878082275, "learning_rate": 2.498387867039494e-05, "loss": 0.0347, "step": 105040 }, { "epoch": 0.12525, "grad_norm": 0.06715084612369537, "learning_rate": 2.4979744996949415e-05, "loss": 0.0341, "step": 105050 }, { "epoch": 0.1253, "grad_norm": 0.07764974236488342, "learning_rate": 2.4975611324057664e-05, "loss": 0.0341, "step": 105060 }, { "epoch": 0.12535, "grad_norm": 0.061199646443128586, "learning_rate": 2.497147765183268e-05, "loss": 0.0349, "step": 105070 }, { "epoch": 0.1254, "grad_norm": 0.06322883814573288, "learning_rate": 2.496734398038749e-05, "loss": 0.0337, "step": 105080 }, { "epoch": 0.12545, "grad_norm": 0.06549254059791565, "learning_rate": 2.4963210309835107e-05, "loss": 0.0333, "step": 105090 }, { "epoch": 0.1255, "grad_norm": 0.06285182386636734, "learning_rate": 2.495907664028853e-05, "loss": 0.0338, "step": 105100 }, { "epoch": 0.12555, "grad_norm": 0.059575922787189484, "learning_rate": 2.4954942971860798e-05, "loss": 0.0328, "step": 105110 }, { "epoch": 0.1256, "grad_norm": 0.07021772861480713, "learning_rate": 2.495080930466489e-05, "loss": 0.0341, "step": 105120 }, { "epoch": 0.12565, "grad_norm": 0.07550730556249619, "learning_rate": 2.4946675638813856e-05, "loss": 0.0335, "step": 105130 }, { "epoch": 0.1257, "grad_norm": 0.08097176253795624, "learning_rate": 2.4942541974420674e-05, "loss": 0.0351, "step": 105140 }, { "epoch": 0.12575, "grad_norm": 0.07972034811973572, "learning_rate": 2.4938408311598376e-05, "loss": 0.0342, "step": 105150 }, { "epoch": 0.1258, "grad_norm": 0.07270655035972595, "learning_rate": 2.493427465045998e-05, "loss": 0.0332, "step": 105160 }, { "epoch": 0.12585, "grad_norm": 0.07965975254774094, "learning_rate": 2.4930140991118483e-05, "loss": 0.034, "step": 105170 }, { "epoch": 0.1259, "grad_norm": 0.07292885333299637, "learning_rate": 2.4926007333686912e-05, "loss": 0.035, "step": 105180 }, { "epoch": 0.12595, "grad_norm": 0.06530830264091492, "learning_rate": 2.4921873678278267e-05, "loss": 0.0355, "step": 105190 }, { "epoch": 0.126, "grad_norm": 0.07178696990013123, "learning_rate": 2.491774002500556e-05, "loss": 0.0372, "step": 105200 }, { "epoch": 0.12605, "grad_norm": 0.0647277906537056, "learning_rate": 2.4913606373981825e-05, "loss": 0.034, "step": 105210 }, { "epoch": 0.1261, "grad_norm": 0.06109518185257912, "learning_rate": 2.4909472725320045e-05, "loss": 0.0358, "step": 105220 }, { "epoch": 0.12615, "grad_norm": 0.08750329911708832, "learning_rate": 2.4905339079133257e-05, "loss": 0.0363, "step": 105230 }, { "epoch": 0.1262, "grad_norm": 0.11697772890329361, "learning_rate": 2.4901205435534457e-05, "loss": 0.0351, "step": 105240 }, { "epoch": 0.12625, "grad_norm": 0.08934105932712555, "learning_rate": 2.489707179463667e-05, "loss": 0.0361, "step": 105250 }, { "epoch": 0.1263, "grad_norm": 0.08917882293462753, "learning_rate": 2.4892938156552896e-05, "loss": 0.0362, "step": 105260 }, { "epoch": 0.12635, "grad_norm": 0.06422018259763718, "learning_rate": 2.488880452139615e-05, "loss": 0.036, "step": 105270 }, { "epoch": 0.1264, "grad_norm": 0.07000827044248581, "learning_rate": 2.4884670889279455e-05, "loss": 0.0366, "step": 105280 }, { "epoch": 0.12645, "grad_norm": 0.08107402920722961, "learning_rate": 2.4880537260315808e-05, "loss": 0.0354, "step": 105290 }, { "epoch": 0.1265, "grad_norm": 0.07664652168750763, "learning_rate": 2.4876403634618232e-05, "loss": 0.0351, "step": 105300 }, { "epoch": 0.12655, "grad_norm": 0.06906727701425552, "learning_rate": 2.4872270012299725e-05, "loss": 0.0354, "step": 105310 }, { "epoch": 0.1266, "grad_norm": 0.07458998262882233, "learning_rate": 2.4868136393473325e-05, "loss": 0.0341, "step": 105320 }, { "epoch": 0.12665, "grad_norm": 0.08547483384609222, "learning_rate": 2.486400277825201e-05, "loss": 0.0366, "step": 105330 }, { "epoch": 0.1267, "grad_norm": 0.08772281557321548, "learning_rate": 2.4859869166748808e-05, "loss": 0.0359, "step": 105340 }, { "epoch": 0.12675, "grad_norm": 0.06818553060293198, "learning_rate": 2.485573555907674e-05, "loss": 0.0394, "step": 105350 }, { "epoch": 0.1268, "grad_norm": 0.08064989745616913, "learning_rate": 2.4851601955348804e-05, "loss": 0.0358, "step": 105360 }, { "epoch": 0.12685, "grad_norm": 0.07183156907558441, "learning_rate": 2.4847468355678016e-05, "loss": 0.0344, "step": 105370 }, { "epoch": 0.1269, "grad_norm": 0.08261267840862274, "learning_rate": 2.4843334760177382e-05, "loss": 0.0359, "step": 105380 }, { "epoch": 0.12695, "grad_norm": 0.07098808884620667, "learning_rate": 2.4839201168959912e-05, "loss": 0.0355, "step": 105390 }, { "epoch": 0.127, "grad_norm": 0.07491979748010635, "learning_rate": 2.4835067582138638e-05, "loss": 0.0357, "step": 105400 }, { "epoch": 0.12705, "grad_norm": 0.07213471829891205, "learning_rate": 2.483093399982654e-05, "loss": 0.0345, "step": 105410 }, { "epoch": 0.1271, "grad_norm": 0.06732694804668427, "learning_rate": 2.4826800422136658e-05, "loss": 0.0346, "step": 105420 }, { "epoch": 0.12715, "grad_norm": 0.07045652717351913, "learning_rate": 2.4822666849181967e-05, "loss": 0.035, "step": 105430 }, { "epoch": 0.1272, "grad_norm": 0.06555169820785522, "learning_rate": 2.4818533281075513e-05, "loss": 0.0339, "step": 105440 }, { "epoch": 0.12725, "grad_norm": 0.06159425154328346, "learning_rate": 2.4814399717930284e-05, "loss": 0.0356, "step": 105450 }, { "epoch": 0.1273, "grad_norm": 0.06212705001235008, "learning_rate": 2.4810266159859297e-05, "loss": 0.0397, "step": 105460 }, { "epoch": 0.12735, "grad_norm": 0.07616399973630905, "learning_rate": 2.4806132606975567e-05, "loss": 0.0357, "step": 105470 }, { "epoch": 0.1274, "grad_norm": 0.07089634239673615, "learning_rate": 2.4801999059392095e-05, "loss": 0.0354, "step": 105480 }, { "epoch": 0.12745, "grad_norm": 0.0662955790758133, "learning_rate": 2.4797865517221895e-05, "loss": 0.0357, "step": 105490 }, { "epoch": 0.1275, "grad_norm": 0.07061684131622314, "learning_rate": 2.479373198057798e-05, "loss": 0.0346, "step": 105500 }, { "epoch": 0.12755, "grad_norm": 0.07761121541261673, "learning_rate": 2.478959844957335e-05, "loss": 0.0351, "step": 105510 }, { "epoch": 0.1276, "grad_norm": 0.07609353214502335, "learning_rate": 2.4785464924321014e-05, "loss": 0.0361, "step": 105520 }, { "epoch": 0.12765, "grad_norm": 0.09143120050430298, "learning_rate": 2.478133140493399e-05, "loss": 0.0346, "step": 105530 }, { "epoch": 0.1277, "grad_norm": 0.06405118852853775, "learning_rate": 2.4777197891525293e-05, "loss": 0.0346, "step": 105540 }, { "epoch": 0.12775, "grad_norm": 0.07845189422369003, "learning_rate": 2.477306438420791e-05, "loss": 0.0358, "step": 105550 }, { "epoch": 0.1278, "grad_norm": 0.10201088339090347, "learning_rate": 2.476893088309487e-05, "loss": 0.0354, "step": 105560 }, { "epoch": 0.12785, "grad_norm": 0.07596845179796219, "learning_rate": 2.4764797388299167e-05, "loss": 0.0337, "step": 105570 }, { "epoch": 0.1279, "grad_norm": 0.09855609387159348, "learning_rate": 2.476066389993382e-05, "loss": 0.035, "step": 105580 }, { "epoch": 0.12795, "grad_norm": 0.08643155544996262, "learning_rate": 2.475653041811183e-05, "loss": 0.0353, "step": 105590 }, { "epoch": 0.128, "grad_norm": 0.07075414806604385, "learning_rate": 2.4752396942946197e-05, "loss": 0.0343, "step": 105600 }, { "epoch": 0.12805, "grad_norm": 0.08212490379810333, "learning_rate": 2.4748263474549958e-05, "loss": 0.0353, "step": 105610 }, { "epoch": 0.1281, "grad_norm": 0.05706174671649933, "learning_rate": 2.4744130013036082e-05, "loss": 0.0336, "step": 105620 }, { "epoch": 0.12815, "grad_norm": 0.0789404883980751, "learning_rate": 2.4739996558517614e-05, "loss": 0.034, "step": 105630 }, { "epoch": 0.1282, "grad_norm": 0.08181510120630264, "learning_rate": 2.4735863111107528e-05, "loss": 0.0352, "step": 105640 }, { "epoch": 0.12825, "grad_norm": 0.0744834840297699, "learning_rate": 2.4731729670918845e-05, "loss": 0.034, "step": 105650 }, { "epoch": 0.1283, "grad_norm": 0.07355952262878418, "learning_rate": 2.4727596238064582e-05, "loss": 0.0345, "step": 105660 }, { "epoch": 0.12835, "grad_norm": 0.07162206619977951, "learning_rate": 2.472346281265773e-05, "loss": 0.0343, "step": 105670 }, { "epoch": 0.1284, "grad_norm": 0.07498904317617416, "learning_rate": 2.47193293948113e-05, "loss": 0.0348, "step": 105680 }, { "epoch": 0.12845, "grad_norm": 0.07180491834878922, "learning_rate": 2.47151959846383e-05, "loss": 0.0358, "step": 105690 }, { "epoch": 0.1285, "grad_norm": 0.06147145479917526, "learning_rate": 2.471106258225174e-05, "loss": 0.0341, "step": 105700 }, { "epoch": 0.12855, "grad_norm": 0.07467851787805557, "learning_rate": 2.4706929187764614e-05, "loss": 0.0352, "step": 105710 }, { "epoch": 0.1286, "grad_norm": 0.08296094834804535, "learning_rate": 2.4702795801289925e-05, "loss": 0.0367, "step": 105720 }, { "epoch": 0.12865, "grad_norm": 0.07414154708385468, "learning_rate": 2.4698662422940702e-05, "loss": 0.0359, "step": 105730 }, { "epoch": 0.1287, "grad_norm": 0.08976276218891144, "learning_rate": 2.4694529052829928e-05, "loss": 0.0358, "step": 105740 }, { "epoch": 0.12875, "grad_norm": 0.07030557096004486, "learning_rate": 2.4690395691070624e-05, "loss": 0.0345, "step": 105750 }, { "epoch": 0.1288, "grad_norm": 0.08556876331567764, "learning_rate": 2.4686262337775774e-05, "loss": 0.0349, "step": 105760 }, { "epoch": 0.12885, "grad_norm": 0.06981374323368073, "learning_rate": 2.4682128993058404e-05, "loss": 0.034, "step": 105770 }, { "epoch": 0.1289, "grad_norm": 0.07156495004892349, "learning_rate": 2.46779956570315e-05, "loss": 0.0383, "step": 105780 }, { "epoch": 0.12895, "grad_norm": 0.07469389587640762, "learning_rate": 2.4673862329808077e-05, "loss": 0.0342, "step": 105790 }, { "epoch": 0.129, "grad_norm": 0.07833770662546158, "learning_rate": 2.4669729011501137e-05, "loss": 0.0358, "step": 105800 }, { "epoch": 0.12905, "grad_norm": 0.07273399084806442, "learning_rate": 2.466559570222367e-05, "loss": 0.034, "step": 105810 }, { "epoch": 0.1291, "grad_norm": 0.06519411504268646, "learning_rate": 2.4661462402088712e-05, "loss": 0.0343, "step": 105820 }, { "epoch": 0.12915, "grad_norm": 0.07139147073030472, "learning_rate": 2.4657329111209224e-05, "loss": 0.0348, "step": 105830 }, { "epoch": 0.1292, "grad_norm": 0.07328125089406967, "learning_rate": 2.4653195829698238e-05, "loss": 0.0354, "step": 105840 }, { "epoch": 0.12925, "grad_norm": 0.0851602628827095, "learning_rate": 2.464906255766875e-05, "loss": 0.0348, "step": 105850 }, { "epoch": 0.1293, "grad_norm": 0.07176851481199265, "learning_rate": 2.464492929523376e-05, "loss": 0.0345, "step": 105860 }, { "epoch": 0.12935, "grad_norm": 0.07033860683441162, "learning_rate": 2.464079604250627e-05, "loss": 0.0346, "step": 105870 }, { "epoch": 0.1294, "grad_norm": 0.07072217017412186, "learning_rate": 2.4636662799599275e-05, "loss": 0.0357, "step": 105880 }, { "epoch": 0.12945, "grad_norm": 0.08237253874540329, "learning_rate": 2.463252956662579e-05, "loss": 0.0365, "step": 105890 }, { "epoch": 0.1295, "grad_norm": 0.07988428324460983, "learning_rate": 2.4628396343698803e-05, "loss": 0.0361, "step": 105900 }, { "epoch": 0.12955, "grad_norm": 0.072598896920681, "learning_rate": 2.4624263130931317e-05, "loss": 0.0355, "step": 105910 }, { "epoch": 0.1296, "grad_norm": 0.07299260050058365, "learning_rate": 2.462012992843635e-05, "loss": 0.0356, "step": 105920 }, { "epoch": 0.12965, "grad_norm": 0.0583757609128952, "learning_rate": 2.461599673632687e-05, "loss": 0.0346, "step": 105930 }, { "epoch": 0.1297, "grad_norm": 0.06796620786190033, "learning_rate": 2.461186355471591e-05, "loss": 0.0345, "step": 105940 }, { "epoch": 0.12975, "grad_norm": 0.06865128129720688, "learning_rate": 2.460773038371645e-05, "loss": 0.0354, "step": 105950 }, { "epoch": 0.1298, "grad_norm": 0.06750306487083435, "learning_rate": 2.4603597223441492e-05, "loss": 0.0362, "step": 105960 }, { "epoch": 0.12985, "grad_norm": 0.06639142334461212, "learning_rate": 2.4599464074004037e-05, "loss": 0.0344, "step": 105970 }, { "epoch": 0.1299, "grad_norm": 0.08696747571229935, "learning_rate": 2.4595330935517082e-05, "loss": 0.0351, "step": 105980 }, { "epoch": 0.12995, "grad_norm": 0.10079227387905121, "learning_rate": 2.4591197808093634e-05, "loss": 0.0341, "step": 105990 }, { "epoch": 0.13, "grad_norm": 0.06885421276092529, "learning_rate": 2.4587064691846678e-05, "loss": 0.0355, "step": 106000 }, { "epoch": 0.13005, "grad_norm": 0.10788242518901825, "learning_rate": 2.4582931586889223e-05, "loss": 0.0354, "step": 106010 }, { "epoch": 0.1301, "grad_norm": 0.07853377610445023, "learning_rate": 2.4578798493334256e-05, "loss": 0.0357, "step": 106020 }, { "epoch": 0.13015, "grad_norm": 0.07444460690021515, "learning_rate": 2.457466541129478e-05, "loss": 0.0347, "step": 106030 }, { "epoch": 0.1302, "grad_norm": 0.07578030973672867, "learning_rate": 2.4570532340883797e-05, "loss": 0.0361, "step": 106040 }, { "epoch": 0.13025, "grad_norm": 0.06895167380571365, "learning_rate": 2.4566399282214295e-05, "loss": 0.0346, "step": 106050 }, { "epoch": 0.1303, "grad_norm": 0.06211972236633301, "learning_rate": 2.456226623539928e-05, "loss": 0.0359, "step": 106060 }, { "epoch": 0.13035, "grad_norm": 0.0613601990044117, "learning_rate": 2.4558133200551735e-05, "loss": 0.0341, "step": 106070 }, { "epoch": 0.1304, "grad_norm": 0.06245096027851105, "learning_rate": 2.4554000177784666e-05, "loss": 0.0347, "step": 106080 }, { "epoch": 0.13045, "grad_norm": 0.059820763766765594, "learning_rate": 2.454986716721106e-05, "loss": 0.038, "step": 106090 }, { "epoch": 0.1305, "grad_norm": 0.06739622354507446, "learning_rate": 2.4545734168943914e-05, "loss": 0.0354, "step": 106100 }, { "epoch": 0.13055, "grad_norm": 0.06598929315805435, "learning_rate": 2.454160118309624e-05, "loss": 0.0336, "step": 106110 }, { "epoch": 0.1306, "grad_norm": 0.08986051380634308, "learning_rate": 2.4537468209781e-05, "loss": 0.0359, "step": 106120 }, { "epoch": 0.13065, "grad_norm": 0.07956880331039429, "learning_rate": 2.453333524911122e-05, "loss": 0.0345, "step": 106130 }, { "epoch": 0.1307, "grad_norm": 0.06792153418064117, "learning_rate": 2.4529202301199865e-05, "loss": 0.0333, "step": 106140 }, { "epoch": 0.13075, "grad_norm": 0.08238748461008072, "learning_rate": 2.4525069366159955e-05, "loss": 0.0334, "step": 106150 }, { "epoch": 0.1308, "grad_norm": 0.07251271605491638, "learning_rate": 2.4520936444104463e-05, "loss": 0.0325, "step": 106160 }, { "epoch": 0.13085, "grad_norm": 0.06597399711608887, "learning_rate": 2.4516803535146387e-05, "loss": 0.033, "step": 106170 }, { "epoch": 0.1309, "grad_norm": 0.057592928409576416, "learning_rate": 2.451267063939873e-05, "loss": 0.0342, "step": 106180 }, { "epoch": 0.13095, "grad_norm": 0.07524098455905914, "learning_rate": 2.4508537756974465e-05, "loss": 0.0346, "step": 106190 }, { "epoch": 0.131, "grad_norm": 0.07812030613422394, "learning_rate": 2.45044048879866e-05, "loss": 0.0366, "step": 106200 }, { "epoch": 0.13105, "grad_norm": 0.10690612345933914, "learning_rate": 2.450027203254811e-05, "loss": 0.0377, "step": 106210 }, { "epoch": 0.1311, "grad_norm": 0.1581622064113617, "learning_rate": 2.4496139190771997e-05, "loss": 0.036, "step": 106220 }, { "epoch": 0.13115, "grad_norm": 0.09735545516014099, "learning_rate": 2.4492006362771257e-05, "loss": 0.0357, "step": 106230 }, { "epoch": 0.1312, "grad_norm": 0.07339781522750854, "learning_rate": 2.4487873548658865e-05, "loss": 0.0358, "step": 106240 }, { "epoch": 0.13125, "grad_norm": 0.08246419578790665, "learning_rate": 2.4483740748547827e-05, "loss": 0.0369, "step": 106250 }, { "epoch": 0.1313, "grad_norm": 0.08868840336799622, "learning_rate": 2.4479607962551115e-05, "loss": 0.0365, "step": 106260 }, { "epoch": 0.13135, "grad_norm": 0.07506828755140305, "learning_rate": 2.4475475190781728e-05, "loss": 0.0386, "step": 106270 }, { "epoch": 0.1314, "grad_norm": 0.07542876154184341, "learning_rate": 2.4471342433352653e-05, "loss": 0.0358, "step": 106280 }, { "epoch": 0.13145, "grad_norm": 0.07146627455949783, "learning_rate": 2.4467209690376873e-05, "loss": 0.0361, "step": 106290 }, { "epoch": 0.1315, "grad_norm": 0.08321649581193924, "learning_rate": 2.4463076961967387e-05, "loss": 0.0343, "step": 106300 }, { "epoch": 0.13155, "grad_norm": 0.08326517790555954, "learning_rate": 2.4458944248237165e-05, "loss": 0.0368, "step": 106310 }, { "epoch": 0.1316, "grad_norm": 0.06860451400279999, "learning_rate": 2.4454811549299218e-05, "loss": 0.034, "step": 106320 }, { "epoch": 0.13165, "grad_norm": 0.0711694061756134, "learning_rate": 2.4450678865266503e-05, "loss": 0.0346, "step": 106330 }, { "epoch": 0.1317, "grad_norm": 0.06888732314109802, "learning_rate": 2.444654619625204e-05, "loss": 0.0335, "step": 106340 }, { "epoch": 0.13175, "grad_norm": 0.08941102027893066, "learning_rate": 2.4442413542368776e-05, "loss": 0.0369, "step": 106350 }, { "epoch": 0.1318, "grad_norm": 0.07423651218414307, "learning_rate": 2.4438280903729722e-05, "loss": 0.0346, "step": 106360 }, { "epoch": 0.13185, "grad_norm": 0.0659366324543953, "learning_rate": 2.4434148280447867e-05, "loss": 0.0345, "step": 106370 }, { "epoch": 0.1319, "grad_norm": 0.08936917036771774, "learning_rate": 2.4430015672636178e-05, "loss": 0.0354, "step": 106380 }, { "epoch": 0.13195, "grad_norm": 0.07357953488826752, "learning_rate": 2.4425883080407648e-05, "loss": 0.0354, "step": 106390 }, { "epoch": 0.132, "grad_norm": 0.06603465229272842, "learning_rate": 2.442175050387526e-05, "loss": 0.0344, "step": 106400 }, { "epoch": 0.13205, "grad_norm": 0.07264716923236847, "learning_rate": 2.4417617943151984e-05, "loss": 0.0359, "step": 106410 }, { "epoch": 0.1321, "grad_norm": 0.07183219492435455, "learning_rate": 2.4413485398350835e-05, "loss": 0.0352, "step": 106420 }, { "epoch": 0.13215, "grad_norm": 0.06507682055234909, "learning_rate": 2.4409352869584758e-05, "loss": 0.0351, "step": 106430 }, { "epoch": 0.1322, "grad_norm": 0.08611390739679337, "learning_rate": 2.440522035696676e-05, "loss": 0.0348, "step": 106440 }, { "epoch": 0.13225, "grad_norm": 0.07270409166812897, "learning_rate": 2.440108786060981e-05, "loss": 0.0337, "step": 106450 }, { "epoch": 0.1323, "grad_norm": 0.0700516477227211, "learning_rate": 2.43969553806269e-05, "loss": 0.0344, "step": 106460 }, { "epoch": 0.13235, "grad_norm": 0.08884414285421371, "learning_rate": 2.4392822917130997e-05, "loss": 0.0338, "step": 106470 }, { "epoch": 0.1324, "grad_norm": 0.0819149985909462, "learning_rate": 2.438869047023509e-05, "loss": 0.0338, "step": 106480 }, { "epoch": 0.13245, "grad_norm": 0.08014719188213348, "learning_rate": 2.4384558040052158e-05, "loss": 0.0335, "step": 106490 }, { "epoch": 0.1325, "grad_norm": 0.07839412987232208, "learning_rate": 2.438042562669517e-05, "loss": 0.0334, "step": 106500 }, { "epoch": 0.13255, "grad_norm": 0.06804198771715164, "learning_rate": 2.437629323027712e-05, "loss": 0.0338, "step": 106510 }, { "epoch": 0.1326, "grad_norm": 0.07780171930789948, "learning_rate": 2.4372160850910973e-05, "loss": 0.0339, "step": 106520 }, { "epoch": 0.13265, "grad_norm": 0.08501968532800674, "learning_rate": 2.4368028488709724e-05, "loss": 0.0344, "step": 106530 }, { "epoch": 0.1327, "grad_norm": 0.08473043888807297, "learning_rate": 2.436389614378632e-05, "loss": 0.0331, "step": 106540 }, { "epoch": 0.13275, "grad_norm": 0.08621985465288162, "learning_rate": 2.4359763816253768e-05, "loss": 0.0334, "step": 106550 }, { "epoch": 0.1328, "grad_norm": 0.06827308982610703, "learning_rate": 2.4355631506225035e-05, "loss": 0.0324, "step": 106560 }, { "epoch": 0.13285, "grad_norm": 0.06201021745800972, "learning_rate": 2.435149921381309e-05, "loss": 0.0331, "step": 106570 }, { "epoch": 0.1329, "grad_norm": 0.07178918272256851, "learning_rate": 2.4347366939130918e-05, "loss": 0.0343, "step": 106580 }, { "epoch": 0.13295, "grad_norm": 0.07109556347131729, "learning_rate": 2.4343234682291484e-05, "loss": 0.0334, "step": 106590 }, { "epoch": 0.133, "grad_norm": 0.08138655871152878, "learning_rate": 2.4339102443407756e-05, "loss": 0.0349, "step": 106600 }, { "epoch": 0.13305, "grad_norm": 0.05992733687162399, "learning_rate": 2.433497022259274e-05, "loss": 0.0347, "step": 106610 }, { "epoch": 0.1331, "grad_norm": 0.0833921730518341, "learning_rate": 2.4330838019959374e-05, "loss": 0.0363, "step": 106620 }, { "epoch": 0.13315, "grad_norm": 0.07941515743732452, "learning_rate": 2.4326705835620658e-05, "loss": 0.0351, "step": 106630 }, { "epoch": 0.1332, "grad_norm": 0.06688013672828674, "learning_rate": 2.4322573669689536e-05, "loss": 0.0335, "step": 106640 }, { "epoch": 0.13325, "grad_norm": 0.08466209471225739, "learning_rate": 2.4318441522279007e-05, "loss": 0.035, "step": 106650 }, { "epoch": 0.1333, "grad_norm": 0.08238755911588669, "learning_rate": 2.4314309393502024e-05, "loss": 0.0346, "step": 106660 }, { "epoch": 0.13335, "grad_norm": 0.06920625269412994, "learning_rate": 2.4310177283471567e-05, "loss": 0.0373, "step": 106670 }, { "epoch": 0.1334, "grad_norm": 0.07812193781137466, "learning_rate": 2.430604519230061e-05, "loss": 0.0339, "step": 106680 }, { "epoch": 0.13345, "grad_norm": 0.06610584259033203, "learning_rate": 2.4301913120102107e-05, "loss": 0.0355, "step": 106690 }, { "epoch": 0.1335, "grad_norm": 0.06607034802436829, "learning_rate": 2.4297781066989045e-05, "loss": 0.0369, "step": 106700 }, { "epoch": 0.13355, "grad_norm": 0.06498633325099945, "learning_rate": 2.4293649033074378e-05, "loss": 0.0348, "step": 106710 }, { "epoch": 0.1336, "grad_norm": 0.07355812937021255, "learning_rate": 2.4289517018471087e-05, "loss": 0.0354, "step": 106720 }, { "epoch": 0.13365, "grad_norm": 0.06755322217941284, "learning_rate": 2.4285385023292124e-05, "loss": 0.0357, "step": 106730 }, { "epoch": 0.1337, "grad_norm": 0.07552862912416458, "learning_rate": 2.428125304765047e-05, "loss": 0.0347, "step": 106740 }, { "epoch": 0.13375, "grad_norm": 0.06588675826787949, "learning_rate": 2.4277121091659095e-05, "loss": 0.0356, "step": 106750 }, { "epoch": 0.1338, "grad_norm": 0.06582847237586975, "learning_rate": 2.4272989155430952e-05, "loss": 0.0342, "step": 106760 }, { "epoch": 0.13385, "grad_norm": 0.06949807703495026, "learning_rate": 2.4268857239079017e-05, "loss": 0.0356, "step": 106770 }, { "epoch": 0.1339, "grad_norm": 0.06784377247095108, "learning_rate": 2.4264725342716242e-05, "loss": 0.0348, "step": 106780 }, { "epoch": 0.13395, "grad_norm": 0.09562524408102036, "learning_rate": 2.426059346645561e-05, "loss": 0.0353, "step": 106790 }, { "epoch": 0.134, "grad_norm": 0.07481709867715836, "learning_rate": 2.4256461610410066e-05, "loss": 0.0339, "step": 106800 }, { "epoch": 0.13405, "grad_norm": 0.07583610713481903, "learning_rate": 2.425232977469258e-05, "loss": 0.0338, "step": 106810 }, { "epoch": 0.1341, "grad_norm": 0.08295128494501114, "learning_rate": 2.424819795941613e-05, "loss": 0.0338, "step": 106820 }, { "epoch": 0.13415, "grad_norm": 0.07903057336807251, "learning_rate": 2.424406616469365e-05, "loss": 0.0351, "step": 106830 }, { "epoch": 0.1342, "grad_norm": 0.07755926251411438, "learning_rate": 2.4239934390638135e-05, "loss": 0.033, "step": 106840 }, { "epoch": 0.13425, "grad_norm": 0.06308278441429138, "learning_rate": 2.4235802637362516e-05, "loss": 0.0328, "step": 106850 }, { "epoch": 0.1343, "grad_norm": 0.09073317795991898, "learning_rate": 2.4231670904979764e-05, "loss": 0.0349, "step": 106860 }, { "epoch": 0.13435, "grad_norm": 0.08459851890802383, "learning_rate": 2.422753919360285e-05, "loss": 0.0336, "step": 106870 }, { "epoch": 0.1344, "grad_norm": 0.07097344845533371, "learning_rate": 2.4223407503344716e-05, "loss": 0.0335, "step": 106880 }, { "epoch": 0.13445, "grad_norm": 0.07137028127908707, "learning_rate": 2.4219275834318338e-05, "loss": 0.0343, "step": 106890 }, { "epoch": 0.1345, "grad_norm": 0.07252298295497894, "learning_rate": 2.4215144186636658e-05, "loss": 0.0334, "step": 106900 }, { "epoch": 0.13455, "grad_norm": 0.09245292842388153, "learning_rate": 2.4211012560412643e-05, "loss": 0.0349, "step": 106910 }, { "epoch": 0.1346, "grad_norm": 0.07488303631544113, "learning_rate": 2.4206880955759247e-05, "loss": 0.0338, "step": 106920 }, { "epoch": 0.13465, "grad_norm": 0.06381545215845108, "learning_rate": 2.4202749372789424e-05, "loss": 0.0362, "step": 106930 }, { "epoch": 0.1347, "grad_norm": 0.07639806717634201, "learning_rate": 2.419861781161614e-05, "loss": 0.0341, "step": 106940 }, { "epoch": 0.13475, "grad_norm": 0.10640095174312592, "learning_rate": 2.419448627235234e-05, "loss": 0.035, "step": 106950 }, { "epoch": 0.1348, "grad_norm": 0.09375517815351486, "learning_rate": 2.419035475511099e-05, "loss": 0.0348, "step": 106960 }, { "epoch": 0.13485, "grad_norm": 0.05909932032227516, "learning_rate": 2.4186223260005032e-05, "loss": 0.0339, "step": 106970 }, { "epoch": 0.1349, "grad_norm": 0.0712037906050682, "learning_rate": 2.4182091787147425e-05, "loss": 0.0348, "step": 106980 }, { "epoch": 0.13495, "grad_norm": 0.05575518310070038, "learning_rate": 2.417796033665112e-05, "loss": 0.0341, "step": 106990 }, { "epoch": 0.135, "grad_norm": 0.07256089895963669, "learning_rate": 2.417382890862907e-05, "loss": 0.0341, "step": 107000 }, { "epoch": 0.13505, "grad_norm": 0.07149246335029602, "learning_rate": 2.416969750319423e-05, "loss": 0.0355, "step": 107010 }, { "epoch": 0.1351, "grad_norm": 0.06963690370321274, "learning_rate": 2.416556612045954e-05, "loss": 0.0335, "step": 107020 }, { "epoch": 0.13515, "grad_norm": 0.06591261178255081, "learning_rate": 2.4161434760537976e-05, "loss": 0.0349, "step": 107030 }, { "epoch": 0.1352, "grad_norm": 0.09097351133823395, "learning_rate": 2.4157303423542452e-05, "loss": 0.0359, "step": 107040 }, { "epoch": 0.13525, "grad_norm": 0.06748231500387192, "learning_rate": 2.4153172109585942e-05, "loss": 0.0333, "step": 107050 }, { "epoch": 0.1353, "grad_norm": 0.0742628425359726, "learning_rate": 2.4149040818781395e-05, "loss": 0.0352, "step": 107060 }, { "epoch": 0.13535, "grad_norm": 0.09796442836523056, "learning_rate": 2.4144909551241745e-05, "loss": 0.0365, "step": 107070 }, { "epoch": 0.1354, "grad_norm": 0.08070221543312073, "learning_rate": 2.4140778307079954e-05, "loss": 0.0349, "step": 107080 }, { "epoch": 0.13545, "grad_norm": 0.08046849071979523, "learning_rate": 2.4136647086408952e-05, "loss": 0.0347, "step": 107090 }, { "epoch": 0.1355, "grad_norm": 0.06781762838363647, "learning_rate": 2.41325158893417e-05, "loss": 0.0357, "step": 107100 }, { "epoch": 0.13555, "grad_norm": 0.08092023432254791, "learning_rate": 2.412838471599114e-05, "loss": 0.0357, "step": 107110 }, { "epoch": 0.1356, "grad_norm": 0.07321129739284515, "learning_rate": 2.4124253566470204e-05, "loss": 0.0336, "step": 107120 }, { "epoch": 0.13565, "grad_norm": 0.07404232025146484, "learning_rate": 2.412012244089186e-05, "loss": 0.0368, "step": 107130 }, { "epoch": 0.1357, "grad_norm": 0.07883698493242264, "learning_rate": 2.4115991339369025e-05, "loss": 0.0345, "step": 107140 }, { "epoch": 0.13575, "grad_norm": 0.07799618691205978, "learning_rate": 2.4111860262014666e-05, "loss": 0.0344, "step": 107150 }, { "epoch": 0.1358, "grad_norm": 0.06702414900064468, "learning_rate": 2.4107729208941705e-05, "loss": 0.0334, "step": 107160 }, { "epoch": 0.13585, "grad_norm": 0.0739661380648613, "learning_rate": 2.41035981802631e-05, "loss": 0.0334, "step": 107170 }, { "epoch": 0.1359, "grad_norm": 0.07123866677284241, "learning_rate": 2.409946717609178e-05, "loss": 0.0332, "step": 107180 }, { "epoch": 0.13595, "grad_norm": 0.07094275206327438, "learning_rate": 2.4095336196540685e-05, "loss": 0.0348, "step": 107190 }, { "epoch": 0.136, "grad_norm": 0.06380019336938858, "learning_rate": 2.4091205241722767e-05, "loss": 0.0331, "step": 107200 }, { "epoch": 0.13605, "grad_norm": 0.066954106092453, "learning_rate": 2.408707431175095e-05, "loss": 0.0342, "step": 107210 }, { "epoch": 0.1361, "grad_norm": 0.06480351090431213, "learning_rate": 2.4082943406738185e-05, "loss": 0.0332, "step": 107220 }, { "epoch": 0.13615, "grad_norm": 0.07648269832134247, "learning_rate": 2.407881252679739e-05, "loss": 0.0369, "step": 107230 }, { "epoch": 0.1362, "grad_norm": 0.10895515233278275, "learning_rate": 2.407468167204152e-05, "loss": 0.0338, "step": 107240 }, { "epoch": 0.13625, "grad_norm": 0.062433335930109024, "learning_rate": 2.407055084258351e-05, "loss": 0.0339, "step": 107250 }, { "epoch": 0.1363, "grad_norm": 0.08131091296672821, "learning_rate": 2.4066420038536288e-05, "loss": 0.0346, "step": 107260 }, { "epoch": 0.13635, "grad_norm": 0.07425548881292343, "learning_rate": 2.4062289260012797e-05, "loss": 0.0343, "step": 107270 }, { "epoch": 0.1364, "grad_norm": 0.0837615355849266, "learning_rate": 2.405815850712596e-05, "loss": 0.0336, "step": 107280 }, { "epoch": 0.13645, "grad_norm": 0.06968902796506882, "learning_rate": 2.405402777998872e-05, "loss": 0.0329, "step": 107290 }, { "epoch": 0.1365, "grad_norm": 0.06833193451166153, "learning_rate": 2.4049897078714e-05, "loss": 0.0323, "step": 107300 }, { "epoch": 0.13655, "grad_norm": 0.06843312829732895, "learning_rate": 2.4045766403414728e-05, "loss": 0.0336, "step": 107310 }, { "epoch": 0.1366, "grad_norm": 0.06134680286049843, "learning_rate": 2.404163575420386e-05, "loss": 0.0367, "step": 107320 }, { "epoch": 0.13665, "grad_norm": 0.07994901388883591, "learning_rate": 2.40375051311943e-05, "loss": 0.0356, "step": 107330 }, { "epoch": 0.1367, "grad_norm": 0.07322155684232712, "learning_rate": 2.4033374534499004e-05, "loss": 0.0344, "step": 107340 }, { "epoch": 0.13675, "grad_norm": 0.07866465300321579, "learning_rate": 2.4029243964230867e-05, "loss": 0.0348, "step": 107350 }, { "epoch": 0.1368, "grad_norm": 0.06857824325561523, "learning_rate": 2.4025113420502843e-05, "loss": 0.0338, "step": 107360 }, { "epoch": 0.13685, "grad_norm": 0.05879867821931839, "learning_rate": 2.402098290342785e-05, "loss": 0.0346, "step": 107370 }, { "epoch": 0.1369, "grad_norm": 0.0671166256070137, "learning_rate": 2.4016852413118815e-05, "loss": 0.0374, "step": 107380 }, { "epoch": 0.13695, "grad_norm": 0.07220599055290222, "learning_rate": 2.401272194968867e-05, "loss": 0.0331, "step": 107390 }, { "epoch": 0.137, "grad_norm": 0.06513206660747528, "learning_rate": 2.4008591513250332e-05, "loss": 0.0358, "step": 107400 }, { "epoch": 0.13705, "grad_norm": 0.058335594832897186, "learning_rate": 2.4004461103916736e-05, "loss": 0.0379, "step": 107410 }, { "epoch": 0.1371, "grad_norm": 0.06773054599761963, "learning_rate": 2.4000330721800796e-05, "loss": 0.0377, "step": 107420 }, { "epoch": 0.13715, "grad_norm": 0.07019204646348953, "learning_rate": 2.3996200367015428e-05, "loss": 0.0358, "step": 107430 }, { "epoch": 0.1372, "grad_norm": 0.07152324169874191, "learning_rate": 2.399207003967358e-05, "loss": 0.0362, "step": 107440 }, { "epoch": 0.13725, "grad_norm": 0.06750397384166718, "learning_rate": 2.3987939739888153e-05, "loss": 0.0377, "step": 107450 }, { "epoch": 0.1373, "grad_norm": 0.07339220494031906, "learning_rate": 2.3983809467772075e-05, "loss": 0.0348, "step": 107460 }, { "epoch": 0.13735, "grad_norm": 0.06790155917406082, "learning_rate": 2.397967922343826e-05, "loss": 0.0351, "step": 107470 }, { "epoch": 0.1374, "grad_norm": 0.06944229453802109, "learning_rate": 2.3975549006999638e-05, "loss": 0.0341, "step": 107480 }, { "epoch": 0.13745, "grad_norm": 0.06574300676584244, "learning_rate": 2.3971418818569115e-05, "loss": 0.0356, "step": 107490 }, { "epoch": 0.1375, "grad_norm": 0.07261010259389877, "learning_rate": 2.3967288658259617e-05, "loss": 0.0351, "step": 107500 }, { "epoch": 0.13755, "grad_norm": 0.06218640133738518, "learning_rate": 2.3963158526184066e-05, "loss": 0.0369, "step": 107510 }, { "epoch": 0.1376, "grad_norm": 0.06601933389902115, "learning_rate": 2.3959028422455357e-05, "loss": 0.0349, "step": 107520 }, { "epoch": 0.13765, "grad_norm": 0.07100588083267212, "learning_rate": 2.3954898347186436e-05, "loss": 0.0345, "step": 107530 }, { "epoch": 0.1377, "grad_norm": 0.0939004197716713, "learning_rate": 2.3950768300490187e-05, "loss": 0.0363, "step": 107540 }, { "epoch": 0.13775, "grad_norm": 0.06147817522287369, "learning_rate": 2.394663828247955e-05, "loss": 0.0364, "step": 107550 }, { "epoch": 0.1378, "grad_norm": 0.0856638178229332, "learning_rate": 2.394250829326742e-05, "loss": 0.0352, "step": 107560 }, { "epoch": 0.13785, "grad_norm": 0.07360093295574188, "learning_rate": 2.3938378332966714e-05, "loss": 0.0371, "step": 107570 }, { "epoch": 0.1379, "grad_norm": 0.07323596626520157, "learning_rate": 2.3934248401690356e-05, "loss": 0.0358, "step": 107580 }, { "epoch": 0.13795, "grad_norm": 0.07091102004051208, "learning_rate": 2.3930118499551236e-05, "loss": 0.0387, "step": 107590 }, { "epoch": 0.138, "grad_norm": 0.06781253218650818, "learning_rate": 2.392598862666228e-05, "loss": 0.035, "step": 107600 }, { "epoch": 0.13805, "grad_norm": 0.0743185356259346, "learning_rate": 2.3921858783136387e-05, "loss": 0.0354, "step": 107610 }, { "epoch": 0.1381, "grad_norm": 0.11321161687374115, "learning_rate": 2.3917728969086468e-05, "loss": 0.0359, "step": 107620 }, { "epoch": 0.13815, "grad_norm": 0.11684630066156387, "learning_rate": 2.3913599184625442e-05, "loss": 0.0356, "step": 107630 }, { "epoch": 0.1382, "grad_norm": 0.12119896709918976, "learning_rate": 2.3909469429866192e-05, "loss": 0.036, "step": 107640 }, { "epoch": 0.13825, "grad_norm": 0.0739327073097229, "learning_rate": 2.3905339704921652e-05, "loss": 0.0354, "step": 107650 }, { "epoch": 0.1383, "grad_norm": 0.07066735625267029, "learning_rate": 2.3901210009904707e-05, "loss": 0.0348, "step": 107660 }, { "epoch": 0.13835, "grad_norm": 0.08110303431749344, "learning_rate": 2.3897080344928273e-05, "loss": 0.0358, "step": 107670 }, { "epoch": 0.1384, "grad_norm": 0.08074736595153809, "learning_rate": 2.3892950710105243e-05, "loss": 0.0351, "step": 107680 }, { "epoch": 0.13845, "grad_norm": 0.0737789049744606, "learning_rate": 2.3888821105548523e-05, "loss": 0.0348, "step": 107690 }, { "epoch": 0.1385, "grad_norm": 0.06845972687005997, "learning_rate": 2.3884691531371023e-05, "loss": 0.0346, "step": 107700 }, { "epoch": 0.13855, "grad_norm": 0.07794822752475739, "learning_rate": 2.3880561987685627e-05, "loss": 0.0337, "step": 107710 }, { "epoch": 0.1386, "grad_norm": 0.07079590857028961, "learning_rate": 2.387643247460526e-05, "loss": 0.0337, "step": 107720 }, { "epoch": 0.13865, "grad_norm": 0.07472711056470871, "learning_rate": 2.387230299224279e-05, "loss": 0.0365, "step": 107730 }, { "epoch": 0.1387, "grad_norm": 0.0704125314950943, "learning_rate": 2.386817354071115e-05, "loss": 0.0336, "step": 107740 }, { "epoch": 0.13875, "grad_norm": 0.06483296304941177, "learning_rate": 2.3864044120123205e-05, "loss": 0.0346, "step": 107750 }, { "epoch": 0.1388, "grad_norm": 0.061506740748882294, "learning_rate": 2.3859914730591873e-05, "loss": 0.0339, "step": 107760 }, { "epoch": 0.13885, "grad_norm": 0.08290746808052063, "learning_rate": 2.385578537223005e-05, "loss": 0.0332, "step": 107770 }, { "epoch": 0.1389, "grad_norm": 0.08351372927427292, "learning_rate": 2.3851656045150617e-05, "loss": 0.0342, "step": 107780 }, { "epoch": 0.13895, "grad_norm": 0.06477633863687515, "learning_rate": 2.384752674946648e-05, "loss": 0.0341, "step": 107790 }, { "epoch": 0.139, "grad_norm": 0.07235559821128845, "learning_rate": 2.3843397485290527e-05, "loss": 0.0335, "step": 107800 }, { "epoch": 0.13905, "grad_norm": 0.06576807051897049, "learning_rate": 2.3839268252735647e-05, "loss": 0.0339, "step": 107810 }, { "epoch": 0.1391, "grad_norm": 0.0647667944431305, "learning_rate": 2.3835139051914753e-05, "loss": 0.0357, "step": 107820 }, { "epoch": 0.13915, "grad_norm": 0.09135240316390991, "learning_rate": 2.3831009882940704e-05, "loss": 0.0353, "step": 107830 }, { "epoch": 0.1392, "grad_norm": 0.07542411983013153, "learning_rate": 2.3826880745926418e-05, "loss": 0.0341, "step": 107840 }, { "epoch": 0.13925, "grad_norm": 0.0774625837802887, "learning_rate": 2.3822751640984757e-05, "loss": 0.0339, "step": 107850 }, { "epoch": 0.1393, "grad_norm": 0.07527747005224228, "learning_rate": 2.3818622568228633e-05, "loss": 0.0335, "step": 107860 }, { "epoch": 0.13935, "grad_norm": 0.0802069753408432, "learning_rate": 2.3814493527770923e-05, "loss": 0.034, "step": 107870 }, { "epoch": 0.1394, "grad_norm": 0.07678884267807007, "learning_rate": 2.3810364519724515e-05, "loss": 0.0349, "step": 107880 }, { "epoch": 0.13945, "grad_norm": 0.09053342044353485, "learning_rate": 2.3806235544202295e-05, "loss": 0.0341, "step": 107890 }, { "epoch": 0.1395, "grad_norm": 0.07176068425178528, "learning_rate": 2.3802106601317146e-05, "loss": 0.0368, "step": 107900 }, { "epoch": 0.13955, "grad_norm": 0.09751512855291367, "learning_rate": 2.3797977691181957e-05, "loss": 0.0366, "step": 107910 }, { "epoch": 0.1396, "grad_norm": 0.06735493242740631, "learning_rate": 2.3793848813909596e-05, "loss": 0.0348, "step": 107920 }, { "epoch": 0.13965, "grad_norm": 0.06756073981523514, "learning_rate": 2.3789719969612966e-05, "loss": 0.0352, "step": 107930 }, { "epoch": 0.1397, "grad_norm": 0.07313332706689835, "learning_rate": 2.3785591158404922e-05, "loss": 0.0353, "step": 107940 }, { "epoch": 0.13975, "grad_norm": 0.0708153247833252, "learning_rate": 2.378146238039837e-05, "loss": 0.0356, "step": 107950 }, { "epoch": 0.1398, "grad_norm": 0.0632934644818306, "learning_rate": 2.377733363570618e-05, "loss": 0.0342, "step": 107960 }, { "epoch": 0.13985, "grad_norm": 0.06964541226625443, "learning_rate": 2.3773204924441227e-05, "loss": 0.0332, "step": 107970 }, { "epoch": 0.1399, "grad_norm": 0.08246681839227676, "learning_rate": 2.3769076246716395e-05, "loss": 0.0349, "step": 107980 }, { "epoch": 0.13995, "grad_norm": 0.08017262071371078, "learning_rate": 2.376494760264455e-05, "loss": 0.0342, "step": 107990 }, { "epoch": 0.14, "grad_norm": 0.07744333148002625, "learning_rate": 2.3760818992338573e-05, "loss": 0.0338, "step": 108000 }, { "epoch": 0.14005, "grad_norm": 0.08109390735626221, "learning_rate": 2.3756690415911346e-05, "loss": 0.0349, "step": 108010 }, { "epoch": 0.1401, "grad_norm": 0.07111233472824097, "learning_rate": 2.3752561873475724e-05, "loss": 0.0332, "step": 108020 }, { "epoch": 0.14015, "grad_norm": 0.08744429051876068, "learning_rate": 2.3748433365144606e-05, "loss": 0.0332, "step": 108030 }, { "epoch": 0.1402, "grad_norm": 0.08111266046762466, "learning_rate": 2.3744304891030837e-05, "loss": 0.0334, "step": 108040 }, { "epoch": 0.14025, "grad_norm": 0.0656583309173584, "learning_rate": 2.3740176451247314e-05, "loss": 0.0333, "step": 108050 }, { "epoch": 0.1403, "grad_norm": 0.08800430595874786, "learning_rate": 2.3736048045906877e-05, "loss": 0.0372, "step": 108060 }, { "epoch": 0.14035, "grad_norm": 0.07889630645513535, "learning_rate": 2.373191967512242e-05, "loss": 0.033, "step": 108070 }, { "epoch": 0.1404, "grad_norm": 0.0784083753824234, "learning_rate": 2.372779133900681e-05, "loss": 0.0347, "step": 108080 }, { "epoch": 0.14045, "grad_norm": 0.06280580163002014, "learning_rate": 2.3723663037672898e-05, "loss": 0.0343, "step": 108090 }, { "epoch": 0.1405, "grad_norm": 0.08633331209421158, "learning_rate": 2.3719534771233563e-05, "loss": 0.0373, "step": 108100 }, { "epoch": 0.14055, "grad_norm": 0.07899964600801468, "learning_rate": 2.3715406539801663e-05, "loss": 0.0347, "step": 108110 }, { "epoch": 0.1406, "grad_norm": 0.08867514878511429, "learning_rate": 2.371127834349007e-05, "loss": 0.035, "step": 108120 }, { "epoch": 0.14065, "grad_norm": 0.06929611414670944, "learning_rate": 2.3707150182411637e-05, "loss": 0.0343, "step": 108130 }, { "epoch": 0.1407, "grad_norm": 0.07136829197406769, "learning_rate": 2.3703022056679227e-05, "loss": 0.0376, "step": 108140 }, { "epoch": 0.14075, "grad_norm": 0.06537345796823502, "learning_rate": 2.369889396640572e-05, "loss": 0.037, "step": 108150 }, { "epoch": 0.1408, "grad_norm": 0.06904958933591843, "learning_rate": 2.3694765911703957e-05, "loss": 0.0349, "step": 108160 }, { "epoch": 0.14085, "grad_norm": 0.07893108576536179, "learning_rate": 2.3690637892686808e-05, "loss": 0.0357, "step": 108170 }, { "epoch": 0.1409, "grad_norm": 0.08251333981752396, "learning_rate": 2.368650990946712e-05, "loss": 0.0353, "step": 108180 }, { "epoch": 0.14095, "grad_norm": 0.0642128735780716, "learning_rate": 2.3682381962157766e-05, "loss": 0.0335, "step": 108190 }, { "epoch": 0.141, "grad_norm": 0.059158291667699814, "learning_rate": 2.3678254050871587e-05, "loss": 0.0351, "step": 108200 }, { "epoch": 0.14105, "grad_norm": 0.07299425452947617, "learning_rate": 2.367412617572145e-05, "loss": 0.0354, "step": 108210 }, { "epoch": 0.1411, "grad_norm": 0.06661834567785263, "learning_rate": 2.3669998336820205e-05, "loss": 0.0349, "step": 108220 }, { "epoch": 0.14115, "grad_norm": 0.07236451655626297, "learning_rate": 2.3665870534280696e-05, "loss": 0.0351, "step": 108230 }, { "epoch": 0.1412, "grad_norm": 0.07977017015218735, "learning_rate": 2.3661742768215802e-05, "loss": 0.0339, "step": 108240 }, { "epoch": 0.14125, "grad_norm": 0.0703221783041954, "learning_rate": 2.3657615038738343e-05, "loss": 0.0342, "step": 108250 }, { "epoch": 0.1413, "grad_norm": 0.0639897808432579, "learning_rate": 2.365348734596119e-05, "loss": 0.035, "step": 108260 }, { "epoch": 0.14135, "grad_norm": 0.08083923906087875, "learning_rate": 2.364935968999719e-05, "loss": 0.0371, "step": 108270 }, { "epoch": 0.1414, "grad_norm": 0.06952771544456482, "learning_rate": 2.3645232070959185e-05, "loss": 0.0355, "step": 108280 }, { "epoch": 0.14145, "grad_norm": 0.06466667354106903, "learning_rate": 2.3641104488960032e-05, "loss": 0.0383, "step": 108290 }, { "epoch": 0.1415, "grad_norm": 0.07003355026245117, "learning_rate": 2.3636976944112568e-05, "loss": 0.0353, "step": 108300 }, { "epoch": 0.14155, "grad_norm": 0.06676148623228073, "learning_rate": 2.3632849436529643e-05, "loss": 0.0356, "step": 108310 }, { "epoch": 0.1416, "grad_norm": 0.09525799751281738, "learning_rate": 2.36287219663241e-05, "loss": 0.0376, "step": 108320 }, { "epoch": 0.14165, "grad_norm": 0.07232654094696045, "learning_rate": 2.3624594533608776e-05, "loss": 0.0359, "step": 108330 }, { "epoch": 0.1417, "grad_norm": 0.06810037791728973, "learning_rate": 2.362046713849654e-05, "loss": 0.0357, "step": 108340 }, { "epoch": 0.14175, "grad_norm": 0.06049606204032898, "learning_rate": 2.361633978110019e-05, "loss": 0.0352, "step": 108350 }, { "epoch": 0.1418, "grad_norm": 0.06264954805374146, "learning_rate": 2.361221246153261e-05, "loss": 0.036, "step": 108360 }, { "epoch": 0.14185, "grad_norm": 0.08460656553506851, "learning_rate": 2.3608085179906607e-05, "loss": 0.039, "step": 108370 }, { "epoch": 0.1419, "grad_norm": 0.07102148234844208, "learning_rate": 2.3603957936335043e-05, "loss": 0.0375, "step": 108380 }, { "epoch": 0.14195, "grad_norm": 0.07672837376594543, "learning_rate": 2.359983073093074e-05, "loss": 0.0358, "step": 108390 }, { "epoch": 0.142, "grad_norm": 0.05874524638056755, "learning_rate": 2.3595703563806536e-05, "loss": 0.0349, "step": 108400 }, { "epoch": 0.14205, "grad_norm": 0.07055836915969849, "learning_rate": 2.3591576435075276e-05, "loss": 0.0349, "step": 108410 }, { "epoch": 0.1421, "grad_norm": 0.055572111159563065, "learning_rate": 2.358744934484978e-05, "loss": 0.0343, "step": 108420 }, { "epoch": 0.14215, "grad_norm": 0.06013401970267296, "learning_rate": 2.3583322293242893e-05, "loss": 0.0346, "step": 108430 }, { "epoch": 0.1422, "grad_norm": 0.07167872786521912, "learning_rate": 2.3579195280367434e-05, "loss": 0.0342, "step": 108440 }, { "epoch": 0.14225, "grad_norm": 0.07439404726028442, "learning_rate": 2.3575068306336245e-05, "loss": 0.0391, "step": 108450 }, { "epoch": 0.1423, "grad_norm": 0.08786989748477936, "learning_rate": 2.3570941371262158e-05, "loss": 0.0361, "step": 108460 }, { "epoch": 0.14235, "grad_norm": 0.0756273940205574, "learning_rate": 2.3566814475257994e-05, "loss": 0.0346, "step": 108470 }, { "epoch": 0.1424, "grad_norm": 0.07808423787355423, "learning_rate": 2.356268761843659e-05, "loss": 0.0333, "step": 108480 }, { "epoch": 0.14245, "grad_norm": 0.07155963033437729, "learning_rate": 2.355856080091076e-05, "loss": 0.0335, "step": 108490 }, { "epoch": 0.1425, "grad_norm": 0.08156801760196686, "learning_rate": 2.3554434022793344e-05, "loss": 0.0361, "step": 108500 }, { "epoch": 0.14255, "grad_norm": 0.06574303656816483, "learning_rate": 2.3550307284197148e-05, "loss": 0.0346, "step": 108510 }, { "epoch": 0.1426, "grad_norm": 0.07332491129636765, "learning_rate": 2.3546180585235003e-05, "loss": 0.0381, "step": 108520 }, { "epoch": 0.14265, "grad_norm": 0.07091228663921356, "learning_rate": 2.3542053926019753e-05, "loss": 0.034, "step": 108530 }, { "epoch": 0.1427, "grad_norm": 0.06543724983930588, "learning_rate": 2.353792730666418e-05, "loss": 0.037, "step": 108540 }, { "epoch": 0.14275, "grad_norm": 0.07887712121009827, "learning_rate": 2.3533800727281145e-05, "loss": 0.0356, "step": 108550 }, { "epoch": 0.1428, "grad_norm": 0.08726233243942261, "learning_rate": 2.3529674187983433e-05, "loss": 0.0362, "step": 108560 }, { "epoch": 0.14285, "grad_norm": 0.07006168365478516, "learning_rate": 2.3525547688883885e-05, "loss": 0.0351, "step": 108570 }, { "epoch": 0.1429, "grad_norm": 0.07025811076164246, "learning_rate": 2.3521421230095303e-05, "loss": 0.0352, "step": 108580 }, { "epoch": 0.14295, "grad_norm": 0.0629371628165245, "learning_rate": 2.351729481173051e-05, "loss": 0.0356, "step": 108590 }, { "epoch": 0.143, "grad_norm": 0.0688789114356041, "learning_rate": 2.3513168433902324e-05, "loss": 0.0349, "step": 108600 }, { "epoch": 0.14305, "grad_norm": 0.07087711989879608, "learning_rate": 2.3509042096723552e-05, "loss": 0.0352, "step": 108610 }, { "epoch": 0.1431, "grad_norm": 0.07730893045663834, "learning_rate": 2.3504915800307012e-05, "loss": 0.0348, "step": 108620 }, { "epoch": 0.14315, "grad_norm": 0.06419000029563904, "learning_rate": 2.350078954476551e-05, "loss": 0.0368, "step": 108630 }, { "epoch": 0.1432, "grad_norm": 0.09525299072265625, "learning_rate": 2.3496663330211848e-05, "loss": 0.0405, "step": 108640 }, { "epoch": 0.14325, "grad_norm": 0.07910957932472229, "learning_rate": 2.349253715675886e-05, "loss": 0.0345, "step": 108650 }, { "epoch": 0.1433, "grad_norm": 0.07841334491968155, "learning_rate": 2.3488411024519334e-05, "loss": 0.0353, "step": 108660 }, { "epoch": 0.14335, "grad_norm": 0.06821152567863464, "learning_rate": 2.3484284933606085e-05, "loss": 0.0353, "step": 108670 }, { "epoch": 0.1434, "grad_norm": 0.07697838544845581, "learning_rate": 2.3480158884131914e-05, "loss": 0.0349, "step": 108680 }, { "epoch": 0.14345, "grad_norm": 0.0828322321176529, "learning_rate": 2.3476032876209632e-05, "loss": 0.0363, "step": 108690 }, { "epoch": 0.1435, "grad_norm": 0.07963103801012039, "learning_rate": 2.3471906909952036e-05, "loss": 0.0348, "step": 108700 }, { "epoch": 0.14355, "grad_norm": 0.07140455394983292, "learning_rate": 2.346778098547193e-05, "loss": 0.0347, "step": 108710 }, { "epoch": 0.1436, "grad_norm": 0.0864250510931015, "learning_rate": 2.346365510288212e-05, "loss": 0.0361, "step": 108720 }, { "epoch": 0.14365, "grad_norm": 0.08561394363641739, "learning_rate": 2.3459529262295394e-05, "loss": 0.0343, "step": 108730 }, { "epoch": 0.1437, "grad_norm": 0.09716397523880005, "learning_rate": 2.3455403463824573e-05, "loss": 0.0347, "step": 108740 }, { "epoch": 0.14375, "grad_norm": 0.07353556901216507, "learning_rate": 2.3451277707582427e-05, "loss": 0.0362, "step": 108750 }, { "epoch": 0.1438, "grad_norm": 0.06440164148807526, "learning_rate": 2.344715199368178e-05, "loss": 0.0341, "step": 108760 }, { "epoch": 0.14385, "grad_norm": 0.07270939648151398, "learning_rate": 2.34430263222354e-05, "loss": 0.0351, "step": 108770 }, { "epoch": 0.1439, "grad_norm": 0.06414466351270676, "learning_rate": 2.3438900693356102e-05, "loss": 0.035, "step": 108780 }, { "epoch": 0.14395, "grad_norm": 0.06215673312544823, "learning_rate": 2.3434775107156674e-05, "loss": 0.0347, "step": 108790 }, { "epoch": 0.144, "grad_norm": 0.059777650982141495, "learning_rate": 2.3430649563749906e-05, "loss": 0.0382, "step": 108800 }, { "epoch": 0.14405, "grad_norm": 0.06394518911838531, "learning_rate": 2.3426524063248593e-05, "loss": 0.0384, "step": 108810 }, { "epoch": 0.1441, "grad_norm": 0.053943321108818054, "learning_rate": 2.3422398605765515e-05, "loss": 0.0333, "step": 108820 }, { "epoch": 0.14415, "grad_norm": 0.06608572602272034, "learning_rate": 2.341827319141346e-05, "loss": 0.0349, "step": 108830 }, { "epoch": 0.1442, "grad_norm": 0.07171956449747086, "learning_rate": 2.3414147820305238e-05, "loss": 0.0348, "step": 108840 }, { "epoch": 0.14425, "grad_norm": 0.06831841915845871, "learning_rate": 2.3410022492553604e-05, "loss": 0.0365, "step": 108850 }, { "epoch": 0.1443, "grad_norm": 0.06574524939060211, "learning_rate": 2.340589720827137e-05, "loss": 0.0345, "step": 108860 }, { "epoch": 0.14435, "grad_norm": 0.07829328626394272, "learning_rate": 2.34017719675713e-05, "loss": 0.0351, "step": 108870 }, { "epoch": 0.1444, "grad_norm": 0.08291122317314148, "learning_rate": 2.339764677056619e-05, "loss": 0.0358, "step": 108880 }, { "epoch": 0.14445, "grad_norm": 0.06285149604082108, "learning_rate": 2.3393521617368806e-05, "loss": 0.0333, "step": 108890 }, { "epoch": 0.1445, "grad_norm": 0.0621388703584671, "learning_rate": 2.3389396508091943e-05, "loss": 0.0346, "step": 108900 }, { "epoch": 0.14455, "grad_norm": 0.0737023651599884, "learning_rate": 2.3385271442848376e-05, "loss": 0.034, "step": 108910 }, { "epoch": 0.1446, "grad_norm": 0.06401995569467545, "learning_rate": 2.338114642175088e-05, "loss": 0.0349, "step": 108920 }, { "epoch": 0.14465, "grad_norm": 0.06761576980352402, "learning_rate": 2.3377021444912235e-05, "loss": 0.0342, "step": 108930 }, { "epoch": 0.1447, "grad_norm": 0.07421578466892242, "learning_rate": 2.3372896512445203e-05, "loss": 0.0354, "step": 108940 }, { "epoch": 0.14475, "grad_norm": 0.060112569481134415, "learning_rate": 2.3368771624462585e-05, "loss": 0.0337, "step": 108950 }, { "epoch": 0.1448, "grad_norm": 0.06732338666915894, "learning_rate": 2.3364646781077123e-05, "loss": 0.0345, "step": 108960 }, { "epoch": 0.14485, "grad_norm": 0.07164648175239563, "learning_rate": 2.3360521982401608e-05, "loss": 0.0342, "step": 108970 }, { "epoch": 0.1449, "grad_norm": 0.0799083486199379, "learning_rate": 2.3356397228548812e-05, "loss": 0.0341, "step": 108980 }, { "epoch": 0.14495, "grad_norm": 0.07315156608819962, "learning_rate": 2.3352272519631494e-05, "loss": 0.0347, "step": 108990 }, { "epoch": 0.145, "grad_norm": 0.07438869029283524, "learning_rate": 2.334814785576243e-05, "loss": 0.035, "step": 109000 }, { "epoch": 0.14505, "grad_norm": 0.0785011276602745, "learning_rate": 2.334402323705438e-05, "loss": 0.0342, "step": 109010 }, { "epoch": 0.1451, "grad_norm": 0.06625154614448547, "learning_rate": 2.3339898663620103e-05, "loss": 0.0348, "step": 109020 }, { "epoch": 0.14515, "grad_norm": 0.06950251013040543, "learning_rate": 2.3335774135572394e-05, "loss": 0.0357, "step": 109030 }, { "epoch": 0.1452, "grad_norm": 0.0790690928697586, "learning_rate": 2.3331649653023977e-05, "loss": 0.0366, "step": 109040 }, { "epoch": 0.14525, "grad_norm": 0.0788382738828659, "learning_rate": 2.3327525216087644e-05, "loss": 0.0369, "step": 109050 }, { "epoch": 0.1453, "grad_norm": 0.07070305198431015, "learning_rate": 2.332340082487613e-05, "loss": 0.0351, "step": 109060 }, { "epoch": 0.14535, "grad_norm": 0.06878872215747833, "learning_rate": 2.331927647950222e-05, "loss": 0.0356, "step": 109070 }, { "epoch": 0.1454, "grad_norm": 0.06110698729753494, "learning_rate": 2.3315152180078654e-05, "loss": 0.035, "step": 109080 }, { "epoch": 0.14545, "grad_norm": 0.09910532087087631, "learning_rate": 2.3311027926718193e-05, "loss": 0.0365, "step": 109090 }, { "epoch": 0.1455, "grad_norm": 0.07843577861785889, "learning_rate": 2.3306903719533598e-05, "loss": 0.0379, "step": 109100 }, { "epoch": 0.14555, "grad_norm": 0.06486725062131882, "learning_rate": 2.3302779558637615e-05, "loss": 0.0356, "step": 109110 }, { "epoch": 0.1456, "grad_norm": 0.1011667549610138, "learning_rate": 2.3298655444143008e-05, "loss": 0.0364, "step": 109120 }, { "epoch": 0.14565, "grad_norm": 0.0909094363451004, "learning_rate": 2.329453137616251e-05, "loss": 0.0394, "step": 109130 }, { "epoch": 0.1457, "grad_norm": 0.07500338554382324, "learning_rate": 2.32904073548089e-05, "loss": 0.0349, "step": 109140 }, { "epoch": 0.14575, "grad_norm": 0.07224991917610168, "learning_rate": 2.3286283380194897e-05, "loss": 0.0363, "step": 109150 }, { "epoch": 0.1458, "grad_norm": 0.06772609055042267, "learning_rate": 2.3282159452433267e-05, "loss": 0.0343, "step": 109160 }, { "epoch": 0.14585, "grad_norm": 0.06794916838407516, "learning_rate": 2.3278035571636755e-05, "loss": 0.0354, "step": 109170 }, { "epoch": 0.1459, "grad_norm": 0.061266761273145676, "learning_rate": 2.3273911737918096e-05, "loss": 0.0359, "step": 109180 }, { "epoch": 0.14595, "grad_norm": 0.07602766156196594, "learning_rate": 2.3269787951390056e-05, "loss": 0.0351, "step": 109190 }, { "epoch": 0.146, "grad_norm": 0.07578182220458984, "learning_rate": 2.326566421216535e-05, "loss": 0.035, "step": 109200 }, { "epoch": 0.14605, "grad_norm": 0.06298764050006866, "learning_rate": 2.3261540520356727e-05, "loss": 0.0342, "step": 109210 }, { "epoch": 0.1461, "grad_norm": 0.057033587247133255, "learning_rate": 2.3257416876076954e-05, "loss": 0.0336, "step": 109220 }, { "epoch": 0.14615, "grad_norm": 0.07515522837638855, "learning_rate": 2.325329327943873e-05, "loss": 0.0344, "step": 109230 }, { "epoch": 0.1462, "grad_norm": 0.06341442465782166, "learning_rate": 2.324916973055483e-05, "loss": 0.0345, "step": 109240 }, { "epoch": 0.14625, "grad_norm": 0.08179578930139542, "learning_rate": 2.3245046229537954e-05, "loss": 0.035, "step": 109250 }, { "epoch": 0.1463, "grad_norm": 0.06408336013555527, "learning_rate": 2.324092277650087e-05, "loss": 0.0356, "step": 109260 }, { "epoch": 0.14635, "grad_norm": 0.08868936449289322, "learning_rate": 2.3236799371556282e-05, "loss": 0.0353, "step": 109270 }, { "epoch": 0.1464, "grad_norm": 0.07364244759082794, "learning_rate": 2.323267601481694e-05, "loss": 0.0358, "step": 109280 }, { "epoch": 0.14645, "grad_norm": 0.07817887514829636, "learning_rate": 2.322855270639558e-05, "loss": 0.0361, "step": 109290 }, { "epoch": 0.1465, "grad_norm": 0.06925657391548157, "learning_rate": 2.3224429446404917e-05, "loss": 0.0357, "step": 109300 }, { "epoch": 0.14655, "grad_norm": 0.06357885152101517, "learning_rate": 2.322030623495769e-05, "loss": 0.0373, "step": 109310 }, { "epoch": 0.1466, "grad_norm": 0.08716066926717758, "learning_rate": 2.321618307216662e-05, "loss": 0.0357, "step": 109320 }, { "epoch": 0.14665, "grad_norm": 0.08204200118780136, "learning_rate": 2.3212059958144434e-05, "loss": 0.0357, "step": 109330 }, { "epoch": 0.1467, "grad_norm": 0.07030654698610306, "learning_rate": 2.320793689300385e-05, "loss": 0.0356, "step": 109340 }, { "epoch": 0.14675, "grad_norm": 0.07193808257579803, "learning_rate": 2.3203813876857606e-05, "loss": 0.0351, "step": 109350 }, { "epoch": 0.1468, "grad_norm": 0.08544500917196274, "learning_rate": 2.319969090981842e-05, "loss": 0.037, "step": 109360 }, { "epoch": 0.14685, "grad_norm": 0.09021718055009842, "learning_rate": 2.3195567991999002e-05, "loss": 0.0353, "step": 109370 }, { "epoch": 0.1469, "grad_norm": 0.07073055952787399, "learning_rate": 2.319144512351208e-05, "loss": 0.0353, "step": 109380 }, { "epoch": 0.14695, "grad_norm": 0.07055257260799408, "learning_rate": 2.3187322304470365e-05, "loss": 0.0358, "step": 109390 }, { "epoch": 0.147, "grad_norm": 0.07493556290864944, "learning_rate": 2.318319953498658e-05, "loss": 0.036, "step": 109400 }, { "epoch": 0.14705, "grad_norm": 0.0694839134812355, "learning_rate": 2.317907681517344e-05, "loss": 0.0357, "step": 109410 }, { "epoch": 0.1471, "grad_norm": 0.07084786146879196, "learning_rate": 2.3174954145143644e-05, "loss": 0.0367, "step": 109420 }, { "epoch": 0.14715, "grad_norm": 0.09428457170724869, "learning_rate": 2.3170831525009933e-05, "loss": 0.0361, "step": 109430 }, { "epoch": 0.1472, "grad_norm": 0.06578890979290009, "learning_rate": 2.3166708954884987e-05, "loss": 0.0359, "step": 109440 }, { "epoch": 0.14725, "grad_norm": 0.07545687258243561, "learning_rate": 2.316258643488154e-05, "loss": 0.0354, "step": 109450 }, { "epoch": 0.1473, "grad_norm": 0.07363058626651764, "learning_rate": 2.315846396511228e-05, "loss": 0.0358, "step": 109460 }, { "epoch": 0.14735, "grad_norm": 0.07134675234556198, "learning_rate": 2.3154341545689926e-05, "loss": 0.0332, "step": 109470 }, { "epoch": 0.1474, "grad_norm": 0.07839695364236832, "learning_rate": 2.3150219176727186e-05, "loss": 0.0353, "step": 109480 }, { "epoch": 0.14745, "grad_norm": 0.07358036935329437, "learning_rate": 2.3146096858336752e-05, "loss": 0.0334, "step": 109490 }, { "epoch": 0.1475, "grad_norm": 0.0781262069940567, "learning_rate": 2.314197459063134e-05, "loss": 0.0344, "step": 109500 }, { "epoch": 0.14755, "grad_norm": 0.07102091610431671, "learning_rate": 2.313785237372364e-05, "loss": 0.0327, "step": 109510 }, { "epoch": 0.1476, "grad_norm": 0.08903536200523376, "learning_rate": 2.313373020772636e-05, "loss": 0.0351, "step": 109520 }, { "epoch": 0.14765, "grad_norm": 0.08091005682945251, "learning_rate": 2.3129608092752196e-05, "loss": 0.0347, "step": 109530 }, { "epoch": 0.1477, "grad_norm": 0.08431022614240646, "learning_rate": 2.312548602891383e-05, "loss": 0.0352, "step": 109540 }, { "epoch": 0.14775, "grad_norm": 0.08247928321361542, "learning_rate": 2.312136401632399e-05, "loss": 0.035, "step": 109550 }, { "epoch": 0.1478, "grad_norm": 0.08493194729089737, "learning_rate": 2.3117242055095344e-05, "loss": 0.0365, "step": 109560 }, { "epoch": 0.14785, "grad_norm": 0.061399515718221664, "learning_rate": 2.3113120145340593e-05, "loss": 0.0339, "step": 109570 }, { "epoch": 0.1479, "grad_norm": 0.06967747211456299, "learning_rate": 2.310899828717243e-05, "loss": 0.034, "step": 109580 }, { "epoch": 0.14795, "grad_norm": 0.07984711974859238, "learning_rate": 2.3104876480703545e-05, "loss": 0.0347, "step": 109590 }, { "epoch": 0.148, "grad_norm": 0.08287534862756729, "learning_rate": 2.310075472604662e-05, "loss": 0.0336, "step": 109600 }, { "epoch": 0.14805, "grad_norm": 0.06081758812069893, "learning_rate": 2.309663302331435e-05, "loss": 0.033, "step": 109610 }, { "epoch": 0.1481, "grad_norm": 0.07051486521959305, "learning_rate": 2.3092511372619422e-05, "loss": 0.0342, "step": 109620 }, { "epoch": 0.14815, "grad_norm": 0.07883277535438538, "learning_rate": 2.3088389774074504e-05, "loss": 0.0336, "step": 109630 }, { "epoch": 0.1482, "grad_norm": 0.06091853231191635, "learning_rate": 2.3084268227792307e-05, "loss": 0.035, "step": 109640 }, { "epoch": 0.14825, "grad_norm": 0.07201207429170609, "learning_rate": 2.3080146733885482e-05, "loss": 0.0341, "step": 109650 }, { "epoch": 0.1483, "grad_norm": 0.0676903948187828, "learning_rate": 2.3076025292466733e-05, "loss": 0.033, "step": 109660 }, { "epoch": 0.14835, "grad_norm": 0.07095537334680557, "learning_rate": 2.307190390364873e-05, "loss": 0.0352, "step": 109670 }, { "epoch": 0.1484, "grad_norm": 0.06302513927221298, "learning_rate": 2.3067782567544147e-05, "loss": 0.0364, "step": 109680 }, { "epoch": 0.14845, "grad_norm": 0.06290469318628311, "learning_rate": 2.3063661284265667e-05, "loss": 0.0346, "step": 109690 }, { "epoch": 0.1485, "grad_norm": 0.0665312334895134, "learning_rate": 2.3059540053925957e-05, "loss": 0.0348, "step": 109700 }, { "epoch": 0.14855, "grad_norm": 0.11684829741716385, "learning_rate": 2.3055418876637696e-05, "loss": 0.0365, "step": 109710 }, { "epoch": 0.1486, "grad_norm": 0.09129486232995987, "learning_rate": 2.305129775251355e-05, "loss": 0.035, "step": 109720 }, { "epoch": 0.14865, "grad_norm": 0.08456933498382568, "learning_rate": 2.304717668166618e-05, "loss": 0.0337, "step": 109730 }, { "epoch": 0.1487, "grad_norm": 0.07299253344535828, "learning_rate": 2.304305566420829e-05, "loss": 0.0341, "step": 109740 }, { "epoch": 0.14875, "grad_norm": 0.05998293682932854, "learning_rate": 2.30389347002525e-05, "loss": 0.0338, "step": 109750 }, { "epoch": 0.1488, "grad_norm": 0.06796760112047195, "learning_rate": 2.303481378991151e-05, "loss": 0.0343, "step": 109760 }, { "epoch": 0.14885, "grad_norm": 0.10017074644565582, "learning_rate": 2.3030692933297972e-05, "loss": 0.0362, "step": 109770 }, { "epoch": 0.1489, "grad_norm": 0.08416993916034698, "learning_rate": 2.302657213052455e-05, "loss": 0.0338, "step": 109780 }, { "epoch": 0.14895, "grad_norm": 0.07039602100849152, "learning_rate": 2.3022451381703903e-05, "loss": 0.0335, "step": 109790 }, { "epoch": 0.149, "grad_norm": 0.08664463460445404, "learning_rate": 2.301833068694869e-05, "loss": 0.0354, "step": 109800 }, { "epoch": 0.14905, "grad_norm": 0.09198068827390671, "learning_rate": 2.3014210046371576e-05, "loss": 0.0357, "step": 109810 }, { "epoch": 0.1491, "grad_norm": 0.06888342648744583, "learning_rate": 2.301008946008521e-05, "loss": 0.0338, "step": 109820 }, { "epoch": 0.14915, "grad_norm": 0.0643400326371193, "learning_rate": 2.3005968928202253e-05, "loss": 0.035, "step": 109830 }, { "epoch": 0.1492, "grad_norm": 0.06421161442995071, "learning_rate": 2.3001848450835348e-05, "loss": 0.0347, "step": 109840 }, { "epoch": 0.14925, "grad_norm": 0.06878542900085449, "learning_rate": 2.299772802809716e-05, "loss": 0.0337, "step": 109850 }, { "epoch": 0.1493, "grad_norm": 0.06569793075323105, "learning_rate": 2.299360766010034e-05, "loss": 0.0358, "step": 109860 }, { "epoch": 0.14935, "grad_norm": 0.0654708594083786, "learning_rate": 2.298948734695753e-05, "loss": 0.0339, "step": 109870 }, { "epoch": 0.1494, "grad_norm": 0.057217568159103394, "learning_rate": 2.2985367088781387e-05, "loss": 0.0342, "step": 109880 }, { "epoch": 0.14945, "grad_norm": 0.07576543837785721, "learning_rate": 2.2981246885684543e-05, "loss": 0.0345, "step": 109890 }, { "epoch": 0.1495, "grad_norm": 0.08619561791419983, "learning_rate": 2.2977126737779658e-05, "loss": 0.0375, "step": 109900 }, { "epoch": 0.14955, "grad_norm": 0.09843472391366959, "learning_rate": 2.297300664517936e-05, "loss": 0.037, "step": 109910 }, { "epoch": 0.1496, "grad_norm": 0.08414553105831146, "learning_rate": 2.2968886607996297e-05, "loss": 0.0354, "step": 109920 }, { "epoch": 0.14965, "grad_norm": 0.0866297110915184, "learning_rate": 2.2964766626343122e-05, "loss": 0.0358, "step": 109930 }, { "epoch": 0.1497, "grad_norm": 0.08381444960832596, "learning_rate": 2.296064670033245e-05, "loss": 0.035, "step": 109940 }, { "epoch": 0.14975, "grad_norm": 0.07397166639566422, "learning_rate": 2.295652683007695e-05, "loss": 0.0338, "step": 109950 }, { "epoch": 0.1498, "grad_norm": 0.07220173627138138, "learning_rate": 2.295240701568922e-05, "loss": 0.0349, "step": 109960 }, { "epoch": 0.14985, "grad_norm": 0.09002058953046799, "learning_rate": 2.294828725728192e-05, "loss": 0.0363, "step": 109970 }, { "epoch": 0.1499, "grad_norm": 0.06266094744205475, "learning_rate": 2.2944167554967675e-05, "loss": 0.0341, "step": 109980 }, { "epoch": 0.14995, "grad_norm": 0.06673440337181091, "learning_rate": 2.2940047908859114e-05, "loss": 0.0334, "step": 109990 }, { "epoch": 0.15, "grad_norm": 0.07416488975286484, "learning_rate": 2.2935928319068876e-05, "loss": 0.0364, "step": 110000 }, { "epoch": 0.15005, "grad_norm": 0.06353218108415604, "learning_rate": 2.2931808785709576e-05, "loss": 0.0338, "step": 110010 }, { "epoch": 0.1501, "grad_norm": 0.060413651168346405, "learning_rate": 2.2927689308893855e-05, "loss": 0.0335, "step": 110020 }, { "epoch": 0.15015, "grad_norm": 0.07217101007699966, "learning_rate": 2.292356988873432e-05, "loss": 0.0339, "step": 110030 }, { "epoch": 0.1502, "grad_norm": 0.06962756812572479, "learning_rate": 2.2919450525343603e-05, "loss": 0.0358, "step": 110040 }, { "epoch": 0.15025, "grad_norm": 0.06547567248344421, "learning_rate": 2.2915331218834335e-05, "loss": 0.0334, "step": 110050 }, { "epoch": 0.1503, "grad_norm": 0.06108114868402481, "learning_rate": 2.2911211969319123e-05, "loss": 0.0335, "step": 110060 }, { "epoch": 0.15035, "grad_norm": 0.06455700099468231, "learning_rate": 2.29070927769106e-05, "loss": 0.0345, "step": 110070 }, { "epoch": 0.1504, "grad_norm": 0.07985526323318481, "learning_rate": 2.2902973641721363e-05, "loss": 0.0338, "step": 110080 }, { "epoch": 0.15045, "grad_norm": 0.06909330934286118, "learning_rate": 2.289885456386405e-05, "loss": 0.0333, "step": 110090 }, { "epoch": 0.1505, "grad_norm": 0.07609910517930984, "learning_rate": 2.2894735543451255e-05, "loss": 0.0366, "step": 110100 }, { "epoch": 0.15055, "grad_norm": 0.0777604803442955, "learning_rate": 2.28906165805956e-05, "loss": 0.0344, "step": 110110 }, { "epoch": 0.1506, "grad_norm": 0.07123269885778427, "learning_rate": 2.28864976754097e-05, "loss": 0.036, "step": 110120 }, { "epoch": 0.15065, "grad_norm": 0.09285328537225723, "learning_rate": 2.288237882800615e-05, "loss": 0.0346, "step": 110130 }, { "epoch": 0.1507, "grad_norm": 0.07575418800115585, "learning_rate": 2.2878260038497584e-05, "loss": 0.0342, "step": 110140 }, { "epoch": 0.15075, "grad_norm": 0.07250441610813141, "learning_rate": 2.2874141306996576e-05, "loss": 0.0355, "step": 110150 }, { "epoch": 0.1508, "grad_norm": 0.06579921394586563, "learning_rate": 2.287002263361576e-05, "loss": 0.033, "step": 110160 }, { "epoch": 0.15085, "grad_norm": 0.10029726475477219, "learning_rate": 2.286590401846771e-05, "loss": 0.0351, "step": 110170 }, { "epoch": 0.1509, "grad_norm": 0.05899001285433769, "learning_rate": 2.2861785461665046e-05, "loss": 0.0337, "step": 110180 }, { "epoch": 0.15095, "grad_norm": 0.0677531361579895, "learning_rate": 2.285766696332037e-05, "loss": 0.0336, "step": 110190 }, { "epoch": 0.151, "grad_norm": 0.07853901386260986, "learning_rate": 2.285354852354627e-05, "loss": 0.0333, "step": 110200 }, { "epoch": 0.15105, "grad_norm": 0.07537727802991867, "learning_rate": 2.2849430142455353e-05, "loss": 0.0347, "step": 110210 }, { "epoch": 0.1511, "grad_norm": 0.09130075573921204, "learning_rate": 2.28453118201602e-05, "loss": 0.0366, "step": 110220 }, { "epoch": 0.15115, "grad_norm": 0.07668808102607727, "learning_rate": 2.2841193556773406e-05, "loss": 0.0351, "step": 110230 }, { "epoch": 0.1512, "grad_norm": 0.08927728235721588, "learning_rate": 2.2837075352407587e-05, "loss": 0.0372, "step": 110240 }, { "epoch": 0.15125, "grad_norm": 0.09870357811450958, "learning_rate": 2.2832957207175303e-05, "loss": 0.0354, "step": 110250 }, { "epoch": 0.1513, "grad_norm": 0.12028441578149796, "learning_rate": 2.2828839121189162e-05, "loss": 0.0345, "step": 110260 }, { "epoch": 0.15135, "grad_norm": 0.11618024855852127, "learning_rate": 2.282472109456174e-05, "loss": 0.0358, "step": 110270 }, { "epoch": 0.1514, "grad_norm": 0.10039499402046204, "learning_rate": 2.282060312740563e-05, "loss": 0.0356, "step": 110280 }, { "epoch": 0.15145, "grad_norm": 0.06601977348327637, "learning_rate": 2.2816485219833405e-05, "loss": 0.0334, "step": 110290 }, { "epoch": 0.1515, "grad_norm": 0.07388697564601898, "learning_rate": 2.2812367371957658e-05, "loss": 0.0383, "step": 110300 }, { "epoch": 0.15155, "grad_norm": 0.06000300496816635, "learning_rate": 2.2808249583890968e-05, "loss": 0.0347, "step": 110310 }, { "epoch": 0.1516, "grad_norm": 0.06344135105609894, "learning_rate": 2.2804131855745906e-05, "loss": 0.0346, "step": 110320 }, { "epoch": 0.15165, "grad_norm": 0.0786777138710022, "learning_rate": 2.2800014187635057e-05, "loss": 0.0352, "step": 110330 }, { "epoch": 0.1517, "grad_norm": 0.07005493342876434, "learning_rate": 2.2795896579670987e-05, "loss": 0.0352, "step": 110340 }, { "epoch": 0.15175, "grad_norm": 0.08562647551298141, "learning_rate": 2.279177903196629e-05, "loss": 0.0342, "step": 110350 }, { "epoch": 0.1518, "grad_norm": 0.07134796679019928, "learning_rate": 2.278766154463351e-05, "loss": 0.0372, "step": 110360 }, { "epoch": 0.15185, "grad_norm": 0.0698230117559433, "learning_rate": 2.2783544117785237e-05, "loss": 0.0373, "step": 110370 }, { "epoch": 0.1519, "grad_norm": 0.06944792717695236, "learning_rate": 2.277942675153404e-05, "loss": 0.034, "step": 110380 }, { "epoch": 0.15195, "grad_norm": 0.07428590208292007, "learning_rate": 2.2775309445992476e-05, "loss": 0.0367, "step": 110390 }, { "epoch": 0.152, "grad_norm": 0.0738653689622879, "learning_rate": 2.2771192201273122e-05, "loss": 0.0352, "step": 110400 }, { "epoch": 0.15205, "grad_norm": 0.06859836727380753, "learning_rate": 2.276707501748853e-05, "loss": 0.0338, "step": 110410 }, { "epoch": 0.1521, "grad_norm": 0.070834681391716, "learning_rate": 2.2762957894751264e-05, "loss": 0.0341, "step": 110420 }, { "epoch": 0.15215, "grad_norm": 0.07761197537183762, "learning_rate": 2.27588408331739e-05, "loss": 0.0348, "step": 110430 }, { "epoch": 0.1522, "grad_norm": 0.05937939137220383, "learning_rate": 2.2754723832868978e-05, "loss": 0.0349, "step": 110440 }, { "epoch": 0.15225, "grad_norm": 0.0805535688996315, "learning_rate": 2.2750606893949074e-05, "loss": 0.0358, "step": 110450 }, { "epoch": 0.1523, "grad_norm": 0.07439429312944412, "learning_rate": 2.2746490016526713e-05, "loss": 0.0354, "step": 110460 }, { "epoch": 0.15235, "grad_norm": 0.057942844927310944, "learning_rate": 2.2742373200714483e-05, "loss": 0.0338, "step": 110470 }, { "epoch": 0.1524, "grad_norm": 0.0685528889298439, "learning_rate": 2.2738256446624917e-05, "loss": 0.0328, "step": 110480 }, { "epoch": 0.15245, "grad_norm": 0.06431593745946884, "learning_rate": 2.273413975437057e-05, "loss": 0.0345, "step": 110490 }, { "epoch": 0.1525, "grad_norm": 0.07077065110206604, "learning_rate": 2.2730023124063995e-05, "loss": 0.0345, "step": 110500 }, { "epoch": 0.15255, "grad_norm": 0.07308831065893173, "learning_rate": 2.272590655581773e-05, "loss": 0.0349, "step": 110510 }, { "epoch": 0.1526, "grad_norm": 0.05925657972693443, "learning_rate": 2.2721790049744333e-05, "loss": 0.0352, "step": 110520 }, { "epoch": 0.15265, "grad_norm": 0.0709601491689682, "learning_rate": 2.271767360595633e-05, "loss": 0.0352, "step": 110530 }, { "epoch": 0.1527, "grad_norm": 0.08541488647460938, "learning_rate": 2.271355722456628e-05, "loss": 0.0358, "step": 110540 }, { "epoch": 0.15275, "grad_norm": 0.08020167052745819, "learning_rate": 2.270944090568671e-05, "loss": 0.0358, "step": 110550 }, { "epoch": 0.1528, "grad_norm": 0.0742366760969162, "learning_rate": 2.2705324649430166e-05, "loss": 0.0362, "step": 110560 }, { "epoch": 0.15285, "grad_norm": 0.07816912233829498, "learning_rate": 2.270120845590919e-05, "loss": 0.0357, "step": 110570 }, { "epoch": 0.1529, "grad_norm": 0.0812477096915245, "learning_rate": 2.2697092325236306e-05, "loss": 0.0359, "step": 110580 }, { "epoch": 0.15295, "grad_norm": 0.0714782103896141, "learning_rate": 2.269297625752406e-05, "loss": 0.0353, "step": 110590 }, { "epoch": 0.153, "grad_norm": 0.07697834074497223, "learning_rate": 2.268886025288497e-05, "loss": 0.0356, "step": 110600 }, { "epoch": 0.15305, "grad_norm": 0.0860663428902626, "learning_rate": 2.268474431143158e-05, "loss": 0.035, "step": 110610 }, { "epoch": 0.1531, "grad_norm": 0.07616297155618668, "learning_rate": 2.2680628433276398e-05, "loss": 0.0356, "step": 110620 }, { "epoch": 0.15315, "grad_norm": 0.07121719419956207, "learning_rate": 2.2676512618531964e-05, "loss": 0.034, "step": 110630 }, { "epoch": 0.1532, "grad_norm": 0.08533298224210739, "learning_rate": 2.2672396867310817e-05, "loss": 0.0364, "step": 110640 }, { "epoch": 0.15325, "grad_norm": 0.07880530506372452, "learning_rate": 2.266828117972545e-05, "loss": 0.0345, "step": 110650 }, { "epoch": 0.1533, "grad_norm": 0.07123337686061859, "learning_rate": 2.2664165555888414e-05, "loss": 0.0347, "step": 110660 }, { "epoch": 0.15335, "grad_norm": 0.0665459856390953, "learning_rate": 2.26600499959122e-05, "loss": 0.0341, "step": 110670 }, { "epoch": 0.1534, "grad_norm": 0.06696517020463943, "learning_rate": 2.2655934499909342e-05, "loss": 0.0344, "step": 110680 }, { "epoch": 0.15345, "grad_norm": 0.06726125627756119, "learning_rate": 2.2651819067992362e-05, "loss": 0.0342, "step": 110690 }, { "epoch": 0.1535, "grad_norm": 0.059936072677373886, "learning_rate": 2.264770370027376e-05, "loss": 0.0326, "step": 110700 }, { "epoch": 0.15355, "grad_norm": 0.0649225041270256, "learning_rate": 2.264358839686606e-05, "loss": 0.0332, "step": 110710 }, { "epoch": 0.1536, "grad_norm": 0.06835932284593582, "learning_rate": 2.2639473157881766e-05, "loss": 0.0339, "step": 110720 }, { "epoch": 0.15365, "grad_norm": 0.07362768799066544, "learning_rate": 2.2635357983433393e-05, "loss": 0.0334, "step": 110730 }, { "epoch": 0.1537, "grad_norm": 0.07793676108121872, "learning_rate": 2.2631242873633437e-05, "loss": 0.034, "step": 110740 }, { "epoch": 0.15375, "grad_norm": 0.08216965198516846, "learning_rate": 2.2627127828594408e-05, "loss": 0.0339, "step": 110750 }, { "epoch": 0.1538, "grad_norm": 0.08607140928506851, "learning_rate": 2.262301284842882e-05, "loss": 0.0349, "step": 110760 }, { "epoch": 0.15385, "grad_norm": 0.06718862056732178, "learning_rate": 2.2618897933249168e-05, "loss": 0.0344, "step": 110770 }, { "epoch": 0.1539, "grad_norm": 0.05782028287649155, "learning_rate": 2.2614783083167952e-05, "loss": 0.0334, "step": 110780 }, { "epoch": 0.15395, "grad_norm": 0.07843133807182312, "learning_rate": 2.2610668298297665e-05, "loss": 0.034, "step": 110790 }, { "epoch": 0.154, "grad_norm": 0.07167660444974899, "learning_rate": 2.260655357875082e-05, "loss": 0.035, "step": 110800 }, { "epoch": 0.15405, "grad_norm": 0.07105650752782822, "learning_rate": 2.260243892463989e-05, "loss": 0.0341, "step": 110810 }, { "epoch": 0.1541, "grad_norm": 0.06884513050317764, "learning_rate": 2.259832433607738e-05, "loss": 0.035, "step": 110820 }, { "epoch": 0.15415, "grad_norm": 0.06763109564781189, "learning_rate": 2.259420981317579e-05, "loss": 0.0343, "step": 110830 }, { "epoch": 0.1542, "grad_norm": 0.06715264171361923, "learning_rate": 2.2590095356047584e-05, "loss": 0.0339, "step": 110840 }, { "epoch": 0.15425, "grad_norm": 0.08633338660001755, "learning_rate": 2.2585980964805285e-05, "loss": 0.0331, "step": 110850 }, { "epoch": 0.1543, "grad_norm": 0.0647883340716362, "learning_rate": 2.2581866639561343e-05, "loss": 0.033, "step": 110860 }, { "epoch": 0.15435, "grad_norm": 0.07531040161848068, "learning_rate": 2.2577752380428265e-05, "loss": 0.0333, "step": 110870 }, { "epoch": 0.1544, "grad_norm": 0.06588154286146164, "learning_rate": 2.257363818751853e-05, "loss": 0.0347, "step": 110880 }, { "epoch": 0.15445, "grad_norm": 0.07107117772102356, "learning_rate": 2.2569524060944607e-05, "loss": 0.0351, "step": 110890 }, { "epoch": 0.1545, "grad_norm": 0.07523241639137268, "learning_rate": 2.2565410000818993e-05, "loss": 0.034, "step": 110900 }, { "epoch": 0.15455, "grad_norm": 0.08571995049715042, "learning_rate": 2.256129600725415e-05, "loss": 0.0344, "step": 110910 }, { "epoch": 0.1546, "grad_norm": 0.07107695937156677, "learning_rate": 2.255718208036256e-05, "loss": 0.0341, "step": 110920 }, { "epoch": 0.15465, "grad_norm": 0.07713331282138824, "learning_rate": 2.2553068220256693e-05, "loss": 0.0342, "step": 110930 }, { "epoch": 0.1547, "grad_norm": 0.081510029733181, "learning_rate": 2.2548954427049012e-05, "loss": 0.0328, "step": 110940 }, { "epoch": 0.15475, "grad_norm": 0.0826130211353302, "learning_rate": 2.2544840700852016e-05, "loss": 0.0345, "step": 110950 }, { "epoch": 0.1548, "grad_norm": 0.07099715620279312, "learning_rate": 2.2540727041778135e-05, "loss": 0.0325, "step": 110960 }, { "epoch": 0.15485, "grad_norm": 0.09759236872196198, "learning_rate": 2.2536613449939866e-05, "loss": 0.0359, "step": 110970 }, { "epoch": 0.1549, "grad_norm": 0.07107224315404892, "learning_rate": 2.253249992544965e-05, "loss": 0.0331, "step": 110980 }, { "epoch": 0.15495, "grad_norm": 0.06958385556936264, "learning_rate": 2.2528386468419965e-05, "loss": 0.0342, "step": 110990 }, { "epoch": 0.155, "grad_norm": 0.07820618152618408, "learning_rate": 2.252427307896326e-05, "loss": 0.0335, "step": 111000 }, { "epoch": 0.15505, "grad_norm": 0.06739425659179688, "learning_rate": 2.2520159757192004e-05, "loss": 0.0373, "step": 111010 }, { "epoch": 0.1551, "grad_norm": 0.07322181016206741, "learning_rate": 2.251604650321865e-05, "loss": 0.035, "step": 111020 }, { "epoch": 0.15515, "grad_norm": 0.06647425889968872, "learning_rate": 2.2511933317155645e-05, "loss": 0.0333, "step": 111030 }, { "epoch": 0.1552, "grad_norm": 0.06537932902574539, "learning_rate": 2.2507820199115457e-05, "loss": 0.0349, "step": 111040 }, { "epoch": 0.15525, "grad_norm": 0.06186918169260025, "learning_rate": 2.2503707149210514e-05, "loss": 0.0331, "step": 111050 }, { "epoch": 0.1553, "grad_norm": 0.06334685534238815, "learning_rate": 2.2499594167553286e-05, "loss": 0.0348, "step": 111060 }, { "epoch": 0.15535, "grad_norm": 0.07704874128103256, "learning_rate": 2.249548125425622e-05, "loss": 0.0355, "step": 111070 }, { "epoch": 0.1554, "grad_norm": 0.07260606437921524, "learning_rate": 2.249136840943175e-05, "loss": 0.0344, "step": 111080 }, { "epoch": 0.15545, "grad_norm": 0.08095771074295044, "learning_rate": 2.2487255633192335e-05, "loss": 0.0341, "step": 111090 }, { "epoch": 0.1555, "grad_norm": 0.07667740434408188, "learning_rate": 2.2483142925650398e-05, "loss": 0.0333, "step": 111100 }, { "epoch": 0.15555, "grad_norm": 0.08815068751573563, "learning_rate": 2.2479030286918392e-05, "loss": 0.0351, "step": 111110 }, { "epoch": 0.1556, "grad_norm": 0.07412435114383698, "learning_rate": 2.2474917717108746e-05, "loss": 0.0354, "step": 111120 }, { "epoch": 0.15565, "grad_norm": 0.06973432749509811, "learning_rate": 2.2470805216333894e-05, "loss": 0.0341, "step": 111130 }, { "epoch": 0.1557, "grad_norm": 0.08697531372308731, "learning_rate": 2.24666927847063e-05, "loss": 0.0384, "step": 111140 }, { "epoch": 0.15575, "grad_norm": 0.08577197045087814, "learning_rate": 2.2462580422338352e-05, "loss": 0.034, "step": 111150 }, { "epoch": 0.1558, "grad_norm": 0.1036408320069313, "learning_rate": 2.2458468129342518e-05, "loss": 0.0351, "step": 111160 }, { "epoch": 0.15585, "grad_norm": 0.08290373533964157, "learning_rate": 2.2454355905831196e-05, "loss": 0.0346, "step": 111170 }, { "epoch": 0.1559, "grad_norm": 0.06540827453136444, "learning_rate": 2.2450243751916838e-05, "loss": 0.0333, "step": 111180 }, { "epoch": 0.15595, "grad_norm": 0.07136257737874985, "learning_rate": 2.244613166771185e-05, "loss": 0.0353, "step": 111190 }, { "epoch": 0.156, "grad_norm": 0.07136812061071396, "learning_rate": 2.2442019653328667e-05, "loss": 0.0351, "step": 111200 }, { "epoch": 0.15605, "grad_norm": 0.07507219165563583, "learning_rate": 2.243790770887971e-05, "loss": 0.037, "step": 111210 }, { "epoch": 0.1561, "grad_norm": 0.0679161325097084, "learning_rate": 2.2433795834477386e-05, "loss": 0.0344, "step": 111220 }, { "epoch": 0.15615, "grad_norm": 0.062479760497808456, "learning_rate": 2.2429684030234125e-05, "loss": 0.0349, "step": 111230 }, { "epoch": 0.1562, "grad_norm": 0.07537438720464706, "learning_rate": 2.2425572296262334e-05, "loss": 0.035, "step": 111240 }, { "epoch": 0.15625, "grad_norm": 0.07191035896539688, "learning_rate": 2.2421460632674424e-05, "loss": 0.0349, "step": 111250 }, { "epoch": 0.1563, "grad_norm": 0.0660267248749733, "learning_rate": 2.2417349039582822e-05, "loss": 0.0347, "step": 111260 }, { "epoch": 0.15635, "grad_norm": 0.08792657405138016, "learning_rate": 2.2413237517099918e-05, "loss": 0.0344, "step": 111270 }, { "epoch": 0.1564, "grad_norm": 0.06152603402733803, "learning_rate": 2.2409126065338136e-05, "loss": 0.0336, "step": 111280 }, { "epoch": 0.15645, "grad_norm": 0.06601186096668243, "learning_rate": 2.2405014684409873e-05, "loss": 0.0346, "step": 111290 }, { "epoch": 0.1565, "grad_norm": 0.08689270168542862, "learning_rate": 2.2400903374427536e-05, "loss": 0.0351, "step": 111300 }, { "epoch": 0.15655, "grad_norm": 0.07292336970567703, "learning_rate": 2.2396792135503517e-05, "loss": 0.0333, "step": 111310 }, { "epoch": 0.1566, "grad_norm": 0.07466064393520355, "learning_rate": 2.2392680967750225e-05, "loss": 0.0333, "step": 111320 }, { "epoch": 0.15665, "grad_norm": 0.0883336216211319, "learning_rate": 2.238856987128006e-05, "loss": 0.0347, "step": 111330 }, { "epoch": 0.1567, "grad_norm": 0.06872241199016571, "learning_rate": 2.2384458846205404e-05, "loss": 0.0356, "step": 111340 }, { "epoch": 0.15675, "grad_norm": 0.08283447474241257, "learning_rate": 2.2380347892638677e-05, "loss": 0.0355, "step": 111350 }, { "epoch": 0.1568, "grad_norm": 0.09631136804819107, "learning_rate": 2.2376237010692235e-05, "loss": 0.0342, "step": 111360 }, { "epoch": 0.15685, "grad_norm": 0.0883079394698143, "learning_rate": 2.237212620047851e-05, "loss": 0.034, "step": 111370 }, { "epoch": 0.1569, "grad_norm": 0.0943932831287384, "learning_rate": 2.2368015462109844e-05, "loss": 0.0343, "step": 111380 }, { "epoch": 0.15695, "grad_norm": 0.07425844669342041, "learning_rate": 2.2363904795698653e-05, "loss": 0.0336, "step": 111390 }, { "epoch": 0.157, "grad_norm": 0.08522672951221466, "learning_rate": 2.2359794201357322e-05, "loss": 0.0352, "step": 111400 }, { "epoch": 0.15705, "grad_norm": 0.08357825130224228, "learning_rate": 2.235568367919822e-05, "loss": 0.0347, "step": 111410 }, { "epoch": 0.1571, "grad_norm": 0.07961345463991165, "learning_rate": 2.235157322933374e-05, "loss": 0.0344, "step": 111420 }, { "epoch": 0.15715, "grad_norm": 0.08275941759347916, "learning_rate": 2.2347462851876246e-05, "loss": 0.0341, "step": 111430 }, { "epoch": 0.1572, "grad_norm": 0.08862300217151642, "learning_rate": 2.2343352546938118e-05, "loss": 0.0351, "step": 111440 }, { "epoch": 0.15725, "grad_norm": 0.06781277805566788, "learning_rate": 2.2339242314631746e-05, "loss": 0.0339, "step": 111450 }, { "epoch": 0.1573, "grad_norm": 0.07251814752817154, "learning_rate": 2.2335132155069476e-05, "loss": 0.0333, "step": 111460 }, { "epoch": 0.15735, "grad_norm": 0.07582741230726242, "learning_rate": 2.23310220683637e-05, "loss": 0.0349, "step": 111470 }, { "epoch": 0.1574, "grad_norm": 0.06999965757131577, "learning_rate": 2.2326912054626772e-05, "loss": 0.0335, "step": 111480 }, { "epoch": 0.15745, "grad_norm": 0.07859835028648376, "learning_rate": 2.2322802113971073e-05, "loss": 0.0338, "step": 111490 }, { "epoch": 0.1575, "grad_norm": 0.05699661374092102, "learning_rate": 2.231869224650895e-05, "loss": 0.033, "step": 111500 }, { "epoch": 0.15755, "grad_norm": 0.08140408247709274, "learning_rate": 2.2314582452352774e-05, "loss": 0.0333, "step": 111510 }, { "epoch": 0.1576, "grad_norm": 0.07481715828180313, "learning_rate": 2.2310472731614912e-05, "loss": 0.035, "step": 111520 }, { "epoch": 0.15765, "grad_norm": 0.07342035323381424, "learning_rate": 2.230636308440771e-05, "loss": 0.0335, "step": 111530 }, { "epoch": 0.1577, "grad_norm": 0.08008212596178055, "learning_rate": 2.2302253510843534e-05, "loss": 0.0346, "step": 111540 }, { "epoch": 0.15775, "grad_norm": 0.09975617378950119, "learning_rate": 2.229814401103472e-05, "loss": 0.0374, "step": 111550 }, { "epoch": 0.1578, "grad_norm": 0.0929156094789505, "learning_rate": 2.2294034585093653e-05, "loss": 0.0363, "step": 111560 }, { "epoch": 0.15785, "grad_norm": 0.07880815863609314, "learning_rate": 2.228992523313265e-05, "loss": 0.035, "step": 111570 }, { "epoch": 0.1579, "grad_norm": 0.06929337233304977, "learning_rate": 2.2285815955264077e-05, "loss": 0.0346, "step": 111580 }, { "epoch": 0.15795, "grad_norm": 0.08029261231422424, "learning_rate": 2.228170675160028e-05, "loss": 0.0356, "step": 111590 }, { "epoch": 0.158, "grad_norm": 0.06501136720180511, "learning_rate": 2.227759762225359e-05, "loss": 0.0385, "step": 111600 }, { "epoch": 0.15805, "grad_norm": 0.0692141130566597, "learning_rate": 2.2273488567336366e-05, "loss": 0.0355, "step": 111610 }, { "epoch": 0.1581, "grad_norm": 0.0631387010216713, "learning_rate": 2.226937958696094e-05, "loss": 0.035, "step": 111620 }, { "epoch": 0.15815, "grad_norm": 0.0704304650425911, "learning_rate": 2.2265270681239637e-05, "loss": 0.0339, "step": 111630 }, { "epoch": 0.1582, "grad_norm": 0.078705333173275, "learning_rate": 2.2261161850284828e-05, "loss": 0.0346, "step": 111640 }, { "epoch": 0.15825, "grad_norm": 0.061205726116895676, "learning_rate": 2.2257053094208806e-05, "loss": 0.0349, "step": 111650 }, { "epoch": 0.1583, "grad_norm": 0.06317484378814697, "learning_rate": 2.225294441312394e-05, "loss": 0.0369, "step": 111660 }, { "epoch": 0.15835, "grad_norm": 0.06498158723115921, "learning_rate": 2.2248835807142525e-05, "loss": 0.0346, "step": 111670 }, { "epoch": 0.1584, "grad_norm": 0.07051656395196915, "learning_rate": 2.2244727276376918e-05, "loss": 0.0358, "step": 111680 }, { "epoch": 0.15845, "grad_norm": 0.0564606674015522, "learning_rate": 2.2240618820939424e-05, "loss": 0.034, "step": 111690 }, { "epoch": 0.1585, "grad_norm": 0.06933465600013733, "learning_rate": 2.2236510440942378e-05, "loss": 0.034, "step": 111700 }, { "epoch": 0.15855, "grad_norm": 0.06537031382322311, "learning_rate": 2.2232402136498102e-05, "loss": 0.035, "step": 111710 }, { "epoch": 0.1586, "grad_norm": 0.06910958886146545, "learning_rate": 2.2228293907718907e-05, "loss": 0.0348, "step": 111720 }, { "epoch": 0.15865, "grad_norm": 0.0797191634774208, "learning_rate": 2.2224185754717115e-05, "loss": 0.0345, "step": 111730 }, { "epoch": 0.1587, "grad_norm": 0.08333103358745575, "learning_rate": 2.2220077677605044e-05, "loss": 0.0346, "step": 111740 }, { "epoch": 0.15875, "grad_norm": 0.0689079686999321, "learning_rate": 2.2215969676495007e-05, "loss": 0.0339, "step": 111750 }, { "epoch": 0.1588, "grad_norm": 0.06846673786640167, "learning_rate": 2.2211861751499303e-05, "loss": 0.0346, "step": 111760 }, { "epoch": 0.15885, "grad_norm": 0.0739859864115715, "learning_rate": 2.2207753902730255e-05, "loss": 0.0333, "step": 111770 }, { "epoch": 0.1589, "grad_norm": 0.06391892582178116, "learning_rate": 2.2203646130300174e-05, "loss": 0.0336, "step": 111780 }, { "epoch": 0.15895, "grad_norm": 0.060105256736278534, "learning_rate": 2.2199538434321348e-05, "loss": 0.0343, "step": 111790 }, { "epoch": 0.159, "grad_norm": 0.06684567779302597, "learning_rate": 2.21954308149061e-05, "loss": 0.0328, "step": 111800 }, { "epoch": 0.15905, "grad_norm": 0.049714989960193634, "learning_rate": 2.219132327216671e-05, "loss": 0.0338, "step": 111810 }, { "epoch": 0.1591, "grad_norm": 0.06189383566379547, "learning_rate": 2.2187215806215494e-05, "loss": 0.0322, "step": 111820 }, { "epoch": 0.15915, "grad_norm": 0.05540673807263374, "learning_rate": 2.2183108417164736e-05, "loss": 0.0318, "step": 111830 }, { "epoch": 0.1592, "grad_norm": 0.06381874531507492, "learning_rate": 2.2179001105126728e-05, "loss": 0.0328, "step": 111840 }, { "epoch": 0.15925, "grad_norm": 0.06503371894359589, "learning_rate": 2.217489387021379e-05, "loss": 0.033, "step": 111850 }, { "epoch": 0.1593, "grad_norm": 0.09525395184755325, "learning_rate": 2.2170786712538176e-05, "loss": 0.0335, "step": 111860 }, { "epoch": 0.15935, "grad_norm": 0.06650565564632416, "learning_rate": 2.2166679632212203e-05, "loss": 0.0341, "step": 111870 }, { "epoch": 0.1594, "grad_norm": 0.06843181699514389, "learning_rate": 2.216257262934813e-05, "loss": 0.0337, "step": 111880 }, { "epoch": 0.15945, "grad_norm": 0.07026353478431702, "learning_rate": 2.215846570405826e-05, "loss": 0.0341, "step": 111890 }, { "epoch": 0.1595, "grad_norm": 0.0896812453866005, "learning_rate": 2.2154358856454875e-05, "loss": 0.0332, "step": 111900 }, { "epoch": 0.15955, "grad_norm": 0.07158301770687103, "learning_rate": 2.2150252086650246e-05, "loss": 0.0377, "step": 111910 }, { "epoch": 0.1596, "grad_norm": 0.07504840195178986, "learning_rate": 2.214614539475666e-05, "loss": 0.033, "step": 111920 }, { "epoch": 0.15965, "grad_norm": 0.06362754851579666, "learning_rate": 2.2142038780886382e-05, "loss": 0.0347, "step": 111930 }, { "epoch": 0.1597, "grad_norm": 0.059492312371730804, "learning_rate": 2.2137932245151692e-05, "loss": 0.0333, "step": 111940 }, { "epoch": 0.15975, "grad_norm": 0.05978335440158844, "learning_rate": 2.2133825787664855e-05, "loss": 0.0329, "step": 111950 }, { "epoch": 0.1598, "grad_norm": 0.07219391316175461, "learning_rate": 2.212971940853814e-05, "loss": 0.0345, "step": 111960 }, { "epoch": 0.15985, "grad_norm": 0.06794709712266922, "learning_rate": 2.212561310788383e-05, "loss": 0.0346, "step": 111970 }, { "epoch": 0.1599, "grad_norm": 0.06835256516933441, "learning_rate": 2.212150688581417e-05, "loss": 0.0338, "step": 111980 }, { "epoch": 0.15995, "grad_norm": 0.07163048535585403, "learning_rate": 2.2117400742441437e-05, "loss": 0.0351, "step": 111990 }, { "epoch": 0.16, "grad_norm": 0.06418144702911377, "learning_rate": 2.2113294677877877e-05, "loss": 0.0356, "step": 112000 }, { "epoch": 0.16005, "grad_norm": 0.06732147932052612, "learning_rate": 2.2109188692235767e-05, "loss": 0.0353, "step": 112010 }, { "epoch": 0.1601, "grad_norm": 0.07495000958442688, "learning_rate": 2.210508278562734e-05, "loss": 0.0352, "step": 112020 }, { "epoch": 0.16015, "grad_norm": 0.07209627330303192, "learning_rate": 2.2100976958164864e-05, "loss": 0.0357, "step": 112030 }, { "epoch": 0.1602, "grad_norm": 0.0804656371474266, "learning_rate": 2.2096871209960598e-05, "loss": 0.0347, "step": 112040 }, { "epoch": 0.16025, "grad_norm": 0.10187839716672897, "learning_rate": 2.209276554112677e-05, "loss": 0.0365, "step": 112050 }, { "epoch": 0.1603, "grad_norm": 0.0867825597524643, "learning_rate": 2.2088659951775654e-05, "loss": 0.0365, "step": 112060 }, { "epoch": 0.16035, "grad_norm": 0.1051550805568695, "learning_rate": 2.2084554442019467e-05, "loss": 0.0368, "step": 112070 }, { "epoch": 0.1604, "grad_norm": 0.0879400372505188, "learning_rate": 2.208044901197047e-05, "loss": 0.0378, "step": 112080 }, { "epoch": 0.16045, "grad_norm": 0.11434191465377808, "learning_rate": 2.2076343661740907e-05, "loss": 0.0352, "step": 112090 }, { "epoch": 0.1605, "grad_norm": 0.06594059616327286, "learning_rate": 2.2072238391443004e-05, "loss": 0.0351, "step": 112100 }, { "epoch": 0.16055, "grad_norm": 0.0907931923866272, "learning_rate": 2.206813320118901e-05, "loss": 0.0352, "step": 112110 }, { "epoch": 0.1606, "grad_norm": 0.06998275220394135, "learning_rate": 2.2064028091091144e-05, "loss": 0.033, "step": 112120 }, { "epoch": 0.16065, "grad_norm": 0.07440738379955292, "learning_rate": 2.2059923061261656e-05, "loss": 0.0333, "step": 112130 }, { "epoch": 0.1607, "grad_norm": 0.06170496344566345, "learning_rate": 2.205581811181276e-05, "loss": 0.0348, "step": 112140 }, { "epoch": 0.16075, "grad_norm": 0.06256181746721268, "learning_rate": 2.2051713242856682e-05, "loss": 0.0356, "step": 112150 }, { "epoch": 0.1608, "grad_norm": 0.07359161972999573, "learning_rate": 2.204760845450568e-05, "loss": 0.0348, "step": 112160 }, { "epoch": 0.16085, "grad_norm": 0.06576690077781677, "learning_rate": 2.2043503746871933e-05, "loss": 0.0338, "step": 112170 }, { "epoch": 0.1609, "grad_norm": 0.06027120724320412, "learning_rate": 2.2039399120067694e-05, "loss": 0.0344, "step": 112180 }, { "epoch": 0.16095, "grad_norm": 0.06426198035478592, "learning_rate": 2.2035294574205166e-05, "loss": 0.0362, "step": 112190 }, { "epoch": 0.161, "grad_norm": 0.0731712356209755, "learning_rate": 2.2031190109396575e-05, "loss": 0.0366, "step": 112200 }, { "epoch": 0.16105, "grad_norm": 0.0613652728497982, "learning_rate": 2.2027085725754126e-05, "loss": 0.0342, "step": 112210 }, { "epoch": 0.1611, "grad_norm": 0.061248842626810074, "learning_rate": 2.202298142339004e-05, "loss": 0.0336, "step": 112220 }, { "epoch": 0.16115, "grad_norm": 0.06405367702245712, "learning_rate": 2.201887720241652e-05, "loss": 0.0366, "step": 112230 }, { "epoch": 0.1612, "grad_norm": 0.062023404985666275, "learning_rate": 2.2014773062945777e-05, "loss": 0.0338, "step": 112240 }, { "epoch": 0.16125, "grad_norm": 0.06777895987033844, "learning_rate": 2.2010669005090025e-05, "loss": 0.0346, "step": 112250 }, { "epoch": 0.1613, "grad_norm": 0.06935904920101166, "learning_rate": 2.2006565028961447e-05, "loss": 0.036, "step": 112260 }, { "epoch": 0.16135, "grad_norm": 0.08053556829690933, "learning_rate": 2.200246113467226e-05, "loss": 0.035, "step": 112270 }, { "epoch": 0.1614, "grad_norm": 0.0714808776974678, "learning_rate": 2.1998357322334666e-05, "loss": 0.0351, "step": 112280 }, { "epoch": 0.16145, "grad_norm": 0.07520432770252228, "learning_rate": 2.199425359206085e-05, "loss": 0.0362, "step": 112290 }, { "epoch": 0.1615, "grad_norm": 0.06644560396671295, "learning_rate": 2.199014994396302e-05, "loss": 0.0346, "step": 112300 }, { "epoch": 0.16155, "grad_norm": 0.07510702311992645, "learning_rate": 2.1986046378153348e-05, "loss": 0.035, "step": 112310 }, { "epoch": 0.1616, "grad_norm": 0.05435965955257416, "learning_rate": 2.1981942894744045e-05, "loss": 0.0331, "step": 112320 }, { "epoch": 0.16165, "grad_norm": 0.06645798683166504, "learning_rate": 2.1977839493847284e-05, "loss": 0.034, "step": 112330 }, { "epoch": 0.1617, "grad_norm": 0.06540507078170776, "learning_rate": 2.197373617557525e-05, "loss": 0.0335, "step": 112340 }, { "epoch": 0.16175, "grad_norm": 0.06752929836511612, "learning_rate": 2.196963294004015e-05, "loss": 0.0339, "step": 112350 }, { "epoch": 0.1618, "grad_norm": 0.05909738317131996, "learning_rate": 2.1965529787354127e-05, "loss": 0.0327, "step": 112360 }, { "epoch": 0.16185, "grad_norm": 0.06119058281183243, "learning_rate": 2.19614267176294e-05, "loss": 0.0332, "step": 112370 }, { "epoch": 0.1619, "grad_norm": 0.06323596090078354, "learning_rate": 2.1957323730978104e-05, "loss": 0.0332, "step": 112380 }, { "epoch": 0.16195, "grad_norm": 0.07488784939050674, "learning_rate": 2.195322082751245e-05, "loss": 0.0338, "step": 112390 }, { "epoch": 0.162, "grad_norm": 0.06295579671859741, "learning_rate": 2.1949118007344584e-05, "loss": 0.0335, "step": 112400 }, { "epoch": 0.16205, "grad_norm": 0.05650020018219948, "learning_rate": 2.194501527058669e-05, "loss": 0.0331, "step": 112410 }, { "epoch": 0.1621, "grad_norm": 0.07044333964586258, "learning_rate": 2.1940912617350932e-05, "loss": 0.0334, "step": 112420 }, { "epoch": 0.16215, "grad_norm": 0.06065429747104645, "learning_rate": 2.193681004774947e-05, "loss": 0.0338, "step": 112430 }, { "epoch": 0.1622, "grad_norm": 0.06744283437728882, "learning_rate": 2.1932707561894474e-05, "loss": 0.0343, "step": 112440 }, { "epoch": 0.16225, "grad_norm": 0.06047586351633072, "learning_rate": 2.1928605159898098e-05, "loss": 0.0337, "step": 112450 }, { "epoch": 0.1623, "grad_norm": 0.07155334949493408, "learning_rate": 2.19245028418725e-05, "loss": 0.0359, "step": 112460 }, { "epoch": 0.16235, "grad_norm": 0.08805252611637115, "learning_rate": 2.1920400607929845e-05, "loss": 0.0356, "step": 112470 }, { "epoch": 0.1624, "grad_norm": 0.07390435039997101, "learning_rate": 2.1916298458182276e-05, "loss": 0.035, "step": 112480 }, { "epoch": 0.16245, "grad_norm": 0.09888370335102081, "learning_rate": 2.1912196392741956e-05, "loss": 0.0361, "step": 112490 }, { "epoch": 0.1625, "grad_norm": 0.06713777035474777, "learning_rate": 2.190809441172102e-05, "loss": 0.0349, "step": 112500 }, { "epoch": 0.16255, "grad_norm": 0.07125592231750488, "learning_rate": 2.1903992515231626e-05, "loss": 0.0342, "step": 112510 }, { "epoch": 0.1626, "grad_norm": 0.0708683654665947, "learning_rate": 2.189989070338591e-05, "loss": 0.0356, "step": 112520 }, { "epoch": 0.16265, "grad_norm": 0.0808732882142067, "learning_rate": 2.1895788976296018e-05, "loss": 0.0346, "step": 112530 }, { "epoch": 0.1627, "grad_norm": 0.08089447021484375, "learning_rate": 2.189168733407409e-05, "loss": 0.0346, "step": 112540 }, { "epoch": 0.16275, "grad_norm": 0.07411898672580719, "learning_rate": 2.1887585776832255e-05, "loss": 0.0346, "step": 112550 }, { "epoch": 0.1628, "grad_norm": 0.06882666796445847, "learning_rate": 2.1883484304682672e-05, "loss": 0.0356, "step": 112560 }, { "epoch": 0.16285, "grad_norm": 0.0912306159734726, "learning_rate": 2.187938291773744e-05, "loss": 0.0365, "step": 112570 }, { "epoch": 0.1629, "grad_norm": 0.06964357942342758, "learning_rate": 2.1875281616108725e-05, "loss": 0.0334, "step": 112580 }, { "epoch": 0.16295, "grad_norm": 0.06867384910583496, "learning_rate": 2.187118039990862e-05, "loss": 0.037, "step": 112590 }, { "epoch": 0.163, "grad_norm": 0.06882866472005844, "learning_rate": 2.186707926924927e-05, "loss": 0.0346, "step": 112600 }, { "epoch": 0.16305, "grad_norm": 0.06393932551145554, "learning_rate": 2.18629782242428e-05, "loss": 0.034, "step": 112610 }, { "epoch": 0.1631, "grad_norm": 0.07947162538766861, "learning_rate": 2.1858877265001327e-05, "loss": 0.0337, "step": 112620 }, { "epoch": 0.16315, "grad_norm": 0.06633418053388596, "learning_rate": 2.1854776391636973e-05, "loss": 0.0347, "step": 112630 }, { "epoch": 0.1632, "grad_norm": 0.06981424242258072, "learning_rate": 2.1850675604261845e-05, "loss": 0.036, "step": 112640 }, { "epoch": 0.16325, "grad_norm": 0.06863073259592056, "learning_rate": 2.1846574902988056e-05, "loss": 0.0348, "step": 112650 }, { "epoch": 0.1633, "grad_norm": 0.06586696952581406, "learning_rate": 2.1842474287927744e-05, "loss": 0.0347, "step": 112660 }, { "epoch": 0.16335, "grad_norm": 0.08072131872177124, "learning_rate": 2.1838373759192978e-05, "loss": 0.0362, "step": 112670 }, { "epoch": 0.1634, "grad_norm": 0.1065782681107521, "learning_rate": 2.18342733168959e-05, "loss": 0.0379, "step": 112680 }, { "epoch": 0.16345, "grad_norm": 0.08189694583415985, "learning_rate": 2.1830172961148594e-05, "loss": 0.0363, "step": 112690 }, { "epoch": 0.1635, "grad_norm": 0.07649283111095428, "learning_rate": 2.1826072692063175e-05, "loss": 0.0345, "step": 112700 }, { "epoch": 0.16355, "grad_norm": 0.08806269615888596, "learning_rate": 2.1821972509751728e-05, "loss": 0.0339, "step": 112710 }, { "epoch": 0.1636, "grad_norm": 0.10222853720188141, "learning_rate": 2.181787241432636e-05, "loss": 0.0358, "step": 112720 }, { "epoch": 0.16365, "grad_norm": 0.07381557673215866, "learning_rate": 2.181377240589917e-05, "loss": 0.0348, "step": 112730 }, { "epoch": 0.1637, "grad_norm": 0.08653289079666138, "learning_rate": 2.180967248458224e-05, "loss": 0.0346, "step": 112740 }, { "epoch": 0.16375, "grad_norm": 0.06406396627426147, "learning_rate": 2.180557265048767e-05, "loss": 0.0342, "step": 112750 }, { "epoch": 0.1638, "grad_norm": 0.07045432925224304, "learning_rate": 2.1801472903727534e-05, "loss": 0.0339, "step": 112760 }, { "epoch": 0.16385, "grad_norm": 0.07248934358358383, "learning_rate": 2.1797373244413947e-05, "loss": 0.0348, "step": 112770 }, { "epoch": 0.1639, "grad_norm": 0.07276508212089539, "learning_rate": 2.179327367265895e-05, "loss": 0.0369, "step": 112780 }, { "epoch": 0.16395, "grad_norm": 0.06122051179409027, "learning_rate": 2.1789174188574654e-05, "loss": 0.0345, "step": 112790 }, { "epoch": 0.164, "grad_norm": 0.07430427521467209, "learning_rate": 2.1785074792273136e-05, "loss": 0.0349, "step": 112800 }, { "epoch": 0.16405, "grad_norm": 0.07259289920330048, "learning_rate": 2.178097548386646e-05, "loss": 0.0337, "step": 112810 }, { "epoch": 0.1641, "grad_norm": 0.06403262168169022, "learning_rate": 2.1776876263466707e-05, "loss": 0.0349, "step": 112820 }, { "epoch": 0.16415, "grad_norm": 0.07015439867973328, "learning_rate": 2.1772777131185945e-05, "loss": 0.0346, "step": 112830 }, { "epoch": 0.1642, "grad_norm": 0.06901204586029053, "learning_rate": 2.1768678087136235e-05, "loss": 0.0338, "step": 112840 }, { "epoch": 0.16425, "grad_norm": 0.07864746451377869, "learning_rate": 2.1764579131429668e-05, "loss": 0.0347, "step": 112850 }, { "epoch": 0.1643, "grad_norm": 0.06460756063461304, "learning_rate": 2.1760480264178278e-05, "loss": 0.0333, "step": 112860 }, { "epoch": 0.16435, "grad_norm": 0.06273296475410461, "learning_rate": 2.1756381485494158e-05, "loss": 0.0336, "step": 112870 }, { "epoch": 0.1644, "grad_norm": 0.06931041181087494, "learning_rate": 2.175228279548933e-05, "loss": 0.0341, "step": 112880 }, { "epoch": 0.16445, "grad_norm": 0.07009255886077881, "learning_rate": 2.1748184194275882e-05, "loss": 0.0343, "step": 112890 }, { "epoch": 0.1645, "grad_norm": 0.08870544284582138, "learning_rate": 2.1744085681965853e-05, "loss": 0.0334, "step": 112900 }, { "epoch": 0.16455, "grad_norm": 0.08789685368537903, "learning_rate": 2.1739987258671295e-05, "loss": 0.0347, "step": 112910 }, { "epoch": 0.1646, "grad_norm": 0.0757160559296608, "learning_rate": 2.173588892450427e-05, "loss": 0.0338, "step": 112920 }, { "epoch": 0.16465, "grad_norm": 0.05919770896434784, "learning_rate": 2.1731790679576807e-05, "loss": 0.0334, "step": 112930 }, { "epoch": 0.1647, "grad_norm": 0.0561647042632103, "learning_rate": 2.1727692524000968e-05, "loss": 0.0348, "step": 112940 }, { "epoch": 0.16475, "grad_norm": 0.06535935401916504, "learning_rate": 2.172359445788878e-05, "loss": 0.0335, "step": 112950 }, { "epoch": 0.1648, "grad_norm": 0.06926766037940979, "learning_rate": 2.1719496481352293e-05, "loss": 0.0336, "step": 112960 }, { "epoch": 0.16485, "grad_norm": 0.07666823267936707, "learning_rate": 2.1715398594503525e-05, "loss": 0.0333, "step": 112970 }, { "epoch": 0.1649, "grad_norm": 0.07533027976751328, "learning_rate": 2.1711300797454533e-05, "loss": 0.0349, "step": 112980 }, { "epoch": 0.16495, "grad_norm": 0.06927791237831116, "learning_rate": 2.170720309031735e-05, "loss": 0.0343, "step": 112990 }, { "epoch": 0.165, "grad_norm": 0.06924304366111755, "learning_rate": 2.1703105473203988e-05, "loss": 0.0338, "step": 113000 }, { "epoch": 0.16505, "grad_norm": 0.0706871822476387, "learning_rate": 2.1699007946226495e-05, "loss": 0.0362, "step": 113010 }, { "epoch": 0.1651, "grad_norm": 0.08440165221691132, "learning_rate": 2.1694910509496872e-05, "loss": 0.0342, "step": 113020 }, { "epoch": 0.16515, "grad_norm": 0.07027032971382141, "learning_rate": 2.1690813163127166e-05, "loss": 0.0348, "step": 113030 }, { "epoch": 0.1652, "grad_norm": 0.07992295175790787, "learning_rate": 2.1686715907229378e-05, "loss": 0.0332, "step": 113040 }, { "epoch": 0.16525, "grad_norm": 0.07327957451343536, "learning_rate": 2.1682618741915522e-05, "loss": 0.0338, "step": 113050 }, { "epoch": 0.1653, "grad_norm": 0.07747048139572144, "learning_rate": 2.1678521667297648e-05, "loss": 0.0337, "step": 113060 }, { "epoch": 0.16535, "grad_norm": 0.07693877816200256, "learning_rate": 2.167442468348772e-05, "loss": 0.0336, "step": 113070 }, { "epoch": 0.1654, "grad_norm": 0.0580328106880188, "learning_rate": 2.167032779059779e-05, "loss": 0.0337, "step": 113080 }, { "epoch": 0.16545, "grad_norm": 0.0614556185901165, "learning_rate": 2.1666230988739833e-05, "loss": 0.0328, "step": 113090 }, { "epoch": 0.1655, "grad_norm": 0.06618588417768478, "learning_rate": 2.1662134278025873e-05, "loss": 0.0332, "step": 113100 }, { "epoch": 0.16555, "grad_norm": 0.06605345755815506, "learning_rate": 2.1658037658567913e-05, "loss": 0.0326, "step": 113110 }, { "epoch": 0.1656, "grad_norm": 0.11311843246221542, "learning_rate": 2.165394113047794e-05, "loss": 0.0351, "step": 113120 }, { "epoch": 0.16565, "grad_norm": 0.06903208792209625, "learning_rate": 2.1649844693867968e-05, "loss": 0.0332, "step": 113130 }, { "epoch": 0.1657, "grad_norm": 0.07519102841615677, "learning_rate": 2.1645748348849977e-05, "loss": 0.0329, "step": 113140 }, { "epoch": 0.16575, "grad_norm": 0.07155120372772217, "learning_rate": 2.164165209553597e-05, "loss": 0.0331, "step": 113150 }, { "epoch": 0.1658, "grad_norm": 0.0691385492682457, "learning_rate": 2.163755593403793e-05, "loss": 0.0339, "step": 113160 }, { "epoch": 0.16585, "grad_norm": 0.08293487131595612, "learning_rate": 2.1633459864467843e-05, "loss": 0.0344, "step": 113170 }, { "epoch": 0.1659, "grad_norm": 0.07103066146373749, "learning_rate": 2.1629363886937705e-05, "loss": 0.0353, "step": 113180 }, { "epoch": 0.16595, "grad_norm": 0.07060660421848297, "learning_rate": 2.162526800155949e-05, "loss": 0.0347, "step": 113190 }, { "epoch": 0.166, "grad_norm": 0.06417767703533173, "learning_rate": 2.162117220844519e-05, "loss": 0.0338, "step": 113200 }, { "epoch": 0.16605, "grad_norm": 0.0670657530426979, "learning_rate": 2.1617076507706756e-05, "loss": 0.0343, "step": 113210 }, { "epoch": 0.1661, "grad_norm": 0.06350884586572647, "learning_rate": 2.1612980899456192e-05, "loss": 0.035, "step": 113220 }, { "epoch": 0.16615, "grad_norm": 0.06385102868080139, "learning_rate": 2.1608885383805453e-05, "loss": 0.0354, "step": 113230 }, { "epoch": 0.1662, "grad_norm": 0.06831283122301102, "learning_rate": 2.160478996086651e-05, "loss": 0.0354, "step": 113240 }, { "epoch": 0.16625, "grad_norm": 0.06666000932455063, "learning_rate": 2.1600694630751343e-05, "loss": 0.0345, "step": 113250 }, { "epoch": 0.1663, "grad_norm": 0.06278067827224731, "learning_rate": 2.1596599393571894e-05, "loss": 0.0339, "step": 113260 }, { "epoch": 0.16635, "grad_norm": 0.05852845311164856, "learning_rate": 2.1592504249440156e-05, "loss": 0.0377, "step": 113270 }, { "epoch": 0.1664, "grad_norm": 0.05839408561587334, "learning_rate": 2.1588409198468056e-05, "loss": 0.0342, "step": 113280 }, { "epoch": 0.16645, "grad_norm": 0.06353548914194107, "learning_rate": 2.158431424076757e-05, "loss": 0.0339, "step": 113290 }, { "epoch": 0.1665, "grad_norm": 0.0663408488035202, "learning_rate": 2.1580219376450657e-05, "loss": 0.0339, "step": 113300 }, { "epoch": 0.16655, "grad_norm": 0.07321648299694061, "learning_rate": 2.1576124605629256e-05, "loss": 0.0346, "step": 113310 }, { "epoch": 0.1666, "grad_norm": 0.07605119794607162, "learning_rate": 2.1572029928415326e-05, "loss": 0.035, "step": 113320 }, { "epoch": 0.16665, "grad_norm": 0.060462381690740585, "learning_rate": 2.1567935344920805e-05, "loss": 0.0342, "step": 113330 }, { "epoch": 0.1667, "grad_norm": 0.0616709403693676, "learning_rate": 2.156384085525765e-05, "loss": 0.0334, "step": 113340 }, { "epoch": 0.16675, "grad_norm": 0.06495338678359985, "learning_rate": 2.155974645953779e-05, "loss": 0.0331, "step": 113350 }, { "epoch": 0.1668, "grad_norm": 0.06545952707529068, "learning_rate": 2.155565215787316e-05, "loss": 0.0332, "step": 113360 }, { "epoch": 0.16685, "grad_norm": 0.07933729887008667, "learning_rate": 2.1551557950375725e-05, "loss": 0.0334, "step": 113370 }, { "epoch": 0.1669, "grad_norm": 0.07859945297241211, "learning_rate": 2.1547463837157382e-05, "loss": 0.0336, "step": 113380 }, { "epoch": 0.16695, "grad_norm": 0.05592895671725273, "learning_rate": 2.1543369818330094e-05, "loss": 0.0331, "step": 113390 }, { "epoch": 0.167, "grad_norm": 0.07539980113506317, "learning_rate": 2.153927589400577e-05, "loss": 0.0339, "step": 113400 }, { "epoch": 0.16705, "grad_norm": 0.05995357781648636, "learning_rate": 2.1535182064296347e-05, "loss": 0.0325, "step": 113410 }, { "epoch": 0.1671, "grad_norm": 0.06396167725324631, "learning_rate": 2.1531088329313743e-05, "loss": 0.033, "step": 113420 }, { "epoch": 0.16715, "grad_norm": 0.06876359134912491, "learning_rate": 2.1526994689169878e-05, "loss": 0.0344, "step": 113430 }, { "epoch": 0.1672, "grad_norm": 0.07557900995016098, "learning_rate": 2.1522901143976675e-05, "loss": 0.0345, "step": 113440 }, { "epoch": 0.16725, "grad_norm": 0.0743618831038475, "learning_rate": 2.151880769384605e-05, "loss": 0.0339, "step": 113450 }, { "epoch": 0.1673, "grad_norm": 0.06382055580615997, "learning_rate": 2.1514714338889914e-05, "loss": 0.0354, "step": 113460 }, { "epoch": 0.16735, "grad_norm": 0.06299655139446259, "learning_rate": 2.1510621079220174e-05, "loss": 0.0335, "step": 113470 }, { "epoch": 0.1674, "grad_norm": 0.05774754658341408, "learning_rate": 2.1506527914948746e-05, "loss": 0.0338, "step": 113480 }, { "epoch": 0.16745, "grad_norm": 0.05452101677656174, "learning_rate": 2.1502434846187536e-05, "loss": 0.0376, "step": 113490 }, { "epoch": 0.1675, "grad_norm": 0.06228433549404144, "learning_rate": 2.149834187304844e-05, "loss": 0.0349, "step": 113500 }, { "epoch": 0.16755, "grad_norm": 0.06229841336607933, "learning_rate": 2.1494248995643366e-05, "loss": 0.0345, "step": 113510 }, { "epoch": 0.1676, "grad_norm": 0.05689483880996704, "learning_rate": 2.1490156214084202e-05, "loss": 0.0351, "step": 113520 }, { "epoch": 0.16765, "grad_norm": 0.060247309505939484, "learning_rate": 2.1486063528482853e-05, "loss": 0.0349, "step": 113530 }, { "epoch": 0.1677, "grad_norm": 0.06565812230110168, "learning_rate": 2.1481970938951204e-05, "loss": 0.0345, "step": 113540 }, { "epoch": 0.16775, "grad_norm": 0.05560121685266495, "learning_rate": 2.147787844560114e-05, "loss": 0.033, "step": 113550 }, { "epoch": 0.1678, "grad_norm": 0.07955660670995712, "learning_rate": 2.1473786048544576e-05, "loss": 0.0343, "step": 113560 }, { "epoch": 0.16785, "grad_norm": 0.06279899179935455, "learning_rate": 2.1469693747893355e-05, "loss": 0.0341, "step": 113570 }, { "epoch": 0.1679, "grad_norm": 0.07042830437421799, "learning_rate": 2.14656015437594e-05, "loss": 0.0356, "step": 113580 }, { "epoch": 0.16795, "grad_norm": 0.08455251157283783, "learning_rate": 2.1461509436254557e-05, "loss": 0.0355, "step": 113590 }, { "epoch": 0.168, "grad_norm": 0.07407843321561813, "learning_rate": 2.1457417425490723e-05, "loss": 0.0361, "step": 113600 }, { "epoch": 0.16805, "grad_norm": 0.06888583302497864, "learning_rate": 2.1453325511579764e-05, "loss": 0.0351, "step": 113610 }, { "epoch": 0.1681, "grad_norm": 0.0661529004573822, "learning_rate": 2.144923369463355e-05, "loss": 0.0342, "step": 113620 }, { "epoch": 0.16815, "grad_norm": 0.07152344286441803, "learning_rate": 2.1445141974763962e-05, "loss": 0.0369, "step": 113630 }, { "epoch": 0.1682, "grad_norm": 0.07802043855190277, "learning_rate": 2.1441050352082848e-05, "loss": 0.0364, "step": 113640 }, { "epoch": 0.16825, "grad_norm": 0.06816740334033966, "learning_rate": 2.1436958826702086e-05, "loss": 0.035, "step": 113650 }, { "epoch": 0.1683, "grad_norm": 0.07369918376207352, "learning_rate": 2.143286739873353e-05, "loss": 0.0359, "step": 113660 }, { "epoch": 0.16835, "grad_norm": 0.07201619446277618, "learning_rate": 2.1428776068289028e-05, "loss": 0.0348, "step": 113670 }, { "epoch": 0.1684, "grad_norm": 0.06485801190137863, "learning_rate": 2.1424684835480456e-05, "loss": 0.0348, "step": 113680 }, { "epoch": 0.16845, "grad_norm": 0.07644911110401154, "learning_rate": 2.142059370041966e-05, "loss": 0.0398, "step": 113690 }, { "epoch": 0.1685, "grad_norm": 0.08884541690349579, "learning_rate": 2.141650266321849e-05, "loss": 0.0362, "step": 113700 }, { "epoch": 0.16855, "grad_norm": 0.060787659138441086, "learning_rate": 2.1412411723988783e-05, "loss": 0.0345, "step": 113710 }, { "epoch": 0.1686, "grad_norm": 0.05763450637459755, "learning_rate": 2.1408320882842398e-05, "loss": 0.0342, "step": 113720 }, { "epoch": 0.16865, "grad_norm": 0.07546335458755493, "learning_rate": 2.1404230139891167e-05, "loss": 0.0378, "step": 113730 }, { "epoch": 0.1687, "grad_norm": 0.06813129037618637, "learning_rate": 2.140013949524693e-05, "loss": 0.034, "step": 113740 }, { "epoch": 0.16875, "grad_norm": 0.06949824094772339, "learning_rate": 2.1396048949021536e-05, "loss": 0.0341, "step": 113750 }, { "epoch": 0.1688, "grad_norm": 0.07007627934217453, "learning_rate": 2.1391958501326793e-05, "loss": 0.0343, "step": 113760 }, { "epoch": 0.16885, "grad_norm": 0.0673021450638771, "learning_rate": 2.138786815227457e-05, "loss": 0.0342, "step": 113770 }, { "epoch": 0.1689, "grad_norm": 0.058130063116550446, "learning_rate": 2.1383777901976658e-05, "loss": 0.0336, "step": 113780 }, { "epoch": 0.16895, "grad_norm": 0.05610980838537216, "learning_rate": 2.137968775054492e-05, "loss": 0.0341, "step": 113790 }, { "epoch": 0.169, "grad_norm": 0.07827328145503998, "learning_rate": 2.1375597698091136e-05, "loss": 0.0361, "step": 113800 }, { "epoch": 0.16905, "grad_norm": 0.06286213546991348, "learning_rate": 2.1371507744727155e-05, "loss": 0.0342, "step": 113810 }, { "epoch": 0.1691, "grad_norm": 0.0825994536280632, "learning_rate": 2.1367417890564797e-05, "loss": 0.0357, "step": 113820 }, { "epoch": 0.16915, "grad_norm": 0.07677712291479111, "learning_rate": 2.1363328135715867e-05, "loss": 0.0332, "step": 113830 }, { "epoch": 0.1692, "grad_norm": 0.09539990872144699, "learning_rate": 2.135923848029218e-05, "loss": 0.036, "step": 113840 }, { "epoch": 0.16925, "grad_norm": 0.08900941163301468, "learning_rate": 2.1355148924405537e-05, "loss": 0.0348, "step": 113850 }, { "epoch": 0.1693, "grad_norm": 0.08057425916194916, "learning_rate": 2.1351059468167755e-05, "loss": 0.0345, "step": 113860 }, { "epoch": 0.16935, "grad_norm": 0.0812041163444519, "learning_rate": 2.1346970111690647e-05, "loss": 0.0352, "step": 113870 }, { "epoch": 0.1694, "grad_norm": 0.07820922881364822, "learning_rate": 2.1342880855085988e-05, "loss": 0.035, "step": 113880 }, { "epoch": 0.16945, "grad_norm": 0.08901207149028778, "learning_rate": 2.1338791698465606e-05, "loss": 0.034, "step": 113890 }, { "epoch": 0.1695, "grad_norm": 0.07280625402927399, "learning_rate": 2.133470264194128e-05, "loss": 0.0339, "step": 113900 }, { "epoch": 0.16955, "grad_norm": 0.06772835552692413, "learning_rate": 2.133061368562481e-05, "loss": 0.0344, "step": 113910 }, { "epoch": 0.1696, "grad_norm": 0.07976733148097992, "learning_rate": 2.1326524829627974e-05, "loss": 0.0348, "step": 113920 }, { "epoch": 0.16965, "grad_norm": 0.0896020233631134, "learning_rate": 2.1322436074062575e-05, "loss": 0.0364, "step": 113930 }, { "epoch": 0.1697, "grad_norm": 0.0783344954252243, "learning_rate": 2.1318347419040393e-05, "loss": 0.0344, "step": 113940 }, { "epoch": 0.16975, "grad_norm": 0.06524500995874405, "learning_rate": 2.1314258864673207e-05, "loss": 0.0337, "step": 113950 }, { "epoch": 0.1698, "grad_norm": 0.0689595490694046, "learning_rate": 2.1310170411072803e-05, "loss": 0.0343, "step": 113960 }, { "epoch": 0.16985, "grad_norm": 0.06631206721067429, "learning_rate": 2.1306082058350944e-05, "loss": 0.033, "step": 113970 }, { "epoch": 0.1699, "grad_norm": 0.06748275458812714, "learning_rate": 2.130199380661943e-05, "loss": 0.034, "step": 113980 }, { "epoch": 0.16995, "grad_norm": 0.07014484703540802, "learning_rate": 2.1297905655989997e-05, "loss": 0.0343, "step": 113990 }, { "epoch": 0.17, "grad_norm": 0.0744910016655922, "learning_rate": 2.129381760657444e-05, "loss": 0.0331, "step": 114000 }, { "epoch": 0.17005, "grad_norm": 0.06807012856006622, "learning_rate": 2.128972965848452e-05, "loss": 0.0353, "step": 114010 }, { "epoch": 0.1701, "grad_norm": 0.07064221054315567, "learning_rate": 2.1285641811831997e-05, "loss": 0.0336, "step": 114020 }, { "epoch": 0.17015, "grad_norm": 0.08048601448535919, "learning_rate": 2.1281554066728636e-05, "loss": 0.0342, "step": 114030 }, { "epoch": 0.1702, "grad_norm": 0.08655247092247009, "learning_rate": 2.1277466423286183e-05, "loss": 0.0351, "step": 114040 }, { "epoch": 0.17025, "grad_norm": 0.06995043903589249, "learning_rate": 2.1273378881616393e-05, "loss": 0.0334, "step": 114050 }, { "epoch": 0.1703, "grad_norm": 0.05340658500790596, "learning_rate": 2.1269291441831042e-05, "loss": 0.0335, "step": 114060 }, { "epoch": 0.17035, "grad_norm": 0.06602095067501068, "learning_rate": 2.1265204104041845e-05, "loss": 0.0329, "step": 114070 }, { "epoch": 0.1704, "grad_norm": 0.06772585213184357, "learning_rate": 2.1261116868360582e-05, "loss": 0.0338, "step": 114080 }, { "epoch": 0.17045, "grad_norm": 0.07455842196941376, "learning_rate": 2.1257029734898957e-05, "loss": 0.0342, "step": 114090 }, { "epoch": 0.1705, "grad_norm": 0.08810683339834213, "learning_rate": 2.1252942703768752e-05, "loss": 0.035, "step": 114100 }, { "epoch": 0.17055, "grad_norm": 0.0923193097114563, "learning_rate": 2.1248855775081675e-05, "loss": 0.0359, "step": 114110 }, { "epoch": 0.1706, "grad_norm": 0.08906389772891998, "learning_rate": 2.1244768948949472e-05, "loss": 0.0332, "step": 114120 }, { "epoch": 0.17065, "grad_norm": 0.07592211663722992, "learning_rate": 2.1240682225483886e-05, "loss": 0.0339, "step": 114130 }, { "epoch": 0.1707, "grad_norm": 0.07437780499458313, "learning_rate": 2.1236595604796624e-05, "loss": 0.033, "step": 114140 }, { "epoch": 0.17075, "grad_norm": 0.08547678589820862, "learning_rate": 2.1232509086999433e-05, "loss": 0.0324, "step": 114150 }, { "epoch": 0.1708, "grad_norm": 0.07600551843643188, "learning_rate": 2.122842267220402e-05, "loss": 0.0333, "step": 114160 }, { "epoch": 0.17085, "grad_norm": 0.0664534717798233, "learning_rate": 2.1224336360522123e-05, "loss": 0.0328, "step": 114170 }, { "epoch": 0.1709, "grad_norm": 0.08195389807224274, "learning_rate": 2.122025015206544e-05, "loss": 0.0335, "step": 114180 }, { "epoch": 0.17095, "grad_norm": 0.08264941722154617, "learning_rate": 2.1216164046945703e-05, "loss": 0.0341, "step": 114190 }, { "epoch": 0.171, "grad_norm": 0.06394562870264053, "learning_rate": 2.1212078045274622e-05, "loss": 0.0337, "step": 114200 }, { "epoch": 0.17105, "grad_norm": 0.06541323661804199, "learning_rate": 2.1207992147163903e-05, "loss": 0.0336, "step": 114210 }, { "epoch": 0.1711, "grad_norm": 0.07025276869535446, "learning_rate": 2.1203906352725256e-05, "loss": 0.0332, "step": 114220 }, { "epoch": 0.17115, "grad_norm": 0.08636878430843353, "learning_rate": 2.119982066207038e-05, "loss": 0.0341, "step": 114230 }, { "epoch": 0.1712, "grad_norm": 0.06825531274080276, "learning_rate": 2.119573507531098e-05, "loss": 0.0359, "step": 114240 }, { "epoch": 0.17125, "grad_norm": 0.06317528337240219, "learning_rate": 2.1191649592558757e-05, "loss": 0.0333, "step": 114250 }, { "epoch": 0.1713, "grad_norm": 0.06706538796424866, "learning_rate": 2.1187564213925393e-05, "loss": 0.0329, "step": 114260 }, { "epoch": 0.17135, "grad_norm": 0.06396844983100891, "learning_rate": 2.1183478939522607e-05, "loss": 0.033, "step": 114270 }, { "epoch": 0.1714, "grad_norm": 0.061988480389118195, "learning_rate": 2.117939376946206e-05, "loss": 0.0346, "step": 114280 }, { "epoch": 0.17145, "grad_norm": 0.06188317760825157, "learning_rate": 2.1175308703855464e-05, "loss": 0.0355, "step": 114290 }, { "epoch": 0.1715, "grad_norm": 0.06005796045064926, "learning_rate": 2.117122374281448e-05, "loss": 0.0353, "step": 114300 }, { "epoch": 0.17155, "grad_norm": 0.059571754187345505, "learning_rate": 2.1167138886450798e-05, "loss": 0.0343, "step": 114310 }, { "epoch": 0.1716, "grad_norm": 0.0594683401286602, "learning_rate": 2.116305413487611e-05, "loss": 0.0336, "step": 114320 }, { "epoch": 0.17165, "grad_norm": 0.10240515321493149, "learning_rate": 2.1158969488202073e-05, "loss": 0.0351, "step": 114330 }, { "epoch": 0.1717, "grad_norm": 0.07099906355142593, "learning_rate": 2.1154884946540378e-05, "loss": 0.0346, "step": 114340 }, { "epoch": 0.17175, "grad_norm": 0.09732221066951752, "learning_rate": 2.1150800510002672e-05, "loss": 0.036, "step": 114350 }, { "epoch": 0.1718, "grad_norm": 0.06488008052110672, "learning_rate": 2.1146716178700644e-05, "loss": 0.0387, "step": 114360 }, { "epoch": 0.17185, "grad_norm": 0.06984580308198929, "learning_rate": 2.114263195274594e-05, "loss": 0.0357, "step": 114370 }, { "epoch": 0.1719, "grad_norm": 0.09493076056241989, "learning_rate": 2.1138547832250223e-05, "loss": 0.0379, "step": 114380 }, { "epoch": 0.17195, "grad_norm": 0.11810750514268875, "learning_rate": 2.1134463817325172e-05, "loss": 0.0357, "step": 114390 }, { "epoch": 0.172, "grad_norm": 0.08848319947719574, "learning_rate": 2.113037990808242e-05, "loss": 0.0349, "step": 114400 }, { "epoch": 0.17205, "grad_norm": 0.07288021594285965, "learning_rate": 2.112629610463363e-05, "loss": 0.0351, "step": 114410 }, { "epoch": 0.1721, "grad_norm": 0.0753333792090416, "learning_rate": 2.1122212407090447e-05, "loss": 0.0356, "step": 114420 }, { "epoch": 0.17215, "grad_norm": 0.1015729233622551, "learning_rate": 2.1118128815564525e-05, "loss": 0.0355, "step": 114430 }, { "epoch": 0.1722, "grad_norm": 0.07044872641563416, "learning_rate": 2.1114045330167498e-05, "loss": 0.0355, "step": 114440 }, { "epoch": 0.17225, "grad_norm": 0.06678333878517151, "learning_rate": 2.110996195101101e-05, "loss": 0.0337, "step": 114450 }, { "epoch": 0.1723, "grad_norm": 0.07907833904027939, "learning_rate": 2.110587867820671e-05, "loss": 0.0345, "step": 114460 }, { "epoch": 0.17235, "grad_norm": 0.07277830690145493, "learning_rate": 2.1101795511866213e-05, "loss": 0.0348, "step": 114470 }, { "epoch": 0.1724, "grad_norm": 0.0655599981546402, "learning_rate": 2.1097712452101175e-05, "loss": 0.0349, "step": 114480 }, { "epoch": 0.17245, "grad_norm": 0.07155159115791321, "learning_rate": 2.10936294990232e-05, "loss": 0.0336, "step": 114490 }, { "epoch": 0.1725, "grad_norm": 0.060851093381643295, "learning_rate": 2.1089546652743926e-05, "loss": 0.034, "step": 114500 }, { "epoch": 0.17255, "grad_norm": 0.06356131285429001, "learning_rate": 2.108546391337499e-05, "loss": 0.0335, "step": 114510 }, { "epoch": 0.1726, "grad_norm": 0.06545457243919373, "learning_rate": 2.108138128102799e-05, "loss": 0.0321, "step": 114520 }, { "epoch": 0.17265, "grad_norm": 0.06263414770364761, "learning_rate": 2.1077298755814563e-05, "loss": 0.0338, "step": 114530 }, { "epoch": 0.1727, "grad_norm": 0.07915288954973221, "learning_rate": 2.1073216337846305e-05, "loss": 0.0343, "step": 114540 }, { "epoch": 0.17275, "grad_norm": 0.07472743093967438, "learning_rate": 2.1069134027234844e-05, "loss": 0.0347, "step": 114550 }, { "epoch": 0.1728, "grad_norm": 0.08519298583269119, "learning_rate": 2.1065051824091773e-05, "loss": 0.036, "step": 114560 }, { "epoch": 0.17285, "grad_norm": 0.06525319069623947, "learning_rate": 2.1060969728528707e-05, "loss": 0.0365, "step": 114570 }, { "epoch": 0.1729, "grad_norm": 0.07866008579730988, "learning_rate": 2.1056887740657264e-05, "loss": 0.0357, "step": 114580 }, { "epoch": 0.17295, "grad_norm": 0.06493719667196274, "learning_rate": 2.105280586058901e-05, "loss": 0.0327, "step": 114590 }, { "epoch": 0.173, "grad_norm": 0.0681166872382164, "learning_rate": 2.1048724088435576e-05, "loss": 0.0339, "step": 114600 }, { "epoch": 0.17305, "grad_norm": 0.08641801029443741, "learning_rate": 2.104464242430853e-05, "loss": 0.0356, "step": 114610 }, { "epoch": 0.1731, "grad_norm": 0.08253999799489975, "learning_rate": 2.1040560868319485e-05, "loss": 0.0334, "step": 114620 }, { "epoch": 0.17315, "grad_norm": 0.08029817044734955, "learning_rate": 2.103647942058001e-05, "loss": 0.0343, "step": 114630 }, { "epoch": 0.1732, "grad_norm": 0.08552084118127823, "learning_rate": 2.1032398081201698e-05, "loss": 0.034, "step": 114640 }, { "epoch": 0.17325, "grad_norm": 0.06611842662096024, "learning_rate": 2.102831685029614e-05, "loss": 0.0333, "step": 114650 }, { "epoch": 0.1733, "grad_norm": 0.09615202993154526, "learning_rate": 2.10242357279749e-05, "loss": 0.0348, "step": 114660 }, { "epoch": 0.17335, "grad_norm": 0.06889908015727997, "learning_rate": 2.1020154714349566e-05, "loss": 0.0333, "step": 114670 }, { "epoch": 0.1734, "grad_norm": 0.08466773480176926, "learning_rate": 2.1016073809531698e-05, "loss": 0.036, "step": 114680 }, { "epoch": 0.17345, "grad_norm": 0.09294584393501282, "learning_rate": 2.101199301363288e-05, "loss": 0.0339, "step": 114690 }, { "epoch": 0.1735, "grad_norm": 0.1027880311012268, "learning_rate": 2.100791232676468e-05, "loss": 0.0343, "step": 114700 }, { "epoch": 0.17355, "grad_norm": 0.07762772589921951, "learning_rate": 2.1003831749038654e-05, "loss": 0.0363, "step": 114710 }, { "epoch": 0.1736, "grad_norm": 0.08277373015880585, "learning_rate": 2.099975128056637e-05, "loss": 0.0333, "step": 114720 }, { "epoch": 0.17365, "grad_norm": 0.07976562529802322, "learning_rate": 2.0995670921459375e-05, "loss": 0.0332, "step": 114730 }, { "epoch": 0.1737, "grad_norm": 0.0763949528336525, "learning_rate": 2.099159067182924e-05, "loss": 0.0339, "step": 114740 }, { "epoch": 0.17375, "grad_norm": 0.0686565712094307, "learning_rate": 2.0987510531787507e-05, "loss": 0.0351, "step": 114750 }, { "epoch": 0.1738, "grad_norm": 0.06873409450054169, "learning_rate": 2.0983430501445722e-05, "loss": 0.0336, "step": 114760 }, { "epoch": 0.17385, "grad_norm": 0.07115428894758224, "learning_rate": 2.0979350580915454e-05, "loss": 0.034, "step": 114770 }, { "epoch": 0.1739, "grad_norm": 0.05651280656456947, "learning_rate": 2.0975270770308215e-05, "loss": 0.0333, "step": 114780 }, { "epoch": 0.17395, "grad_norm": 0.06276542693376541, "learning_rate": 2.0971191069735578e-05, "loss": 0.0367, "step": 114790 }, { "epoch": 0.174, "grad_norm": 0.061557210981845856, "learning_rate": 2.0967111479309044e-05, "loss": 0.0349, "step": 114800 }, { "epoch": 0.17405, "grad_norm": 0.05692186579108238, "learning_rate": 2.096303199914018e-05, "loss": 0.0343, "step": 114810 }, { "epoch": 0.1741, "grad_norm": 0.06463827937841415, "learning_rate": 2.0958952629340502e-05, "loss": 0.0358, "step": 114820 }, { "epoch": 0.17415, "grad_norm": 0.075020931661129, "learning_rate": 2.095487337002154e-05, "loss": 0.0353, "step": 114830 }, { "epoch": 0.1742, "grad_norm": 0.06175791844725609, "learning_rate": 2.095079422129482e-05, "loss": 0.0362, "step": 114840 }, { "epoch": 0.17425, "grad_norm": 0.0771561786532402, "learning_rate": 2.0946715183271863e-05, "loss": 0.0364, "step": 114850 }, { "epoch": 0.1743, "grad_norm": 0.07464942336082458, "learning_rate": 2.09426362560642e-05, "loss": 0.0344, "step": 114860 }, { "epoch": 0.17435, "grad_norm": 0.07180729508399963, "learning_rate": 2.0938557439783327e-05, "loss": 0.0335, "step": 114870 }, { "epoch": 0.1744, "grad_norm": 0.07695086300373077, "learning_rate": 2.0934478734540762e-05, "loss": 0.0361, "step": 114880 }, { "epoch": 0.17445, "grad_norm": 0.06530208885669708, "learning_rate": 2.0930400140448033e-05, "loss": 0.0354, "step": 114890 }, { "epoch": 0.1745, "grad_norm": 0.07088170200586319, "learning_rate": 2.092632165761663e-05, "loss": 0.0336, "step": 114900 }, { "epoch": 0.17455, "grad_norm": 0.06989553570747375, "learning_rate": 2.092224328615807e-05, "loss": 0.0337, "step": 114910 }, { "epoch": 0.1746, "grad_norm": 0.0927966833114624, "learning_rate": 2.0918165026183838e-05, "loss": 0.0351, "step": 114920 }, { "epoch": 0.17465, "grad_norm": 0.06884682178497314, "learning_rate": 2.091408687780545e-05, "loss": 0.0337, "step": 114930 }, { "epoch": 0.1747, "grad_norm": 0.06937593966722488, "learning_rate": 2.0910008841134383e-05, "loss": 0.0349, "step": 114940 }, { "epoch": 0.17475, "grad_norm": 0.07575168460607529, "learning_rate": 2.090593091628213e-05, "loss": 0.0326, "step": 114950 }, { "epoch": 0.1748, "grad_norm": 0.07248397171497345, "learning_rate": 2.0901853103360207e-05, "loss": 0.0339, "step": 114960 }, { "epoch": 0.17485, "grad_norm": 0.0727832019329071, "learning_rate": 2.0897775402480065e-05, "loss": 0.0331, "step": 114970 }, { "epoch": 0.1749, "grad_norm": 0.06162170693278313, "learning_rate": 2.089369781375322e-05, "loss": 0.0349, "step": 114980 }, { "epoch": 0.17495, "grad_norm": 0.056191302835941315, "learning_rate": 2.0889620337291117e-05, "loss": 0.0324, "step": 114990 }, { "epoch": 0.175, "grad_norm": 0.06705120950937271, "learning_rate": 2.0885542973205264e-05, "loss": 0.0331, "step": 115000 }, { "epoch": 0.17505, "grad_norm": 0.057472214102745056, "learning_rate": 2.0881465721607104e-05, "loss": 0.0325, "step": 115010 }, { "epoch": 0.1751, "grad_norm": 0.06392459571361542, "learning_rate": 2.087738858260813e-05, "loss": 0.0331, "step": 115020 }, { "epoch": 0.17515, "grad_norm": 0.057952750474214554, "learning_rate": 2.087331155631981e-05, "loss": 0.0331, "step": 115030 }, { "epoch": 0.1752, "grad_norm": 0.06256795674562454, "learning_rate": 2.08692346428536e-05, "loss": 0.0341, "step": 115040 }, { "epoch": 0.17525, "grad_norm": 0.07768010348081589, "learning_rate": 2.0865157842320958e-05, "loss": 0.0341, "step": 115050 }, { "epoch": 0.1753, "grad_norm": 0.06745045632123947, "learning_rate": 2.0861081154833348e-05, "loss": 0.0336, "step": 115060 }, { "epoch": 0.17535, "grad_norm": 0.06342687457799911, "learning_rate": 2.0857004580502217e-05, "loss": 0.0329, "step": 115070 }, { "epoch": 0.1754, "grad_norm": 0.06732375174760818, "learning_rate": 2.0852928119439043e-05, "loss": 0.0331, "step": 115080 }, { "epoch": 0.17545, "grad_norm": 0.06038998067378998, "learning_rate": 2.084885177175524e-05, "loss": 0.0347, "step": 115090 }, { "epoch": 0.1755, "grad_norm": 0.0717553123831749, "learning_rate": 2.084477553756228e-05, "loss": 0.0349, "step": 115100 }, { "epoch": 0.17555, "grad_norm": 0.06540452688932419, "learning_rate": 2.084069941697159e-05, "loss": 0.0355, "step": 115110 }, { "epoch": 0.1756, "grad_norm": 0.07494036853313446, "learning_rate": 2.0836623410094623e-05, "loss": 0.0359, "step": 115120 }, { "epoch": 0.17565, "grad_norm": 0.07349668443202972, "learning_rate": 2.08325475170428e-05, "loss": 0.0368, "step": 115130 }, { "epoch": 0.1757, "grad_norm": 0.06400984525680542, "learning_rate": 2.082847173792756e-05, "loss": 0.035, "step": 115140 }, { "epoch": 0.17575, "grad_norm": 0.061655063182115555, "learning_rate": 2.0824396072860343e-05, "loss": 0.0366, "step": 115150 }, { "epoch": 0.1758, "grad_norm": 0.07127080112695694, "learning_rate": 2.082032052195256e-05, "loss": 0.0341, "step": 115160 }, { "epoch": 0.17585, "grad_norm": 0.06664267182350159, "learning_rate": 2.081624508531566e-05, "loss": 0.035, "step": 115170 }, { "epoch": 0.1759, "grad_norm": 0.06681042164564133, "learning_rate": 2.081216976306103e-05, "loss": 0.034, "step": 115180 }, { "epoch": 0.17595, "grad_norm": 0.07515434920787811, "learning_rate": 2.080809455530012e-05, "loss": 0.035, "step": 115190 }, { "epoch": 0.176, "grad_norm": 0.06582251936197281, "learning_rate": 2.0804019462144315e-05, "loss": 0.034, "step": 115200 }, { "epoch": 0.17605, "grad_norm": 0.06440650671720505, "learning_rate": 2.0799944483705047e-05, "loss": 0.0353, "step": 115210 }, { "epoch": 0.1761, "grad_norm": 0.06627684086561203, "learning_rate": 2.0795869620093726e-05, "loss": 0.0335, "step": 115220 }, { "epoch": 0.17615, "grad_norm": 0.061812873929739, "learning_rate": 2.0791794871421743e-05, "loss": 0.0346, "step": 115230 }, { "epoch": 0.1762, "grad_norm": 0.06207577511668205, "learning_rate": 2.078772023780051e-05, "loss": 0.0332, "step": 115240 }, { "epoch": 0.17625, "grad_norm": 0.07133428752422333, "learning_rate": 2.0783645719341424e-05, "loss": 0.0355, "step": 115250 }, { "epoch": 0.1763, "grad_norm": 0.06522677093744278, "learning_rate": 2.077957131615587e-05, "loss": 0.0333, "step": 115260 }, { "epoch": 0.17635, "grad_norm": 0.06236393749713898, "learning_rate": 2.0775497028355268e-05, "loss": 0.0338, "step": 115270 }, { "epoch": 0.1764, "grad_norm": 0.08299710601568222, "learning_rate": 2.0771422856050978e-05, "loss": 0.0374, "step": 115280 }, { "epoch": 0.17645, "grad_norm": 0.08262594044208527, "learning_rate": 2.076734879935441e-05, "loss": 0.0345, "step": 115290 }, { "epoch": 0.1765, "grad_norm": 0.07234200090169907, "learning_rate": 2.0763274858376918e-05, "loss": 0.0343, "step": 115300 }, { "epoch": 0.17655, "grad_norm": 0.06531044840812683, "learning_rate": 2.0759201033229914e-05, "loss": 0.034, "step": 115310 }, { "epoch": 0.1766, "grad_norm": 0.06469815969467163, "learning_rate": 2.0755127324024754e-05, "loss": 0.0331, "step": 115320 }, { "epoch": 0.17665, "grad_norm": 0.06525961309671402, "learning_rate": 2.0751053730872817e-05, "loss": 0.0325, "step": 115330 }, { "epoch": 0.1767, "grad_norm": 0.058061715215444565, "learning_rate": 2.0746980253885483e-05, "loss": 0.0342, "step": 115340 }, { "epoch": 0.17675, "grad_norm": 0.09244202822446823, "learning_rate": 2.0742906893174102e-05, "loss": 0.0345, "step": 115350 }, { "epoch": 0.1768, "grad_norm": 0.08636010438203812, "learning_rate": 2.0738833648850056e-05, "loss": 0.0335, "step": 115360 }, { "epoch": 0.17685, "grad_norm": 0.06512778997421265, "learning_rate": 2.0734760521024685e-05, "loss": 0.0339, "step": 115370 }, { "epoch": 0.1769, "grad_norm": 0.06986937671899796, "learning_rate": 2.0730687509809377e-05, "loss": 0.0346, "step": 115380 }, { "epoch": 0.17695, "grad_norm": 0.07900271564722061, "learning_rate": 2.0726614615315447e-05, "loss": 0.0347, "step": 115390 }, { "epoch": 0.177, "grad_norm": 0.06363274157047272, "learning_rate": 2.072254183765428e-05, "loss": 0.0323, "step": 115400 }, { "epoch": 0.17705, "grad_norm": 0.09011182934045792, "learning_rate": 2.0718469176937214e-05, "loss": 0.037, "step": 115410 }, { "epoch": 0.1771, "grad_norm": 0.11731145530939102, "learning_rate": 2.0714396633275586e-05, "loss": 0.0343, "step": 115420 }, { "epoch": 0.17715, "grad_norm": 0.09642758220434189, "learning_rate": 2.0710324206780756e-05, "loss": 0.0338, "step": 115430 }, { "epoch": 0.1772, "grad_norm": 0.10255391150712967, "learning_rate": 2.0706251897564037e-05, "loss": 0.035, "step": 115440 }, { "epoch": 0.17725, "grad_norm": 0.08662714809179306, "learning_rate": 2.0702179705736778e-05, "loss": 0.0334, "step": 115450 }, { "epoch": 0.1773, "grad_norm": 0.06695021688938141, "learning_rate": 2.0698107631410323e-05, "loss": 0.033, "step": 115460 }, { "epoch": 0.17735, "grad_norm": 0.06570626050233841, "learning_rate": 2.0694035674695974e-05, "loss": 0.0349, "step": 115470 }, { "epoch": 0.1774, "grad_norm": 0.06275024265050888, "learning_rate": 2.068996383570509e-05, "loss": 0.033, "step": 115480 }, { "epoch": 0.17745, "grad_norm": 0.055382829159498215, "learning_rate": 2.068589211454896e-05, "loss": 0.0328, "step": 115490 }, { "epoch": 0.1775, "grad_norm": 0.062211498618125916, "learning_rate": 2.0681820511338927e-05, "loss": 0.0337, "step": 115500 }, { "epoch": 0.17755, "grad_norm": 0.062481772154569626, "learning_rate": 2.0677749026186296e-05, "loss": 0.0328, "step": 115510 }, { "epoch": 0.1776, "grad_norm": 0.06326182186603546, "learning_rate": 2.067367765920238e-05, "loss": 0.0325, "step": 115520 }, { "epoch": 0.17765, "grad_norm": 0.061724428087472916, "learning_rate": 2.0669606410498498e-05, "loss": 0.0328, "step": 115530 }, { "epoch": 0.1777, "grad_norm": 0.06887421011924744, "learning_rate": 2.066553528018595e-05, "loss": 0.0337, "step": 115540 }, { "epoch": 0.17775, "grad_norm": 0.06955559551715851, "learning_rate": 2.0661464268376037e-05, "loss": 0.0332, "step": 115550 }, { "epoch": 0.1778, "grad_norm": 0.06526818871498108, "learning_rate": 2.0657393375180058e-05, "loss": 0.0344, "step": 115560 }, { "epoch": 0.17785, "grad_norm": 0.07432231307029724, "learning_rate": 2.065332260070932e-05, "loss": 0.0338, "step": 115570 }, { "epoch": 0.1779, "grad_norm": 0.09075212478637695, "learning_rate": 2.0649251945075095e-05, "loss": 0.0333, "step": 115580 }, { "epoch": 0.17795, "grad_norm": 0.06763681769371033, "learning_rate": 2.0645181408388694e-05, "loss": 0.0333, "step": 115590 }, { "epoch": 0.178, "grad_norm": 0.07734362781047821, "learning_rate": 2.0641110990761403e-05, "loss": 0.0332, "step": 115600 }, { "epoch": 0.17805, "grad_norm": 0.06649279594421387, "learning_rate": 2.0637040692304492e-05, "loss": 0.0346, "step": 115610 }, { "epoch": 0.1781, "grad_norm": 0.06801921129226685, "learning_rate": 2.063297051312926e-05, "loss": 0.0339, "step": 115620 }, { "epoch": 0.17815, "grad_norm": 0.08945854008197784, "learning_rate": 2.0628900453346967e-05, "loss": 0.0337, "step": 115630 }, { "epoch": 0.1782, "grad_norm": 0.07723744213581085, "learning_rate": 2.0624830513068895e-05, "loss": 0.035, "step": 115640 }, { "epoch": 0.17825, "grad_norm": 0.06963032484054565, "learning_rate": 2.062076069240631e-05, "loss": 0.0333, "step": 115650 }, { "epoch": 0.1783, "grad_norm": 0.06304597109556198, "learning_rate": 2.0616690991470477e-05, "loss": 0.0359, "step": 115660 }, { "epoch": 0.17835, "grad_norm": 0.06315404921770096, "learning_rate": 2.0612621410372685e-05, "loss": 0.034, "step": 115670 }, { "epoch": 0.1784, "grad_norm": 0.05911766365170479, "learning_rate": 2.0608551949224152e-05, "loss": 0.0342, "step": 115680 }, { "epoch": 0.17845, "grad_norm": 0.06626007705926895, "learning_rate": 2.0604482608136185e-05, "loss": 0.0332, "step": 115690 }, { "epoch": 0.1785, "grad_norm": 0.0709712877869606, "learning_rate": 2.0600413387219986e-05, "loss": 0.0341, "step": 115700 }, { "epoch": 0.17855, "grad_norm": 0.06101493537425995, "learning_rate": 2.059634428658684e-05, "loss": 0.0338, "step": 115710 }, { "epoch": 0.1786, "grad_norm": 0.07519076019525528, "learning_rate": 2.0592275306347996e-05, "loss": 0.0365, "step": 115720 }, { "epoch": 0.17865, "grad_norm": 0.06127850338816643, "learning_rate": 2.0588206446614683e-05, "loss": 0.0342, "step": 115730 }, { "epoch": 0.1787, "grad_norm": 0.06362968683242798, "learning_rate": 2.0584137707498153e-05, "loss": 0.0337, "step": 115740 }, { "epoch": 0.17875, "grad_norm": 0.05607949197292328, "learning_rate": 2.0580069089109633e-05, "loss": 0.0337, "step": 115750 }, { "epoch": 0.1788, "grad_norm": 0.08142589032649994, "learning_rate": 2.0576000591560368e-05, "loss": 0.0339, "step": 115760 }, { "epoch": 0.17885, "grad_norm": 0.06536273658275604, "learning_rate": 2.0571932214961583e-05, "loss": 0.0344, "step": 115770 }, { "epoch": 0.1789, "grad_norm": 0.07052770256996155, "learning_rate": 2.0567863959424498e-05, "loss": 0.035, "step": 115780 }, { "epoch": 0.17895, "grad_norm": 0.07326888293027878, "learning_rate": 2.0563795825060358e-05, "loss": 0.0358, "step": 115790 }, { "epoch": 0.179, "grad_norm": 0.06202247738838196, "learning_rate": 2.055972781198037e-05, "loss": 0.0341, "step": 115800 }, { "epoch": 0.17905, "grad_norm": 0.06690514087677002, "learning_rate": 2.0555659920295763e-05, "loss": 0.0357, "step": 115810 }, { "epoch": 0.1791, "grad_norm": 0.06660515069961548, "learning_rate": 2.0551592150117735e-05, "loss": 0.0345, "step": 115820 }, { "epoch": 0.17915, "grad_norm": 0.07260756194591522, "learning_rate": 2.0547524501557514e-05, "loss": 0.0359, "step": 115830 }, { "epoch": 0.1792, "grad_norm": 0.070611372590065, "learning_rate": 2.0543456974726295e-05, "loss": 0.0366, "step": 115840 }, { "epoch": 0.17925, "grad_norm": 0.09465611726045609, "learning_rate": 2.0539389569735287e-05, "loss": 0.0358, "step": 115850 }, { "epoch": 0.1793, "grad_norm": 0.08132003247737885, "learning_rate": 2.05353222866957e-05, "loss": 0.0369, "step": 115860 }, { "epoch": 0.17935, "grad_norm": 0.12759603559970856, "learning_rate": 2.0531255125718708e-05, "loss": 0.0373, "step": 115870 }, { "epoch": 0.1794, "grad_norm": 0.07082096487283707, "learning_rate": 2.0527188086915544e-05, "loss": 0.0357, "step": 115880 }, { "epoch": 0.17945, "grad_norm": 0.07586158066987991, "learning_rate": 2.052312117039736e-05, "loss": 0.0361, "step": 115890 }, { "epoch": 0.1795, "grad_norm": 0.07408928126096725, "learning_rate": 2.0519054376275365e-05, "loss": 0.0378, "step": 115900 }, { "epoch": 0.17955, "grad_norm": 0.0980948954820633, "learning_rate": 2.051498770466075e-05, "loss": 0.0371, "step": 115910 }, { "epoch": 0.1796, "grad_norm": 0.0799083560705185, "learning_rate": 2.0510921155664674e-05, "loss": 0.0364, "step": 115920 }, { "epoch": 0.17965, "grad_norm": 0.07427407801151276, "learning_rate": 2.0506854729398336e-05, "loss": 0.0367, "step": 115930 }, { "epoch": 0.1797, "grad_norm": 0.07316645979881287, "learning_rate": 2.0502788425972896e-05, "loss": 0.0351, "step": 115940 }, { "epoch": 0.17975, "grad_norm": 0.06873858720064163, "learning_rate": 2.0498722245499534e-05, "loss": 0.0383, "step": 115950 }, { "epoch": 0.1798, "grad_norm": 0.0818914845585823, "learning_rate": 2.0494656188089414e-05, "loss": 0.0361, "step": 115960 }, { "epoch": 0.17985, "grad_norm": 0.05910930410027504, "learning_rate": 2.0490590253853693e-05, "loss": 0.035, "step": 115970 }, { "epoch": 0.1799, "grad_norm": 0.07244950532913208, "learning_rate": 2.048652444290356e-05, "loss": 0.0355, "step": 115980 }, { "epoch": 0.17995, "grad_norm": 0.07029040157794952, "learning_rate": 2.0482458755350132e-05, "loss": 0.0348, "step": 115990 }, { "epoch": 0.18, "grad_norm": 0.06852120161056519, "learning_rate": 2.0478393191304598e-05, "loss": 0.0349, "step": 116000 }, { "epoch": 0.18005, "grad_norm": 0.07397285103797913, "learning_rate": 2.0474327750878088e-05, "loss": 0.0353, "step": 116010 }, { "epoch": 0.1801, "grad_norm": 0.06840717792510986, "learning_rate": 2.0470262434181762e-05, "loss": 0.0343, "step": 116020 }, { "epoch": 0.18015, "grad_norm": 0.07946541160345078, "learning_rate": 2.0466197241326757e-05, "loss": 0.0359, "step": 116030 }, { "epoch": 0.1802, "grad_norm": 0.07225006818771362, "learning_rate": 2.0462132172424218e-05, "loss": 0.0348, "step": 116040 }, { "epoch": 0.18025, "grad_norm": 0.06738066673278809, "learning_rate": 2.045806722758528e-05, "loss": 0.0347, "step": 116050 }, { "epoch": 0.1803, "grad_norm": 0.06892020255327225, "learning_rate": 2.0454002406921075e-05, "loss": 0.0342, "step": 116060 }, { "epoch": 0.18035, "grad_norm": 0.07145651429891586, "learning_rate": 2.0449937710542743e-05, "loss": 0.0332, "step": 116070 }, { "epoch": 0.1804, "grad_norm": 0.06717827171087265, "learning_rate": 2.0445873138561393e-05, "loss": 0.0344, "step": 116080 }, { "epoch": 0.18045, "grad_norm": 0.07199730724096298, "learning_rate": 2.0441808691088164e-05, "loss": 0.0336, "step": 116090 }, { "epoch": 0.1805, "grad_norm": 0.060535430908203125, "learning_rate": 2.043774436823418e-05, "loss": 0.0332, "step": 116100 }, { "epoch": 0.18055, "grad_norm": 0.05504748225212097, "learning_rate": 2.0433680170110548e-05, "loss": 0.0335, "step": 116110 }, { "epoch": 0.1806, "grad_norm": 0.0665460079908371, "learning_rate": 2.0429616096828387e-05, "loss": 0.0354, "step": 116120 }, { "epoch": 0.18065, "grad_norm": 0.05895334109663963, "learning_rate": 2.04255521484988e-05, "loss": 0.0335, "step": 116130 }, { "epoch": 0.1807, "grad_norm": 0.05527698993682861, "learning_rate": 2.0421488325232904e-05, "loss": 0.0333, "step": 116140 }, { "epoch": 0.18075, "grad_norm": 0.06963811814785004, "learning_rate": 2.041742462714179e-05, "loss": 0.0344, "step": 116150 }, { "epoch": 0.1808, "grad_norm": 0.05897054448723793, "learning_rate": 2.0413361054336564e-05, "loss": 0.0336, "step": 116160 }, { "epoch": 0.18085, "grad_norm": 0.05657659471035004, "learning_rate": 2.040929760692834e-05, "loss": 0.0336, "step": 116170 }, { "epoch": 0.1809, "grad_norm": 0.05925530940294266, "learning_rate": 2.0405234285028174e-05, "loss": 0.0333, "step": 116180 }, { "epoch": 0.18095, "grad_norm": 0.06751128286123276, "learning_rate": 2.0401171088747194e-05, "loss": 0.0362, "step": 116190 }, { "epoch": 0.181, "grad_norm": 0.08569232374429703, "learning_rate": 2.0397108018196453e-05, "loss": 0.0338, "step": 116200 }, { "epoch": 0.18105, "grad_norm": 0.07641440629959106, "learning_rate": 2.039304507348706e-05, "loss": 0.034, "step": 116210 }, { "epoch": 0.1811, "grad_norm": 0.0684174969792366, "learning_rate": 2.038898225473008e-05, "loss": 0.0351, "step": 116220 }, { "epoch": 0.18115, "grad_norm": 0.07047194242477417, "learning_rate": 2.0384919562036593e-05, "loss": 0.0334, "step": 116230 }, { "epoch": 0.1812, "grad_norm": 0.06977847218513489, "learning_rate": 2.0380856995517673e-05, "loss": 0.0342, "step": 116240 }, { "epoch": 0.18125, "grad_norm": 0.080093152821064, "learning_rate": 2.0376794555284386e-05, "loss": 0.0343, "step": 116250 }, { "epoch": 0.1813, "grad_norm": 0.05909072235226631, "learning_rate": 2.0372732241447802e-05, "loss": 0.0344, "step": 116260 }, { "epoch": 0.18135, "grad_norm": 0.0653337761759758, "learning_rate": 2.0368670054118976e-05, "loss": 0.0332, "step": 116270 }, { "epoch": 0.1814, "grad_norm": 0.07813318073749542, "learning_rate": 2.036460799340897e-05, "loss": 0.0329, "step": 116280 }, { "epoch": 0.18145, "grad_norm": 0.06988602876663208, "learning_rate": 2.0360546059428843e-05, "loss": 0.0341, "step": 116290 }, { "epoch": 0.1815, "grad_norm": 0.06033988296985626, "learning_rate": 2.035648425228964e-05, "loss": 0.0332, "step": 116300 }, { "epoch": 0.18155, "grad_norm": 0.07113895565271378, "learning_rate": 2.0352422572102423e-05, "loss": 0.0357, "step": 116310 }, { "epoch": 0.1816, "grad_norm": 0.0769420638680458, "learning_rate": 2.0348361018978217e-05, "loss": 0.0349, "step": 116320 }, { "epoch": 0.18165, "grad_norm": 0.06596451252698898, "learning_rate": 2.0344299593028083e-05, "loss": 0.0336, "step": 116330 }, { "epoch": 0.1817, "grad_norm": 0.07285913079977036, "learning_rate": 2.034023829436304e-05, "loss": 0.0329, "step": 116340 }, { "epoch": 0.18175, "grad_norm": 0.07244844734668732, "learning_rate": 2.033617712309413e-05, "loss": 0.0349, "step": 116350 }, { "epoch": 0.1818, "grad_norm": 0.09303736686706543, "learning_rate": 2.0332116079332396e-05, "loss": 0.0348, "step": 116360 }, { "epoch": 0.18185, "grad_norm": 0.07680649310350418, "learning_rate": 2.032805516318884e-05, "loss": 0.0347, "step": 116370 }, { "epoch": 0.1819, "grad_norm": 0.06392459571361542, "learning_rate": 2.0323994374774516e-05, "loss": 0.0337, "step": 116380 }, { "epoch": 0.18195, "grad_norm": 0.059849537909030914, "learning_rate": 2.0319933714200416e-05, "loss": 0.0357, "step": 116390 }, { "epoch": 0.182, "grad_norm": 0.05902472510933876, "learning_rate": 2.031587318157758e-05, "loss": 0.0342, "step": 116400 }, { "epoch": 0.18205, "grad_norm": 0.07465706765651703, "learning_rate": 2.0311812777017004e-05, "loss": 0.0353, "step": 116410 }, { "epoch": 0.1821, "grad_norm": 0.0838695615530014, "learning_rate": 2.0307752500629707e-05, "loss": 0.0368, "step": 116420 }, { "epoch": 0.18215, "grad_norm": 0.06150379031896591, "learning_rate": 2.0303692352526698e-05, "loss": 0.0359, "step": 116430 }, { "epoch": 0.1822, "grad_norm": 0.07571075856685638, "learning_rate": 2.0299632332818973e-05, "loss": 0.0357, "step": 116440 }, { "epoch": 0.18225, "grad_norm": 0.10019955784082413, "learning_rate": 2.029557244161754e-05, "loss": 0.0346, "step": 116450 }, { "epoch": 0.1823, "grad_norm": 0.07792889326810837, "learning_rate": 2.029151267903338e-05, "loss": 0.0343, "step": 116460 }, { "epoch": 0.18235, "grad_norm": 0.07031518220901489, "learning_rate": 2.028745304517749e-05, "loss": 0.0331, "step": 116470 }, { "epoch": 0.1824, "grad_norm": 0.06752107292413712, "learning_rate": 2.028339354016088e-05, "loss": 0.0358, "step": 116480 }, { "epoch": 0.18245, "grad_norm": 0.06869736313819885, "learning_rate": 2.0279334164094504e-05, "loss": 0.0359, "step": 116490 }, { "epoch": 0.1825, "grad_norm": 0.07195311039686203, "learning_rate": 2.027527491708937e-05, "loss": 0.035, "step": 116500 }, { "epoch": 0.18255, "grad_norm": 0.07395985722541809, "learning_rate": 2.0271215799256434e-05, "loss": 0.0341, "step": 116510 }, { "epoch": 0.1826, "grad_norm": 0.05496805161237717, "learning_rate": 2.026715681070669e-05, "loss": 0.0337, "step": 116520 }, { "epoch": 0.18265, "grad_norm": 0.07406099140644073, "learning_rate": 2.0263097951551098e-05, "loss": 0.0342, "step": 116530 }, { "epoch": 0.1827, "grad_norm": 0.05828932300209999, "learning_rate": 2.0259039221900627e-05, "loss": 0.034, "step": 116540 }, { "epoch": 0.18275, "grad_norm": 0.06917819380760193, "learning_rate": 2.0254980621866247e-05, "loss": 0.0352, "step": 116550 }, { "epoch": 0.1828, "grad_norm": 0.06745157390832901, "learning_rate": 2.025092215155891e-05, "loss": 0.0357, "step": 116560 }, { "epoch": 0.18285, "grad_norm": 0.07505171000957489, "learning_rate": 2.024686381108958e-05, "loss": 0.0351, "step": 116570 }, { "epoch": 0.1829, "grad_norm": 0.06549634039402008, "learning_rate": 2.0242805600569198e-05, "loss": 0.0339, "step": 116580 }, { "epoch": 0.18295, "grad_norm": 0.06527920067310333, "learning_rate": 2.023874752010874e-05, "loss": 0.0354, "step": 116590 }, { "epoch": 0.183, "grad_norm": 0.052015628665685654, "learning_rate": 2.023468956981912e-05, "loss": 0.0344, "step": 116600 }, { "epoch": 0.18305, "grad_norm": 0.05962091684341431, "learning_rate": 2.0230631749811306e-05, "loss": 0.0349, "step": 116610 }, { "epoch": 0.1831, "grad_norm": 0.062450990080833435, "learning_rate": 2.022657406019623e-05, "loss": 0.035, "step": 116620 }, { "epoch": 0.18315, "grad_norm": 0.06071823462843895, "learning_rate": 2.022251650108482e-05, "loss": 0.0352, "step": 116630 }, { "epoch": 0.1832, "grad_norm": 0.059769246727228165, "learning_rate": 2.021845907258802e-05, "loss": 0.0348, "step": 116640 }, { "epoch": 0.18325, "grad_norm": 0.056287750601768494, "learning_rate": 2.0214401774816748e-05, "loss": 0.0377, "step": 116650 }, { "epoch": 0.1833, "grad_norm": 0.07899390161037445, "learning_rate": 2.0210344607881925e-05, "loss": 0.0351, "step": 116660 }, { "epoch": 0.18335, "grad_norm": 0.0634530559182167, "learning_rate": 2.02062875718945e-05, "loss": 0.0357, "step": 116670 }, { "epoch": 0.1834, "grad_norm": 0.060274887830019, "learning_rate": 2.0202230666965354e-05, "loss": 0.0346, "step": 116680 }, { "epoch": 0.18345, "grad_norm": 0.061985548585653305, "learning_rate": 2.019817389320544e-05, "loss": 0.0348, "step": 116690 }, { "epoch": 0.1835, "grad_norm": 0.05522892624139786, "learning_rate": 2.019411725072563e-05, "loss": 0.0343, "step": 116700 }, { "epoch": 0.18355, "grad_norm": 0.06568529456853867, "learning_rate": 2.0190060739636856e-05, "loss": 0.0356, "step": 116710 }, { "epoch": 0.1836, "grad_norm": 0.06055418774485588, "learning_rate": 2.0186004360050013e-05, "loss": 0.0343, "step": 116720 }, { "epoch": 0.18365, "grad_norm": 0.07213877886533737, "learning_rate": 2.0181948112076e-05, "loss": 0.0335, "step": 116730 }, { "epoch": 0.1837, "grad_norm": 0.08859606087207794, "learning_rate": 2.017789199582572e-05, "loss": 0.0371, "step": 116740 }, { "epoch": 0.18375, "grad_norm": 0.0729413777589798, "learning_rate": 2.0173836011410057e-05, "loss": 0.0339, "step": 116750 }, { "epoch": 0.1838, "grad_norm": 0.05296160280704498, "learning_rate": 2.016978015893991e-05, "loss": 0.0341, "step": 116760 }, { "epoch": 0.18385, "grad_norm": 0.06408056616783142, "learning_rate": 2.0165724438526153e-05, "loss": 0.034, "step": 116770 }, { "epoch": 0.1839, "grad_norm": 0.05741770565509796, "learning_rate": 2.0161668850279682e-05, "loss": 0.0343, "step": 116780 }, { "epoch": 0.18395, "grad_norm": 0.11421132832765579, "learning_rate": 2.015761339431135e-05, "loss": 0.0364, "step": 116790 }, { "epoch": 0.184, "grad_norm": 0.08587351441383362, "learning_rate": 2.015355807073206e-05, "loss": 0.035, "step": 116800 }, { "epoch": 0.18405, "grad_norm": 0.06537584215402603, "learning_rate": 2.0149502879652674e-05, "loss": 0.0352, "step": 116810 }, { "epoch": 0.1841, "grad_norm": 0.057796284556388855, "learning_rate": 2.0145447821184053e-05, "loss": 0.0352, "step": 116820 }, { "epoch": 0.18415, "grad_norm": 0.0686817318201065, "learning_rate": 2.0141392895437067e-05, "loss": 0.0359, "step": 116830 }, { "epoch": 0.1842, "grad_norm": 0.05911717191338539, "learning_rate": 2.0137338102522573e-05, "loss": 0.0345, "step": 116840 }, { "epoch": 0.18425, "grad_norm": 0.09625113755464554, "learning_rate": 2.013328344255143e-05, "loss": 0.0364, "step": 116850 }, { "epoch": 0.1843, "grad_norm": 0.059197183698415756, "learning_rate": 2.0129228915634485e-05, "loss": 0.0351, "step": 116860 }, { "epoch": 0.18435, "grad_norm": 0.0632222518324852, "learning_rate": 2.012517452188259e-05, "loss": 0.0348, "step": 116870 }, { "epoch": 0.1844, "grad_norm": 0.0603092722594738, "learning_rate": 2.0121120261406603e-05, "loss": 0.0352, "step": 116880 }, { "epoch": 0.18445, "grad_norm": 0.07025669515132904, "learning_rate": 2.0117066134317343e-05, "loss": 0.0351, "step": 116890 }, { "epoch": 0.1845, "grad_norm": 0.0625699982047081, "learning_rate": 2.0113012140725673e-05, "loss": 0.0362, "step": 116900 }, { "epoch": 0.18455, "grad_norm": 0.06732776015996933, "learning_rate": 2.01089582807424e-05, "loss": 0.0353, "step": 116910 }, { "epoch": 0.1846, "grad_norm": 0.07922066003084183, "learning_rate": 2.0104904554478378e-05, "loss": 0.0355, "step": 116920 }, { "epoch": 0.18465, "grad_norm": 0.07065048813819885, "learning_rate": 2.0100850962044432e-05, "loss": 0.0353, "step": 116930 }, { "epoch": 0.1847, "grad_norm": 0.06404469907283783, "learning_rate": 2.0096797503551372e-05, "loss": 0.0338, "step": 116940 }, { "epoch": 0.18475, "grad_norm": 0.06592314690351486, "learning_rate": 2.009274417911003e-05, "loss": 0.0344, "step": 116950 }, { "epoch": 0.1848, "grad_norm": 0.0722537413239479, "learning_rate": 2.008869098883122e-05, "loss": 0.0347, "step": 116960 }, { "epoch": 0.18485, "grad_norm": 0.06831130385398865, "learning_rate": 2.0084637932825752e-05, "loss": 0.0336, "step": 116970 }, { "epoch": 0.1849, "grad_norm": 0.09930170327425003, "learning_rate": 2.0080585011204434e-05, "loss": 0.0356, "step": 116980 }, { "epoch": 0.18495, "grad_norm": 0.07288338989019394, "learning_rate": 2.0076532224078068e-05, "loss": 0.0358, "step": 116990 }, { "epoch": 0.185, "grad_norm": 0.09408794343471527, "learning_rate": 2.007247957155747e-05, "loss": 0.0347, "step": 117000 }, { "epoch": 0.18505, "grad_norm": 0.07589706778526306, "learning_rate": 2.006842705375343e-05, "loss": 0.0343, "step": 117010 }, { "epoch": 0.1851, "grad_norm": 0.05950513482093811, "learning_rate": 2.006437467077674e-05, "loss": 0.0364, "step": 117020 }, { "epoch": 0.18515, "grad_norm": 0.0669105276465416, "learning_rate": 2.006032242273819e-05, "loss": 0.0356, "step": 117030 }, { "epoch": 0.1852, "grad_norm": 0.07175637781620026, "learning_rate": 2.0056270309748572e-05, "loss": 0.0354, "step": 117040 }, { "epoch": 0.18525, "grad_norm": 0.07348711043596268, "learning_rate": 2.0052218331918666e-05, "loss": 0.0347, "step": 117050 }, { "epoch": 0.1853, "grad_norm": 0.07550489157438278, "learning_rate": 2.0048166489359247e-05, "loss": 0.0358, "step": 117060 }, { "epoch": 0.18535, "grad_norm": 0.0652247816324234, "learning_rate": 2.0044114782181105e-05, "loss": 0.035, "step": 117070 }, { "epoch": 0.1854, "grad_norm": 0.13588404655456543, "learning_rate": 2.0040063210494992e-05, "loss": 0.0342, "step": 117080 }, { "epoch": 0.18545, "grad_norm": 0.10506972670555115, "learning_rate": 2.00360117744117e-05, "loss": 0.0346, "step": 117090 }, { "epoch": 0.1855, "grad_norm": 0.08438332378864288, "learning_rate": 2.0031960474041966e-05, "loss": 0.033, "step": 117100 }, { "epoch": 0.18555, "grad_norm": 0.08282383531332016, "learning_rate": 2.0027909309496576e-05, "loss": 0.0331, "step": 117110 }, { "epoch": 0.1856, "grad_norm": 0.07057241350412369, "learning_rate": 2.0023858280886278e-05, "loss": 0.0332, "step": 117120 }, { "epoch": 0.18565, "grad_norm": 0.07022110372781754, "learning_rate": 2.0019807388321825e-05, "loss": 0.0335, "step": 117130 }, { "epoch": 0.1857, "grad_norm": 0.06743114441633224, "learning_rate": 2.0015756631913967e-05, "loss": 0.0342, "step": 117140 }, { "epoch": 0.18575, "grad_norm": 0.06218687444925308, "learning_rate": 2.0011706011773446e-05, "loss": 0.0337, "step": 117150 }, { "epoch": 0.1858, "grad_norm": 0.06828916817903519, "learning_rate": 2.0007655528011017e-05, "loss": 0.0328, "step": 117160 }, { "epoch": 0.18585, "grad_norm": 0.06903926283121109, "learning_rate": 2.0003605180737403e-05, "loss": 0.0339, "step": 117170 }, { "epoch": 0.1859, "grad_norm": 0.06969159841537476, "learning_rate": 1.999955497006334e-05, "loss": 0.0334, "step": 117180 }, { "epoch": 0.18595, "grad_norm": 0.06109282001852989, "learning_rate": 1.9995504896099583e-05, "loss": 0.0329, "step": 117190 }, { "epoch": 0.186, "grad_norm": 0.07328418642282486, "learning_rate": 1.9991454958956822e-05, "loss": 0.0332, "step": 117200 }, { "epoch": 0.18605, "grad_norm": 0.06142791733145714, "learning_rate": 1.9987405158745817e-05, "loss": 0.0334, "step": 117210 }, { "epoch": 0.1861, "grad_norm": 0.06849481165409088, "learning_rate": 1.9983355495577266e-05, "loss": 0.0333, "step": 117220 }, { "epoch": 0.18615, "grad_norm": 0.06934063136577606, "learning_rate": 1.9979305969561895e-05, "loss": 0.0323, "step": 117230 }, { "epoch": 0.1862, "grad_norm": 0.06079070642590523, "learning_rate": 1.9975256580810405e-05, "loss": 0.034, "step": 117240 }, { "epoch": 0.18625, "grad_norm": 0.06963855028152466, "learning_rate": 1.9971207329433518e-05, "loss": 0.0332, "step": 117250 }, { "epoch": 0.1863, "grad_norm": 0.0564182884991169, "learning_rate": 1.9967158215541936e-05, "loss": 0.0344, "step": 117260 }, { "epoch": 0.18635, "grad_norm": 0.06805352121591568, "learning_rate": 1.9963109239246346e-05, "loss": 0.0329, "step": 117270 }, { "epoch": 0.1864, "grad_norm": 0.058724112808704376, "learning_rate": 1.995906040065747e-05, "loss": 0.0327, "step": 117280 }, { "epoch": 0.18645, "grad_norm": 0.058111634105443954, "learning_rate": 1.995501169988598e-05, "loss": 0.0326, "step": 117290 }, { "epoch": 0.1865, "grad_norm": 0.05567874759435654, "learning_rate": 1.9950963137042573e-05, "loss": 0.0354, "step": 117300 }, { "epoch": 0.18655, "grad_norm": 0.06355543434619904, "learning_rate": 1.9946914712237946e-05, "loss": 0.0359, "step": 117310 }, { "epoch": 0.1866, "grad_norm": 0.08021455258131027, "learning_rate": 1.994286642558277e-05, "loss": 0.0339, "step": 117320 }, { "epoch": 0.18665, "grad_norm": 0.07066422700881958, "learning_rate": 1.9938818277187726e-05, "loss": 0.0337, "step": 117330 }, { "epoch": 0.1867, "grad_norm": 0.08102439343929291, "learning_rate": 1.9934770267163484e-05, "loss": 0.0349, "step": 117340 }, { "epoch": 0.18675, "grad_norm": 0.07358285784721375, "learning_rate": 1.9930722395620727e-05, "loss": 0.0336, "step": 117350 }, { "epoch": 0.1868, "grad_norm": 0.06806259602308273, "learning_rate": 1.992667466267011e-05, "loss": 0.0338, "step": 117360 }, { "epoch": 0.18685, "grad_norm": 0.056118763983249664, "learning_rate": 1.9922627068422297e-05, "loss": 0.0327, "step": 117370 }, { "epoch": 0.1869, "grad_norm": 0.06797992438077927, "learning_rate": 1.9918579612987968e-05, "loss": 0.0342, "step": 117380 }, { "epoch": 0.18695, "grad_norm": 0.07334133982658386, "learning_rate": 1.991453229647775e-05, "loss": 0.0344, "step": 117390 }, { "epoch": 0.187, "grad_norm": 0.06543901562690735, "learning_rate": 1.991048511900232e-05, "loss": 0.0339, "step": 117400 }, { "epoch": 0.18705, "grad_norm": 0.07021824270486832, "learning_rate": 1.99064380806723e-05, "loss": 0.0355, "step": 117410 }, { "epoch": 0.1871, "grad_norm": 0.06691295653581619, "learning_rate": 1.9902391181598358e-05, "loss": 0.0348, "step": 117420 }, { "epoch": 0.18715, "grad_norm": 0.06111136078834534, "learning_rate": 1.9898344421891125e-05, "loss": 0.0356, "step": 117430 }, { "epoch": 0.1872, "grad_norm": 0.07992721349000931, "learning_rate": 1.9894297801661236e-05, "loss": 0.0354, "step": 117440 }, { "epoch": 0.18725, "grad_norm": 0.07154236733913422, "learning_rate": 1.9890251321019335e-05, "loss": 0.034, "step": 117450 }, { "epoch": 0.1873, "grad_norm": 0.07538168132305145, "learning_rate": 1.9886204980076033e-05, "loss": 0.0334, "step": 117460 }, { "epoch": 0.18735, "grad_norm": 0.05986643582582474, "learning_rate": 1.9882158778941977e-05, "loss": 0.0337, "step": 117470 }, { "epoch": 0.1874, "grad_norm": 0.06718599796295166, "learning_rate": 1.987811271772777e-05, "loss": 0.0333, "step": 117480 }, { "epoch": 0.18745, "grad_norm": 0.06104372441768646, "learning_rate": 1.987406679654403e-05, "loss": 0.0337, "step": 117490 }, { "epoch": 0.1875, "grad_norm": 0.065663181245327, "learning_rate": 1.987002101550139e-05, "loss": 0.0328, "step": 117500 }, { "epoch": 0.18755, "grad_norm": 0.1018490120768547, "learning_rate": 1.9865975374710443e-05, "loss": 0.0346, "step": 117510 }, { "epoch": 0.1876, "grad_norm": 0.08078235387802124, "learning_rate": 1.9861929874281804e-05, "loss": 0.0376, "step": 117520 }, { "epoch": 0.18765, "grad_norm": 0.08387861400842667, "learning_rate": 1.985788451432607e-05, "loss": 0.0364, "step": 117530 }, { "epoch": 0.1877, "grad_norm": 0.06840310990810394, "learning_rate": 1.9853839294953843e-05, "loss": 0.0333, "step": 117540 }, { "epoch": 0.18775, "grad_norm": 0.07385700941085815, "learning_rate": 1.9849794216275712e-05, "loss": 0.0344, "step": 117550 }, { "epoch": 0.1878, "grad_norm": 0.07153363525867462, "learning_rate": 1.9845749278402277e-05, "loss": 0.0332, "step": 117560 }, { "epoch": 0.18785, "grad_norm": 0.07106166332960129, "learning_rate": 1.984170448144412e-05, "loss": 0.0325, "step": 117570 }, { "epoch": 0.1879, "grad_norm": 0.0803474560379982, "learning_rate": 1.9837659825511818e-05, "loss": 0.034, "step": 117580 }, { "epoch": 0.18795, "grad_norm": 0.08528460562229156, "learning_rate": 1.9833615310715968e-05, "loss": 0.0342, "step": 117590 }, { "epoch": 0.188, "grad_norm": 0.06444571167230606, "learning_rate": 1.982957093716712e-05, "loss": 0.0348, "step": 117600 }, { "epoch": 0.18805, "grad_norm": 0.07973705232143402, "learning_rate": 1.982552670497588e-05, "loss": 0.0343, "step": 117610 }, { "epoch": 0.1881, "grad_norm": 0.07987213879823685, "learning_rate": 1.982148261425278e-05, "loss": 0.0364, "step": 117620 }, { "epoch": 0.18815, "grad_norm": 0.08558136969804764, "learning_rate": 1.9817438665108402e-05, "loss": 0.0364, "step": 117630 }, { "epoch": 0.1882, "grad_norm": 0.06126723811030388, "learning_rate": 1.981339485765331e-05, "loss": 0.0335, "step": 117640 }, { "epoch": 0.18825, "grad_norm": 0.06406763195991516, "learning_rate": 1.9809351191998045e-05, "loss": 0.034, "step": 117650 }, { "epoch": 0.1883, "grad_norm": 0.04996425285935402, "learning_rate": 1.980530766825318e-05, "loss": 0.0331, "step": 117660 }, { "epoch": 0.18835, "grad_norm": 0.0618036724627018, "learning_rate": 1.980126428652924e-05, "loss": 0.0363, "step": 117670 }, { "epoch": 0.1884, "grad_norm": 0.06212411820888519, "learning_rate": 1.979722104693678e-05, "loss": 0.033, "step": 117680 }, { "epoch": 0.18845, "grad_norm": 0.06862057745456696, "learning_rate": 1.9793177949586363e-05, "loss": 0.0342, "step": 117690 }, { "epoch": 0.1885, "grad_norm": 0.06919129192829132, "learning_rate": 1.9789134994588482e-05, "loss": 0.0345, "step": 117700 }, { "epoch": 0.18855, "grad_norm": 0.06631922721862793, "learning_rate": 1.9785092182053702e-05, "loss": 0.0336, "step": 117710 }, { "epoch": 0.1886, "grad_norm": 0.054118335247039795, "learning_rate": 1.9781049512092542e-05, "loss": 0.0348, "step": 117720 }, { "epoch": 0.18865, "grad_norm": 0.06398317962884903, "learning_rate": 1.977700698481553e-05, "loss": 0.0328, "step": 117730 }, { "epoch": 0.1887, "grad_norm": 0.07057011872529984, "learning_rate": 1.977296460033318e-05, "loss": 0.0344, "step": 117740 }, { "epoch": 0.18875, "grad_norm": 0.0695476233959198, "learning_rate": 1.9768922358756014e-05, "loss": 0.0338, "step": 117750 }, { "epoch": 0.1888, "grad_norm": 0.06410462409257889, "learning_rate": 1.9764880260194552e-05, "loss": 0.0339, "step": 117760 }, { "epoch": 0.18885, "grad_norm": 0.08577097207307816, "learning_rate": 1.976083830475929e-05, "loss": 0.0346, "step": 117770 }, { "epoch": 0.1889, "grad_norm": 0.06604179739952087, "learning_rate": 1.9756796492560748e-05, "loss": 0.0337, "step": 117780 }, { "epoch": 0.18895, "grad_norm": 0.068792924284935, "learning_rate": 1.9752754823709406e-05, "loss": 0.0341, "step": 117790 }, { "epoch": 0.189, "grad_norm": 0.07170477509498596, "learning_rate": 1.9748713298315797e-05, "loss": 0.0345, "step": 117800 }, { "epoch": 0.18905, "grad_norm": 0.06775487214326859, "learning_rate": 1.9744671916490376e-05, "loss": 0.0342, "step": 117810 }, { "epoch": 0.1891, "grad_norm": 0.06951591372489929, "learning_rate": 1.9740630678343653e-05, "loss": 0.0329, "step": 117820 }, { "epoch": 0.18915, "grad_norm": 0.07820066064596176, "learning_rate": 1.973658958398612e-05, "loss": 0.0334, "step": 117830 }, { "epoch": 0.1892, "grad_norm": 0.06221409887075424, "learning_rate": 1.9732548633528243e-05, "loss": 0.0332, "step": 117840 }, { "epoch": 0.18925, "grad_norm": 0.06415358930826187, "learning_rate": 1.9728507827080512e-05, "loss": 0.0335, "step": 117850 }, { "epoch": 0.1893, "grad_norm": 0.052510686218738556, "learning_rate": 1.9724467164753394e-05, "loss": 0.033, "step": 117860 }, { "epoch": 0.18935, "grad_norm": 0.06607841700315475, "learning_rate": 1.9720426646657352e-05, "loss": 0.0336, "step": 117870 }, { "epoch": 0.1894, "grad_norm": 0.060421060770750046, "learning_rate": 1.971638627290288e-05, "loss": 0.0337, "step": 117880 }, { "epoch": 0.18945, "grad_norm": 0.07931596040725708, "learning_rate": 1.971234604360041e-05, "loss": 0.0354, "step": 117890 }, { "epoch": 0.1895, "grad_norm": 0.08308328688144684, "learning_rate": 1.9708305958860425e-05, "loss": 0.0349, "step": 117900 }, { "epoch": 0.18955, "grad_norm": 0.08788150548934937, "learning_rate": 1.9704266018793354e-05, "loss": 0.0338, "step": 117910 }, { "epoch": 0.1896, "grad_norm": 0.07126295566558838, "learning_rate": 1.970022622350967e-05, "loss": 0.0333, "step": 117920 }, { "epoch": 0.18965, "grad_norm": 0.06478600949048996, "learning_rate": 1.96961865731198e-05, "loss": 0.032, "step": 117930 }, { "epoch": 0.1897, "grad_norm": 0.07265316694974899, "learning_rate": 1.9692147067734202e-05, "loss": 0.0345, "step": 117940 }, { "epoch": 0.18975, "grad_norm": 0.06458255648612976, "learning_rate": 1.968810770746331e-05, "loss": 0.0345, "step": 117950 }, { "epoch": 0.1898, "grad_norm": 0.06969352811574936, "learning_rate": 1.9684068492417558e-05, "loss": 0.0343, "step": 117960 }, { "epoch": 0.18985, "grad_norm": 0.05881544575095177, "learning_rate": 1.968002942270738e-05, "loss": 0.0334, "step": 117970 }, { "epoch": 0.1899, "grad_norm": 0.06733879446983337, "learning_rate": 1.967599049844319e-05, "loss": 0.0345, "step": 117980 }, { "epoch": 0.18995, "grad_norm": 0.06642694771289825, "learning_rate": 1.967195171973543e-05, "loss": 0.0326, "step": 117990 }, { "epoch": 0.19, "grad_norm": 0.06816085427999496, "learning_rate": 1.9667913086694494e-05, "loss": 0.0328, "step": 118000 }, { "epoch": 0.19005, "grad_norm": 0.07988379895687103, "learning_rate": 1.966387459943082e-05, "loss": 0.0346, "step": 118010 }, { "epoch": 0.1901, "grad_norm": 0.05612655729055405, "learning_rate": 1.965983625805481e-05, "loss": 0.0331, "step": 118020 }, { "epoch": 0.19015, "grad_norm": 0.07056666165590286, "learning_rate": 1.965579806267687e-05, "loss": 0.0331, "step": 118030 }, { "epoch": 0.1902, "grad_norm": 0.07695815712213516, "learning_rate": 1.9651760013407404e-05, "loss": 0.0368, "step": 118040 }, { "epoch": 0.19025, "grad_norm": 0.06521902233362198, "learning_rate": 1.9647722110356807e-05, "loss": 0.0338, "step": 118050 }, { "epoch": 0.1903, "grad_norm": 0.06370382755994797, "learning_rate": 1.9643684353635482e-05, "loss": 0.0341, "step": 118060 }, { "epoch": 0.19035, "grad_norm": 0.07350403070449829, "learning_rate": 1.9639646743353814e-05, "loss": 0.0337, "step": 118070 }, { "epoch": 0.1904, "grad_norm": 0.07703772187232971, "learning_rate": 1.9635609279622178e-05, "loss": 0.0341, "step": 118080 }, { "epoch": 0.19045, "grad_norm": 0.07223615795373917, "learning_rate": 1.9631571962550986e-05, "loss": 0.0343, "step": 118090 }, { "epoch": 0.1905, "grad_norm": 0.06532876193523407, "learning_rate": 1.9627534792250584e-05, "loss": 0.034, "step": 118100 }, { "epoch": 0.19055, "grad_norm": 0.06084360554814339, "learning_rate": 1.962349776883138e-05, "loss": 0.0337, "step": 118110 }, { "epoch": 0.1906, "grad_norm": 0.06823130697011948, "learning_rate": 1.9619460892403713e-05, "loss": 0.0349, "step": 118120 }, { "epoch": 0.19065, "grad_norm": 0.06761247664690018, "learning_rate": 1.9615424163077963e-05, "loss": 0.0346, "step": 118130 }, { "epoch": 0.1907, "grad_norm": 0.08065170049667358, "learning_rate": 1.9611387580964504e-05, "loss": 0.0348, "step": 118140 }, { "epoch": 0.19075, "grad_norm": 0.062395017594099045, "learning_rate": 1.960735114617368e-05, "loss": 0.0356, "step": 118150 }, { "epoch": 0.1908, "grad_norm": 0.07153302431106567, "learning_rate": 1.960331485881585e-05, "loss": 0.0351, "step": 118160 }, { "epoch": 0.19085, "grad_norm": 0.06883693486452103, "learning_rate": 1.9599278719001363e-05, "loss": 0.0354, "step": 118170 }, { "epoch": 0.1909, "grad_norm": 0.06124531850218773, "learning_rate": 1.9595242726840568e-05, "loss": 0.0352, "step": 118180 }, { "epoch": 0.19095, "grad_norm": 0.05614418163895607, "learning_rate": 1.9591206882443806e-05, "loss": 0.0338, "step": 118190 }, { "epoch": 0.191, "grad_norm": 0.07188361138105392, "learning_rate": 1.9587171185921406e-05, "loss": 0.035, "step": 118200 }, { "epoch": 0.19105, "grad_norm": 0.06130637601017952, "learning_rate": 1.9583135637383726e-05, "loss": 0.0374, "step": 118210 }, { "epoch": 0.1911, "grad_norm": 0.06994622200727463, "learning_rate": 1.9579100236941076e-05, "loss": 0.0351, "step": 118220 }, { "epoch": 0.19115, "grad_norm": 0.06938885897397995, "learning_rate": 1.9575064984703794e-05, "loss": 0.0363, "step": 118230 }, { "epoch": 0.1912, "grad_norm": 0.06537797302007675, "learning_rate": 1.9571029880782195e-05, "loss": 0.0345, "step": 118240 }, { "epoch": 0.19125, "grad_norm": 0.07701186090707779, "learning_rate": 1.9566994925286602e-05, "loss": 0.0363, "step": 118250 }, { "epoch": 0.1913, "grad_norm": 0.06954272836446762, "learning_rate": 1.956296011832732e-05, "loss": 0.0336, "step": 118260 }, { "epoch": 0.19135, "grad_norm": 0.06378397345542908, "learning_rate": 1.9558925460014668e-05, "loss": 0.0356, "step": 118270 }, { "epoch": 0.1914, "grad_norm": 0.058148663491010666, "learning_rate": 1.9554890950458954e-05, "loss": 0.0347, "step": 118280 }, { "epoch": 0.19145, "grad_norm": 0.05619463324546814, "learning_rate": 1.9550856589770467e-05, "loss": 0.0343, "step": 118290 }, { "epoch": 0.1915, "grad_norm": 0.07490137219429016, "learning_rate": 1.954682237805953e-05, "loss": 0.0343, "step": 118300 }, { "epoch": 0.19155, "grad_norm": 0.06550087779760361, "learning_rate": 1.95427883154364e-05, "loss": 0.0342, "step": 118310 }, { "epoch": 0.1916, "grad_norm": 0.058537062257528305, "learning_rate": 1.9538754402011396e-05, "loss": 0.0343, "step": 118320 }, { "epoch": 0.19165, "grad_norm": 0.07029842585325241, "learning_rate": 1.95347206378948e-05, "loss": 0.0339, "step": 118330 }, { "epoch": 0.1917, "grad_norm": 0.07698415964841843, "learning_rate": 1.9530687023196885e-05, "loss": 0.0338, "step": 118340 }, { "epoch": 0.19175, "grad_norm": 0.07064062356948853, "learning_rate": 1.9526653558027937e-05, "loss": 0.0364, "step": 118350 }, { "epoch": 0.1918, "grad_norm": 0.06120840460062027, "learning_rate": 1.9522620242498214e-05, "loss": 0.0353, "step": 118360 }, { "epoch": 0.19185, "grad_norm": 0.06712406128644943, "learning_rate": 1.9518587076718008e-05, "loss": 0.0343, "step": 118370 }, { "epoch": 0.1919, "grad_norm": 0.06007279083132744, "learning_rate": 1.951455406079756e-05, "loss": 0.0344, "step": 118380 }, { "epoch": 0.19195, "grad_norm": 0.07222392410039902, "learning_rate": 1.9510521194847142e-05, "loss": 0.0339, "step": 118390 }, { "epoch": 0.192, "grad_norm": 0.06543446332216263, "learning_rate": 1.9506488478977027e-05, "loss": 0.0343, "step": 118400 }, { "epoch": 0.19205, "grad_norm": 0.06642667949199677, "learning_rate": 1.9502455913297438e-05, "loss": 0.0361, "step": 118410 }, { "epoch": 0.1921, "grad_norm": 0.06374597549438477, "learning_rate": 1.949842349791865e-05, "loss": 0.033, "step": 118420 }, { "epoch": 0.19215, "grad_norm": 0.08575686067342758, "learning_rate": 1.949439123295089e-05, "loss": 0.0327, "step": 118430 }, { "epoch": 0.1922, "grad_norm": 0.08806584030389786, "learning_rate": 1.9490359118504412e-05, "loss": 0.0366, "step": 118440 }, { "epoch": 0.19225, "grad_norm": 0.0780394971370697, "learning_rate": 1.948632715468944e-05, "loss": 0.0351, "step": 118450 }, { "epoch": 0.1923, "grad_norm": 0.07681611180305481, "learning_rate": 1.9482295341616212e-05, "loss": 0.034, "step": 118460 }, { "epoch": 0.19235, "grad_norm": 0.06136760860681534, "learning_rate": 1.947826367939496e-05, "loss": 0.0347, "step": 118470 }, { "epoch": 0.1924, "grad_norm": 0.09613508731126785, "learning_rate": 1.9474232168135903e-05, "loss": 0.0344, "step": 118480 }, { "epoch": 0.19245, "grad_norm": 0.061935920268297195, "learning_rate": 1.9470200807949267e-05, "loss": 0.0345, "step": 118490 }, { "epoch": 0.1925, "grad_norm": 0.06192437931895256, "learning_rate": 1.946616959894525e-05, "loss": 0.0345, "step": 118500 }, { "epoch": 0.19255, "grad_norm": 0.08948417007923126, "learning_rate": 1.946213854123409e-05, "loss": 0.035, "step": 118510 }, { "epoch": 0.1926, "grad_norm": 0.0753794014453888, "learning_rate": 1.9458107634925975e-05, "loss": 0.034, "step": 118520 }, { "epoch": 0.19265, "grad_norm": 0.05915827676653862, "learning_rate": 1.945407688013112e-05, "loss": 0.0347, "step": 118530 }, { "epoch": 0.1927, "grad_norm": 0.06349233537912369, "learning_rate": 1.945004627695972e-05, "loss": 0.0347, "step": 118540 }, { "epoch": 0.19275, "grad_norm": 0.06719525903463364, "learning_rate": 1.9446015825521967e-05, "loss": 0.0346, "step": 118550 }, { "epoch": 0.1928, "grad_norm": 0.06673227250576019, "learning_rate": 1.944198552592806e-05, "loss": 0.037, "step": 118560 }, { "epoch": 0.19285, "grad_norm": 0.06972498446702957, "learning_rate": 1.9437955378288173e-05, "loss": 0.0353, "step": 118570 }, { "epoch": 0.1929, "grad_norm": 0.06662328541278839, "learning_rate": 1.9433925382712493e-05, "loss": 0.0337, "step": 118580 }, { "epoch": 0.19295, "grad_norm": 0.05945248901844025, "learning_rate": 1.9429895539311215e-05, "loss": 0.034, "step": 118590 }, { "epoch": 0.193, "grad_norm": 0.06642909348011017, "learning_rate": 1.9425865848194488e-05, "loss": 0.0345, "step": 118600 }, { "epoch": 0.19305, "grad_norm": 0.052793245762586594, "learning_rate": 1.942183630947251e-05, "loss": 0.0364, "step": 118610 }, { "epoch": 0.1931, "grad_norm": 0.05456728860735893, "learning_rate": 1.9417806923255415e-05, "loss": 0.0339, "step": 118620 }, { "epoch": 0.19315, "grad_norm": 0.06805785000324249, "learning_rate": 1.9413777689653393e-05, "loss": 0.0339, "step": 118630 }, { "epoch": 0.1932, "grad_norm": 0.07911752909421921, "learning_rate": 1.9409748608776585e-05, "loss": 0.034, "step": 118640 }, { "epoch": 0.19325, "grad_norm": 0.07075954228639603, "learning_rate": 1.9405719680735146e-05, "loss": 0.0352, "step": 118650 }, { "epoch": 0.1933, "grad_norm": 0.06585540622472763, "learning_rate": 1.940169090563924e-05, "loss": 0.0342, "step": 118660 }, { "epoch": 0.19335, "grad_norm": 0.06112068518996239, "learning_rate": 1.9397662283598996e-05, "loss": 0.0338, "step": 118670 }, { "epoch": 0.1934, "grad_norm": 0.06887775659561157, "learning_rate": 1.939363381472456e-05, "loss": 0.0345, "step": 118680 }, { "epoch": 0.19345, "grad_norm": 0.058120857924222946, "learning_rate": 1.938960549912607e-05, "loss": 0.0344, "step": 118690 }, { "epoch": 0.1935, "grad_norm": 0.06258076429367065, "learning_rate": 1.938557733691365e-05, "loss": 0.0333, "step": 118700 }, { "epoch": 0.19355, "grad_norm": 0.06609176099300385, "learning_rate": 1.9381549328197445e-05, "loss": 0.0355, "step": 118710 }, { "epoch": 0.1936, "grad_norm": 0.0720101147890091, "learning_rate": 1.937752147308757e-05, "loss": 0.039, "step": 118720 }, { "epoch": 0.19365, "grad_norm": 0.07010449469089508, "learning_rate": 1.9373493771694145e-05, "loss": 0.0346, "step": 118730 }, { "epoch": 0.1937, "grad_norm": 0.07113895565271378, "learning_rate": 1.9369466224127285e-05, "loss": 0.0346, "step": 118740 }, { "epoch": 0.19375, "grad_norm": 0.06821592897176743, "learning_rate": 1.936543883049711e-05, "loss": 0.0351, "step": 118750 }, { "epoch": 0.1938, "grad_norm": 0.06012248992919922, "learning_rate": 1.9361411590913715e-05, "loss": 0.035, "step": 118760 }, { "epoch": 0.19385, "grad_norm": 0.06248101964592934, "learning_rate": 1.9357384505487204e-05, "loss": 0.0357, "step": 118770 }, { "epoch": 0.1939, "grad_norm": 0.07085489481687546, "learning_rate": 1.935335757432769e-05, "loss": 0.0343, "step": 118780 }, { "epoch": 0.19395, "grad_norm": 0.06061448156833649, "learning_rate": 1.9349330797545247e-05, "loss": 0.0342, "step": 118790 }, { "epoch": 0.194, "grad_norm": 0.06255852431058884, "learning_rate": 1.9345304175249996e-05, "loss": 0.0356, "step": 118800 }, { "epoch": 0.19405, "grad_norm": 0.06098503991961479, "learning_rate": 1.9341277707551982e-05, "loss": 0.0356, "step": 118810 }, { "epoch": 0.1941, "grad_norm": 0.06389370560646057, "learning_rate": 1.933725139456133e-05, "loss": 0.0365, "step": 118820 }, { "epoch": 0.19415, "grad_norm": 0.07680466026067734, "learning_rate": 1.933322523638808e-05, "loss": 0.0354, "step": 118830 }, { "epoch": 0.1942, "grad_norm": 0.0719519779086113, "learning_rate": 1.932919923314233e-05, "loss": 0.0341, "step": 118840 }, { "epoch": 0.19425, "grad_norm": 0.08088349550962448, "learning_rate": 1.932517338493415e-05, "loss": 0.0346, "step": 118850 }, { "epoch": 0.1943, "grad_norm": 0.06553579121828079, "learning_rate": 1.9321147691873586e-05, "loss": 0.0364, "step": 118860 }, { "epoch": 0.19435, "grad_norm": 0.06444582343101501, "learning_rate": 1.931712215407072e-05, "loss": 0.034, "step": 118870 }, { "epoch": 0.1944, "grad_norm": 0.06452842801809311, "learning_rate": 1.9313096771635596e-05, "loss": 0.0343, "step": 118880 }, { "epoch": 0.19445, "grad_norm": 0.0822884812951088, "learning_rate": 1.930907154467826e-05, "loss": 0.0347, "step": 118890 }, { "epoch": 0.1945, "grad_norm": 0.06797374784946442, "learning_rate": 1.9305046473308792e-05, "loss": 0.034, "step": 118900 }, { "epoch": 0.19455, "grad_norm": 0.06760447472333908, "learning_rate": 1.9301021557637193e-05, "loss": 0.0346, "step": 118910 }, { "epoch": 0.1946, "grad_norm": 0.06659888476133347, "learning_rate": 1.9296996797773534e-05, "loss": 0.0348, "step": 118920 }, { "epoch": 0.19465, "grad_norm": 0.05706968903541565, "learning_rate": 1.9292972193827837e-05, "loss": 0.0344, "step": 118930 }, { "epoch": 0.1947, "grad_norm": 0.07194396108388901, "learning_rate": 1.928894774591014e-05, "loss": 0.0343, "step": 118940 }, { "epoch": 0.19475, "grad_norm": 0.09228664636611938, "learning_rate": 1.928492345413046e-05, "loss": 0.0347, "step": 118950 }, { "epoch": 0.1948, "grad_norm": 0.06826335191726685, "learning_rate": 1.9280899318598827e-05, "loss": 0.0337, "step": 118960 }, { "epoch": 0.19485, "grad_norm": 0.053651031106710434, "learning_rate": 1.9276875339425262e-05, "loss": 0.0345, "step": 118970 }, { "epoch": 0.1949, "grad_norm": 0.07270044088363647, "learning_rate": 1.9272851516719773e-05, "loss": 0.0343, "step": 118980 }, { "epoch": 0.19495, "grad_norm": 0.08653170615434647, "learning_rate": 1.9268827850592374e-05, "loss": 0.0353, "step": 118990 }, { "epoch": 0.195, "grad_norm": 0.09220685809850693, "learning_rate": 1.926480434115306e-05, "loss": 0.0355, "step": 119000 }, { "epoch": 0.19505, "grad_norm": 0.09737882763147354, "learning_rate": 1.9260780988511856e-05, "loss": 0.0357, "step": 119010 }, { "epoch": 0.1951, "grad_norm": 0.0808955505490303, "learning_rate": 1.925675779277873e-05, "loss": 0.0353, "step": 119020 }, { "epoch": 0.19515, "grad_norm": 0.06855889409780502, "learning_rate": 1.925273475406369e-05, "loss": 0.0344, "step": 119030 }, { "epoch": 0.1952, "grad_norm": 0.07284665107727051, "learning_rate": 1.9248711872476727e-05, "loss": 0.0358, "step": 119040 }, { "epoch": 0.19525, "grad_norm": 0.06209972873330116, "learning_rate": 1.924468914812782e-05, "loss": 0.0328, "step": 119050 }, { "epoch": 0.1953, "grad_norm": 0.06067826226353645, "learning_rate": 1.924066658112695e-05, "loss": 0.0341, "step": 119060 }, { "epoch": 0.19535, "grad_norm": 0.0795523151755333, "learning_rate": 1.923664417158409e-05, "loss": 0.0334, "step": 119070 }, { "epoch": 0.1954, "grad_norm": 0.06257842481136322, "learning_rate": 1.9232621919609207e-05, "loss": 0.0377, "step": 119080 }, { "epoch": 0.19545, "grad_norm": 0.060376379638910294, "learning_rate": 1.922859982531229e-05, "loss": 0.0329, "step": 119090 }, { "epoch": 0.1955, "grad_norm": 0.06914056837558746, "learning_rate": 1.922457788880327e-05, "loss": 0.0348, "step": 119100 }, { "epoch": 0.19555, "grad_norm": 0.08333203196525574, "learning_rate": 1.9220556110192136e-05, "loss": 0.0335, "step": 119110 }, { "epoch": 0.1956, "grad_norm": 0.06205040588974953, "learning_rate": 1.9216534489588812e-05, "loss": 0.0336, "step": 119120 }, { "epoch": 0.19565, "grad_norm": 0.06603963673114777, "learning_rate": 1.921251302710327e-05, "loss": 0.0338, "step": 119130 }, { "epoch": 0.1957, "grad_norm": 0.057853810489177704, "learning_rate": 1.9208491722845445e-05, "loss": 0.0331, "step": 119140 }, { "epoch": 0.19575, "grad_norm": 0.09958021342754364, "learning_rate": 1.920447057692528e-05, "loss": 0.0332, "step": 119150 }, { "epoch": 0.1958, "grad_norm": 0.06875363737344742, "learning_rate": 1.920044958945272e-05, "loss": 0.0332, "step": 119160 }, { "epoch": 0.19585, "grad_norm": 0.060864198952913284, "learning_rate": 1.919642876053768e-05, "loss": 0.0354, "step": 119170 }, { "epoch": 0.1959, "grad_norm": 0.08430393040180206, "learning_rate": 1.9192408090290105e-05, "loss": 0.0354, "step": 119180 }, { "epoch": 0.19595, "grad_norm": 0.11097610741853714, "learning_rate": 1.9188387578819902e-05, "loss": 0.039, "step": 119190 }, { "epoch": 0.196, "grad_norm": 0.08503743261098862, "learning_rate": 1.918436722623701e-05, "loss": 0.0346, "step": 119200 }, { "epoch": 0.19605, "grad_norm": 0.07787574082612991, "learning_rate": 1.918034703265132e-05, "loss": 0.0334, "step": 119210 }, { "epoch": 0.1961, "grad_norm": 0.06750308722257614, "learning_rate": 1.917632699817276e-05, "loss": 0.0345, "step": 119220 }, { "epoch": 0.19615, "grad_norm": 0.07580902427434921, "learning_rate": 1.917230712291124e-05, "loss": 0.0341, "step": 119230 }, { "epoch": 0.1962, "grad_norm": 0.07479807734489441, "learning_rate": 1.9168287406976646e-05, "loss": 0.0347, "step": 119240 }, { "epoch": 0.19625, "grad_norm": 0.060865797102451324, "learning_rate": 1.916426785047889e-05, "loss": 0.0345, "step": 119250 }, { "epoch": 0.1963, "grad_norm": 0.06803198158740997, "learning_rate": 1.9160248453527852e-05, "loss": 0.034, "step": 119260 }, { "epoch": 0.19635, "grad_norm": 0.07381574809551239, "learning_rate": 1.9156229216233434e-05, "loss": 0.0348, "step": 119270 }, { "epoch": 0.1964, "grad_norm": 0.05769243463873863, "learning_rate": 1.9152210138705508e-05, "loss": 0.0331, "step": 119280 }, { "epoch": 0.19645, "grad_norm": 0.06551354378461838, "learning_rate": 1.9148191221053955e-05, "loss": 0.0342, "step": 119290 }, { "epoch": 0.1965, "grad_norm": 0.06820987910032272, "learning_rate": 1.914417246338867e-05, "loss": 0.0338, "step": 119300 }, { "epoch": 0.19655, "grad_norm": 0.05691422149538994, "learning_rate": 1.9140153865819496e-05, "loss": 0.0323, "step": 119310 }, { "epoch": 0.1966, "grad_norm": 0.06343129277229309, "learning_rate": 1.913613542845633e-05, "loss": 0.0338, "step": 119320 }, { "epoch": 0.19665, "grad_norm": 0.058741576969623566, "learning_rate": 1.9132117151409002e-05, "loss": 0.0327, "step": 119330 }, { "epoch": 0.1967, "grad_norm": 0.06788462400436401, "learning_rate": 1.912809903478739e-05, "loss": 0.0332, "step": 119340 }, { "epoch": 0.19675, "grad_norm": 0.07411598414182663, "learning_rate": 1.912408107870135e-05, "loss": 0.0347, "step": 119350 }, { "epoch": 0.1968, "grad_norm": 0.06857981532812119, "learning_rate": 1.9120063283260722e-05, "loss": 0.0332, "step": 119360 }, { "epoch": 0.19685, "grad_norm": 0.05873365327715874, "learning_rate": 1.9116045648575358e-05, "loss": 0.0333, "step": 119370 }, { "epoch": 0.1969, "grad_norm": 0.06224021315574646, "learning_rate": 1.9112028174755094e-05, "loss": 0.0343, "step": 119380 }, { "epoch": 0.19695, "grad_norm": 0.06388100236654282, "learning_rate": 1.910801086190977e-05, "loss": 0.0345, "step": 119390 }, { "epoch": 0.197, "grad_norm": 0.06213835999369621, "learning_rate": 1.910399371014921e-05, "loss": 0.0334, "step": 119400 }, { "epoch": 0.19705, "grad_norm": 0.054965998977422714, "learning_rate": 1.9099976719583245e-05, "loss": 0.0341, "step": 119410 }, { "epoch": 0.1971, "grad_norm": 0.06047357618808746, "learning_rate": 1.909595989032171e-05, "loss": 0.0348, "step": 119420 }, { "epoch": 0.19715, "grad_norm": 0.062203872948884964, "learning_rate": 1.9091943222474407e-05, "loss": 0.0351, "step": 119430 }, { "epoch": 0.1972, "grad_norm": 0.07518782466650009, "learning_rate": 1.908792671615116e-05, "loss": 0.0356, "step": 119440 }, { "epoch": 0.19725, "grad_norm": 0.08392120152711868, "learning_rate": 1.9083910371461772e-05, "loss": 0.0355, "step": 119450 }, { "epoch": 0.1973, "grad_norm": 0.09785937517881393, "learning_rate": 1.9079894188516056e-05, "loss": 0.0341, "step": 119460 }, { "epoch": 0.19735, "grad_norm": 0.07539359480142593, "learning_rate": 1.9075878167423805e-05, "loss": 0.0342, "step": 119470 }, { "epoch": 0.1974, "grad_norm": 0.10306426137685776, "learning_rate": 1.907186230829482e-05, "loss": 0.0349, "step": 119480 }, { "epoch": 0.19745, "grad_norm": 0.08121484518051147, "learning_rate": 1.90678466112389e-05, "loss": 0.0355, "step": 119490 }, { "epoch": 0.1975, "grad_norm": 0.07452220469713211, "learning_rate": 1.9063831076365807e-05, "loss": 0.0348, "step": 119500 }, { "epoch": 0.19755, "grad_norm": 0.0740089938044548, "learning_rate": 1.9059815703785362e-05, "loss": 0.0342, "step": 119510 }, { "epoch": 0.1976, "grad_norm": 0.09367933869361877, "learning_rate": 1.905580049360731e-05, "loss": 0.035, "step": 119520 }, { "epoch": 0.19765, "grad_norm": 0.07209538668394089, "learning_rate": 1.9051785445941446e-05, "loss": 0.0346, "step": 119530 }, { "epoch": 0.1977, "grad_norm": 0.06270518898963928, "learning_rate": 1.9047770560897532e-05, "loss": 0.0346, "step": 119540 }, { "epoch": 0.19775, "grad_norm": 0.06139799952507019, "learning_rate": 1.9043755838585334e-05, "loss": 0.0335, "step": 119550 }, { "epoch": 0.1978, "grad_norm": 0.05551351234316826, "learning_rate": 1.9039741279114617e-05, "loss": 0.0334, "step": 119560 }, { "epoch": 0.19785, "grad_norm": 0.06037990376353264, "learning_rate": 1.903572688259513e-05, "loss": 0.0344, "step": 119570 }, { "epoch": 0.1979, "grad_norm": 0.05958827584981918, "learning_rate": 1.9031712649136634e-05, "loss": 0.0334, "step": 119580 }, { "epoch": 0.19795, "grad_norm": 0.0590079165995121, "learning_rate": 1.9027698578848867e-05, "loss": 0.034, "step": 119590 }, { "epoch": 0.198, "grad_norm": 0.058510467410087585, "learning_rate": 1.9023684671841575e-05, "loss": 0.0345, "step": 119600 }, { "epoch": 0.19805, "grad_norm": 0.09045732766389847, "learning_rate": 1.9019670928224513e-05, "loss": 0.0336, "step": 119610 }, { "epoch": 0.1981, "grad_norm": 0.08185352385044098, "learning_rate": 1.9015657348107384e-05, "loss": 0.0345, "step": 119620 }, { "epoch": 0.19815, "grad_norm": 0.07754584401845932, "learning_rate": 1.901164393159994e-05, "loss": 0.0327, "step": 119630 }, { "epoch": 0.1982, "grad_norm": 0.09113860875368118, "learning_rate": 1.9007630678811905e-05, "loss": 0.0325, "step": 119640 }, { "epoch": 0.19825, "grad_norm": 0.06712755560874939, "learning_rate": 1.9003617589852998e-05, "loss": 0.0332, "step": 119650 }, { "epoch": 0.1983, "grad_norm": 0.08282347023487091, "learning_rate": 1.899960466483293e-05, "loss": 0.033, "step": 119660 }, { "epoch": 0.19835, "grad_norm": 0.06524205207824707, "learning_rate": 1.899559190386141e-05, "loss": 0.0317, "step": 119670 }, { "epoch": 0.1984, "grad_norm": 0.055411096662282944, "learning_rate": 1.899157930704816e-05, "loss": 0.0329, "step": 119680 }, { "epoch": 0.19845, "grad_norm": 0.07240267843008041, "learning_rate": 1.8987566874502874e-05, "loss": 0.0329, "step": 119690 }, { "epoch": 0.1985, "grad_norm": 0.055038850754499435, "learning_rate": 1.8983554606335254e-05, "loss": 0.0374, "step": 119700 }, { "epoch": 0.19855, "grad_norm": 0.08657161891460419, "learning_rate": 1.897954250265498e-05, "loss": 0.0331, "step": 119710 }, { "epoch": 0.1986, "grad_norm": 0.08538049459457397, "learning_rate": 1.8975530563571752e-05, "loss": 0.0341, "step": 119720 }, { "epoch": 0.19865, "grad_norm": 0.06875836849212646, "learning_rate": 1.8971518789195266e-05, "loss": 0.036, "step": 119730 }, { "epoch": 0.1987, "grad_norm": 0.07377111911773682, "learning_rate": 1.8967507179635187e-05, "loss": 0.0337, "step": 119740 }, { "epoch": 0.19875, "grad_norm": 0.07042787224054337, "learning_rate": 1.8963495735001197e-05, "loss": 0.0348, "step": 119750 }, { "epoch": 0.1988, "grad_norm": 0.06096513941884041, "learning_rate": 1.895948445540296e-05, "loss": 0.0331, "step": 119760 }, { "epoch": 0.19885, "grad_norm": 0.07015082240104675, "learning_rate": 1.895547334095016e-05, "loss": 0.0348, "step": 119770 }, { "epoch": 0.1989, "grad_norm": 0.06189883127808571, "learning_rate": 1.8951462391752436e-05, "loss": 0.0342, "step": 119780 }, { "epoch": 0.19895, "grad_norm": 0.06042921170592308, "learning_rate": 1.8947451607919457e-05, "loss": 0.0357, "step": 119790 }, { "epoch": 0.199, "grad_norm": 0.06005643680691719, "learning_rate": 1.894344098956089e-05, "loss": 0.0346, "step": 119800 }, { "epoch": 0.19905, "grad_norm": 0.06529024243354797, "learning_rate": 1.8939430536786357e-05, "loss": 0.0354, "step": 119810 }, { "epoch": 0.1991, "grad_norm": 0.06161453202366829, "learning_rate": 1.8935420249705533e-05, "loss": 0.0336, "step": 119820 }, { "epoch": 0.19915, "grad_norm": 0.06204698234796524, "learning_rate": 1.8931410128428024e-05, "loss": 0.034, "step": 119830 }, { "epoch": 0.1992, "grad_norm": 0.06497624516487122, "learning_rate": 1.8927400173063493e-05, "loss": 0.034, "step": 119840 }, { "epoch": 0.19925, "grad_norm": 0.07811807096004486, "learning_rate": 1.892339038372155e-05, "loss": 0.0326, "step": 119850 }, { "epoch": 0.1993, "grad_norm": 0.08065193146467209, "learning_rate": 1.8919380760511838e-05, "loss": 0.0332, "step": 119860 }, { "epoch": 0.19935, "grad_norm": 0.06446090340614319, "learning_rate": 1.8915371303543973e-05, "loss": 0.0318, "step": 119870 }, { "epoch": 0.1994, "grad_norm": 0.06551788002252579, "learning_rate": 1.8911362012927565e-05, "loss": 0.0327, "step": 119880 }, { "epoch": 0.19945, "grad_norm": 0.08348044008016586, "learning_rate": 1.890735288877224e-05, "loss": 0.0337, "step": 119890 }, { "epoch": 0.1995, "grad_norm": 0.08474072068929672, "learning_rate": 1.890334393118759e-05, "loss": 0.0334, "step": 119900 }, { "epoch": 0.19955, "grad_norm": 0.09361223876476288, "learning_rate": 1.8899335140283225e-05, "loss": 0.0335, "step": 119910 }, { "epoch": 0.1996, "grad_norm": 0.0642770305275917, "learning_rate": 1.8895326516168755e-05, "loss": 0.0327, "step": 119920 }, { "epoch": 0.19965, "grad_norm": 0.06142275780439377, "learning_rate": 1.8891318058953756e-05, "loss": 0.0329, "step": 119930 }, { "epoch": 0.1997, "grad_norm": 0.07025183737277985, "learning_rate": 1.8887309768747834e-05, "loss": 0.0321, "step": 119940 }, { "epoch": 0.19975, "grad_norm": 0.05500560998916626, "learning_rate": 1.8883301645660563e-05, "loss": 0.0328, "step": 119950 }, { "epoch": 0.1998, "grad_norm": 0.08264268934726715, "learning_rate": 1.887929368980153e-05, "loss": 0.0342, "step": 119960 }, { "epoch": 0.19985, "grad_norm": 0.07302163541316986, "learning_rate": 1.8875285901280303e-05, "loss": 0.032, "step": 119970 }, { "epoch": 0.1999, "grad_norm": 0.07754736393690109, "learning_rate": 1.8871278280206458e-05, "loss": 0.0341, "step": 119980 }, { "epoch": 0.19995, "grad_norm": 0.06747692078351974, "learning_rate": 1.886727082668957e-05, "loss": 0.035, "step": 119990 }, { "epoch": 0.2, "grad_norm": 0.07290555536746979, "learning_rate": 1.886326354083918e-05, "loss": 0.0361, "step": 120000 }, { "epoch": 0.20005, "grad_norm": 0.06327662616968155, "learning_rate": 1.8859256422764878e-05, "loss": 0.0329, "step": 120010 }, { "epoch": 0.2001, "grad_norm": 0.12843845784664154, "learning_rate": 1.885524947257618e-05, "loss": 0.0368, "step": 120020 }, { "epoch": 0.20015, "grad_norm": 0.1008138656616211, "learning_rate": 1.8851242690382672e-05, "loss": 0.0337, "step": 120030 }, { "epoch": 0.2002, "grad_norm": 0.09233039617538452, "learning_rate": 1.884723607629386e-05, "loss": 0.0358, "step": 120040 }, { "epoch": 0.20025, "grad_norm": 0.07306820154190063, "learning_rate": 1.884322963041931e-05, "loss": 0.034, "step": 120050 }, { "epoch": 0.2003, "grad_norm": 0.09731920063495636, "learning_rate": 1.8839223352868553e-05, "loss": 0.035, "step": 120060 }, { "epoch": 0.20035, "grad_norm": 0.07172708213329315, "learning_rate": 1.8835217243751107e-05, "loss": 0.0332, "step": 120070 }, { "epoch": 0.2004, "grad_norm": 0.0703640952706337, "learning_rate": 1.8831211303176514e-05, "loss": 0.0332, "step": 120080 }, { "epoch": 0.20045, "grad_norm": 0.06962746381759644, "learning_rate": 1.8827205531254282e-05, "loss": 0.0334, "step": 120090 }, { "epoch": 0.2005, "grad_norm": 0.07666989415884018, "learning_rate": 1.8823199928093923e-05, "loss": 0.037, "step": 120100 }, { "epoch": 0.20055, "grad_norm": 0.05687066167593002, "learning_rate": 1.8819194493804976e-05, "loss": 0.0333, "step": 120110 }, { "epoch": 0.2006, "grad_norm": 0.05135420337319374, "learning_rate": 1.881518922849691e-05, "loss": 0.0344, "step": 120120 }, { "epoch": 0.20065, "grad_norm": 0.06644924730062485, "learning_rate": 1.8811184132279265e-05, "loss": 0.0347, "step": 120130 }, { "epoch": 0.2007, "grad_norm": 0.07128880172967911, "learning_rate": 1.880717920526151e-05, "loss": 0.0342, "step": 120140 }, { "epoch": 0.20075, "grad_norm": 0.06110058352351189, "learning_rate": 1.8803174447553157e-05, "loss": 0.0369, "step": 120150 }, { "epoch": 0.2008, "grad_norm": 0.062450435012578964, "learning_rate": 1.8799169859263676e-05, "loss": 0.0347, "step": 120160 }, { "epoch": 0.20085, "grad_norm": 0.056189533323049545, "learning_rate": 1.8795165440502564e-05, "loss": 0.0338, "step": 120170 }, { "epoch": 0.2009, "grad_norm": 0.05827772989869118, "learning_rate": 1.879116119137931e-05, "loss": 0.0341, "step": 120180 }, { "epoch": 0.20095, "grad_norm": 0.0684100016951561, "learning_rate": 1.878715711200336e-05, "loss": 0.0334, "step": 120190 }, { "epoch": 0.201, "grad_norm": 0.06133823096752167, "learning_rate": 1.8783153202484213e-05, "loss": 0.0363, "step": 120200 }, { "epoch": 0.20105, "grad_norm": 0.06145724281668663, "learning_rate": 1.877914946293131e-05, "loss": 0.0353, "step": 120210 }, { "epoch": 0.2011, "grad_norm": 0.07646960020065308, "learning_rate": 1.877514589345414e-05, "loss": 0.0344, "step": 120220 }, { "epoch": 0.20115, "grad_norm": 0.057070545852184296, "learning_rate": 1.8771142494162124e-05, "loss": 0.0343, "step": 120230 }, { "epoch": 0.2012, "grad_norm": 0.06414657086133957, "learning_rate": 1.876713926516474e-05, "loss": 0.0347, "step": 120240 }, { "epoch": 0.20125, "grad_norm": 0.0708075612783432, "learning_rate": 1.8763136206571432e-05, "loss": 0.0346, "step": 120250 }, { "epoch": 0.2013, "grad_norm": 0.06626040488481522, "learning_rate": 1.875913331849163e-05, "loss": 0.0348, "step": 120260 }, { "epoch": 0.20135, "grad_norm": 0.06647317111492157, "learning_rate": 1.8755130601034787e-05, "loss": 0.0344, "step": 120270 }, { "epoch": 0.2014, "grad_norm": 0.06419264525175095, "learning_rate": 1.875112805431032e-05, "loss": 0.0334, "step": 120280 }, { "epoch": 0.20145, "grad_norm": 0.07849488407373428, "learning_rate": 1.8747125678427658e-05, "loss": 0.0345, "step": 120290 }, { "epoch": 0.2015, "grad_norm": 0.06297898292541504, "learning_rate": 1.874312347349625e-05, "loss": 0.0344, "step": 120300 }, { "epoch": 0.20155, "grad_norm": 0.0669475868344307, "learning_rate": 1.8739121439625474e-05, "loss": 0.0341, "step": 120310 }, { "epoch": 0.2016, "grad_norm": 0.07643059641122818, "learning_rate": 1.8735119576924787e-05, "loss": 0.0361, "step": 120320 }, { "epoch": 0.20165, "grad_norm": 0.06809680163860321, "learning_rate": 1.8731117885503558e-05, "loss": 0.0358, "step": 120330 }, { "epoch": 0.2017, "grad_norm": 0.06518039107322693, "learning_rate": 1.8727116365471226e-05, "loss": 0.0361, "step": 120340 }, { "epoch": 0.20175, "grad_norm": 0.05954527109861374, "learning_rate": 1.8723115016937164e-05, "loss": 0.0353, "step": 120350 }, { "epoch": 0.2018, "grad_norm": 0.07108394801616669, "learning_rate": 1.8719113840010784e-05, "loss": 0.0341, "step": 120360 }, { "epoch": 0.20185, "grad_norm": 0.07770578563213348, "learning_rate": 1.8715112834801476e-05, "loss": 0.0333, "step": 120370 }, { "epoch": 0.2019, "grad_norm": 0.07711070030927658, "learning_rate": 1.8711112001418618e-05, "loss": 0.0334, "step": 120380 }, { "epoch": 0.20195, "grad_norm": 0.07317481189966202, "learning_rate": 1.87071113399716e-05, "loss": 0.0357, "step": 120390 }, { "epoch": 0.202, "grad_norm": 0.06111655384302139, "learning_rate": 1.870311085056979e-05, "loss": 0.0348, "step": 120400 }, { "epoch": 0.20205, "grad_norm": 0.062063030898571014, "learning_rate": 1.8699110533322565e-05, "loss": 0.0332, "step": 120410 }, { "epoch": 0.2021, "grad_norm": 0.05810019373893738, "learning_rate": 1.869511038833928e-05, "loss": 0.0327, "step": 120420 }, { "epoch": 0.20215, "grad_norm": 0.05780354142189026, "learning_rate": 1.869111041572932e-05, "loss": 0.0339, "step": 120430 }, { "epoch": 0.2022, "grad_norm": 0.054892897605895996, "learning_rate": 1.868711061560203e-05, "loss": 0.0333, "step": 120440 }, { "epoch": 0.20225, "grad_norm": 0.05134418606758118, "learning_rate": 1.868311098806676e-05, "loss": 0.0324, "step": 120450 }, { "epoch": 0.2023, "grad_norm": 0.05112555995583534, "learning_rate": 1.8679111533232867e-05, "loss": 0.0313, "step": 120460 }, { "epoch": 0.20235, "grad_norm": 0.06146746128797531, "learning_rate": 1.867511225120969e-05, "loss": 0.0342, "step": 120470 }, { "epoch": 0.2024, "grad_norm": 0.06157961115241051, "learning_rate": 1.8671113142106566e-05, "loss": 0.0327, "step": 120480 }, { "epoch": 0.20245, "grad_norm": 0.07634248584508896, "learning_rate": 1.866711420603283e-05, "loss": 0.0336, "step": 120490 }, { "epoch": 0.2025, "grad_norm": 0.0633772686123848, "learning_rate": 1.866311544309781e-05, "loss": 0.0331, "step": 120500 }, { "epoch": 0.20255, "grad_norm": 0.08006856590509415, "learning_rate": 1.8659116853410847e-05, "loss": 0.0356, "step": 120510 }, { "epoch": 0.2026, "grad_norm": 0.06903557479381561, "learning_rate": 1.8655118437081225e-05, "loss": 0.0347, "step": 120520 }, { "epoch": 0.20265, "grad_norm": 0.05871882662177086, "learning_rate": 1.8651120194218305e-05, "loss": 0.0335, "step": 120530 }, { "epoch": 0.2027, "grad_norm": 0.07219228148460388, "learning_rate": 1.8647122124931356e-05, "loss": 0.0332, "step": 120540 }, { "epoch": 0.20275, "grad_norm": 0.06969571113586426, "learning_rate": 1.8643124229329705e-05, "loss": 0.0323, "step": 120550 }, { "epoch": 0.2028, "grad_norm": 0.06328929215669632, "learning_rate": 1.8639126507522654e-05, "loss": 0.0332, "step": 120560 }, { "epoch": 0.20285, "grad_norm": 0.09350479394197464, "learning_rate": 1.863512895961949e-05, "loss": 0.0354, "step": 120570 }, { "epoch": 0.2029, "grad_norm": 0.05679807439446449, "learning_rate": 1.8631131585729517e-05, "loss": 0.0338, "step": 120580 }, { "epoch": 0.20295, "grad_norm": 0.05669408664107323, "learning_rate": 1.8627134385962007e-05, "loss": 0.0337, "step": 120590 }, { "epoch": 0.203, "grad_norm": 0.07240969687700272, "learning_rate": 1.862313736042625e-05, "loss": 0.0353, "step": 120600 }, { "epoch": 0.20305, "grad_norm": 0.07339102029800415, "learning_rate": 1.861914050923152e-05, "loss": 0.0381, "step": 120610 }, { "epoch": 0.2031, "grad_norm": 0.06361488997936249, "learning_rate": 1.8615143832487086e-05, "loss": 0.0336, "step": 120620 }, { "epoch": 0.20315, "grad_norm": 0.07316234707832336, "learning_rate": 1.8611147330302233e-05, "loss": 0.0333, "step": 120630 }, { "epoch": 0.2032, "grad_norm": 0.058388516306877136, "learning_rate": 1.8607151002786206e-05, "loss": 0.0333, "step": 120640 }, { "epoch": 0.20325, "grad_norm": 0.06330445408821106, "learning_rate": 1.8603154850048275e-05, "loss": 0.0344, "step": 120650 }, { "epoch": 0.2033, "grad_norm": 0.07335477322340012, "learning_rate": 1.859915887219768e-05, "loss": 0.0343, "step": 120660 }, { "epoch": 0.20335, "grad_norm": 0.07023430615663528, "learning_rate": 1.859516306934368e-05, "loss": 0.0334, "step": 120670 }, { "epoch": 0.2034, "grad_norm": 0.06397784501314163, "learning_rate": 1.8591167441595513e-05, "loss": 0.0345, "step": 120680 }, { "epoch": 0.20345, "grad_norm": 0.13211436569690704, "learning_rate": 1.858717198906242e-05, "loss": 0.0368, "step": 120690 }, { "epoch": 0.2035, "grad_norm": 0.08479201048612595, "learning_rate": 1.858317671185364e-05, "loss": 0.0345, "step": 120700 }, { "epoch": 0.20355, "grad_norm": 0.0865594744682312, "learning_rate": 1.857918161007839e-05, "loss": 0.0335, "step": 120710 }, { "epoch": 0.2036, "grad_norm": 0.07507319748401642, "learning_rate": 1.8575186683845917e-05, "loss": 0.0335, "step": 120720 }, { "epoch": 0.20365, "grad_norm": 0.07103493064641953, "learning_rate": 1.857119193326541e-05, "loss": 0.0353, "step": 120730 }, { "epoch": 0.2037, "grad_norm": 0.062294647097587585, "learning_rate": 1.8567197358446108e-05, "loss": 0.0376, "step": 120740 }, { "epoch": 0.20375, "grad_norm": 0.06791318953037262, "learning_rate": 1.8563202959497212e-05, "loss": 0.0356, "step": 120750 }, { "epoch": 0.2038, "grad_norm": 0.06171085312962532, "learning_rate": 1.855920873652793e-05, "loss": 0.0342, "step": 120760 }, { "epoch": 0.20385, "grad_norm": 0.07706128060817719, "learning_rate": 1.8555214689647466e-05, "loss": 0.0351, "step": 120770 }, { "epoch": 0.2039, "grad_norm": 0.07189098745584488, "learning_rate": 1.8551220818965004e-05, "loss": 0.034, "step": 120780 }, { "epoch": 0.20395, "grad_norm": 0.08183571696281433, "learning_rate": 1.854722712458975e-05, "loss": 0.035, "step": 120790 }, { "epoch": 0.204, "grad_norm": 0.07607299834489822, "learning_rate": 1.8543233606630874e-05, "loss": 0.0364, "step": 120800 }, { "epoch": 0.20405, "grad_norm": 0.08183683454990387, "learning_rate": 1.8539240265197562e-05, "loss": 0.0356, "step": 120810 }, { "epoch": 0.2041, "grad_norm": 0.08728110045194626, "learning_rate": 1.8535247100399012e-05, "loss": 0.0353, "step": 120820 }, { "epoch": 0.20415, "grad_norm": 0.07391119748353958, "learning_rate": 1.8531254112344356e-05, "loss": 0.0372, "step": 120830 }, { "epoch": 0.2042, "grad_norm": 0.0634394958615303, "learning_rate": 1.8527261301142796e-05, "loss": 0.0354, "step": 120840 }, { "epoch": 0.20425, "grad_norm": 0.06386538594961166, "learning_rate": 1.8523268666903475e-05, "loss": 0.0355, "step": 120850 }, { "epoch": 0.2043, "grad_norm": 0.061842963099479675, "learning_rate": 1.851927620973556e-05, "loss": 0.0341, "step": 120860 }, { "epoch": 0.20435, "grad_norm": 0.07959350943565369, "learning_rate": 1.8515283929748194e-05, "loss": 0.0402, "step": 120870 }, { "epoch": 0.2044, "grad_norm": 0.06257916241884232, "learning_rate": 1.851129182705053e-05, "loss": 0.0342, "step": 120880 }, { "epoch": 0.20445, "grad_norm": 0.06070181727409363, "learning_rate": 1.8507299901751718e-05, "loss": 0.0342, "step": 120890 }, { "epoch": 0.2045, "grad_norm": 0.07804949581623077, "learning_rate": 1.850330815396087e-05, "loss": 0.0354, "step": 120900 }, { "epoch": 0.20455, "grad_norm": 0.06420771032571793, "learning_rate": 1.8499316583787157e-05, "loss": 0.0331, "step": 120910 }, { "epoch": 0.2046, "grad_norm": 0.05669880285859108, "learning_rate": 1.8495325191339668e-05, "loss": 0.0356, "step": 120920 }, { "epoch": 0.20465, "grad_norm": 0.0698152482509613, "learning_rate": 1.8491333976727553e-05, "loss": 0.0335, "step": 120930 }, { "epoch": 0.2047, "grad_norm": 0.06292180716991425, "learning_rate": 1.8487342940059926e-05, "loss": 0.0333, "step": 120940 }, { "epoch": 0.20475, "grad_norm": 0.06293244659900665, "learning_rate": 1.8483352081445886e-05, "loss": 0.0332, "step": 120950 }, { "epoch": 0.2048, "grad_norm": 0.059083491563797, "learning_rate": 1.8479361400994567e-05, "loss": 0.0343, "step": 120960 }, { "epoch": 0.20485, "grad_norm": 0.07200410962104797, "learning_rate": 1.8475370898815043e-05, "loss": 0.0327, "step": 120970 }, { "epoch": 0.2049, "grad_norm": 0.06585627049207687, "learning_rate": 1.847138057501644e-05, "loss": 0.034, "step": 120980 }, { "epoch": 0.20495, "grad_norm": 0.07143794000148773, "learning_rate": 1.8467390429707825e-05, "loss": 0.034, "step": 120990 }, { "epoch": 0.205, "grad_norm": 0.08228740096092224, "learning_rate": 1.8463400462998302e-05, "loss": 0.0336, "step": 121000 }, { "epoch": 0.20505, "grad_norm": 0.06945434957742691, "learning_rate": 1.8459410674996973e-05, "loss": 0.0336, "step": 121010 }, { "epoch": 0.2051, "grad_norm": 0.0655989870429039, "learning_rate": 1.845542106581288e-05, "loss": 0.0346, "step": 121020 }, { "epoch": 0.20515, "grad_norm": 0.060558054596185684, "learning_rate": 1.845143163555513e-05, "loss": 0.0348, "step": 121030 }, { "epoch": 0.2052, "grad_norm": 0.06830138713121414, "learning_rate": 1.844744238433277e-05, "loss": 0.0343, "step": 121040 }, { "epoch": 0.20525, "grad_norm": 0.07038040459156036, "learning_rate": 1.8443453312254876e-05, "loss": 0.0331, "step": 121050 }, { "epoch": 0.2053, "grad_norm": 0.06014655902981758, "learning_rate": 1.843946441943051e-05, "loss": 0.0358, "step": 121060 }, { "epoch": 0.20535, "grad_norm": 0.06151202321052551, "learning_rate": 1.8435475705968712e-05, "loss": 0.0351, "step": 121070 }, { "epoch": 0.2054, "grad_norm": 0.05301975831389427, "learning_rate": 1.843148717197855e-05, "loss": 0.0339, "step": 121080 }, { "epoch": 0.20545, "grad_norm": 0.05595868080854416, "learning_rate": 1.842749881756906e-05, "loss": 0.0346, "step": 121090 }, { "epoch": 0.2055, "grad_norm": 0.062360938638448715, "learning_rate": 1.8423510642849284e-05, "loss": 0.0355, "step": 121100 }, { "epoch": 0.20555, "grad_norm": 0.06301844865083694, "learning_rate": 1.8419522647928243e-05, "loss": 0.0351, "step": 121110 }, { "epoch": 0.2056, "grad_norm": 0.060403723269701004, "learning_rate": 1.8415534832914995e-05, "loss": 0.0329, "step": 121120 }, { "epoch": 0.20565, "grad_norm": 0.0839466080069542, "learning_rate": 1.841154719791855e-05, "loss": 0.04, "step": 121130 }, { "epoch": 0.2057, "grad_norm": 0.06863157451152802, "learning_rate": 1.8407559743047924e-05, "loss": 0.034, "step": 121140 }, { "epoch": 0.20575, "grad_norm": 0.06865214556455612, "learning_rate": 1.8403572468412145e-05, "loss": 0.0341, "step": 121150 }, { "epoch": 0.2058, "grad_norm": 0.06753120571374893, "learning_rate": 1.8399585374120214e-05, "loss": 0.0337, "step": 121160 }, { "epoch": 0.20585, "grad_norm": 0.06102924793958664, "learning_rate": 1.8395598460281137e-05, "loss": 0.0339, "step": 121170 }, { "epoch": 0.2059, "grad_norm": 0.07939987629652023, "learning_rate": 1.839161172700392e-05, "loss": 0.0342, "step": 121180 }, { "epoch": 0.20595, "grad_norm": 0.06291405856609344, "learning_rate": 1.8387625174397543e-05, "loss": 0.0335, "step": 121190 }, { "epoch": 0.206, "grad_norm": 0.05723979324102402, "learning_rate": 1.8383638802571028e-05, "loss": 0.0332, "step": 121200 }, { "epoch": 0.20605, "grad_norm": 0.062456369400024414, "learning_rate": 1.837965261163333e-05, "loss": 0.0331, "step": 121210 }, { "epoch": 0.2061, "grad_norm": 0.058690495789051056, "learning_rate": 1.837566660169346e-05, "loss": 0.0325, "step": 121220 }, { "epoch": 0.20615, "grad_norm": 0.06099254637956619, "learning_rate": 1.8371680772860353e-05, "loss": 0.0339, "step": 121230 }, { "epoch": 0.2062, "grad_norm": 0.05650908127427101, "learning_rate": 1.8367695125243023e-05, "loss": 0.0333, "step": 121240 }, { "epoch": 0.20625, "grad_norm": 0.05411923676729202, "learning_rate": 1.8363709658950402e-05, "loss": 0.0345, "step": 121250 }, { "epoch": 0.2063, "grad_norm": 0.07875292003154755, "learning_rate": 1.835972437409147e-05, "loss": 0.0344, "step": 121260 }, { "epoch": 0.20635, "grad_norm": 0.07497681677341461, "learning_rate": 1.8355739270775184e-05, "loss": 0.0342, "step": 121270 }, { "epoch": 0.2064, "grad_norm": 0.07842659205198288, "learning_rate": 1.8351754349110484e-05, "loss": 0.0345, "step": 121280 }, { "epoch": 0.20645, "grad_norm": 0.05954407900571823, "learning_rate": 1.834776960920633e-05, "loss": 0.0342, "step": 121290 }, { "epoch": 0.2065, "grad_norm": 0.057787492871284485, "learning_rate": 1.8343785051171647e-05, "loss": 0.0329, "step": 121300 }, { "epoch": 0.20655, "grad_norm": 0.06499254703521729, "learning_rate": 1.8339800675115376e-05, "loss": 0.0359, "step": 121310 }, { "epoch": 0.2066, "grad_norm": 0.06317613273859024, "learning_rate": 1.8335816481146466e-05, "loss": 0.033, "step": 121320 }, { "epoch": 0.20665, "grad_norm": 0.055468276143074036, "learning_rate": 1.833183246937382e-05, "loss": 0.0328, "step": 121330 }, { "epoch": 0.2067, "grad_norm": 0.05523278936743736, "learning_rate": 1.832784863990638e-05, "loss": 0.0318, "step": 121340 }, { "epoch": 0.20675, "grad_norm": 0.06277237832546234, "learning_rate": 1.832386499285304e-05, "loss": 0.0326, "step": 121350 }, { "epoch": 0.2068, "grad_norm": 0.05014962702989578, "learning_rate": 1.8319881528322735e-05, "loss": 0.0316, "step": 121360 }, { "epoch": 0.20685, "grad_norm": 0.050259605050086975, "learning_rate": 1.831589824642435e-05, "loss": 0.0325, "step": 121370 }, { "epoch": 0.2069, "grad_norm": 0.06408250331878662, "learning_rate": 1.8311915147266796e-05, "loss": 0.0358, "step": 121380 }, { "epoch": 0.20695, "grad_norm": 0.06439071148633957, "learning_rate": 1.8307932230958975e-05, "loss": 0.0329, "step": 121390 }, { "epoch": 0.207, "grad_norm": 0.06621982157230377, "learning_rate": 1.8303949497609763e-05, "loss": 0.0338, "step": 121400 }, { "epoch": 0.20705, "grad_norm": 0.06697060167789459, "learning_rate": 1.829996694732807e-05, "loss": 0.0343, "step": 121410 }, { "epoch": 0.2071, "grad_norm": 0.06745675951242447, "learning_rate": 1.829598458022275e-05, "loss": 0.0334, "step": 121420 }, { "epoch": 0.20715, "grad_norm": 0.07169412821531296, "learning_rate": 1.8292002396402708e-05, "loss": 0.0345, "step": 121430 }, { "epoch": 0.2072, "grad_norm": 0.053990427404642105, "learning_rate": 1.8288020395976786e-05, "loss": 0.0324, "step": 121440 }, { "epoch": 0.20725, "grad_norm": 0.057537153363227844, "learning_rate": 1.8284038579053865e-05, "loss": 0.0339, "step": 121450 }, { "epoch": 0.2073, "grad_norm": 0.06586366146802902, "learning_rate": 1.8280056945742817e-05, "loss": 0.0346, "step": 121460 }, { "epoch": 0.20735, "grad_norm": 0.05934857577085495, "learning_rate": 1.8276075496152477e-05, "loss": 0.0337, "step": 121470 }, { "epoch": 0.2074, "grad_norm": 0.050682567059993744, "learning_rate": 1.8272094230391716e-05, "loss": 0.0334, "step": 121480 }, { "epoch": 0.20745, "grad_norm": 0.05052315071225166, "learning_rate": 1.8268113148569367e-05, "loss": 0.0342, "step": 121490 }, { "epoch": 0.2075, "grad_norm": 0.06679129600524902, "learning_rate": 1.826413225079427e-05, "loss": 0.0345, "step": 121500 }, { "epoch": 0.20755, "grad_norm": 0.06831441074609756, "learning_rate": 1.826015153717528e-05, "loss": 0.0334, "step": 121510 }, { "epoch": 0.2076, "grad_norm": 0.0557723343372345, "learning_rate": 1.8256171007821198e-05, "loss": 0.033, "step": 121520 }, { "epoch": 0.20765, "grad_norm": 0.07006657123565674, "learning_rate": 1.825219066284088e-05, "loss": 0.0347, "step": 121530 }, { "epoch": 0.2077, "grad_norm": 0.061841513961553574, "learning_rate": 1.8248210502343128e-05, "loss": 0.0332, "step": 121540 }, { "epoch": 0.20775, "grad_norm": 0.07461114972829819, "learning_rate": 1.824423052643677e-05, "loss": 0.0321, "step": 121550 }, { "epoch": 0.2078, "grad_norm": 0.0667690858244896, "learning_rate": 1.8240250735230607e-05, "loss": 0.0332, "step": 121560 }, { "epoch": 0.20785, "grad_norm": 0.07005876302719116, "learning_rate": 1.8236271128833448e-05, "loss": 0.0352, "step": 121570 }, { "epoch": 0.2079, "grad_norm": 0.07365118712186813, "learning_rate": 1.82322917073541e-05, "loss": 0.0332, "step": 121580 }, { "epoch": 0.20795, "grad_norm": 0.05793159827589989, "learning_rate": 1.8228312470901356e-05, "loss": 0.0326, "step": 121590 }, { "epoch": 0.208, "grad_norm": 0.06920211762189865, "learning_rate": 1.8224333419584e-05, "loss": 0.0323, "step": 121600 }, { "epoch": 0.20805, "grad_norm": 0.06781196594238281, "learning_rate": 1.822035455351082e-05, "loss": 0.0337, "step": 121610 }, { "epoch": 0.2081, "grad_norm": 0.06084830313920975, "learning_rate": 1.8216375872790608e-05, "loss": 0.0326, "step": 121620 }, { "epoch": 0.20815, "grad_norm": 0.06041372939944267, "learning_rate": 1.821239737753212e-05, "loss": 0.0342, "step": 121630 }, { "epoch": 0.2082, "grad_norm": 0.06792076677083969, "learning_rate": 1.8208419067844146e-05, "loss": 0.0325, "step": 121640 }, { "epoch": 0.20825, "grad_norm": 0.06462553143501282, "learning_rate": 1.8204440943835444e-05, "loss": 0.0337, "step": 121650 }, { "epoch": 0.2083, "grad_norm": 0.05725152790546417, "learning_rate": 1.8200463005614766e-05, "loss": 0.0333, "step": 121660 }, { "epoch": 0.20835, "grad_norm": 0.060946546494960785, "learning_rate": 1.8196485253290885e-05, "loss": 0.0337, "step": 121670 }, { "epoch": 0.2084, "grad_norm": 0.06666068732738495, "learning_rate": 1.8192507686972534e-05, "loss": 0.0335, "step": 121680 }, { "epoch": 0.20845, "grad_norm": 0.052066754549741745, "learning_rate": 1.818853030676846e-05, "loss": 0.0328, "step": 121690 }, { "epoch": 0.2085, "grad_norm": 0.06224596127867699, "learning_rate": 1.8184553112787428e-05, "loss": 0.0332, "step": 121700 }, { "epoch": 0.20855, "grad_norm": 0.057339563965797424, "learning_rate": 1.8180576105138135e-05, "loss": 0.033, "step": 121710 }, { "epoch": 0.2086, "grad_norm": 0.05593106895685196, "learning_rate": 1.8176599283929342e-05, "loss": 0.0341, "step": 121720 }, { "epoch": 0.20865, "grad_norm": 0.06946161389350891, "learning_rate": 1.817262264926975e-05, "loss": 0.0341, "step": 121730 }, { "epoch": 0.2087, "grad_norm": 0.0783376544713974, "learning_rate": 1.8168646201268096e-05, "loss": 0.0337, "step": 121740 }, { "epoch": 0.20875, "grad_norm": 0.06264694035053253, "learning_rate": 1.8164669940033087e-05, "loss": 0.0376, "step": 121750 }, { "epoch": 0.2088, "grad_norm": 0.05206596851348877, "learning_rate": 1.8160693865673433e-05, "loss": 0.0337, "step": 121760 }, { "epoch": 0.20885, "grad_norm": 0.060494888573884964, "learning_rate": 1.8156717978297845e-05, "loss": 0.0324, "step": 121770 }, { "epoch": 0.2089, "grad_norm": 0.06030144542455673, "learning_rate": 1.815274227801501e-05, "loss": 0.0345, "step": 121780 }, { "epoch": 0.20895, "grad_norm": 0.057702165096998215, "learning_rate": 1.8148766764933634e-05, "loss": 0.0332, "step": 121790 }, { "epoch": 0.209, "grad_norm": 0.06220393255352974, "learning_rate": 1.8144791439162397e-05, "loss": 0.0337, "step": 121800 }, { "epoch": 0.20905, "grad_norm": 0.061017636209726334, "learning_rate": 1.8140816300809987e-05, "loss": 0.0346, "step": 121810 }, { "epoch": 0.2091, "grad_norm": 0.07539673149585724, "learning_rate": 1.8136841349985077e-05, "loss": 0.0342, "step": 121820 }, { "epoch": 0.20915, "grad_norm": 0.06629452854394913, "learning_rate": 1.813286658679635e-05, "loss": 0.0346, "step": 121830 }, { "epoch": 0.2092, "grad_norm": 0.0765426754951477, "learning_rate": 1.8128892011352478e-05, "loss": 0.0362, "step": 121840 }, { "epoch": 0.20925, "grad_norm": 0.12590640783309937, "learning_rate": 1.812491762376211e-05, "loss": 0.0351, "step": 121850 }, { "epoch": 0.2093, "grad_norm": 0.0840090662240982, "learning_rate": 1.8120943424133915e-05, "loss": 0.0357, "step": 121860 }, { "epoch": 0.20935, "grad_norm": 0.08788575232028961, "learning_rate": 1.811696941257654e-05, "loss": 0.0352, "step": 121870 }, { "epoch": 0.2094, "grad_norm": 0.06937284022569656, "learning_rate": 1.811299558919864e-05, "loss": 0.0346, "step": 121880 }, { "epoch": 0.20945, "grad_norm": 0.05698763206601143, "learning_rate": 1.8109021954108845e-05, "loss": 0.034, "step": 121890 }, { "epoch": 0.2095, "grad_norm": 0.06224209442734718, "learning_rate": 1.8105048507415797e-05, "loss": 0.0341, "step": 121900 }, { "epoch": 0.20955, "grad_norm": 0.09045542776584625, "learning_rate": 1.810107524922815e-05, "loss": 0.0346, "step": 121910 }, { "epoch": 0.2096, "grad_norm": 0.0724298283457756, "learning_rate": 1.8097102179654498e-05, "loss": 0.0359, "step": 121920 }, { "epoch": 0.20965, "grad_norm": 0.06703891605138779, "learning_rate": 1.8093129298803494e-05, "loss": 0.0341, "step": 121930 }, { "epoch": 0.2097, "grad_norm": 0.06555094569921494, "learning_rate": 1.8089156606783726e-05, "loss": 0.0342, "step": 121940 }, { "epoch": 0.20975, "grad_norm": 0.055910781025886536, "learning_rate": 1.8085184103703823e-05, "loss": 0.0328, "step": 121950 }, { "epoch": 0.2098, "grad_norm": 0.07043515890836716, "learning_rate": 1.8081211789672393e-05, "loss": 0.0337, "step": 121960 }, { "epoch": 0.20985, "grad_norm": 0.07362595200538635, "learning_rate": 1.807723966479803e-05, "loss": 0.0349, "step": 121970 }, { "epoch": 0.2099, "grad_norm": 0.06403699517250061, "learning_rate": 1.807326772918934e-05, "loss": 0.0334, "step": 121980 }, { "epoch": 0.20995, "grad_norm": 0.06171891465783119, "learning_rate": 1.8069295982954904e-05, "loss": 0.0334, "step": 121990 }, { "epoch": 0.21, "grad_norm": 0.0766458809375763, "learning_rate": 1.8065324426203313e-05, "loss": 0.0327, "step": 122000 }, { "epoch": 0.21005, "grad_norm": 0.08834163844585419, "learning_rate": 1.8061353059043144e-05, "loss": 0.0341, "step": 122010 }, { "epoch": 0.2101, "grad_norm": 0.08196330815553665, "learning_rate": 1.8057381881582973e-05, "loss": 0.0336, "step": 122020 }, { "epoch": 0.21015, "grad_norm": 0.057843003422021866, "learning_rate": 1.805341089393138e-05, "loss": 0.0322, "step": 122030 }, { "epoch": 0.2102, "grad_norm": 0.056796714663505554, "learning_rate": 1.804944009619692e-05, "loss": 0.0342, "step": 122040 }, { "epoch": 0.21025, "grad_norm": 0.06158505007624626, "learning_rate": 1.804546948848816e-05, "loss": 0.0347, "step": 122050 }, { "epoch": 0.2103, "grad_norm": 0.07728610932826996, "learning_rate": 1.8041499070913646e-05, "loss": 0.0342, "step": 122060 }, { "epoch": 0.21035, "grad_norm": 0.06384309381246567, "learning_rate": 1.803752884358194e-05, "loss": 0.033, "step": 122070 }, { "epoch": 0.2104, "grad_norm": 0.05880480259656906, "learning_rate": 1.8033558806601576e-05, "loss": 0.0334, "step": 122080 }, { "epoch": 0.21045, "grad_norm": 0.053635504096746445, "learning_rate": 1.8029588960081097e-05, "loss": 0.0326, "step": 122090 }, { "epoch": 0.2105, "grad_norm": 0.05860619246959686, "learning_rate": 1.8025619304129037e-05, "loss": 0.0341, "step": 122100 }, { "epoch": 0.21055, "grad_norm": 0.06333498656749725, "learning_rate": 1.802164983885392e-05, "loss": 0.0341, "step": 122110 }, { "epoch": 0.2106, "grad_norm": 0.06388422846794128, "learning_rate": 1.801768056436429e-05, "loss": 0.033, "step": 122120 }, { "epoch": 0.21065, "grad_norm": 0.06269916892051697, "learning_rate": 1.801371148076863e-05, "loss": 0.033, "step": 122130 }, { "epoch": 0.2107, "grad_norm": 0.06834321469068527, "learning_rate": 1.8009742588175484e-05, "loss": 0.0333, "step": 122140 }, { "epoch": 0.21075, "grad_norm": 0.05867002159357071, "learning_rate": 1.8005773886693353e-05, "loss": 0.0341, "step": 122150 }, { "epoch": 0.2108, "grad_norm": 0.07019494473934174, "learning_rate": 1.800180537643073e-05, "loss": 0.0328, "step": 122160 }, { "epoch": 0.21085, "grad_norm": 0.06627378612756729, "learning_rate": 1.7997837057496126e-05, "loss": 0.0337, "step": 122170 }, { "epoch": 0.2109, "grad_norm": 0.05703491345047951, "learning_rate": 1.7993868929998022e-05, "loss": 0.0328, "step": 122180 }, { "epoch": 0.21095, "grad_norm": 0.05551630258560181, "learning_rate": 1.7989900994044913e-05, "loss": 0.0325, "step": 122190 }, { "epoch": 0.211, "grad_norm": 0.05379296839237213, "learning_rate": 1.798593324974527e-05, "loss": 0.0352, "step": 122200 }, { "epoch": 0.21105, "grad_norm": 0.06293535977602005, "learning_rate": 1.7981965697207573e-05, "loss": 0.033, "step": 122210 }, { "epoch": 0.2111, "grad_norm": 0.06423439085483551, "learning_rate": 1.7977998336540313e-05, "loss": 0.0328, "step": 122220 }, { "epoch": 0.21115, "grad_norm": 0.06156112998723984, "learning_rate": 1.7974031167851924e-05, "loss": 0.0336, "step": 122230 }, { "epoch": 0.2112, "grad_norm": 0.06134914979338646, "learning_rate": 1.7970064191250896e-05, "loss": 0.0337, "step": 122240 }, { "epoch": 0.21125, "grad_norm": 0.06287740916013718, "learning_rate": 1.796609740684567e-05, "loss": 0.0333, "step": 122250 }, { "epoch": 0.2113, "grad_norm": 0.055571284145116806, "learning_rate": 1.7962130814744696e-05, "loss": 0.0331, "step": 122260 }, { "epoch": 0.21135, "grad_norm": 0.059910066425800323, "learning_rate": 1.795816441505642e-05, "loss": 0.0354, "step": 122270 }, { "epoch": 0.2114, "grad_norm": 0.056272391229867935, "learning_rate": 1.7954198207889285e-05, "loss": 0.0338, "step": 122280 }, { "epoch": 0.21145, "grad_norm": 0.050958115607500076, "learning_rate": 1.7950232193351724e-05, "loss": 0.033, "step": 122290 }, { "epoch": 0.2115, "grad_norm": 0.05666881054639816, "learning_rate": 1.7946266371552166e-05, "loss": 0.0334, "step": 122300 }, { "epoch": 0.21155, "grad_norm": 0.09566666930913925, "learning_rate": 1.794230074259904e-05, "loss": 0.0351, "step": 122310 }, { "epoch": 0.2116, "grad_norm": 0.07883096486330032, "learning_rate": 1.7938335306600746e-05, "loss": 0.0347, "step": 122320 }, { "epoch": 0.21165, "grad_norm": 0.06914518028497696, "learning_rate": 1.793437006366572e-05, "loss": 0.0341, "step": 122330 }, { "epoch": 0.2117, "grad_norm": 0.08925186842679977, "learning_rate": 1.793040501390237e-05, "loss": 0.0349, "step": 122340 }, { "epoch": 0.21175, "grad_norm": 0.06953421980142593, "learning_rate": 1.792644015741908e-05, "loss": 0.0347, "step": 122350 }, { "epoch": 0.2118, "grad_norm": 0.0675453245639801, "learning_rate": 1.7922475494324266e-05, "loss": 0.0344, "step": 122360 }, { "epoch": 0.21185, "grad_norm": 0.07842274755239487, "learning_rate": 1.791851102472631e-05, "loss": 0.0346, "step": 122370 }, { "epoch": 0.2119, "grad_norm": 0.07493267953395844, "learning_rate": 1.7914546748733607e-05, "loss": 0.035, "step": 122380 }, { "epoch": 0.21195, "grad_norm": 0.061492398381233215, "learning_rate": 1.7910582666454527e-05, "loss": 0.0345, "step": 122390 }, { "epoch": 0.212, "grad_norm": 0.06904582679271698, "learning_rate": 1.7906618777997446e-05, "loss": 0.0367, "step": 122400 }, { "epoch": 0.21205, "grad_norm": 0.0825248658657074, "learning_rate": 1.7902655083470764e-05, "loss": 0.0349, "step": 122410 }, { "epoch": 0.2121, "grad_norm": 0.06248489394783974, "learning_rate": 1.7898691582982807e-05, "loss": 0.0336, "step": 122420 }, { "epoch": 0.21215, "grad_norm": 0.08642593026161194, "learning_rate": 1.789472827664197e-05, "loss": 0.034, "step": 122430 }, { "epoch": 0.2122, "grad_norm": 0.059294890612363815, "learning_rate": 1.7890765164556576e-05, "loss": 0.0341, "step": 122440 }, { "epoch": 0.21225, "grad_norm": 0.0638987347483635, "learning_rate": 1.7886802246835005e-05, "loss": 0.0341, "step": 122450 }, { "epoch": 0.2123, "grad_norm": 0.05983492732048035, "learning_rate": 1.788283952358558e-05, "loss": 0.0341, "step": 122460 }, { "epoch": 0.21235, "grad_norm": 0.06589211523532867, "learning_rate": 1.7878876994916653e-05, "loss": 0.0348, "step": 122470 }, { "epoch": 0.2124, "grad_norm": 0.054842691868543625, "learning_rate": 1.7874914660936555e-05, "loss": 0.0341, "step": 122480 }, { "epoch": 0.21245, "grad_norm": 0.06396602094173431, "learning_rate": 1.7870952521753607e-05, "loss": 0.0356, "step": 122490 }, { "epoch": 0.2125, "grad_norm": 0.06473995745182037, "learning_rate": 1.7866990577476146e-05, "loss": 0.0351, "step": 122500 }, { "epoch": 0.21255, "grad_norm": 0.07215896993875504, "learning_rate": 1.786302882821248e-05, "loss": 0.0349, "step": 122510 }, { "epoch": 0.2126, "grad_norm": 0.062243200838565826, "learning_rate": 1.7859067274070916e-05, "loss": 0.0346, "step": 122520 }, { "epoch": 0.21265, "grad_norm": 0.06540673226118088, "learning_rate": 1.785510591515978e-05, "loss": 0.0333, "step": 122530 }, { "epoch": 0.2127, "grad_norm": 0.06943630427122116, "learning_rate": 1.7851144751587363e-05, "loss": 0.034, "step": 122540 }, { "epoch": 0.21275, "grad_norm": 0.05753122642636299, "learning_rate": 1.7847183783461967e-05, "loss": 0.0347, "step": 122550 }, { "epoch": 0.2128, "grad_norm": 0.06939942389726639, "learning_rate": 1.784322301089187e-05, "loss": 0.0344, "step": 122560 }, { "epoch": 0.21285, "grad_norm": 0.0671052634716034, "learning_rate": 1.7839262433985377e-05, "loss": 0.0352, "step": 122570 }, { "epoch": 0.2129, "grad_norm": 0.07446414977312088, "learning_rate": 1.783530205285075e-05, "loss": 0.0344, "step": 122580 }, { "epoch": 0.21295, "grad_norm": 0.056451063603162766, "learning_rate": 1.7831341867596273e-05, "loss": 0.0336, "step": 122590 }, { "epoch": 0.213, "grad_norm": 0.05866613984107971, "learning_rate": 1.7827381878330225e-05, "loss": 0.036, "step": 122600 }, { "epoch": 0.21305, "grad_norm": 0.07412475347518921, "learning_rate": 1.782342208516085e-05, "loss": 0.0336, "step": 122610 }, { "epoch": 0.2131, "grad_norm": 0.0759148970246315, "learning_rate": 1.7819462488196435e-05, "loss": 0.0346, "step": 122620 }, { "epoch": 0.21315, "grad_norm": 0.06466677784919739, "learning_rate": 1.7815503087545203e-05, "loss": 0.0334, "step": 122630 }, { "epoch": 0.2132, "grad_norm": 0.08377153426408768, "learning_rate": 1.781154388331543e-05, "loss": 0.0359, "step": 122640 }, { "epoch": 0.21325, "grad_norm": 0.06291453540325165, "learning_rate": 1.7807584875615334e-05, "loss": 0.0356, "step": 122650 }, { "epoch": 0.2133, "grad_norm": 0.05173429474234581, "learning_rate": 1.7803626064553168e-05, "loss": 0.0345, "step": 122660 }, { "epoch": 0.21335, "grad_norm": 0.0746605321764946, "learning_rate": 1.7799667450237166e-05, "loss": 0.0342, "step": 122670 }, { "epoch": 0.2134, "grad_norm": 0.06961067020893097, "learning_rate": 1.779570903277555e-05, "loss": 0.0351, "step": 122680 }, { "epoch": 0.21345, "grad_norm": 0.08081655949354172, "learning_rate": 1.7791750812276547e-05, "loss": 0.037, "step": 122690 }, { "epoch": 0.2135, "grad_norm": 0.06566688418388367, "learning_rate": 1.778779278884836e-05, "loss": 0.0342, "step": 122700 }, { "epoch": 0.21355, "grad_norm": 0.06508004665374756, "learning_rate": 1.778383496259921e-05, "loss": 0.034, "step": 122710 }, { "epoch": 0.2136, "grad_norm": 0.0744289830327034, "learning_rate": 1.7779877333637312e-05, "loss": 0.0354, "step": 122720 }, { "epoch": 0.21365, "grad_norm": 0.06905969232320786, "learning_rate": 1.7775919902070836e-05, "loss": 0.0348, "step": 122730 }, { "epoch": 0.2137, "grad_norm": 0.06146163120865822, "learning_rate": 1.7771962668008012e-05, "loss": 0.0349, "step": 122740 }, { "epoch": 0.21375, "grad_norm": 0.06307521462440491, "learning_rate": 1.776800563155701e-05, "loss": 0.0347, "step": 122750 }, { "epoch": 0.2138, "grad_norm": 0.053822193294763565, "learning_rate": 1.7764048792826016e-05, "loss": 0.034, "step": 122760 }, { "epoch": 0.21385, "grad_norm": 0.06739028543233871, "learning_rate": 1.7760092151923207e-05, "loss": 0.035, "step": 122770 }, { "epoch": 0.2139, "grad_norm": 0.047717057168483734, "learning_rate": 1.775613570895676e-05, "loss": 0.0342, "step": 122780 }, { "epoch": 0.21395, "grad_norm": 0.13976827263832092, "learning_rate": 1.7752179464034845e-05, "loss": 0.0379, "step": 122790 }, { "epoch": 0.214, "grad_norm": 0.09999356418848038, "learning_rate": 1.7748223417265618e-05, "loss": 0.035, "step": 122800 }, { "epoch": 0.21405, "grad_norm": 0.062376927584409714, "learning_rate": 1.774426756875724e-05, "loss": 0.0338, "step": 122810 }, { "epoch": 0.2141, "grad_norm": 0.08710140734910965, "learning_rate": 1.7740311918617853e-05, "loss": 0.0352, "step": 122820 }, { "epoch": 0.21415, "grad_norm": 0.06661096215248108, "learning_rate": 1.773635646695562e-05, "loss": 0.0338, "step": 122830 }, { "epoch": 0.2142, "grad_norm": 0.0639859214425087, "learning_rate": 1.7732401213878664e-05, "loss": 0.0341, "step": 122840 }, { "epoch": 0.21425, "grad_norm": 0.06335785239934921, "learning_rate": 1.7728446159495132e-05, "loss": 0.0336, "step": 122850 }, { "epoch": 0.2143, "grad_norm": 0.06624232232570648, "learning_rate": 1.7724491303913156e-05, "loss": 0.0352, "step": 122860 }, { "epoch": 0.21435, "grad_norm": 0.06434208154678345, "learning_rate": 1.772053664724085e-05, "loss": 0.0352, "step": 122870 }, { "epoch": 0.2144, "grad_norm": 0.06184322386980057, "learning_rate": 1.771658218958634e-05, "loss": 0.036, "step": 122880 }, { "epoch": 0.21445, "grad_norm": 0.07136630266904831, "learning_rate": 1.7712627931057732e-05, "loss": 0.0352, "step": 122890 }, { "epoch": 0.2145, "grad_norm": 0.09077708423137665, "learning_rate": 1.7708673871763136e-05, "loss": 0.035, "step": 122900 }, { "epoch": 0.21455, "grad_norm": 0.07337837666273117, "learning_rate": 1.770472001181067e-05, "loss": 0.035, "step": 122910 }, { "epoch": 0.2146, "grad_norm": 0.06152607500553131, "learning_rate": 1.77007663513084e-05, "loss": 0.0344, "step": 122920 }, { "epoch": 0.21465, "grad_norm": 0.08395437896251678, "learning_rate": 1.7696812890364455e-05, "loss": 0.0358, "step": 122930 }, { "epoch": 0.2147, "grad_norm": 0.0598633699119091, "learning_rate": 1.769285962908689e-05, "loss": 0.0377, "step": 122940 }, { "epoch": 0.21475, "grad_norm": 0.07186885178089142, "learning_rate": 1.76889065675838e-05, "loss": 0.034, "step": 122950 }, { "epoch": 0.2148, "grad_norm": 0.05980784073472023, "learning_rate": 1.7684953705963258e-05, "loss": 0.0324, "step": 122960 }, { "epoch": 0.21485, "grad_norm": 0.050015125423669815, "learning_rate": 1.768100104433333e-05, "loss": 0.0333, "step": 122970 }, { "epoch": 0.2149, "grad_norm": 0.0592203326523304, "learning_rate": 1.767704858280209e-05, "loss": 0.0337, "step": 122980 }, { "epoch": 0.21495, "grad_norm": 0.0700206533074379, "learning_rate": 1.767309632147759e-05, "loss": 0.0363, "step": 122990 }, { "epoch": 0.215, "grad_norm": 0.06486654281616211, "learning_rate": 1.7669144260467883e-05, "loss": 0.0338, "step": 123000 }, { "epoch": 0.21505, "grad_norm": 0.06818637251853943, "learning_rate": 1.7665192399881015e-05, "loss": 0.0343, "step": 123010 }, { "epoch": 0.2151, "grad_norm": 0.06002725660800934, "learning_rate": 1.7661240739825036e-05, "loss": 0.0325, "step": 123020 }, { "epoch": 0.21515, "grad_norm": 0.06803982704877853, "learning_rate": 1.7657289280407968e-05, "loss": 0.0352, "step": 123030 }, { "epoch": 0.2152, "grad_norm": 0.06577999144792557, "learning_rate": 1.7653338021737857e-05, "loss": 0.0331, "step": 123040 }, { "epoch": 0.21525, "grad_norm": 0.06379947811365128, "learning_rate": 1.764938696392273e-05, "loss": 0.0328, "step": 123050 }, { "epoch": 0.2153, "grad_norm": 0.05803842097520828, "learning_rate": 1.76454361070706e-05, "loss": 0.0337, "step": 123060 }, { "epoch": 0.21535, "grad_norm": 0.08368424326181412, "learning_rate": 1.7641485451289484e-05, "loss": 0.0349, "step": 123070 }, { "epoch": 0.2154, "grad_norm": 0.07134660333395004, "learning_rate": 1.7637534996687394e-05, "loss": 0.0337, "step": 123080 }, { "epoch": 0.21545, "grad_norm": 0.07949186861515045, "learning_rate": 1.7633584743372326e-05, "loss": 0.035, "step": 123090 }, { "epoch": 0.2155, "grad_norm": 0.061419837176799774, "learning_rate": 1.7629634691452285e-05, "loss": 0.0328, "step": 123100 }, { "epoch": 0.21555, "grad_norm": 0.054267480969429016, "learning_rate": 1.7625684841035255e-05, "loss": 0.0339, "step": 123110 }, { "epoch": 0.2156, "grad_norm": 0.05243111029267311, "learning_rate": 1.762173519222925e-05, "loss": 0.0332, "step": 123120 }, { "epoch": 0.21565, "grad_norm": 0.06539767235517502, "learning_rate": 1.7617785745142214e-05, "loss": 0.0339, "step": 123130 }, { "epoch": 0.2157, "grad_norm": 0.07243485003709793, "learning_rate": 1.7613836499882158e-05, "loss": 0.0346, "step": 123140 }, { "epoch": 0.21575, "grad_norm": 0.07234058529138565, "learning_rate": 1.7609887456557023e-05, "loss": 0.0345, "step": 123150 }, { "epoch": 0.2158, "grad_norm": 0.11803265661001205, "learning_rate": 1.7605938615274793e-05, "loss": 0.0338, "step": 123160 }, { "epoch": 0.21585, "grad_norm": 0.11944836378097534, "learning_rate": 1.760198997614343e-05, "loss": 0.0354, "step": 123170 }, { "epoch": 0.2159, "grad_norm": 0.08904100209474564, "learning_rate": 1.7598041539270877e-05, "loss": 0.0345, "step": 123180 }, { "epoch": 0.21595, "grad_norm": 0.0659065842628479, "learning_rate": 1.7594093304765093e-05, "loss": 0.0358, "step": 123190 }, { "epoch": 0.216, "grad_norm": 0.08313240110874176, "learning_rate": 1.7590145272734007e-05, "loss": 0.0356, "step": 123200 }, { "epoch": 0.21605, "grad_norm": 0.05321460962295532, "learning_rate": 1.7586197443285575e-05, "loss": 0.0342, "step": 123210 }, { "epoch": 0.2161, "grad_norm": 0.06692170351743698, "learning_rate": 1.7582249816527706e-05, "loss": 0.0352, "step": 123220 }, { "epoch": 0.21615, "grad_norm": 0.06521233916282654, "learning_rate": 1.7578302392568342e-05, "loss": 0.0351, "step": 123230 }, { "epoch": 0.2162, "grad_norm": 0.06535640358924866, "learning_rate": 1.7574355171515413e-05, "loss": 0.0343, "step": 123240 }, { "epoch": 0.21625, "grad_norm": 0.06495814770460129, "learning_rate": 1.757040815347682e-05, "loss": 0.0377, "step": 123250 }, { "epoch": 0.2163, "grad_norm": 0.07342529296875, "learning_rate": 1.756646133856048e-05, "loss": 0.0357, "step": 123260 }, { "epoch": 0.21635, "grad_norm": 0.06682030856609344, "learning_rate": 1.7562514726874288e-05, "loss": 0.0338, "step": 123270 }, { "epoch": 0.2164, "grad_norm": 0.05626925081014633, "learning_rate": 1.7558568318526154e-05, "loss": 0.0341, "step": 123280 }, { "epoch": 0.21645, "grad_norm": 0.069165900349617, "learning_rate": 1.7554622113623964e-05, "loss": 0.0372, "step": 123290 }, { "epoch": 0.2165, "grad_norm": 0.05860066041350365, "learning_rate": 1.7550676112275605e-05, "loss": 0.0335, "step": 123300 }, { "epoch": 0.21655, "grad_norm": 0.057062942534685135, "learning_rate": 1.7546730314588973e-05, "loss": 0.034, "step": 123310 }, { "epoch": 0.2166, "grad_norm": 0.05711813271045685, "learning_rate": 1.754278472067192e-05, "loss": 0.0342, "step": 123320 }, { "epoch": 0.21665, "grad_norm": 0.06175006553530693, "learning_rate": 1.7538839330632344e-05, "loss": 0.0333, "step": 123330 }, { "epoch": 0.2167, "grad_norm": 0.058298323303461075, "learning_rate": 1.7534894144578086e-05, "loss": 0.0337, "step": 123340 }, { "epoch": 0.21675, "grad_norm": 0.06874939799308777, "learning_rate": 1.7530949162617023e-05, "loss": 0.0345, "step": 123350 }, { "epoch": 0.2168, "grad_norm": 0.05856994166970253, "learning_rate": 1.752700438485701e-05, "loss": 0.0366, "step": 123360 }, { "epoch": 0.21685, "grad_norm": 0.07017688453197479, "learning_rate": 1.7523059811405877e-05, "loss": 0.0352, "step": 123370 }, { "epoch": 0.2169, "grad_norm": 0.07073337584733963, "learning_rate": 1.7519115442371496e-05, "loss": 0.0346, "step": 123380 }, { "epoch": 0.21695, "grad_norm": 0.06469359248876572, "learning_rate": 1.7515171277861676e-05, "loss": 0.0335, "step": 123390 }, { "epoch": 0.217, "grad_norm": 0.0709473267197609, "learning_rate": 1.751122731798427e-05, "loss": 0.0358, "step": 123400 }, { "epoch": 0.21705, "grad_norm": 0.07505214214324951, "learning_rate": 1.750728356284709e-05, "loss": 0.0351, "step": 123410 }, { "epoch": 0.2171, "grad_norm": 0.06235995888710022, "learning_rate": 1.7503340012557953e-05, "loss": 0.0347, "step": 123420 }, { "epoch": 0.21715, "grad_norm": 0.0804169625043869, "learning_rate": 1.7499396667224705e-05, "loss": 0.0362, "step": 123430 }, { "epoch": 0.2172, "grad_norm": 0.07404050976037979, "learning_rate": 1.7495453526955114e-05, "loss": 0.0362, "step": 123440 }, { "epoch": 0.21725, "grad_norm": 0.07752832025289536, "learning_rate": 1.7491510591857015e-05, "loss": 0.0386, "step": 123450 }, { "epoch": 0.2173, "grad_norm": 0.0669313594698906, "learning_rate": 1.7487567862038195e-05, "loss": 0.0365, "step": 123460 }, { "epoch": 0.21735, "grad_norm": 0.06261171400547028, "learning_rate": 1.748362533760645e-05, "loss": 0.035, "step": 123470 }, { "epoch": 0.2174, "grad_norm": 0.0605204738676548, "learning_rate": 1.7479683018669556e-05, "loss": 0.0347, "step": 123480 }, { "epoch": 0.21745, "grad_norm": 0.06800848990678787, "learning_rate": 1.7475740905335308e-05, "loss": 0.0346, "step": 123490 }, { "epoch": 0.2175, "grad_norm": 0.06989282369613647, "learning_rate": 1.747179899771148e-05, "loss": 0.0361, "step": 123500 }, { "epoch": 0.21755, "grad_norm": 0.07043875008821487, "learning_rate": 1.7467857295905836e-05, "loss": 0.0341, "step": 123510 }, { "epoch": 0.2176, "grad_norm": 0.05971217155456543, "learning_rate": 1.7463915800026144e-05, "loss": 0.0333, "step": 123520 }, { "epoch": 0.21765, "grad_norm": 0.05468961223959923, "learning_rate": 1.7459974510180156e-05, "loss": 0.0329, "step": 123530 }, { "epoch": 0.2177, "grad_norm": 0.08345212787389755, "learning_rate": 1.7456033426475638e-05, "loss": 0.0352, "step": 123540 }, { "epoch": 0.21775, "grad_norm": 0.07348014414310455, "learning_rate": 1.745209254902034e-05, "loss": 0.034, "step": 123550 }, { "epoch": 0.2178, "grad_norm": 0.07218588143587112, "learning_rate": 1.7448151877921985e-05, "loss": 0.0336, "step": 123560 }, { "epoch": 0.21785, "grad_norm": 0.06379861384630203, "learning_rate": 1.7444211413288325e-05, "loss": 0.0347, "step": 123570 }, { "epoch": 0.2179, "grad_norm": 0.07845243811607361, "learning_rate": 1.7440271155227082e-05, "loss": 0.0351, "step": 123580 }, { "epoch": 0.21795, "grad_norm": 0.05751929059624672, "learning_rate": 1.7436331103845995e-05, "loss": 0.0341, "step": 123590 }, { "epoch": 0.218, "grad_norm": 0.06091049313545227, "learning_rate": 1.7432391259252768e-05, "loss": 0.0335, "step": 123600 }, { "epoch": 0.21805, "grad_norm": 0.06685923784971237, "learning_rate": 1.7428451621555115e-05, "loss": 0.0371, "step": 123610 }, { "epoch": 0.2181, "grad_norm": 0.062011849135160446, "learning_rate": 1.7424512190860764e-05, "loss": 0.0326, "step": 123620 }, { "epoch": 0.21815, "grad_norm": 0.06231192871928215, "learning_rate": 1.742057296727739e-05, "loss": 0.0337, "step": 123630 }, { "epoch": 0.2182, "grad_norm": 0.07024695724248886, "learning_rate": 1.741663395091272e-05, "loss": 0.0342, "step": 123640 }, { "epoch": 0.21825, "grad_norm": 0.07106582075357437, "learning_rate": 1.741269514187441e-05, "loss": 0.0337, "step": 123650 }, { "epoch": 0.2183, "grad_norm": 0.06409791857004166, "learning_rate": 1.740875654027018e-05, "loss": 0.0336, "step": 123660 }, { "epoch": 0.21835, "grad_norm": 0.052089840173721313, "learning_rate": 1.7404818146207684e-05, "loss": 0.0336, "step": 123670 }, { "epoch": 0.2184, "grad_norm": 0.05470779910683632, "learning_rate": 1.740087995979461e-05, "loss": 0.0332, "step": 123680 }, { "epoch": 0.21845, "grad_norm": 0.057459279894828796, "learning_rate": 1.7396941981138624e-05, "loss": 0.0347, "step": 123690 }, { "epoch": 0.2185, "grad_norm": 0.06484170258045197, "learning_rate": 1.7393004210347387e-05, "loss": 0.0334, "step": 123700 }, { "epoch": 0.21855, "grad_norm": 0.06167706847190857, "learning_rate": 1.7389066647528556e-05, "loss": 0.0347, "step": 123710 }, { "epoch": 0.2186, "grad_norm": 0.05439428612589836, "learning_rate": 1.7385129292789786e-05, "loss": 0.0346, "step": 123720 }, { "epoch": 0.21865, "grad_norm": 0.052550043910741806, "learning_rate": 1.738119214623871e-05, "loss": 0.0346, "step": 123730 }, { "epoch": 0.2187, "grad_norm": 0.049942102283239365, "learning_rate": 1.737725520798299e-05, "loss": 0.0349, "step": 123740 }, { "epoch": 0.21875, "grad_norm": 0.0705181211233139, "learning_rate": 1.7373318478130245e-05, "loss": 0.0351, "step": 123750 }, { "epoch": 0.2188, "grad_norm": 0.08208610117435455, "learning_rate": 1.7369381956788114e-05, "loss": 0.0338, "step": 123760 }, { "epoch": 0.21885, "grad_norm": 0.06833874434232712, "learning_rate": 1.7365445644064207e-05, "loss": 0.0342, "step": 123770 }, { "epoch": 0.2189, "grad_norm": 0.07863493263721466, "learning_rate": 1.736150954006615e-05, "loss": 0.0345, "step": 123780 }, { "epoch": 0.21895, "grad_norm": 0.05749135464429855, "learning_rate": 1.7357573644901552e-05, "loss": 0.0335, "step": 123790 }, { "epoch": 0.219, "grad_norm": 0.060620639473199844, "learning_rate": 1.735363795867802e-05, "loss": 0.0356, "step": 123800 }, { "epoch": 0.21905, "grad_norm": 0.06407640129327774, "learning_rate": 1.7349702481503156e-05, "loss": 0.034, "step": 123810 }, { "epoch": 0.2191, "grad_norm": 0.056964900344610214, "learning_rate": 1.734576721348454e-05, "loss": 0.0333, "step": 123820 }, { "epoch": 0.21915, "grad_norm": 0.10963110625743866, "learning_rate": 1.7341832154729794e-05, "loss": 0.035, "step": 123830 }, { "epoch": 0.2192, "grad_norm": 0.08259830623865128, "learning_rate": 1.7337897305346457e-05, "loss": 0.0341, "step": 123840 }, { "epoch": 0.21925, "grad_norm": 0.07771535217761993, "learning_rate": 1.7333962665442148e-05, "loss": 0.0338, "step": 123850 }, { "epoch": 0.2193, "grad_norm": 0.065434031188488, "learning_rate": 1.7330028235124408e-05, "loss": 0.0341, "step": 123860 }, { "epoch": 0.21935, "grad_norm": 0.06774911284446716, "learning_rate": 1.7326094014500815e-05, "loss": 0.0341, "step": 123870 }, { "epoch": 0.2194, "grad_norm": 0.0658387616276741, "learning_rate": 1.7322160003678934e-05, "loss": 0.0344, "step": 123880 }, { "epoch": 0.21945, "grad_norm": 0.06059543788433075, "learning_rate": 1.7318226202766312e-05, "loss": 0.0326, "step": 123890 }, { "epoch": 0.2195, "grad_norm": 0.06279711425304413, "learning_rate": 1.73142926118705e-05, "loss": 0.0337, "step": 123900 }, { "epoch": 0.21955, "grad_norm": 0.07527292519807816, "learning_rate": 1.7310359231099042e-05, "loss": 0.0337, "step": 123910 }, { "epoch": 0.2196, "grad_norm": 0.07258503139019012, "learning_rate": 1.7306426060559463e-05, "loss": 0.035, "step": 123920 }, { "epoch": 0.21965, "grad_norm": 0.07843124866485596, "learning_rate": 1.7302493100359323e-05, "loss": 0.0359, "step": 123930 }, { "epoch": 0.2197, "grad_norm": 0.07263282686471939, "learning_rate": 1.7298560350606115e-05, "loss": 0.0358, "step": 123940 }, { "epoch": 0.21975, "grad_norm": 0.0668807402253151, "learning_rate": 1.729462781140738e-05, "loss": 0.0336, "step": 123950 }, { "epoch": 0.2198, "grad_norm": 0.0571887269616127, "learning_rate": 1.7290695482870627e-05, "loss": 0.0349, "step": 123960 }, { "epoch": 0.21985, "grad_norm": 0.0678820088505745, "learning_rate": 1.7286763365103364e-05, "loss": 0.0349, "step": 123970 }, { "epoch": 0.2199, "grad_norm": 0.06259248405694962, "learning_rate": 1.728283145821309e-05, "loss": 0.0336, "step": 123980 }, { "epoch": 0.21995, "grad_norm": 0.05824877321720123, "learning_rate": 1.7278899762307303e-05, "loss": 0.0342, "step": 123990 }, { "epoch": 0.22, "grad_norm": 0.07384226471185684, "learning_rate": 1.72749682774935e-05, "loss": 0.0339, "step": 124000 }, { "epoch": 0.22005, "grad_norm": 0.06599892675876617, "learning_rate": 1.727103700387916e-05, "loss": 0.033, "step": 124010 }, { "epoch": 0.2201, "grad_norm": 0.06363017857074738, "learning_rate": 1.726710594157177e-05, "loss": 0.0335, "step": 124020 }, { "epoch": 0.22015, "grad_norm": 0.06069604679942131, "learning_rate": 1.7263175090678786e-05, "loss": 0.0338, "step": 124030 }, { "epoch": 0.2202, "grad_norm": 0.06854899972677231, "learning_rate": 1.7259244451307705e-05, "loss": 0.0336, "step": 124040 }, { "epoch": 0.22025, "grad_norm": 0.07456790655851364, "learning_rate": 1.7255314023565956e-05, "loss": 0.0343, "step": 124050 }, { "epoch": 0.2203, "grad_norm": 0.06498544663190842, "learning_rate": 1.7251383807561018e-05, "loss": 0.0331, "step": 124060 }, { "epoch": 0.22035, "grad_norm": 0.05230363830924034, "learning_rate": 1.724745380340034e-05, "loss": 0.0331, "step": 124070 }, { "epoch": 0.2204, "grad_norm": 0.05821483954787254, "learning_rate": 1.724352401119136e-05, "loss": 0.034, "step": 124080 }, { "epoch": 0.22045, "grad_norm": 0.055370673537254333, "learning_rate": 1.723959443104152e-05, "loss": 0.0334, "step": 124090 }, { "epoch": 0.2205, "grad_norm": 0.05448485538363457, "learning_rate": 1.723566506305825e-05, "loss": 0.0346, "step": 124100 }, { "epoch": 0.22055, "grad_norm": 0.06155163422226906, "learning_rate": 1.723173590734898e-05, "loss": 0.0336, "step": 124110 }, { "epoch": 0.2206, "grad_norm": 0.05619033798575401, "learning_rate": 1.722780696402114e-05, "loss": 0.0331, "step": 124120 }, { "epoch": 0.22065, "grad_norm": 0.07716576755046844, "learning_rate": 1.7223878233182125e-05, "loss": 0.0345, "step": 124130 }, { "epoch": 0.2207, "grad_norm": 0.06452678143978119, "learning_rate": 1.7219949714939374e-05, "loss": 0.034, "step": 124140 }, { "epoch": 0.22075, "grad_norm": 0.06073189154267311, "learning_rate": 1.7216021409400256e-05, "loss": 0.0335, "step": 124150 }, { "epoch": 0.2208, "grad_norm": 0.06564535200595856, "learning_rate": 1.7212093316672203e-05, "loss": 0.0334, "step": 124160 }, { "epoch": 0.22085, "grad_norm": 0.06574677675962448, "learning_rate": 1.720816543686259e-05, "loss": 0.0345, "step": 124170 }, { "epoch": 0.2209, "grad_norm": 0.06860848516225815, "learning_rate": 1.7204237770078803e-05, "loss": 0.0344, "step": 124180 }, { "epoch": 0.22095, "grad_norm": 0.07889265567064285, "learning_rate": 1.7200310316428232e-05, "loss": 0.0361, "step": 124190 }, { "epoch": 0.221, "grad_norm": 0.07528276741504669, "learning_rate": 1.719638307601824e-05, "loss": 0.0351, "step": 124200 }, { "epoch": 0.22105, "grad_norm": 0.0553547702729702, "learning_rate": 1.719245604895621e-05, "loss": 0.0347, "step": 124210 }, { "epoch": 0.2211, "grad_norm": 0.06014566123485565, "learning_rate": 1.7188529235349493e-05, "loss": 0.0337, "step": 124220 }, { "epoch": 0.22115, "grad_norm": 0.060287296772003174, "learning_rate": 1.7184602635305455e-05, "loss": 0.0341, "step": 124230 }, { "epoch": 0.2212, "grad_norm": 0.06788293272256851, "learning_rate": 1.7180676248931437e-05, "loss": 0.0338, "step": 124240 }, { "epoch": 0.22125, "grad_norm": 0.06208663061261177, "learning_rate": 1.7176750076334797e-05, "loss": 0.0337, "step": 124250 }, { "epoch": 0.2213, "grad_norm": 0.057153813540935516, "learning_rate": 1.7172824117622876e-05, "loss": 0.0346, "step": 124260 }, { "epoch": 0.22135, "grad_norm": 0.061345912516117096, "learning_rate": 1.7168898372903e-05, "loss": 0.0331, "step": 124270 }, { "epoch": 0.2214, "grad_norm": 0.055477313697338104, "learning_rate": 1.7164972842282504e-05, "loss": 0.0328, "step": 124280 }, { "epoch": 0.22145, "grad_norm": 0.06002333015203476, "learning_rate": 1.7161047525868702e-05, "loss": 0.035, "step": 124290 }, { "epoch": 0.2215, "grad_norm": 0.06831765919923782, "learning_rate": 1.715712242376892e-05, "loss": 0.0367, "step": 124300 }, { "epoch": 0.22155, "grad_norm": 0.059336576610803604, "learning_rate": 1.7153197536090458e-05, "loss": 0.0349, "step": 124310 }, { "epoch": 0.2216, "grad_norm": 0.06508708000183105, "learning_rate": 1.7149272862940628e-05, "loss": 0.0338, "step": 124320 }, { "epoch": 0.22165, "grad_norm": 0.06287387758493423, "learning_rate": 1.714534840442674e-05, "loss": 0.0352, "step": 124330 }, { "epoch": 0.2217, "grad_norm": 0.05168507993221283, "learning_rate": 1.7141424160656062e-05, "loss": 0.0346, "step": 124340 }, { "epoch": 0.22175, "grad_norm": 0.07266610860824585, "learning_rate": 1.7137500131735907e-05, "loss": 0.0378, "step": 124350 }, { "epoch": 0.2218, "grad_norm": 0.10454844683408737, "learning_rate": 1.713357631777353e-05, "loss": 0.0372, "step": 124360 }, { "epoch": 0.22185, "grad_norm": 0.0826743096113205, "learning_rate": 1.712965271887623e-05, "loss": 0.0376, "step": 124370 }, { "epoch": 0.2219, "grad_norm": 0.06038236618041992, "learning_rate": 1.712572933515127e-05, "loss": 0.0363, "step": 124380 }, { "epoch": 0.22195, "grad_norm": 0.06790652126073837, "learning_rate": 1.712180616670591e-05, "loss": 0.0346, "step": 124390 }, { "epoch": 0.222, "grad_norm": 0.06319904327392578, "learning_rate": 1.7117883213647413e-05, "loss": 0.0348, "step": 124400 }, { "epoch": 0.22205, "grad_norm": 0.07048717141151428, "learning_rate": 1.711396047608302e-05, "loss": 0.0357, "step": 124410 }, { "epoch": 0.2221, "grad_norm": 0.06790277361869812, "learning_rate": 1.7110037954119994e-05, "loss": 0.0347, "step": 124420 }, { "epoch": 0.22215, "grad_norm": 0.05983925983309746, "learning_rate": 1.7106115647865557e-05, "loss": 0.0378, "step": 124430 }, { "epoch": 0.2222, "grad_norm": 0.06110047176480293, "learning_rate": 1.710219355742695e-05, "loss": 0.035, "step": 124440 }, { "epoch": 0.22225, "grad_norm": 0.08017834275960922, "learning_rate": 1.7098271682911416e-05, "loss": 0.0343, "step": 124450 }, { "epoch": 0.2223, "grad_norm": 0.06500820070505142, "learning_rate": 1.7094350024426157e-05, "loss": 0.0341, "step": 124460 }, { "epoch": 0.22235, "grad_norm": 0.05860864371061325, "learning_rate": 1.7090428582078403e-05, "loss": 0.0343, "step": 124470 }, { "epoch": 0.2224, "grad_norm": 0.07563772797584534, "learning_rate": 1.7086507355975356e-05, "loss": 0.0338, "step": 124480 }, { "epoch": 0.22245, "grad_norm": 0.05746683478355408, "learning_rate": 1.7082586346224232e-05, "loss": 0.0336, "step": 124490 }, { "epoch": 0.2225, "grad_norm": 0.0540003776550293, "learning_rate": 1.7078665552932216e-05, "loss": 0.0344, "step": 124500 }, { "epoch": 0.22255, "grad_norm": 0.05797993019223213, "learning_rate": 1.7074744976206506e-05, "loss": 0.0329, "step": 124510 }, { "epoch": 0.2226, "grad_norm": 0.06687459349632263, "learning_rate": 1.7070824616154302e-05, "loss": 0.0339, "step": 124520 }, { "epoch": 0.22265, "grad_norm": 0.05583649501204491, "learning_rate": 1.7066904472882762e-05, "loss": 0.0332, "step": 124530 }, { "epoch": 0.2227, "grad_norm": 0.06575287133455276, "learning_rate": 1.7062984546499087e-05, "loss": 0.0338, "step": 124540 }, { "epoch": 0.22275, "grad_norm": 0.055947426706552505, "learning_rate": 1.7059064837110416e-05, "loss": 0.0341, "step": 124550 }, { "epoch": 0.2228, "grad_norm": 0.06190335005521774, "learning_rate": 1.7055145344823937e-05, "loss": 0.034, "step": 124560 }, { "epoch": 0.22285, "grad_norm": 0.06709223985671997, "learning_rate": 1.7051226069746805e-05, "loss": 0.0351, "step": 124570 }, { "epoch": 0.2229, "grad_norm": 0.059285055845975876, "learning_rate": 1.7047307011986158e-05, "loss": 0.0336, "step": 124580 }, { "epoch": 0.22295, "grad_norm": 0.0603620707988739, "learning_rate": 1.7043388171649154e-05, "loss": 0.0354, "step": 124590 }, { "epoch": 0.223, "grad_norm": 0.0807933658361435, "learning_rate": 1.703946954884293e-05, "loss": 0.0358, "step": 124600 }, { "epoch": 0.22305, "grad_norm": 0.06559517234563828, "learning_rate": 1.7035551143674615e-05, "loss": 0.0344, "step": 124610 }, { "epoch": 0.2231, "grad_norm": 0.06602256000041962, "learning_rate": 1.7031632956251336e-05, "loss": 0.0373, "step": 124620 }, { "epoch": 0.22315, "grad_norm": 0.06404702365398407, "learning_rate": 1.7027714986680214e-05, "loss": 0.0344, "step": 124630 }, { "epoch": 0.2232, "grad_norm": 0.06762048602104187, "learning_rate": 1.702379723506839e-05, "loss": 0.0362, "step": 124640 }, { "epoch": 0.22325, "grad_norm": 0.0721345841884613, "learning_rate": 1.701987970152293e-05, "loss": 0.0334, "step": 124650 }, { "epoch": 0.2233, "grad_norm": 0.06913810223340988, "learning_rate": 1.7015962386150978e-05, "loss": 0.035, "step": 124660 }, { "epoch": 0.22335, "grad_norm": 0.07465165853500366, "learning_rate": 1.7012045289059604e-05, "loss": 0.0342, "step": 124670 }, { "epoch": 0.2234, "grad_norm": 0.06738516688346863, "learning_rate": 1.700812841035592e-05, "loss": 0.0332, "step": 124680 }, { "epoch": 0.22345, "grad_norm": 0.06403343379497528, "learning_rate": 1.7004211750146993e-05, "loss": 0.0341, "step": 124690 }, { "epoch": 0.2235, "grad_norm": 0.05525503680109978, "learning_rate": 1.7000295308539917e-05, "loss": 0.0345, "step": 124700 }, { "epoch": 0.22355, "grad_norm": 0.06048808991909027, "learning_rate": 1.6996379085641768e-05, "loss": 0.0337, "step": 124710 }, { "epoch": 0.2236, "grad_norm": 0.05922717973589897, "learning_rate": 1.6992463081559602e-05, "loss": 0.034, "step": 124720 }, { "epoch": 0.22365, "grad_norm": 0.05505324527621269, "learning_rate": 1.6988547296400488e-05, "loss": 0.0328, "step": 124730 }, { "epoch": 0.2237, "grad_norm": 0.05941491946578026, "learning_rate": 1.6984631730271476e-05, "loss": 0.0343, "step": 124740 }, { "epoch": 0.22375, "grad_norm": 0.058640215545892715, "learning_rate": 1.6980716383279622e-05, "loss": 0.0325, "step": 124750 }, { "epoch": 0.2238, "grad_norm": 0.05964535102248192, "learning_rate": 1.6976801255531977e-05, "loss": 0.0351, "step": 124760 }, { "epoch": 0.22385, "grad_norm": 0.05463533103466034, "learning_rate": 1.6972886347135565e-05, "loss": 0.0333, "step": 124770 }, { "epoch": 0.2239, "grad_norm": 0.06249071657657623, "learning_rate": 1.696897165819743e-05, "loss": 0.0344, "step": 124780 }, { "epoch": 0.22395, "grad_norm": 0.07721731066703796, "learning_rate": 1.696505718882459e-05, "loss": 0.0338, "step": 124790 }, { "epoch": 0.224, "grad_norm": 0.06688842922449112, "learning_rate": 1.696114293912407e-05, "loss": 0.0348, "step": 124800 }, { "epoch": 0.22405, "grad_norm": 0.06384867429733276, "learning_rate": 1.6957228909202883e-05, "loss": 0.0343, "step": 124810 }, { "epoch": 0.2241, "grad_norm": 0.06892948597669601, "learning_rate": 1.6953315099168022e-05, "loss": 0.0358, "step": 124820 }, { "epoch": 0.22415, "grad_norm": 0.06952431052923203, "learning_rate": 1.6949401509126524e-05, "loss": 0.0341, "step": 124830 }, { "epoch": 0.2242, "grad_norm": 0.06377065181732178, "learning_rate": 1.694548813918535e-05, "loss": 0.033, "step": 124840 }, { "epoch": 0.22425, "grad_norm": 0.06937036663293839, "learning_rate": 1.6941574989451518e-05, "loss": 0.0334, "step": 124850 }, { "epoch": 0.2243, "grad_norm": 0.06026379391551018, "learning_rate": 1.693766206003198e-05, "loss": 0.0359, "step": 124860 }, { "epoch": 0.22435, "grad_norm": 0.06354683637619019, "learning_rate": 1.6933749351033744e-05, "loss": 0.036, "step": 124870 }, { "epoch": 0.2244, "grad_norm": 0.06264355033636093, "learning_rate": 1.692983686256377e-05, "loss": 0.0342, "step": 124880 }, { "epoch": 0.22445, "grad_norm": 0.067869171500206, "learning_rate": 1.692592459472902e-05, "loss": 0.0352, "step": 124890 }, { "epoch": 0.2245, "grad_norm": 0.0714171975851059, "learning_rate": 1.692201254763646e-05, "loss": 0.0349, "step": 124900 }, { "epoch": 0.22455, "grad_norm": 0.08100499957799911, "learning_rate": 1.6918100721393045e-05, "loss": 0.034, "step": 124910 }, { "epoch": 0.2246, "grad_norm": 0.07563242316246033, "learning_rate": 1.691418911610572e-05, "loss": 0.0381, "step": 124920 }, { "epoch": 0.22465, "grad_norm": 0.0738987997174263, "learning_rate": 1.6910277731881424e-05, "loss": 0.0353, "step": 124930 }, { "epoch": 0.2247, "grad_norm": 0.06895702332258224, "learning_rate": 1.6906366568827088e-05, "loss": 0.036, "step": 124940 }, { "epoch": 0.22475, "grad_norm": 0.09274575859308243, "learning_rate": 1.690245562704966e-05, "loss": 0.035, "step": 124950 }, { "epoch": 0.2248, "grad_norm": 0.07356208562850952, "learning_rate": 1.6898544906656052e-05, "loss": 0.0351, "step": 124960 }, { "epoch": 0.22485, "grad_norm": 0.06885560601949692, "learning_rate": 1.6894634407753186e-05, "loss": 0.0374, "step": 124970 }, { "epoch": 0.2249, "grad_norm": 0.06371462345123291, "learning_rate": 1.6890724130447963e-05, "loss": 0.0353, "step": 124980 }, { "epoch": 0.22495, "grad_norm": 0.06379080563783646, "learning_rate": 1.68868140748473e-05, "loss": 0.0341, "step": 124990 }, { "epoch": 0.225, "grad_norm": 0.06789926439523697, "learning_rate": 1.688290424105809e-05, "loss": 0.0345, "step": 125000 }, { "epoch": 0.22505, "grad_norm": 0.06488768756389618, "learning_rate": 1.687899462918723e-05, "loss": 0.0338, "step": 125010 }, { "epoch": 0.2251, "grad_norm": 0.06970378756523132, "learning_rate": 1.687508523934161e-05, "loss": 0.0346, "step": 125020 }, { "epoch": 0.22515, "grad_norm": 0.054153922945261, "learning_rate": 1.687117607162809e-05, "loss": 0.0353, "step": 125030 }, { "epoch": 0.2252, "grad_norm": 0.06287054717540741, "learning_rate": 1.6867267126153584e-05, "loss": 0.0348, "step": 125040 }, { "epoch": 0.22525, "grad_norm": 0.05829636752605438, "learning_rate": 1.6863358403024928e-05, "loss": 0.0344, "step": 125050 }, { "epoch": 0.2253, "grad_norm": 0.049323271960020065, "learning_rate": 1.6859449902349007e-05, "loss": 0.0338, "step": 125060 }, { "epoch": 0.22535, "grad_norm": 0.05087485536932945, "learning_rate": 1.685554162423265e-05, "loss": 0.034, "step": 125070 }, { "epoch": 0.2254, "grad_norm": 0.06357801705598831, "learning_rate": 1.6851633568782733e-05, "loss": 0.034, "step": 125080 }, { "epoch": 0.22545, "grad_norm": 0.06831253319978714, "learning_rate": 1.68477257361061e-05, "loss": 0.0351, "step": 125090 }, { "epoch": 0.2255, "grad_norm": 0.049976907670497894, "learning_rate": 1.6843818126309576e-05, "loss": 0.0336, "step": 125100 }, { "epoch": 0.22555, "grad_norm": 0.1038125678896904, "learning_rate": 1.6839910739500002e-05, "loss": 0.0362, "step": 125110 }, { "epoch": 0.2256, "grad_norm": 0.07736409455537796, "learning_rate": 1.68360035757842e-05, "loss": 0.0344, "step": 125120 }, { "epoch": 0.22565, "grad_norm": 0.06509806215763092, "learning_rate": 1.683209663526899e-05, "loss": 0.0342, "step": 125130 }, { "epoch": 0.2257, "grad_norm": 0.06010761111974716, "learning_rate": 1.6828189918061206e-05, "loss": 0.0332, "step": 125140 }, { "epoch": 0.22575, "grad_norm": 0.09686113148927689, "learning_rate": 1.6824283424267617e-05, "loss": 0.0341, "step": 125150 }, { "epoch": 0.2258, "grad_norm": 0.06265415996313095, "learning_rate": 1.6820377153995065e-05, "loss": 0.0329, "step": 125160 }, { "epoch": 0.22585, "grad_norm": 0.06570495665073395, "learning_rate": 1.681647110735032e-05, "loss": 0.0334, "step": 125170 }, { "epoch": 0.2259, "grad_norm": 0.06195457652211189, "learning_rate": 1.681256528444019e-05, "loss": 0.0336, "step": 125180 }, { "epoch": 0.22595, "grad_norm": 0.07338821887969971, "learning_rate": 1.680865968537144e-05, "loss": 0.0344, "step": 125190 }, { "epoch": 0.226, "grad_norm": 0.07165399193763733, "learning_rate": 1.6804754310250858e-05, "loss": 0.0344, "step": 125200 }, { "epoch": 0.22605, "grad_norm": 0.07292645424604416, "learning_rate": 1.6800849159185217e-05, "loss": 0.0343, "step": 125210 }, { "epoch": 0.2261, "grad_norm": 0.06303286552429199, "learning_rate": 1.6796944232281278e-05, "loss": 0.0327, "step": 125220 }, { "epoch": 0.22615, "grad_norm": 0.06282463669776917, "learning_rate": 1.6793039529645806e-05, "loss": 0.0348, "step": 125230 }, { "epoch": 0.2262, "grad_norm": 0.07054274529218674, "learning_rate": 1.678913505138554e-05, "loss": 0.0351, "step": 125240 }, { "epoch": 0.22625, "grad_norm": 0.0869477391242981, "learning_rate": 1.6785230797607252e-05, "loss": 0.034, "step": 125250 }, { "epoch": 0.2263, "grad_norm": 0.07772769033908844, "learning_rate": 1.678132676841765e-05, "loss": 0.0339, "step": 125260 }, { "epoch": 0.22635, "grad_norm": 0.0869736522436142, "learning_rate": 1.6777422963923494e-05, "loss": 0.0337, "step": 125270 }, { "epoch": 0.2264, "grad_norm": 0.09080974757671356, "learning_rate": 1.6773519384231512e-05, "loss": 0.0334, "step": 125280 }, { "epoch": 0.22645, "grad_norm": 0.05822908505797386, "learning_rate": 1.6769616029448415e-05, "loss": 0.0332, "step": 125290 }, { "epoch": 0.2265, "grad_norm": 0.06880011409521103, "learning_rate": 1.6765712899680924e-05, "loss": 0.0357, "step": 125300 }, { "epoch": 0.22655, "grad_norm": 0.0638493001461029, "learning_rate": 1.676180999503575e-05, "loss": 0.0325, "step": 125310 }, { "epoch": 0.2266, "grad_norm": 0.0685199722647667, "learning_rate": 1.6757907315619587e-05, "loss": 0.0349, "step": 125320 }, { "epoch": 0.22665, "grad_norm": 0.056751806288957596, "learning_rate": 1.6754004861539156e-05, "loss": 0.0337, "step": 125330 }, { "epoch": 0.2267, "grad_norm": 0.056441500782966614, "learning_rate": 1.6750102632901117e-05, "loss": 0.0351, "step": 125340 }, { "epoch": 0.22675, "grad_norm": 0.04892340302467346, "learning_rate": 1.674620062981219e-05, "loss": 0.0329, "step": 125350 }, { "epoch": 0.2268, "grad_norm": 0.056955333799123764, "learning_rate": 1.6742298852379025e-05, "loss": 0.0333, "step": 125360 }, { "epoch": 0.22685, "grad_norm": 0.05275273323059082, "learning_rate": 1.6738397300708315e-05, "loss": 0.0347, "step": 125370 }, { "epoch": 0.2269, "grad_norm": 0.05845008045434952, "learning_rate": 1.6734495974906713e-05, "loss": 0.034, "step": 125380 }, { "epoch": 0.22695, "grad_norm": 0.05458725616335869, "learning_rate": 1.6730594875080887e-05, "loss": 0.0333, "step": 125390 }, { "epoch": 0.227, "grad_norm": 0.05155658721923828, "learning_rate": 1.6726694001337496e-05, "loss": 0.0338, "step": 125400 }, { "epoch": 0.22705, "grad_norm": 0.06531024724245071, "learning_rate": 1.6722793353783178e-05, "loss": 0.0361, "step": 125410 }, { "epoch": 0.2271, "grad_norm": 0.05535675212740898, "learning_rate": 1.6718892932524584e-05, "loss": 0.0336, "step": 125420 }, { "epoch": 0.22715, "grad_norm": 0.05717598274350166, "learning_rate": 1.671499273766834e-05, "loss": 0.0318, "step": 125430 }, { "epoch": 0.2272, "grad_norm": 0.06368177384138107, "learning_rate": 1.6711092769321088e-05, "loss": 0.0336, "step": 125440 }, { "epoch": 0.22725, "grad_norm": 0.06778300553560257, "learning_rate": 1.6707193027589434e-05, "loss": 0.0327, "step": 125450 }, { "epoch": 0.2273, "grad_norm": 0.06283332407474518, "learning_rate": 1.6703293512580013e-05, "loss": 0.0318, "step": 125460 }, { "epoch": 0.22735, "grad_norm": 0.060217004269361496, "learning_rate": 1.669939422439944e-05, "loss": 0.0343, "step": 125470 }, { "epoch": 0.2274, "grad_norm": 0.0580129511654377, "learning_rate": 1.66954951631543e-05, "loss": 0.0328, "step": 125480 }, { "epoch": 0.22745, "grad_norm": 0.049048274755477905, "learning_rate": 1.6691596328951212e-05, "loss": 0.0313, "step": 125490 }, { "epoch": 0.2275, "grad_norm": 0.05264052376151085, "learning_rate": 1.668769772189675e-05, "loss": 0.0329, "step": 125500 }, { "epoch": 0.22755, "grad_norm": 0.05675099417567253, "learning_rate": 1.6683799342097517e-05, "loss": 0.0331, "step": 125510 }, { "epoch": 0.2276, "grad_norm": 0.060881178826093674, "learning_rate": 1.667990118966008e-05, "loss": 0.0335, "step": 125520 }, { "epoch": 0.22765, "grad_norm": 0.06495590507984161, "learning_rate": 1.6676003264691015e-05, "loss": 0.0355, "step": 125530 }, { "epoch": 0.2277, "grad_norm": 0.05344432219862938, "learning_rate": 1.6672105567296904e-05, "loss": 0.0327, "step": 125540 }, { "epoch": 0.22775, "grad_norm": 0.0638977512717247, "learning_rate": 1.6668208097584287e-05, "loss": 0.0351, "step": 125550 }, { "epoch": 0.2278, "grad_norm": 0.07072903960943222, "learning_rate": 1.6664310855659747e-05, "loss": 0.0344, "step": 125560 }, { "epoch": 0.22785, "grad_norm": 0.05785040929913521, "learning_rate": 1.6660413841629795e-05, "loss": 0.0349, "step": 125570 }, { "epoch": 0.2279, "grad_norm": 0.061877842992544174, "learning_rate": 1.6656517055601007e-05, "loss": 0.0342, "step": 125580 }, { "epoch": 0.22795, "grad_norm": 0.07650292664766312, "learning_rate": 1.665262049767991e-05, "loss": 0.035, "step": 125590 }, { "epoch": 0.228, "grad_norm": 0.049784474074840546, "learning_rate": 1.6648724167973028e-05, "loss": 0.0326, "step": 125600 }, { "epoch": 0.22805, "grad_norm": 0.0595824271440506, "learning_rate": 1.6644828066586897e-05, "loss": 0.0335, "step": 125610 }, { "epoch": 0.2281, "grad_norm": 0.056988898664712906, "learning_rate": 1.664093219362802e-05, "loss": 0.0327, "step": 125620 }, { "epoch": 0.22815, "grad_norm": 0.05611162260174751, "learning_rate": 1.6637036549202924e-05, "loss": 0.0349, "step": 125630 }, { "epoch": 0.2282, "grad_norm": 0.05163775011897087, "learning_rate": 1.66331411334181e-05, "loss": 0.0338, "step": 125640 }, { "epoch": 0.22825, "grad_norm": 0.04976727068424225, "learning_rate": 1.6629245946380052e-05, "loss": 0.0339, "step": 125650 }, { "epoch": 0.2283, "grad_norm": 0.0533292330801487, "learning_rate": 1.6625350988195282e-05, "loss": 0.0329, "step": 125660 }, { "epoch": 0.22835, "grad_norm": 0.05242196097970009, "learning_rate": 1.6621456258970264e-05, "loss": 0.0338, "step": 125670 }, { "epoch": 0.2284, "grad_norm": 0.06851650774478912, "learning_rate": 1.6617561758811493e-05, "loss": 0.0355, "step": 125680 }, { "epoch": 0.22845, "grad_norm": 0.05392906069755554, "learning_rate": 1.6613667487825427e-05, "loss": 0.0339, "step": 125690 }, { "epoch": 0.2285, "grad_norm": 0.05616411939263344, "learning_rate": 1.660977344611855e-05, "loss": 0.0338, "step": 125700 }, { "epoch": 0.22855, "grad_norm": 0.05417593568563461, "learning_rate": 1.6605879633797304e-05, "loss": 0.0345, "step": 125710 }, { "epoch": 0.2286, "grad_norm": 0.051290225237607956, "learning_rate": 1.6601986050968154e-05, "loss": 0.0343, "step": 125720 }, { "epoch": 0.22865, "grad_norm": 0.05213925987482071, "learning_rate": 1.659809269773756e-05, "loss": 0.0341, "step": 125730 }, { "epoch": 0.2287, "grad_norm": 0.06550920009613037, "learning_rate": 1.6594199574211944e-05, "loss": 0.035, "step": 125740 }, { "epoch": 0.22875, "grad_norm": 0.09685634821653366, "learning_rate": 1.659030668049777e-05, "loss": 0.0359, "step": 125750 }, { "epoch": 0.2288, "grad_norm": 0.07793646305799484, "learning_rate": 1.658641401670144e-05, "loss": 0.0345, "step": 125760 }, { "epoch": 0.22885, "grad_norm": 0.07697339355945587, "learning_rate": 1.658252158292939e-05, "loss": 0.0343, "step": 125770 }, { "epoch": 0.2289, "grad_norm": 0.0588192492723465, "learning_rate": 1.6578629379288042e-05, "loss": 0.0359, "step": 125780 }, { "epoch": 0.22895, "grad_norm": 0.0640011578798294, "learning_rate": 1.65747374058838e-05, "loss": 0.0344, "step": 125790 }, { "epoch": 0.229, "grad_norm": 0.0636245459318161, "learning_rate": 1.6570845662823075e-05, "loss": 0.0354, "step": 125800 }, { "epoch": 0.22905, "grad_norm": 0.06315630674362183, "learning_rate": 1.656695415021226e-05, "loss": 0.0341, "step": 125810 }, { "epoch": 0.2291, "grad_norm": 0.05824105069041252, "learning_rate": 1.6563062868157756e-05, "loss": 0.0384, "step": 125820 }, { "epoch": 0.22915, "grad_norm": 0.06384194642305374, "learning_rate": 1.6559171816765936e-05, "loss": 0.0337, "step": 125830 }, { "epoch": 0.2292, "grad_norm": 0.04800541698932648, "learning_rate": 1.6555280996143186e-05, "loss": 0.0362, "step": 125840 }, { "epoch": 0.22925, "grad_norm": 0.07251149415969849, "learning_rate": 1.6551390406395896e-05, "loss": 0.0389, "step": 125850 }, { "epoch": 0.2293, "grad_norm": 0.06954000890254974, "learning_rate": 1.6547500047630398e-05, "loss": 0.0384, "step": 125860 }, { "epoch": 0.22935, "grad_norm": 0.05920116603374481, "learning_rate": 1.654360991995309e-05, "loss": 0.0346, "step": 125870 }, { "epoch": 0.2294, "grad_norm": 0.06064040586352348, "learning_rate": 1.65397200234703e-05, "loss": 0.0348, "step": 125880 }, { "epoch": 0.22945, "grad_norm": 0.06895332783460617, "learning_rate": 1.653583035828839e-05, "loss": 0.0352, "step": 125890 }, { "epoch": 0.2295, "grad_norm": 0.06560982763767242, "learning_rate": 1.6531940924513697e-05, "loss": 0.0351, "step": 125900 }, { "epoch": 0.22955, "grad_norm": 0.059170350432395935, "learning_rate": 1.6528051722252557e-05, "loss": 0.0345, "step": 125910 }, { "epoch": 0.2296, "grad_norm": 0.07745416462421417, "learning_rate": 1.6524162751611304e-05, "loss": 0.0339, "step": 125920 }, { "epoch": 0.22965, "grad_norm": 0.05844829976558685, "learning_rate": 1.6520274012696252e-05, "loss": 0.0343, "step": 125930 }, { "epoch": 0.2297, "grad_norm": 0.051176246255636215, "learning_rate": 1.6516385505613728e-05, "loss": 0.0339, "step": 125940 }, { "epoch": 0.22975, "grad_norm": 0.05116390809416771, "learning_rate": 1.651249723047003e-05, "loss": 0.0323, "step": 125950 }, { "epoch": 0.2298, "grad_norm": 0.06636460870504379, "learning_rate": 1.650860918737147e-05, "loss": 0.0338, "step": 125960 }, { "epoch": 0.22985, "grad_norm": 0.07166604697704315, "learning_rate": 1.6504721376424354e-05, "loss": 0.0343, "step": 125970 }, { "epoch": 0.2299, "grad_norm": 0.06095893681049347, "learning_rate": 1.6500833797734955e-05, "loss": 0.0338, "step": 125980 }, { "epoch": 0.22995, "grad_norm": 0.05798349529504776, "learning_rate": 1.6496946451409577e-05, "loss": 0.0327, "step": 125990 }, { "epoch": 0.23, "grad_norm": 0.05991566926240921, "learning_rate": 1.649305933755448e-05, "loss": 0.033, "step": 126000 }, { "epoch": 0.23005, "grad_norm": 0.06393137574195862, "learning_rate": 1.648917245627595e-05, "loss": 0.0348, "step": 126010 }, { "epoch": 0.2301, "grad_norm": 0.058520495891571045, "learning_rate": 1.648528580768024e-05, "loss": 0.0335, "step": 126020 }, { "epoch": 0.23015, "grad_norm": 0.05948096886277199, "learning_rate": 1.6481399391873615e-05, "loss": 0.0339, "step": 126030 }, { "epoch": 0.2302, "grad_norm": 0.060224246233701706, "learning_rate": 1.647751320896235e-05, "loss": 0.033, "step": 126040 }, { "epoch": 0.23025, "grad_norm": 0.0713365450501442, "learning_rate": 1.6473627259052648e-05, "loss": 0.0353, "step": 126050 }, { "epoch": 0.2303, "grad_norm": 0.07686593383550644, "learning_rate": 1.6469741542250792e-05, "loss": 0.0345, "step": 126060 }, { "epoch": 0.23035, "grad_norm": 0.059691183269023895, "learning_rate": 1.646585605866299e-05, "loss": 0.034, "step": 126070 }, { "epoch": 0.2304, "grad_norm": 0.06376127898693085, "learning_rate": 1.6461970808395476e-05, "loss": 0.0349, "step": 126080 }, { "epoch": 0.23045, "grad_norm": 0.06100200489163399, "learning_rate": 1.6458085791554474e-05, "loss": 0.0337, "step": 126090 }, { "epoch": 0.2305, "grad_norm": 0.05868079885840416, "learning_rate": 1.6454201008246196e-05, "loss": 0.0332, "step": 126100 }, { "epoch": 0.23055, "grad_norm": 0.07140611112117767, "learning_rate": 1.6450316458576852e-05, "loss": 0.0352, "step": 126110 }, { "epoch": 0.2306, "grad_norm": 0.07169882208108902, "learning_rate": 1.6446432142652647e-05, "loss": 0.0346, "step": 126120 }, { "epoch": 0.23065, "grad_norm": 0.058701857924461365, "learning_rate": 1.6442548060579778e-05, "loss": 0.0332, "step": 126130 }, { "epoch": 0.2307, "grad_norm": 0.06083898991346359, "learning_rate": 1.643866421246442e-05, "loss": 0.0329, "step": 126140 }, { "epoch": 0.23075, "grad_norm": 0.056007515639066696, "learning_rate": 1.6434780598412764e-05, "loss": 0.0342, "step": 126150 }, { "epoch": 0.2308, "grad_norm": 0.05735059827566147, "learning_rate": 1.6430897218530998e-05, "loss": 0.0328, "step": 126160 }, { "epoch": 0.23085, "grad_norm": 0.06010760739445686, "learning_rate": 1.642701407292528e-05, "loss": 0.0334, "step": 126170 }, { "epoch": 0.2309, "grad_norm": 0.0675911083817482, "learning_rate": 1.6423131161701778e-05, "loss": 0.0329, "step": 126180 }, { "epoch": 0.23095, "grad_norm": 0.05605832487344742, "learning_rate": 1.6419248484966642e-05, "loss": 0.0344, "step": 126190 }, { "epoch": 0.231, "grad_norm": 0.05773789808154106, "learning_rate": 1.6415366042826036e-05, "loss": 0.0339, "step": 126200 }, { "epoch": 0.23105, "grad_norm": 0.0519493967294693, "learning_rate": 1.6411483835386092e-05, "loss": 0.0339, "step": 126210 }, { "epoch": 0.2311, "grad_norm": 0.058896906673908234, "learning_rate": 1.640760186275296e-05, "loss": 0.0332, "step": 126220 }, { "epoch": 0.23115, "grad_norm": 0.08163320273160934, "learning_rate": 1.640372012503276e-05, "loss": 0.0341, "step": 126230 }, { "epoch": 0.2312, "grad_norm": 0.09805915504693985, "learning_rate": 1.6399838622331616e-05, "loss": 0.0341, "step": 126240 }, { "epoch": 0.23125, "grad_norm": 0.08248712122440338, "learning_rate": 1.639595735475567e-05, "loss": 0.0342, "step": 126250 }, { "epoch": 0.2313, "grad_norm": 0.08971148729324341, "learning_rate": 1.6392076322411e-05, "loss": 0.0329, "step": 126260 }, { "epoch": 0.23135, "grad_norm": 0.06334342062473297, "learning_rate": 1.6388195525403746e-05, "loss": 0.0323, "step": 126270 }, { "epoch": 0.2314, "grad_norm": 0.06715748459100723, "learning_rate": 1.6384314963839976e-05, "loss": 0.0319, "step": 126280 }, { "epoch": 0.23145, "grad_norm": 0.06183129921555519, "learning_rate": 1.6380434637825804e-05, "loss": 0.033, "step": 126290 }, { "epoch": 0.2315, "grad_norm": 0.07641464471817017, "learning_rate": 1.637655454746731e-05, "loss": 0.0342, "step": 126300 }, { "epoch": 0.23155, "grad_norm": 0.06532736867666245, "learning_rate": 1.6372674692870578e-05, "loss": 0.0331, "step": 126310 }, { "epoch": 0.2316, "grad_norm": 0.06653792411088943, "learning_rate": 1.636879507414168e-05, "loss": 0.0338, "step": 126320 }, { "epoch": 0.23165, "grad_norm": 0.05246128514409065, "learning_rate": 1.6364915691386677e-05, "loss": 0.0323, "step": 126330 }, { "epoch": 0.2317, "grad_norm": 0.07467667013406754, "learning_rate": 1.6361036544711628e-05, "loss": 0.0332, "step": 126340 }, { "epoch": 0.23175, "grad_norm": 0.06301455944776535, "learning_rate": 1.6357157634222613e-05, "loss": 0.0329, "step": 126350 }, { "epoch": 0.2318, "grad_norm": 0.08649516105651855, "learning_rate": 1.6353278960025646e-05, "loss": 0.0346, "step": 126360 }, { "epoch": 0.23185, "grad_norm": 0.06692379713058472, "learning_rate": 1.634940052222679e-05, "loss": 0.033, "step": 126370 }, { "epoch": 0.2319, "grad_norm": 0.06434053182601929, "learning_rate": 1.634552232093207e-05, "loss": 0.0334, "step": 126380 }, { "epoch": 0.23195, "grad_norm": 0.06479530781507492, "learning_rate": 1.6341644356247526e-05, "loss": 0.0341, "step": 126390 }, { "epoch": 0.232, "grad_norm": 0.05729609355330467, "learning_rate": 1.6337766628279165e-05, "loss": 0.0318, "step": 126400 }, { "epoch": 0.23205, "grad_norm": 0.06309373676776886, "learning_rate": 1.6333889137133014e-05, "loss": 0.0323, "step": 126410 }, { "epoch": 0.2321, "grad_norm": 0.06207401677966118, "learning_rate": 1.633001188291508e-05, "loss": 0.0345, "step": 126420 }, { "epoch": 0.23215, "grad_norm": 0.06494017690420151, "learning_rate": 1.632613486573136e-05, "loss": 0.033, "step": 126430 }, { "epoch": 0.2322, "grad_norm": 0.06067967042326927, "learning_rate": 1.632225808568786e-05, "loss": 0.0348, "step": 126440 }, { "epoch": 0.23225, "grad_norm": 0.05891747027635574, "learning_rate": 1.6318381542890552e-05, "loss": 0.0337, "step": 126450 }, { "epoch": 0.2323, "grad_norm": 0.07398000359535217, "learning_rate": 1.6314505237445448e-05, "loss": 0.0352, "step": 126460 }, { "epoch": 0.23235, "grad_norm": 0.14084388315677643, "learning_rate": 1.631062916945849e-05, "loss": 0.0342, "step": 126470 }, { "epoch": 0.2324, "grad_norm": 0.09354683756828308, "learning_rate": 1.6306753339035673e-05, "loss": 0.0333, "step": 126480 }, { "epoch": 0.23245, "grad_norm": 0.09307026118040085, "learning_rate": 1.630287774628296e-05, "loss": 0.0362, "step": 126490 }, { "epoch": 0.2325, "grad_norm": 0.07109540700912476, "learning_rate": 1.6299002391306294e-05, "loss": 0.0351, "step": 126500 }, { "epoch": 0.23255, "grad_norm": 0.0916818305850029, "learning_rate": 1.6295127274211643e-05, "loss": 0.0361, "step": 126510 }, { "epoch": 0.2326, "grad_norm": 0.05944633483886719, "learning_rate": 1.6291252395104935e-05, "loss": 0.0331, "step": 126520 }, { "epoch": 0.23265, "grad_norm": 0.07088175415992737, "learning_rate": 1.6287377754092108e-05, "loss": 0.034, "step": 126530 }, { "epoch": 0.2327, "grad_norm": 0.07546090334653854, "learning_rate": 1.6283503351279118e-05, "loss": 0.0341, "step": 126540 }, { "epoch": 0.23275, "grad_norm": 0.06843554973602295, "learning_rate": 1.627962918677185e-05, "loss": 0.0345, "step": 126550 }, { "epoch": 0.2328, "grad_norm": 0.06755559891462326, "learning_rate": 1.6275755260676268e-05, "loss": 0.034, "step": 126560 }, { "epoch": 0.23285, "grad_norm": 0.05856577306985855, "learning_rate": 1.627188157309824e-05, "loss": 0.0328, "step": 126570 }, { "epoch": 0.2329, "grad_norm": 0.054877739399671555, "learning_rate": 1.6268008124143703e-05, "loss": 0.0335, "step": 126580 }, { "epoch": 0.23295, "grad_norm": 0.0606815367937088, "learning_rate": 1.6264134913918537e-05, "loss": 0.0336, "step": 126590 }, { "epoch": 0.233, "grad_norm": 0.05075656995177269, "learning_rate": 1.626026194252864e-05, "loss": 0.0335, "step": 126600 }, { "epoch": 0.23305, "grad_norm": 0.05685050040483475, "learning_rate": 1.6256389210079904e-05, "loss": 0.0329, "step": 126610 }, { "epoch": 0.2331, "grad_norm": 0.0628228485584259, "learning_rate": 1.6252516716678196e-05, "loss": 0.0325, "step": 126620 }, { "epoch": 0.23315, "grad_norm": 0.06094030663371086, "learning_rate": 1.62486444624294e-05, "loss": 0.0351, "step": 126630 }, { "epoch": 0.2332, "grad_norm": 0.05392427742481232, "learning_rate": 1.624477244743937e-05, "loss": 0.0334, "step": 126640 }, { "epoch": 0.23325, "grad_norm": 0.05599977821111679, "learning_rate": 1.624090067181398e-05, "loss": 0.0319, "step": 126650 }, { "epoch": 0.2333, "grad_norm": 0.051868144422769547, "learning_rate": 1.6237029135659065e-05, "loss": 0.0357, "step": 126660 }, { "epoch": 0.23335, "grad_norm": 0.06395290791988373, "learning_rate": 1.6233157839080485e-05, "loss": 0.0342, "step": 126670 }, { "epoch": 0.2334, "grad_norm": 0.0637071505188942, "learning_rate": 1.6229286782184083e-05, "loss": 0.0335, "step": 126680 }, { "epoch": 0.23345, "grad_norm": 0.057250093668699265, "learning_rate": 1.6225415965075676e-05, "loss": 0.0331, "step": 126690 }, { "epoch": 0.2335, "grad_norm": 0.05252157896757126, "learning_rate": 1.622154538786111e-05, "loss": 0.0368, "step": 126700 }, { "epoch": 0.23355, "grad_norm": 0.057001277804374695, "learning_rate": 1.6217675050646188e-05, "loss": 0.0348, "step": 126710 }, { "epoch": 0.2336, "grad_norm": 0.060486093163490295, "learning_rate": 1.6213804953536727e-05, "loss": 0.0336, "step": 126720 }, { "epoch": 0.23365, "grad_norm": 0.05644745007157326, "learning_rate": 1.6209935096638553e-05, "loss": 0.0329, "step": 126730 }, { "epoch": 0.2337, "grad_norm": 0.06537935882806778, "learning_rate": 1.6206065480057432e-05, "loss": 0.0342, "step": 126740 }, { "epoch": 0.23375, "grad_norm": 0.06667554378509521, "learning_rate": 1.6202196103899197e-05, "loss": 0.0336, "step": 126750 }, { "epoch": 0.2338, "grad_norm": 0.0648476704955101, "learning_rate": 1.6198326968269594e-05, "loss": 0.0338, "step": 126760 }, { "epoch": 0.23385, "grad_norm": 0.05586965009570122, "learning_rate": 1.619445807327445e-05, "loss": 0.0325, "step": 126770 }, { "epoch": 0.2339, "grad_norm": 0.0503368154168129, "learning_rate": 1.619058941901949e-05, "loss": 0.0333, "step": 126780 }, { "epoch": 0.23395, "grad_norm": 0.06135008856654167, "learning_rate": 1.6186721005610515e-05, "loss": 0.0337, "step": 126790 }, { "epoch": 0.234, "grad_norm": 0.061872679740190506, "learning_rate": 1.618285283315328e-05, "loss": 0.0338, "step": 126800 }, { "epoch": 0.23405, "grad_norm": 0.06412120163440704, "learning_rate": 1.6178984901753534e-05, "loss": 0.0335, "step": 126810 }, { "epoch": 0.2341, "grad_norm": 0.048076044768095016, "learning_rate": 1.617511721151703e-05, "loss": 0.0329, "step": 126820 }, { "epoch": 0.23415, "grad_norm": 0.060390032827854156, "learning_rate": 1.61712497625495e-05, "loss": 0.0343, "step": 126830 }, { "epoch": 0.2342, "grad_norm": 0.06160585582256317, "learning_rate": 1.616738255495669e-05, "loss": 0.0326, "step": 126840 }, { "epoch": 0.23425, "grad_norm": 0.06219357252120972, "learning_rate": 1.6163515588844318e-05, "loss": 0.033, "step": 126850 }, { "epoch": 0.2343, "grad_norm": 0.06169546768069267, "learning_rate": 1.6159648864318106e-05, "loss": 0.0341, "step": 126860 }, { "epoch": 0.23435, "grad_norm": 0.06924784928560257, "learning_rate": 1.6155782381483784e-05, "loss": 0.033, "step": 126870 }, { "epoch": 0.2344, "grad_norm": 0.0668037161231041, "learning_rate": 1.6151916140447042e-05, "loss": 0.0333, "step": 126880 }, { "epoch": 0.23445, "grad_norm": 0.06414536386728287, "learning_rate": 1.6148050141313592e-05, "loss": 0.0343, "step": 126890 }, { "epoch": 0.2345, "grad_norm": 0.06171473115682602, "learning_rate": 1.6144184384189127e-05, "loss": 0.0332, "step": 126900 }, { "epoch": 0.23455, "grad_norm": 0.06489849835634232, "learning_rate": 1.6140318869179333e-05, "loss": 0.034, "step": 126910 }, { "epoch": 0.2346, "grad_norm": 0.059951625764369965, "learning_rate": 1.613645359638989e-05, "loss": 0.0328, "step": 126920 }, { "epoch": 0.23465, "grad_norm": 0.05820494145154953, "learning_rate": 1.613258856592647e-05, "loss": 0.033, "step": 126930 }, { "epoch": 0.2347, "grad_norm": 0.05218276381492615, "learning_rate": 1.612872377789476e-05, "loss": 0.0333, "step": 126940 }, { "epoch": 0.23475, "grad_norm": 0.05372566729784012, "learning_rate": 1.6124859232400396e-05, "loss": 0.0325, "step": 126950 }, { "epoch": 0.2348, "grad_norm": 0.05991847440600395, "learning_rate": 1.6120994929549065e-05, "loss": 0.0335, "step": 126960 }, { "epoch": 0.23485, "grad_norm": 0.05379624664783478, "learning_rate": 1.6117130869446378e-05, "loss": 0.0341, "step": 126970 }, { "epoch": 0.2349, "grad_norm": 0.07068346440792084, "learning_rate": 1.6113267052198e-05, "loss": 0.034, "step": 126980 }, { "epoch": 0.23495, "grad_norm": 0.06268243491649628, "learning_rate": 1.6109403477909572e-05, "loss": 0.0355, "step": 126990 }, { "epoch": 0.235, "grad_norm": 0.06141556426882744, "learning_rate": 1.6105540146686706e-05, "loss": 0.0336, "step": 127000 }, { "epoch": 0.23505, "grad_norm": 0.06479032337665558, "learning_rate": 1.6101677058635035e-05, "loss": 0.0336, "step": 127010 }, { "epoch": 0.2351, "grad_norm": 0.07502786815166473, "learning_rate": 1.6097814213860165e-05, "loss": 0.0345, "step": 127020 }, { "epoch": 0.23515, "grad_norm": 0.05763748288154602, "learning_rate": 1.6093951612467713e-05, "loss": 0.0322, "step": 127030 }, { "epoch": 0.2352, "grad_norm": 0.06246546283364296, "learning_rate": 1.6090089254563274e-05, "loss": 0.0349, "step": 127040 }, { "epoch": 0.23525, "grad_norm": 0.06013244017958641, "learning_rate": 1.6086227140252443e-05, "loss": 0.0331, "step": 127050 }, { "epoch": 0.2353, "grad_norm": 0.05250510200858116, "learning_rate": 1.608236526964083e-05, "loss": 0.0337, "step": 127060 }, { "epoch": 0.23535, "grad_norm": 0.07325278222560883, "learning_rate": 1.6078503642833985e-05, "loss": 0.0353, "step": 127070 }, { "epoch": 0.2354, "grad_norm": 0.08081185817718506, "learning_rate": 1.6074642259937507e-05, "loss": 0.0357, "step": 127080 }, { "epoch": 0.23545, "grad_norm": 0.06401636451482773, "learning_rate": 1.6070781121056953e-05, "loss": 0.0347, "step": 127090 }, { "epoch": 0.2355, "grad_norm": 0.05674474313855171, "learning_rate": 1.6066920226297894e-05, "loss": 0.036, "step": 127100 }, { "epoch": 0.23555, "grad_norm": 0.07341116666793823, "learning_rate": 1.6063059575765872e-05, "loss": 0.0341, "step": 127110 }, { "epoch": 0.2356, "grad_norm": 0.07966278493404388, "learning_rate": 1.6059199169566446e-05, "loss": 0.0338, "step": 127120 }, { "epoch": 0.23565, "grad_norm": 0.06760095804929733, "learning_rate": 1.605533900780516e-05, "loss": 0.0345, "step": 127130 }, { "epoch": 0.2357, "grad_norm": 0.06217208877205849, "learning_rate": 1.6051479090587534e-05, "loss": 0.0347, "step": 127140 }, { "epoch": 0.23575, "grad_norm": 0.07714894413948059, "learning_rate": 1.604761941801913e-05, "loss": 0.0352, "step": 127150 }, { "epoch": 0.2358, "grad_norm": 0.06739632785320282, "learning_rate": 1.6043759990205427e-05, "loss": 0.0365, "step": 127160 }, { "epoch": 0.23585, "grad_norm": 0.05616482347249985, "learning_rate": 1.6039900807251962e-05, "loss": 0.0371, "step": 127170 }, { "epoch": 0.2359, "grad_norm": 0.07639485597610474, "learning_rate": 1.6036041869264254e-05, "loss": 0.0337, "step": 127180 }, { "epoch": 0.23595, "grad_norm": 0.06982140243053436, "learning_rate": 1.6032183176347786e-05, "loss": 0.0335, "step": 127190 }, { "epoch": 0.236, "grad_norm": 0.08310776203870773, "learning_rate": 1.6028324728608067e-05, "loss": 0.0341, "step": 127200 }, { "epoch": 0.23605, "grad_norm": 0.06467178463935852, "learning_rate": 1.6024466526150574e-05, "loss": 0.0345, "step": 127210 }, { "epoch": 0.2361, "grad_norm": 0.066603884100914, "learning_rate": 1.6020608569080802e-05, "loss": 0.0376, "step": 127220 }, { "epoch": 0.23615, "grad_norm": 0.058768756687641144, "learning_rate": 1.6016750857504208e-05, "loss": 0.0349, "step": 127230 }, { "epoch": 0.2362, "grad_norm": 0.05835096910595894, "learning_rate": 1.601289339152627e-05, "loss": 0.0328, "step": 127240 }, { "epoch": 0.23625, "grad_norm": 0.06757104396820068, "learning_rate": 1.6009036171252465e-05, "loss": 0.0339, "step": 127250 }, { "epoch": 0.2363, "grad_norm": 0.07333344221115112, "learning_rate": 1.6005179196788217e-05, "loss": 0.0348, "step": 127260 }, { "epoch": 0.23635, "grad_norm": 0.07218988239765167, "learning_rate": 1.6001322468239e-05, "loss": 0.0342, "step": 127270 }, { "epoch": 0.2364, "grad_norm": 0.06297601759433746, "learning_rate": 1.599746598571024e-05, "loss": 0.0342, "step": 127280 }, { "epoch": 0.23645, "grad_norm": 0.05608023330569267, "learning_rate": 1.5993609749307385e-05, "loss": 0.0355, "step": 127290 }, { "epoch": 0.2365, "grad_norm": 0.07831545919179916, "learning_rate": 1.5989753759135853e-05, "loss": 0.0344, "step": 127300 }, { "epoch": 0.23655, "grad_norm": 0.06702210754156113, "learning_rate": 1.5985898015301064e-05, "loss": 0.0341, "step": 127310 }, { "epoch": 0.2366, "grad_norm": 0.05965721979737282, "learning_rate": 1.5982042517908445e-05, "loss": 0.0349, "step": 127320 }, { "epoch": 0.23665, "grad_norm": 0.07125255465507507, "learning_rate": 1.597818726706339e-05, "loss": 0.0391, "step": 127330 }, { "epoch": 0.2367, "grad_norm": 0.06022556126117706, "learning_rate": 1.597433226287131e-05, "loss": 0.034, "step": 127340 }, { "epoch": 0.23675, "grad_norm": 0.053169671446084976, "learning_rate": 1.5970477505437586e-05, "loss": 0.0339, "step": 127350 }, { "epoch": 0.2368, "grad_norm": 0.05481424927711487, "learning_rate": 1.596662299486762e-05, "loss": 0.0348, "step": 127360 }, { "epoch": 0.23685, "grad_norm": 0.06159573793411255, "learning_rate": 1.596276873126679e-05, "loss": 0.0347, "step": 127370 }, { "epoch": 0.2369, "grad_norm": 0.06082068756222725, "learning_rate": 1.5958914714740464e-05, "loss": 0.0341, "step": 127380 }, { "epoch": 0.23695, "grad_norm": 0.07267327606678009, "learning_rate": 1.595506094539402e-05, "loss": 0.0356, "step": 127390 }, { "epoch": 0.237, "grad_norm": 0.06275376677513123, "learning_rate": 1.5951207423332806e-05, "loss": 0.0342, "step": 127400 }, { "epoch": 0.23705, "grad_norm": 0.04976017028093338, "learning_rate": 1.5947354148662187e-05, "loss": 0.0334, "step": 127410 }, { "epoch": 0.2371, "grad_norm": 0.05717877671122551, "learning_rate": 1.5943501121487496e-05, "loss": 0.0356, "step": 127420 }, { "epoch": 0.23715, "grad_norm": 0.057774148881435394, "learning_rate": 1.5939648341914082e-05, "loss": 0.0335, "step": 127430 }, { "epoch": 0.2372, "grad_norm": 0.07764162123203278, "learning_rate": 1.593579581004729e-05, "loss": 0.0352, "step": 127440 }, { "epoch": 0.23725, "grad_norm": 0.07691578567028046, "learning_rate": 1.593194352599242e-05, "loss": 0.0349, "step": 127450 }, { "epoch": 0.2373, "grad_norm": 0.07152264565229416, "learning_rate": 1.5928091489854823e-05, "loss": 0.0344, "step": 127460 }, { "epoch": 0.23735, "grad_norm": 0.0600624606013298, "learning_rate": 1.5924239701739786e-05, "loss": 0.033, "step": 127470 }, { "epoch": 0.2374, "grad_norm": 0.05317433178424835, "learning_rate": 1.5920388161752632e-05, "loss": 0.033, "step": 127480 }, { "epoch": 0.23745, "grad_norm": 0.05922538787126541, "learning_rate": 1.591653686999865e-05, "loss": 0.0325, "step": 127490 }, { "epoch": 0.2375, "grad_norm": 0.06978096812963486, "learning_rate": 1.5912685826583136e-05, "loss": 0.0333, "step": 127500 }, { "epoch": 0.23755, "grad_norm": 0.06716527044773102, "learning_rate": 1.5908835031611386e-05, "loss": 0.034, "step": 127510 }, { "epoch": 0.2376, "grad_norm": 0.06381786614656448, "learning_rate": 1.5904984485188662e-05, "loss": 0.0353, "step": 127520 }, { "epoch": 0.23765, "grad_norm": 0.058379948139190674, "learning_rate": 1.5901134187420252e-05, "loss": 0.0351, "step": 127530 }, { "epoch": 0.2377, "grad_norm": 0.054788608103990555, "learning_rate": 1.589728413841141e-05, "loss": 0.0328, "step": 127540 }, { "epoch": 0.23775, "grad_norm": 0.06037406250834465, "learning_rate": 1.5893434338267394e-05, "loss": 0.0331, "step": 127550 }, { "epoch": 0.2378, "grad_norm": 0.07420245558023453, "learning_rate": 1.588958478709347e-05, "loss": 0.033, "step": 127560 }, { "epoch": 0.23785, "grad_norm": 0.06018362194299698, "learning_rate": 1.5885735484994876e-05, "loss": 0.0331, "step": 127570 }, { "epoch": 0.2379, "grad_norm": 0.05091716721653938, "learning_rate": 1.5881886432076852e-05, "loss": 0.0321, "step": 127580 }, { "epoch": 0.23795, "grad_norm": 0.051424361765384674, "learning_rate": 1.5878037628444624e-05, "loss": 0.0319, "step": 127590 }, { "epoch": 0.238, "grad_norm": 0.06559579819440842, "learning_rate": 1.587418907420342e-05, "loss": 0.0346, "step": 127600 }, { "epoch": 0.23805, "grad_norm": 0.056557316333055496, "learning_rate": 1.5870340769458457e-05, "loss": 0.0345, "step": 127610 }, { "epoch": 0.2381, "grad_norm": 0.06059598922729492, "learning_rate": 1.5866492714314952e-05, "loss": 0.0334, "step": 127620 }, { "epoch": 0.23815, "grad_norm": 0.06248987838625908, "learning_rate": 1.5862644908878106e-05, "loss": 0.0338, "step": 127630 }, { "epoch": 0.2382, "grad_norm": 0.06059509888291359, "learning_rate": 1.58587973532531e-05, "loss": 0.0326, "step": 127640 }, { "epoch": 0.23825, "grad_norm": 0.056693557649850845, "learning_rate": 1.5854950047545165e-05, "loss": 0.0356, "step": 127650 }, { "epoch": 0.2383, "grad_norm": 0.06219499930739403, "learning_rate": 1.5851102991859437e-05, "loss": 0.0327, "step": 127660 }, { "epoch": 0.23835, "grad_norm": 0.05717054754495621, "learning_rate": 1.5847256186301135e-05, "loss": 0.0337, "step": 127670 }, { "epoch": 0.2384, "grad_norm": 0.07006093859672546, "learning_rate": 1.5843409630975394e-05, "loss": 0.0345, "step": 127680 }, { "epoch": 0.23845, "grad_norm": 0.06150520220398903, "learning_rate": 1.58395633259874e-05, "loss": 0.0365, "step": 127690 }, { "epoch": 0.2385, "grad_norm": 0.06333986669778824, "learning_rate": 1.5835717271442307e-05, "loss": 0.0349, "step": 127700 }, { "epoch": 0.23855, "grad_norm": 0.05847545713186264, "learning_rate": 1.583187146744526e-05, "loss": 0.0355, "step": 127710 }, { "epoch": 0.2386, "grad_norm": 0.0587812140583992, "learning_rate": 1.5828025914101402e-05, "loss": 0.0345, "step": 127720 }, { "epoch": 0.23865, "grad_norm": 0.05848672240972519, "learning_rate": 1.5824180611515865e-05, "loss": 0.0333, "step": 127730 }, { "epoch": 0.2387, "grad_norm": 0.05444684997200966, "learning_rate": 1.5820335559793782e-05, "loss": 0.0348, "step": 127740 }, { "epoch": 0.23875, "grad_norm": 0.06529933959245682, "learning_rate": 1.5816490759040288e-05, "loss": 0.0345, "step": 127750 }, { "epoch": 0.2388, "grad_norm": 0.05455807223916054, "learning_rate": 1.581264620936047e-05, "loss": 0.0344, "step": 127760 }, { "epoch": 0.23885, "grad_norm": 0.055863041430711746, "learning_rate": 1.5808801910859468e-05, "loss": 0.0379, "step": 127770 }, { "epoch": 0.2389, "grad_norm": 0.05264187976717949, "learning_rate": 1.580495786364236e-05, "loss": 0.0332, "step": 127780 }, { "epoch": 0.23895, "grad_norm": 0.05669938027858734, "learning_rate": 1.580111406781426e-05, "loss": 0.0332, "step": 127790 }, { "epoch": 0.239, "grad_norm": 0.05038554593920708, "learning_rate": 1.5797270523480236e-05, "loss": 0.0322, "step": 127800 }, { "epoch": 0.23905, "grad_norm": 0.05440721660852432, "learning_rate": 1.579342723074538e-05, "loss": 0.0337, "step": 127810 }, { "epoch": 0.2391, "grad_norm": 0.05540277063846588, "learning_rate": 1.578958418971477e-05, "loss": 0.0337, "step": 127820 }, { "epoch": 0.23915, "grad_norm": 0.050372201949357986, "learning_rate": 1.578574140049346e-05, "loss": 0.0321, "step": 127830 }, { "epoch": 0.2392, "grad_norm": 0.05455819144845009, "learning_rate": 1.5781898863186526e-05, "loss": 0.0325, "step": 127840 }, { "epoch": 0.23925, "grad_norm": 0.04976142942905426, "learning_rate": 1.5778056577899003e-05, "loss": 0.033, "step": 127850 }, { "epoch": 0.2393, "grad_norm": 0.055015236139297485, "learning_rate": 1.5774214544735962e-05, "loss": 0.0366, "step": 127860 }, { "epoch": 0.23935, "grad_norm": 0.05450107902288437, "learning_rate": 1.577037276380242e-05, "loss": 0.0337, "step": 127870 }, { "epoch": 0.2394, "grad_norm": 0.06425601989030838, "learning_rate": 1.5766531235203418e-05, "loss": 0.0337, "step": 127880 }, { "epoch": 0.23945, "grad_norm": 0.05643454194068909, "learning_rate": 1.5762689959043992e-05, "loss": 0.0323, "step": 127890 }, { "epoch": 0.2395, "grad_norm": 0.056932345032691956, "learning_rate": 1.5758848935429147e-05, "loss": 0.0337, "step": 127900 }, { "epoch": 0.23955, "grad_norm": 0.0619305819272995, "learning_rate": 1.5755008164463904e-05, "loss": 0.0333, "step": 127910 }, { "epoch": 0.2396, "grad_norm": 0.08095294237136841, "learning_rate": 1.575116764625326e-05, "loss": 0.035, "step": 127920 }, { "epoch": 0.23965, "grad_norm": 0.09136935323476791, "learning_rate": 1.574732738090221e-05, "loss": 0.0342, "step": 127930 }, { "epoch": 0.2397, "grad_norm": 0.06716940551996231, "learning_rate": 1.5743487368515775e-05, "loss": 0.0343, "step": 127940 }, { "epoch": 0.23975, "grad_norm": 0.05612089857459068, "learning_rate": 1.57396476091989e-05, "loss": 0.0335, "step": 127950 }, { "epoch": 0.2398, "grad_norm": 0.06548678129911423, "learning_rate": 1.5735808103056592e-05, "loss": 0.0354, "step": 127960 }, { "epoch": 0.23985, "grad_norm": 0.06602530926465988, "learning_rate": 1.57319688501938e-05, "loss": 0.0346, "step": 127970 }, { "epoch": 0.2399, "grad_norm": 0.06799127161502838, "learning_rate": 1.5728129850715503e-05, "loss": 0.0346, "step": 127980 }, { "epoch": 0.23995, "grad_norm": 0.06980675458908081, "learning_rate": 1.5724291104726652e-05, "loss": 0.0344, "step": 127990 }, { "epoch": 0.24, "grad_norm": 0.05383969470858574, "learning_rate": 1.572045261233219e-05, "loss": 0.035, "step": 128000 }, { "epoch": 0.24005, "grad_norm": 0.06383443623781204, "learning_rate": 1.5716614373637085e-05, "loss": 0.0333, "step": 128010 }, { "epoch": 0.2401, "grad_norm": 0.0596088282763958, "learning_rate": 1.5712776388746243e-05, "loss": 0.0334, "step": 128020 }, { "epoch": 0.24015, "grad_norm": 0.08484054356813431, "learning_rate": 1.570893865776461e-05, "loss": 0.0351, "step": 128030 }, { "epoch": 0.2402, "grad_norm": 0.06879259645938873, "learning_rate": 1.5705101180797098e-05, "loss": 0.0331, "step": 128040 }, { "epoch": 0.24025, "grad_norm": 0.06585878133773804, "learning_rate": 1.5701263957948636e-05, "loss": 0.0335, "step": 128050 }, { "epoch": 0.2403, "grad_norm": 0.0568404421210289, "learning_rate": 1.569742698932411e-05, "loss": 0.0344, "step": 128060 }, { "epoch": 0.24035, "grad_norm": 0.057048458606004715, "learning_rate": 1.5693590275028445e-05, "loss": 0.0326, "step": 128070 }, { "epoch": 0.2404, "grad_norm": 0.05349932610988617, "learning_rate": 1.5689753815166526e-05, "loss": 0.0325, "step": 128080 }, { "epoch": 0.24045, "grad_norm": 0.050432320684194565, "learning_rate": 1.5685917609843236e-05, "loss": 0.0326, "step": 128090 }, { "epoch": 0.2405, "grad_norm": 0.05716854706406593, "learning_rate": 1.5682081659163467e-05, "loss": 0.0325, "step": 128100 }, { "epoch": 0.24055, "grad_norm": 0.04774298518896103, "learning_rate": 1.567824596323208e-05, "loss": 0.0333, "step": 128110 }, { "epoch": 0.2406, "grad_norm": 0.058883074671030045, "learning_rate": 1.567441052215395e-05, "loss": 0.0349, "step": 128120 }, { "epoch": 0.24065, "grad_norm": 0.05763725936412811, "learning_rate": 1.567057533603393e-05, "loss": 0.0336, "step": 128130 }, { "epoch": 0.2407, "grad_norm": 0.05150453373789787, "learning_rate": 1.5666740404976864e-05, "loss": 0.0331, "step": 128140 }, { "epoch": 0.24075, "grad_norm": 0.08317604660987854, "learning_rate": 1.566290572908763e-05, "loss": 0.0342, "step": 128150 }, { "epoch": 0.2408, "grad_norm": 0.058748021721839905, "learning_rate": 1.565907130847103e-05, "loss": 0.0339, "step": 128160 }, { "epoch": 0.24085, "grad_norm": 0.06249953806400299, "learning_rate": 1.565523714323192e-05, "loss": 0.0343, "step": 128170 }, { "epoch": 0.2409, "grad_norm": 0.07479379326105118, "learning_rate": 1.565140323347511e-05, "loss": 0.0361, "step": 128180 }, { "epoch": 0.24095, "grad_norm": 0.0653737485408783, "learning_rate": 1.564756957930542e-05, "loss": 0.0328, "step": 128190 }, { "epoch": 0.241, "grad_norm": 0.05890415981411934, "learning_rate": 1.5643736180827676e-05, "loss": 0.0342, "step": 128200 }, { "epoch": 0.24105, "grad_norm": 0.06277629733085632, "learning_rate": 1.5639903038146665e-05, "loss": 0.0338, "step": 128210 }, { "epoch": 0.2411, "grad_norm": 0.06385780870914459, "learning_rate": 1.563607015136719e-05, "loss": 0.0348, "step": 128220 }, { "epoch": 0.24115, "grad_norm": 0.05908294767141342, "learning_rate": 1.5632237520594036e-05, "loss": 0.0339, "step": 128230 }, { "epoch": 0.2412, "grad_norm": 0.05373472720384598, "learning_rate": 1.562840514593199e-05, "loss": 0.0335, "step": 128240 }, { "epoch": 0.24125, "grad_norm": 0.050608761608600616, "learning_rate": 1.562457302748582e-05, "loss": 0.0339, "step": 128250 }, { "epoch": 0.2413, "grad_norm": 0.06089496240019798, "learning_rate": 1.5620741165360303e-05, "loss": 0.0331, "step": 128260 }, { "epoch": 0.24135, "grad_norm": 0.05857257544994354, "learning_rate": 1.561690955966021e-05, "loss": 0.0337, "step": 128270 }, { "epoch": 0.2414, "grad_norm": 0.06435168534517288, "learning_rate": 1.5613078210490274e-05, "loss": 0.033, "step": 128280 }, { "epoch": 0.24145, "grad_norm": 0.05921395868062973, "learning_rate": 1.5609247117955262e-05, "loss": 0.0329, "step": 128290 }, { "epoch": 0.2415, "grad_norm": 0.052547890692949295, "learning_rate": 1.5605416282159897e-05, "loss": 0.0327, "step": 128300 }, { "epoch": 0.24155, "grad_norm": 0.08207894116640091, "learning_rate": 1.560158570320893e-05, "loss": 0.0339, "step": 128310 }, { "epoch": 0.2416, "grad_norm": 0.06959673017263412, "learning_rate": 1.5597755381207075e-05, "loss": 0.0338, "step": 128320 }, { "epoch": 0.24165, "grad_norm": 0.06273122131824493, "learning_rate": 1.559392531625905e-05, "loss": 0.0347, "step": 128330 }, { "epoch": 0.2417, "grad_norm": 0.0634051263332367, "learning_rate": 1.5590095508469583e-05, "loss": 0.0335, "step": 128340 }, { "epoch": 0.24175, "grad_norm": 0.06408239901065826, "learning_rate": 1.5586265957943358e-05, "loss": 0.0328, "step": 128350 }, { "epoch": 0.2418, "grad_norm": 0.06315138936042786, "learning_rate": 1.5582436664785098e-05, "loss": 0.0322, "step": 128360 }, { "epoch": 0.24185, "grad_norm": 0.07049424201250076, "learning_rate": 1.557860762909947e-05, "loss": 0.0329, "step": 128370 }, { "epoch": 0.2419, "grad_norm": 0.06051500141620636, "learning_rate": 1.557477885099117e-05, "loss": 0.0325, "step": 128380 }, { "epoch": 0.24195, "grad_norm": 0.05313899368047714, "learning_rate": 1.5570950330564888e-05, "loss": 0.0327, "step": 128390 }, { "epoch": 0.242, "grad_norm": 0.05960242077708244, "learning_rate": 1.5567122067925272e-05, "loss": 0.0336, "step": 128400 }, { "epoch": 0.24205, "grad_norm": 0.07312949746847153, "learning_rate": 1.5563294063177004e-05, "loss": 0.0333, "step": 128410 }, { "epoch": 0.2421, "grad_norm": 0.06439289450645447, "learning_rate": 1.555946631642472e-05, "loss": 0.0344, "step": 128420 }, { "epoch": 0.24215, "grad_norm": 0.06231061741709709, "learning_rate": 1.555563882777309e-05, "loss": 0.0322, "step": 128430 }, { "epoch": 0.2422, "grad_norm": 0.07115618884563446, "learning_rate": 1.555181159732674e-05, "loss": 0.0327, "step": 128440 }, { "epoch": 0.24225, "grad_norm": 0.05875179171562195, "learning_rate": 1.5547984625190303e-05, "loss": 0.032, "step": 128450 }, { "epoch": 0.2423, "grad_norm": 0.07768935710191727, "learning_rate": 1.5544157911468433e-05, "loss": 0.0342, "step": 128460 }, { "epoch": 0.24235, "grad_norm": 0.07130517065525055, "learning_rate": 1.554033145626572e-05, "loss": 0.0339, "step": 128470 }, { "epoch": 0.2424, "grad_norm": 0.07714162021875381, "learning_rate": 1.55365052596868e-05, "loss": 0.0334, "step": 128480 }, { "epoch": 0.24245, "grad_norm": 0.05913243442773819, "learning_rate": 1.5532679321836264e-05, "loss": 0.0319, "step": 128490 }, { "epoch": 0.2425, "grad_norm": 0.06145475059747696, "learning_rate": 1.5528853642818726e-05, "loss": 0.034, "step": 128500 }, { "epoch": 0.24255, "grad_norm": 0.07546162605285645, "learning_rate": 1.5525028222738763e-05, "loss": 0.0333, "step": 128510 }, { "epoch": 0.2426, "grad_norm": 0.06005942448973656, "learning_rate": 1.5521203061700975e-05, "loss": 0.0326, "step": 128520 }, { "epoch": 0.24265, "grad_norm": 0.06266207247972488, "learning_rate": 1.5517378159809935e-05, "loss": 0.0326, "step": 128530 }, { "epoch": 0.2427, "grad_norm": 0.05349930003285408, "learning_rate": 1.551355351717021e-05, "loss": 0.0327, "step": 128540 }, { "epoch": 0.24275, "grad_norm": 0.06674955040216446, "learning_rate": 1.550972913388637e-05, "loss": 0.0333, "step": 128550 }, { "epoch": 0.2428, "grad_norm": 0.06313853710889816, "learning_rate": 1.5505905010062962e-05, "loss": 0.0363, "step": 128560 }, { "epoch": 0.24285, "grad_norm": 0.05368867889046669, "learning_rate": 1.550208114580455e-05, "loss": 0.0326, "step": 128570 }, { "epoch": 0.2429, "grad_norm": 0.06399517506361008, "learning_rate": 1.549825754121568e-05, "loss": 0.0334, "step": 128580 }, { "epoch": 0.24295, "grad_norm": 0.06687916070222855, "learning_rate": 1.5494434196400864e-05, "loss": 0.0369, "step": 128590 }, { "epoch": 0.243, "grad_norm": 0.05454210191965103, "learning_rate": 1.5490611111464657e-05, "loss": 0.0341, "step": 128600 }, { "epoch": 0.24305, "grad_norm": 0.07353127002716064, "learning_rate": 1.5486788286511567e-05, "loss": 0.0357, "step": 128610 }, { "epoch": 0.2431, "grad_norm": 0.058172594755887985, "learning_rate": 1.5482965721646113e-05, "loss": 0.034, "step": 128620 }, { "epoch": 0.24315, "grad_norm": 0.05430532246828079, "learning_rate": 1.5479143416972795e-05, "loss": 0.0334, "step": 128630 }, { "epoch": 0.2432, "grad_norm": 0.04920189082622528, "learning_rate": 1.5475321372596117e-05, "loss": 0.0331, "step": 128640 }, { "epoch": 0.24325, "grad_norm": 0.060522664338350296, "learning_rate": 1.5471499588620593e-05, "loss": 0.0346, "step": 128650 }, { "epoch": 0.2433, "grad_norm": 0.058444004505872726, "learning_rate": 1.5467678065150668e-05, "loss": 0.0344, "step": 128660 }, { "epoch": 0.24335, "grad_norm": 0.07578611373901367, "learning_rate": 1.546385680229086e-05, "loss": 0.0348, "step": 128670 }, { "epoch": 0.2434, "grad_norm": 0.05158187448978424, "learning_rate": 1.546003580014561e-05, "loss": 0.0328, "step": 128680 }, { "epoch": 0.24345, "grad_norm": 0.05328892916440964, "learning_rate": 1.5456215058819412e-05, "loss": 0.034, "step": 128690 }, { "epoch": 0.2435, "grad_norm": 0.04662049934267998, "learning_rate": 1.5452394578416697e-05, "loss": 0.033, "step": 128700 }, { "epoch": 0.24355, "grad_norm": 0.08248545974493027, "learning_rate": 1.5448574359041934e-05, "loss": 0.0334, "step": 128710 }, { "epoch": 0.2436, "grad_norm": 0.07601383328437805, "learning_rate": 1.544475440079956e-05, "loss": 0.0336, "step": 128720 }, { "epoch": 0.24365, "grad_norm": 0.082184799015522, "learning_rate": 1.5440934703794007e-05, "loss": 0.0333, "step": 128730 }, { "epoch": 0.2437, "grad_norm": 0.0712849572300911, "learning_rate": 1.5437115268129715e-05, "loss": 0.0331, "step": 128740 }, { "epoch": 0.24375, "grad_norm": 0.0680183470249176, "learning_rate": 1.543329609391109e-05, "loss": 0.033, "step": 128750 }, { "epoch": 0.2438, "grad_norm": 0.06785959750413895, "learning_rate": 1.5429477181242552e-05, "loss": 0.0335, "step": 128760 }, { "epoch": 0.24385, "grad_norm": 0.06210260093212128, "learning_rate": 1.5425658530228522e-05, "loss": 0.0356, "step": 128770 }, { "epoch": 0.2439, "grad_norm": 0.06129412353038788, "learning_rate": 1.5421840140973385e-05, "loss": 0.0351, "step": 128780 }, { "epoch": 0.24395, "grad_norm": 0.061416443437337875, "learning_rate": 1.541802201358155e-05, "loss": 0.034, "step": 128790 }, { "epoch": 0.244, "grad_norm": 0.07154708355665207, "learning_rate": 1.5414204148157385e-05, "loss": 0.0325, "step": 128800 }, { "epoch": 0.24405, "grad_norm": 0.06849026679992676, "learning_rate": 1.5410386544805282e-05, "loss": 0.0341, "step": 128810 }, { "epoch": 0.2441, "grad_norm": 0.06167871132493019, "learning_rate": 1.5406569203629605e-05, "loss": 0.0333, "step": 128820 }, { "epoch": 0.24415, "grad_norm": 0.06029805541038513, "learning_rate": 1.5402752124734722e-05, "loss": 0.0329, "step": 128830 }, { "epoch": 0.2442, "grad_norm": 0.06149749830365181, "learning_rate": 1.5398935308224995e-05, "loss": 0.0329, "step": 128840 }, { "epoch": 0.24425, "grad_norm": 0.060887884348630905, "learning_rate": 1.539511875420476e-05, "loss": 0.0327, "step": 128850 }, { "epoch": 0.2443, "grad_norm": 0.06271271407604218, "learning_rate": 1.5391302462778384e-05, "loss": 0.0338, "step": 128860 }, { "epoch": 0.24435, "grad_norm": 0.050711505115032196, "learning_rate": 1.5387486434050175e-05, "loss": 0.0328, "step": 128870 }, { "epoch": 0.2444, "grad_norm": 0.05109580606222153, "learning_rate": 1.538367066812449e-05, "loss": 0.0327, "step": 128880 }, { "epoch": 0.24445, "grad_norm": 0.048492636531591415, "learning_rate": 1.537985516510562e-05, "loss": 0.0331, "step": 128890 }, { "epoch": 0.2445, "grad_norm": 0.06411636620759964, "learning_rate": 1.5376039925097902e-05, "loss": 0.0337, "step": 128900 }, { "epoch": 0.24455, "grad_norm": 0.0920451357960701, "learning_rate": 1.537222494820564e-05, "loss": 0.034, "step": 128910 }, { "epoch": 0.2446, "grad_norm": 0.05714166909456253, "learning_rate": 1.5368410234533127e-05, "loss": 0.0331, "step": 128920 }, { "epoch": 0.24465, "grad_norm": 0.059067945927381516, "learning_rate": 1.5364595784184666e-05, "loss": 0.0332, "step": 128930 }, { "epoch": 0.2447, "grad_norm": 0.08221178501844406, "learning_rate": 1.536078159726453e-05, "loss": 0.0346, "step": 128940 }, { "epoch": 0.24475, "grad_norm": 0.07259626686573029, "learning_rate": 1.5356967673877e-05, "loss": 0.0324, "step": 128950 }, { "epoch": 0.2448, "grad_norm": 0.0738881304860115, "learning_rate": 1.5353154014126363e-05, "loss": 0.0327, "step": 128960 }, { "epoch": 0.24485, "grad_norm": 0.05740315094590187, "learning_rate": 1.5349340618116857e-05, "loss": 0.0323, "step": 128970 }, { "epoch": 0.2449, "grad_norm": 0.054173581302165985, "learning_rate": 1.5345527485952768e-05, "loss": 0.0335, "step": 128980 }, { "epoch": 0.24495, "grad_norm": 0.05699336901307106, "learning_rate": 1.5341714617738324e-05, "loss": 0.0338, "step": 128990 }, { "epoch": 0.245, "grad_norm": 0.05243121460080147, "learning_rate": 1.5337902013577775e-05, "loss": 0.0348, "step": 129000 }, { "epoch": 0.24505, "grad_norm": 0.054811857640743256, "learning_rate": 1.533408967357535e-05, "loss": 0.0338, "step": 129010 }, { "epoch": 0.2451, "grad_norm": 0.06116772070527077, "learning_rate": 1.5330277597835287e-05, "loss": 0.0343, "step": 129020 }, { "epoch": 0.24515, "grad_norm": 0.05350358039140701, "learning_rate": 1.53264657864618e-05, "loss": 0.0354, "step": 129030 }, { "epoch": 0.2452, "grad_norm": 0.053333185613155365, "learning_rate": 1.5322654239559104e-05, "loss": 0.0367, "step": 129040 }, { "epoch": 0.24525, "grad_norm": 0.06097253039479256, "learning_rate": 1.531884295723141e-05, "loss": 0.0332, "step": 129050 }, { "epoch": 0.2453, "grad_norm": 0.06263768672943115, "learning_rate": 1.53150319395829e-05, "loss": 0.0347, "step": 129060 }, { "epoch": 0.24535, "grad_norm": 0.07106564193964005, "learning_rate": 1.53112211867178e-05, "loss": 0.0354, "step": 129070 }, { "epoch": 0.2454, "grad_norm": 0.06520446389913559, "learning_rate": 1.530741069874025e-05, "loss": 0.0341, "step": 129080 }, { "epoch": 0.24545, "grad_norm": 0.05361152067780495, "learning_rate": 1.530360047575446e-05, "loss": 0.0379, "step": 129090 }, { "epoch": 0.2455, "grad_norm": 0.0815877839922905, "learning_rate": 1.5299790517864592e-05, "loss": 0.0365, "step": 129100 }, { "epoch": 0.24555, "grad_norm": 0.0634656473994255, "learning_rate": 1.5295980825174804e-05, "loss": 0.0341, "step": 129110 }, { "epoch": 0.2456, "grad_norm": 0.05546913295984268, "learning_rate": 1.529217139778926e-05, "loss": 0.0343, "step": 129120 }, { "epoch": 0.24565, "grad_norm": 0.07225493341684341, "learning_rate": 1.5288362235812096e-05, "loss": 0.0342, "step": 129130 }, { "epoch": 0.2457, "grad_norm": 0.06344477832317352, "learning_rate": 1.5284553339347458e-05, "loss": 0.0344, "step": 129140 }, { "epoch": 0.24575, "grad_norm": 0.08348975330591202, "learning_rate": 1.5280744708499494e-05, "loss": 0.0347, "step": 129150 }, { "epoch": 0.2458, "grad_norm": 0.06194521114230156, "learning_rate": 1.5276936343372304e-05, "loss": 0.0341, "step": 129160 }, { "epoch": 0.24585, "grad_norm": 0.06985343992710114, "learning_rate": 1.5273128244070034e-05, "loss": 0.0355, "step": 129170 }, { "epoch": 0.2459, "grad_norm": 0.06529099494218826, "learning_rate": 1.5269320410696773e-05, "loss": 0.0339, "step": 129180 }, { "epoch": 0.24595, "grad_norm": 0.05931119620800018, "learning_rate": 1.5265512843356646e-05, "loss": 0.0332, "step": 129190 }, { "epoch": 0.246, "grad_norm": 0.06843093782663345, "learning_rate": 1.526170554215373e-05, "loss": 0.0343, "step": 129200 }, { "epoch": 0.24605, "grad_norm": 0.062442880123853683, "learning_rate": 1.525789850719213e-05, "loss": 0.0331, "step": 129210 }, { "epoch": 0.2461, "grad_norm": 0.06340550631284714, "learning_rate": 1.5254091738575932e-05, "loss": 0.0338, "step": 129220 }, { "epoch": 0.24615, "grad_norm": 0.07777202129364014, "learning_rate": 1.5250285236409199e-05, "loss": 0.0349, "step": 129230 }, { "epoch": 0.2462, "grad_norm": 0.07286890596151352, "learning_rate": 1.524647900079601e-05, "loss": 0.0353, "step": 129240 }, { "epoch": 0.24625, "grad_norm": 0.0640026405453682, "learning_rate": 1.5242673031840412e-05, "loss": 0.0332, "step": 129250 }, { "epoch": 0.2463, "grad_norm": 0.06555936485528946, "learning_rate": 1.5238867329646479e-05, "loss": 0.0335, "step": 129260 }, { "epoch": 0.24635, "grad_norm": 0.06518343091011047, "learning_rate": 1.5235061894318229e-05, "loss": 0.0332, "step": 129270 }, { "epoch": 0.2464, "grad_norm": 0.05131986364722252, "learning_rate": 1.5231256725959725e-05, "loss": 0.0333, "step": 129280 }, { "epoch": 0.24645, "grad_norm": 0.062407419085502625, "learning_rate": 1.5227451824674998e-05, "loss": 0.0346, "step": 129290 }, { "epoch": 0.2465, "grad_norm": 0.05796368047595024, "learning_rate": 1.5223647190568064e-05, "loss": 0.0321, "step": 129300 }, { "epoch": 0.24655, "grad_norm": 0.06182119622826576, "learning_rate": 1.5219842823742947e-05, "loss": 0.0336, "step": 129310 }, { "epoch": 0.2466, "grad_norm": 0.05594813823699951, "learning_rate": 1.5216038724303647e-05, "loss": 0.033, "step": 129320 }, { "epoch": 0.24665, "grad_norm": 0.05082995444536209, "learning_rate": 1.5212234892354176e-05, "loss": 0.033, "step": 129330 }, { "epoch": 0.2467, "grad_norm": 0.07302447408437729, "learning_rate": 1.5208431327998523e-05, "loss": 0.0348, "step": 129340 }, { "epoch": 0.24675, "grad_norm": 0.09341346472501755, "learning_rate": 1.5204628031340676e-05, "loss": 0.0346, "step": 129350 }, { "epoch": 0.2468, "grad_norm": 0.0673801451921463, "learning_rate": 1.520082500248463e-05, "loss": 0.0332, "step": 129360 }, { "epoch": 0.24685, "grad_norm": 0.06581735610961914, "learning_rate": 1.5197022241534337e-05, "loss": 0.033, "step": 129370 }, { "epoch": 0.2469, "grad_norm": 0.053973015397787094, "learning_rate": 1.5193219748593784e-05, "loss": 0.0324, "step": 129380 }, { "epoch": 0.24695, "grad_norm": 0.06393645703792572, "learning_rate": 1.5189417523766903e-05, "loss": 0.0344, "step": 129390 }, { "epoch": 0.247, "grad_norm": 0.05542540177702904, "learning_rate": 1.5185615567157668e-05, "loss": 0.0328, "step": 129400 }, { "epoch": 0.24705, "grad_norm": 0.061920009553432465, "learning_rate": 1.5181813878870022e-05, "loss": 0.0389, "step": 129410 }, { "epoch": 0.2471, "grad_norm": 0.05536089465022087, "learning_rate": 1.517801245900789e-05, "loss": 0.0339, "step": 129420 }, { "epoch": 0.24715, "grad_norm": 0.06831495463848114, "learning_rate": 1.5174211307675212e-05, "loss": 0.0331, "step": 129430 }, { "epoch": 0.2472, "grad_norm": 0.07440666854381561, "learning_rate": 1.51704104249759e-05, "loss": 0.0334, "step": 129440 }, { "epoch": 0.24725, "grad_norm": 0.073429174721241, "learning_rate": 1.5166609811013882e-05, "loss": 0.0335, "step": 129450 }, { "epoch": 0.2473, "grad_norm": 0.05541494861245155, "learning_rate": 1.5162809465893052e-05, "loss": 0.0329, "step": 129460 }, { "epoch": 0.24735, "grad_norm": 0.05493704602122307, "learning_rate": 1.5159009389717307e-05, "loss": 0.0325, "step": 129470 }, { "epoch": 0.2474, "grad_norm": 0.052072275429964066, "learning_rate": 1.5155209582590562e-05, "loss": 0.0329, "step": 129480 }, { "epoch": 0.24745, "grad_norm": 0.0666222795844078, "learning_rate": 1.5151410044616682e-05, "loss": 0.0353, "step": 129490 }, { "epoch": 0.2475, "grad_norm": 0.051309410482645035, "learning_rate": 1.5147610775899557e-05, "loss": 0.0331, "step": 129500 }, { "epoch": 0.24755, "grad_norm": 0.05242369323968887, "learning_rate": 1.5143811776543044e-05, "loss": 0.0323, "step": 129510 }, { "epoch": 0.2476, "grad_norm": 0.05651107430458069, "learning_rate": 1.5140013046651022e-05, "loss": 0.0338, "step": 129520 }, { "epoch": 0.24765, "grad_norm": 0.06189333274960518, "learning_rate": 1.5136214586327335e-05, "loss": 0.0341, "step": 129530 }, { "epoch": 0.2477, "grad_norm": 0.0606662854552269, "learning_rate": 1.5132416395675834e-05, "loss": 0.0374, "step": 129540 }, { "epoch": 0.24775, "grad_norm": 0.05297665670514107, "learning_rate": 1.5128618474800365e-05, "loss": 0.0346, "step": 129550 }, { "epoch": 0.2478, "grad_norm": 0.053852248936891556, "learning_rate": 1.5124820823804754e-05, "loss": 0.0333, "step": 129560 }, { "epoch": 0.24785, "grad_norm": 0.06997379660606384, "learning_rate": 1.5121023442792842e-05, "loss": 0.0336, "step": 129570 }, { "epoch": 0.2479, "grad_norm": 0.05877318233251572, "learning_rate": 1.5117226331868423e-05, "loss": 0.034, "step": 129580 }, { "epoch": 0.24795, "grad_norm": 0.050678376108407974, "learning_rate": 1.5113429491135328e-05, "loss": 0.0338, "step": 129590 }, { "epoch": 0.248, "grad_norm": 0.06394968181848526, "learning_rate": 1.5109632920697364e-05, "loss": 0.0338, "step": 129600 }, { "epoch": 0.24805, "grad_norm": 0.06775014102458954, "learning_rate": 1.5105836620658315e-05, "loss": 0.033, "step": 129610 }, { "epoch": 0.2481, "grad_norm": 0.062414124608039856, "learning_rate": 1.510204059112198e-05, "loss": 0.035, "step": 129620 }, { "epoch": 0.24815, "grad_norm": 0.08363818377256393, "learning_rate": 1.509824483219213e-05, "loss": 0.035, "step": 129630 }, { "epoch": 0.2482, "grad_norm": 0.08240839838981628, "learning_rate": 1.5094449343972553e-05, "loss": 0.0342, "step": 129640 }, { "epoch": 0.24825, "grad_norm": 0.06142202019691467, "learning_rate": 1.5090654126567006e-05, "loss": 0.0351, "step": 129650 }, { "epoch": 0.2483, "grad_norm": 0.064139723777771, "learning_rate": 1.5086859180079244e-05, "loss": 0.0345, "step": 129660 }, { "epoch": 0.24835, "grad_norm": 0.06331875175237656, "learning_rate": 1.5083064504613042e-05, "loss": 0.034, "step": 129670 }, { "epoch": 0.2484, "grad_norm": 0.055469829589128494, "learning_rate": 1.5079270100272119e-05, "loss": 0.0349, "step": 129680 }, { "epoch": 0.24845, "grad_norm": 0.07021180540323257, "learning_rate": 1.5075475967160235e-05, "loss": 0.0348, "step": 129690 }, { "epoch": 0.2485, "grad_norm": 0.09408631920814514, "learning_rate": 1.5071682105381101e-05, "loss": 0.0336, "step": 129700 }, { "epoch": 0.24855, "grad_norm": 0.0889461413025856, "learning_rate": 1.5067888515038459e-05, "loss": 0.0333, "step": 129710 }, { "epoch": 0.2486, "grad_norm": 0.05832533910870552, "learning_rate": 1.5064095196236006e-05, "loss": 0.0348, "step": 129720 }, { "epoch": 0.24865, "grad_norm": 0.06746535748243332, "learning_rate": 1.5060302149077454e-05, "loss": 0.0356, "step": 129730 }, { "epoch": 0.2487, "grad_norm": 0.06575343012809753, "learning_rate": 1.5056509373666516e-05, "loss": 0.0351, "step": 129740 }, { "epoch": 0.24875, "grad_norm": 0.056073229759931564, "learning_rate": 1.505271687010687e-05, "loss": 0.0354, "step": 129750 }, { "epoch": 0.2488, "grad_norm": 0.05148507282137871, "learning_rate": 1.5048924638502216e-05, "loss": 0.0323, "step": 129760 }, { "epoch": 0.24885, "grad_norm": 0.05382775515317917, "learning_rate": 1.5045132678956208e-05, "loss": 0.0326, "step": 129770 }, { "epoch": 0.2489, "grad_norm": 0.050694435834884644, "learning_rate": 1.5041340991572542e-05, "loss": 0.0326, "step": 129780 }, { "epoch": 0.24895, "grad_norm": 0.06192651763558388, "learning_rate": 1.5037549576454874e-05, "loss": 0.0333, "step": 129790 }, { "epoch": 0.249, "grad_norm": 0.06613980233669281, "learning_rate": 1.5033758433706858e-05, "loss": 0.034, "step": 129800 }, { "epoch": 0.24905, "grad_norm": 0.0578388012945652, "learning_rate": 1.502996756343214e-05, "loss": 0.0335, "step": 129810 }, { "epoch": 0.2491, "grad_norm": 0.056715864688158035, "learning_rate": 1.5026176965734362e-05, "loss": 0.0334, "step": 129820 }, { "epoch": 0.24915, "grad_norm": 0.05182795599102974, "learning_rate": 1.5022386640717165e-05, "loss": 0.0341, "step": 129830 }, { "epoch": 0.2492, "grad_norm": 0.05253973975777626, "learning_rate": 1.5018596588484163e-05, "loss": 0.0324, "step": 129840 }, { "epoch": 0.24925, "grad_norm": 0.05492791533470154, "learning_rate": 1.5014806809138975e-05, "loss": 0.0351, "step": 129850 }, { "epoch": 0.2493, "grad_norm": 0.05340440198779106, "learning_rate": 1.5011017302785233e-05, "loss": 0.0328, "step": 129860 }, { "epoch": 0.24935, "grad_norm": 0.05057377368211746, "learning_rate": 1.5007228069526508e-05, "loss": 0.033, "step": 129870 }, { "epoch": 0.2494, "grad_norm": 0.05610264837741852, "learning_rate": 1.5003439109466433e-05, "loss": 0.0324, "step": 129880 }, { "epoch": 0.24945, "grad_norm": 0.04865795373916626, "learning_rate": 1.4999650422708558e-05, "loss": 0.033, "step": 129890 }, { "epoch": 0.2495, "grad_norm": 0.0512232780456543, "learning_rate": 1.4995862009356496e-05, "loss": 0.0327, "step": 129900 }, { "epoch": 0.24955, "grad_norm": 0.04978105425834656, "learning_rate": 1.49920738695138e-05, "loss": 0.0321, "step": 129910 }, { "epoch": 0.2496, "grad_norm": 0.056026410311460495, "learning_rate": 1.4988286003284047e-05, "loss": 0.0339, "step": 129920 }, { "epoch": 0.24965, "grad_norm": 0.05365905165672302, "learning_rate": 1.4984498410770801e-05, "loss": 0.0345, "step": 129930 }, { "epoch": 0.2497, "grad_norm": 0.06317032873630524, "learning_rate": 1.4980711092077598e-05, "loss": 0.0334, "step": 129940 }, { "epoch": 0.24975, "grad_norm": 0.04971380531787872, "learning_rate": 1.4976924047307994e-05, "loss": 0.0366, "step": 129950 }, { "epoch": 0.2498, "grad_norm": 0.0540299117565155, "learning_rate": 1.4973137276565519e-05, "loss": 0.0336, "step": 129960 }, { "epoch": 0.24985, "grad_norm": 0.058525823056697845, "learning_rate": 1.4969350779953695e-05, "loss": 0.0365, "step": 129970 }, { "epoch": 0.2499, "grad_norm": 0.07276928424835205, "learning_rate": 1.4965564557576064e-05, "loss": 0.0346, "step": 129980 }, { "epoch": 0.24995, "grad_norm": 0.0586562342941761, "learning_rate": 1.4961778609536123e-05, "loss": 0.0339, "step": 129990 }, { "epoch": 0.25, "grad_norm": 0.0669587254524231, "learning_rate": 1.495799293593739e-05, "loss": 0.035, "step": 130000 }, { "epoch": 0.25005, "grad_norm": 0.06829706579446793, "learning_rate": 1.4954207536883352e-05, "loss": 0.0353, "step": 130010 }, { "epoch": 0.2501, "grad_norm": 0.055767159909009933, "learning_rate": 1.495042241247751e-05, "loss": 0.0363, "step": 130020 }, { "epoch": 0.25015, "grad_norm": 0.059968505054712296, "learning_rate": 1.494663756282334e-05, "loss": 0.0329, "step": 130030 }, { "epoch": 0.2502, "grad_norm": 0.06776098906993866, "learning_rate": 1.494285298802432e-05, "loss": 0.0338, "step": 130040 }, { "epoch": 0.25025, "grad_norm": 0.07966256886720657, "learning_rate": 1.4939068688183927e-05, "loss": 0.0348, "step": 130050 }, { "epoch": 0.2503, "grad_norm": 0.07551705837249756, "learning_rate": 1.4935284663405608e-05, "loss": 0.0371, "step": 130060 }, { "epoch": 0.25035, "grad_norm": 0.06012747809290886, "learning_rate": 1.493150091379284e-05, "loss": 0.0356, "step": 130070 }, { "epoch": 0.2504, "grad_norm": 0.053942855447530746, "learning_rate": 1.4927717439449036e-05, "loss": 0.0353, "step": 130080 }, { "epoch": 0.25045, "grad_norm": 0.06574242562055588, "learning_rate": 1.4923934240477672e-05, "loss": 0.0343, "step": 130090 }, { "epoch": 0.2505, "grad_norm": 0.06035936623811722, "learning_rate": 1.4920151316982146e-05, "loss": 0.0345, "step": 130100 }, { "epoch": 0.25055, "grad_norm": 0.05731990560889244, "learning_rate": 1.4916368669065895e-05, "loss": 0.0341, "step": 130110 }, { "epoch": 0.2506, "grad_norm": 0.05266165733337402, "learning_rate": 1.4912586296832348e-05, "loss": 0.0327, "step": 130120 }, { "epoch": 0.25065, "grad_norm": 0.058082010596990585, "learning_rate": 1.4908804200384893e-05, "loss": 0.034, "step": 130130 }, { "epoch": 0.2507, "grad_norm": 0.06456492096185684, "learning_rate": 1.4905022379826947e-05, "loss": 0.0347, "step": 130140 }, { "epoch": 0.25075, "grad_norm": 0.06144573166966438, "learning_rate": 1.4901240835261893e-05, "loss": 0.0344, "step": 130150 }, { "epoch": 0.2508, "grad_norm": 0.06580778956413269, "learning_rate": 1.4897459566793112e-05, "loss": 0.0347, "step": 130160 }, { "epoch": 0.25085, "grad_norm": 0.06285262852907181, "learning_rate": 1.4893678574524009e-05, "loss": 0.0344, "step": 130170 }, { "epoch": 0.2509, "grad_norm": 0.06998418271541595, "learning_rate": 1.4889897858557921e-05, "loss": 0.0362, "step": 130180 }, { "epoch": 0.25095, "grad_norm": 0.061762843281030655, "learning_rate": 1.4886117418998235e-05, "loss": 0.0335, "step": 130190 }, { "epoch": 0.251, "grad_norm": 0.05425383895635605, "learning_rate": 1.4882337255948297e-05, "loss": 0.035, "step": 130200 }, { "epoch": 0.25105, "grad_norm": 0.10530871897935867, "learning_rate": 1.4878557369511465e-05, "loss": 0.0374, "step": 130210 }, { "epoch": 0.2511, "grad_norm": 0.09325791895389557, "learning_rate": 1.4874777759791065e-05, "loss": 0.0347, "step": 130220 }, { "epoch": 0.25115, "grad_norm": 0.07867307960987091, "learning_rate": 1.4870998426890435e-05, "loss": 0.0335, "step": 130230 }, { "epoch": 0.2512, "grad_norm": 0.06272491812705994, "learning_rate": 1.4867219370912908e-05, "loss": 0.0337, "step": 130240 }, { "epoch": 0.25125, "grad_norm": 0.06531760096549988, "learning_rate": 1.4863440591961791e-05, "loss": 0.033, "step": 130250 }, { "epoch": 0.2513, "grad_norm": 0.05865396186709404, "learning_rate": 1.4859662090140408e-05, "loss": 0.0337, "step": 130260 }, { "epoch": 0.25135, "grad_norm": 0.05396764725446701, "learning_rate": 1.4855883865552042e-05, "loss": 0.0333, "step": 130270 }, { "epoch": 0.2514, "grad_norm": 0.0740916058421135, "learning_rate": 1.4852105918300016e-05, "loss": 0.0356, "step": 130280 }, { "epoch": 0.25145, "grad_norm": 0.06599223613739014, "learning_rate": 1.4848328248487586e-05, "loss": 0.0361, "step": 130290 }, { "epoch": 0.2515, "grad_norm": 0.053400177508592606, "learning_rate": 1.4844550856218054e-05, "loss": 0.0329, "step": 130300 }, { "epoch": 0.25155, "grad_norm": 0.05594543740153313, "learning_rate": 1.484077374159469e-05, "loss": 0.033, "step": 130310 }, { "epoch": 0.2516, "grad_norm": 0.06213982775807381, "learning_rate": 1.483699690472075e-05, "loss": 0.034, "step": 130320 }, { "epoch": 0.25165, "grad_norm": 0.052713699638843536, "learning_rate": 1.4833220345699506e-05, "loss": 0.034, "step": 130330 }, { "epoch": 0.2517, "grad_norm": 0.05229334905743599, "learning_rate": 1.4829444064634187e-05, "loss": 0.0334, "step": 130340 }, { "epoch": 0.25175, "grad_norm": 0.061390116810798645, "learning_rate": 1.4825668061628046e-05, "loss": 0.0337, "step": 130350 }, { "epoch": 0.2518, "grad_norm": 0.06338523328304291, "learning_rate": 1.4821892336784333e-05, "loss": 0.0348, "step": 130360 }, { "epoch": 0.25185, "grad_norm": 0.055219992995262146, "learning_rate": 1.481811689020624e-05, "loss": 0.0332, "step": 130370 }, { "epoch": 0.2519, "grad_norm": 0.06830650568008423, "learning_rate": 1.4814341721997024e-05, "loss": 0.0343, "step": 130380 }, { "epoch": 0.25195, "grad_norm": 0.06185084208846092, "learning_rate": 1.4810566832259865e-05, "loss": 0.0349, "step": 130390 }, { "epoch": 0.252, "grad_norm": 0.05709722265601158, "learning_rate": 1.4806792221097986e-05, "loss": 0.0337, "step": 130400 }, { "epoch": 0.25205, "grad_norm": 0.07840575277805328, "learning_rate": 1.480301788861458e-05, "loss": 0.0341, "step": 130410 }, { "epoch": 0.2521, "grad_norm": 0.06489013135433197, "learning_rate": 1.4799243834912829e-05, "loss": 0.0349, "step": 130420 }, { "epoch": 0.25215, "grad_norm": 0.0650085061788559, "learning_rate": 1.4795470060095928e-05, "loss": 0.0348, "step": 130430 }, { "epoch": 0.2522, "grad_norm": 0.06238774210214615, "learning_rate": 1.4791696564267036e-05, "loss": 0.0328, "step": 130440 }, { "epoch": 0.25225, "grad_norm": 0.05862411484122276, "learning_rate": 1.4787923347529328e-05, "loss": 0.0353, "step": 130450 }, { "epoch": 0.2523, "grad_norm": 0.061414580792188644, "learning_rate": 1.4784150409985952e-05, "loss": 0.034, "step": 130460 }, { "epoch": 0.25235, "grad_norm": 0.06263222545385361, "learning_rate": 1.4780377751740076e-05, "loss": 0.0337, "step": 130470 }, { "epoch": 0.2524, "grad_norm": 0.07081720232963562, "learning_rate": 1.4776605372894819e-05, "loss": 0.0325, "step": 130480 }, { "epoch": 0.25245, "grad_norm": 0.0483192540705204, "learning_rate": 1.4772833273553338e-05, "loss": 0.0347, "step": 130490 }, { "epoch": 0.2525, "grad_norm": 0.045618437230587006, "learning_rate": 1.4769061453818761e-05, "loss": 0.0317, "step": 130500 }, { "epoch": 0.25255, "grad_norm": 0.0476597435772419, "learning_rate": 1.4765289913794189e-05, "loss": 0.0331, "step": 130510 }, { "epoch": 0.2526, "grad_norm": 0.0782722607254982, "learning_rate": 1.4761518653582753e-05, "loss": 0.034, "step": 130520 }, { "epoch": 0.25265, "grad_norm": 0.053563639521598816, "learning_rate": 1.4757747673287548e-05, "loss": 0.0333, "step": 130530 }, { "epoch": 0.2527, "grad_norm": 0.05494308099150658, "learning_rate": 1.4753976973011677e-05, "loss": 0.0323, "step": 130540 }, { "epoch": 0.25275, "grad_norm": 0.05890059843659401, "learning_rate": 1.475020655285822e-05, "loss": 0.0323, "step": 130550 }, { "epoch": 0.2528, "grad_norm": 0.05034961178898811, "learning_rate": 1.4746436412930261e-05, "loss": 0.0339, "step": 130560 }, { "epoch": 0.25285, "grad_norm": 0.05406641960144043, "learning_rate": 1.4742666553330894e-05, "loss": 0.0326, "step": 130570 }, { "epoch": 0.2529, "grad_norm": 0.06735268235206604, "learning_rate": 1.4738896974163154e-05, "loss": 0.0332, "step": 130580 }, { "epoch": 0.25295, "grad_norm": 0.062144964933395386, "learning_rate": 1.473512767553013e-05, "loss": 0.0329, "step": 130590 }, { "epoch": 0.253, "grad_norm": 0.04975804686546326, "learning_rate": 1.473135865753485e-05, "loss": 0.0332, "step": 130600 }, { "epoch": 0.25305, "grad_norm": 0.05743909627199173, "learning_rate": 1.4727589920280366e-05, "loss": 0.0319, "step": 130610 }, { "epoch": 0.2531, "grad_norm": 0.06325464695692062, "learning_rate": 1.472382146386972e-05, "loss": 0.0335, "step": 130620 }, { "epoch": 0.25315, "grad_norm": 0.07747869938611984, "learning_rate": 1.4720053288405928e-05, "loss": 0.0324, "step": 130630 }, { "epoch": 0.2532, "grad_norm": 0.06305349618196487, "learning_rate": 1.4716285393992025e-05, "loss": 0.032, "step": 130640 }, { "epoch": 0.25325, "grad_norm": 0.06549563258886337, "learning_rate": 1.4712517780731011e-05, "loss": 0.0324, "step": 130650 }, { "epoch": 0.2533, "grad_norm": 0.0979926735162735, "learning_rate": 1.4708750448725899e-05, "loss": 0.0346, "step": 130660 }, { "epoch": 0.25335, "grad_norm": 0.0753621831536293, "learning_rate": 1.470498339807968e-05, "loss": 0.034, "step": 130670 }, { "epoch": 0.2534, "grad_norm": 0.06919003278017044, "learning_rate": 1.4701216628895342e-05, "loss": 0.0343, "step": 130680 }, { "epoch": 0.25345, "grad_norm": 0.053296275436878204, "learning_rate": 1.4697450141275881e-05, "loss": 0.034, "step": 130690 }, { "epoch": 0.2535, "grad_norm": 0.06867097318172455, "learning_rate": 1.469368393532426e-05, "loss": 0.0338, "step": 130700 }, { "epoch": 0.25355, "grad_norm": 0.05448020622134209, "learning_rate": 1.468991801114345e-05, "loss": 0.0321, "step": 130710 }, { "epoch": 0.2536, "grad_norm": 0.05940767377614975, "learning_rate": 1.4686152368836406e-05, "loss": 0.0331, "step": 130720 }, { "epoch": 0.25365, "grad_norm": 0.053210772573947906, "learning_rate": 1.4682387008506087e-05, "loss": 0.0347, "step": 130730 }, { "epoch": 0.2537, "grad_norm": 0.053768180310726166, "learning_rate": 1.4678621930255426e-05, "loss": 0.0334, "step": 130740 }, { "epoch": 0.25375, "grad_norm": 0.05573287233710289, "learning_rate": 1.467485713418736e-05, "loss": 0.0328, "step": 130750 }, { "epoch": 0.2538, "grad_norm": 0.05512506514787674, "learning_rate": 1.4671092620404831e-05, "loss": 0.0338, "step": 130760 }, { "epoch": 0.25385, "grad_norm": 0.059196557849645615, "learning_rate": 1.4667328389010738e-05, "loss": 0.0336, "step": 130770 }, { "epoch": 0.2539, "grad_norm": 0.05658409744501114, "learning_rate": 1.4663564440108019e-05, "loss": 0.035, "step": 130780 }, { "epoch": 0.25395, "grad_norm": 0.05933243781328201, "learning_rate": 1.4659800773799547e-05, "loss": 0.0348, "step": 130790 }, { "epoch": 0.254, "grad_norm": 0.06367258727550507, "learning_rate": 1.4656037390188246e-05, "loss": 0.0325, "step": 130800 }, { "epoch": 0.25405, "grad_norm": 0.057351671159267426, "learning_rate": 1.4652274289377e-05, "loss": 0.0343, "step": 130810 }, { "epoch": 0.2541, "grad_norm": 0.061499468982219696, "learning_rate": 1.4648511471468685e-05, "loss": 0.0325, "step": 130820 }, { "epoch": 0.25415, "grad_norm": 0.059906553477048874, "learning_rate": 1.464474893656618e-05, "loss": 0.0341, "step": 130830 }, { "epoch": 0.2542, "grad_norm": 0.05713615193963051, "learning_rate": 1.4640986684772345e-05, "loss": 0.0353, "step": 130840 }, { "epoch": 0.25425, "grad_norm": 0.079178087413311, "learning_rate": 1.4637224716190045e-05, "loss": 0.035, "step": 130850 }, { "epoch": 0.2543, "grad_norm": 0.0858449786901474, "learning_rate": 1.4633463030922129e-05, "loss": 0.0354, "step": 130860 }, { "epoch": 0.25435, "grad_norm": 0.06844604760408401, "learning_rate": 1.462970162907143e-05, "loss": 0.0339, "step": 130870 }, { "epoch": 0.2544, "grad_norm": 0.06488780677318573, "learning_rate": 1.4625940510740807e-05, "loss": 0.0338, "step": 130880 }, { "epoch": 0.25445, "grad_norm": 0.06646611541509628, "learning_rate": 1.462217967603306e-05, "loss": 0.0356, "step": 130890 }, { "epoch": 0.2545, "grad_norm": 0.06702245771884918, "learning_rate": 1.461841912505103e-05, "loss": 0.0338, "step": 130900 }, { "epoch": 0.25455, "grad_norm": 0.06354524940252304, "learning_rate": 1.4614658857897518e-05, "loss": 0.0333, "step": 130910 }, { "epoch": 0.2546, "grad_norm": 0.05211248621344566, "learning_rate": 1.4610898874675333e-05, "loss": 0.0334, "step": 130920 }, { "epoch": 0.25465, "grad_norm": 0.06046607345342636, "learning_rate": 1.4607139175487267e-05, "loss": 0.0332, "step": 130930 }, { "epoch": 0.2547, "grad_norm": 0.06311957538127899, "learning_rate": 1.4603379760436111e-05, "loss": 0.0336, "step": 130940 }, { "epoch": 0.25475, "grad_norm": 0.05592069774866104, "learning_rate": 1.4599620629624655e-05, "loss": 0.0356, "step": 130950 }, { "epoch": 0.2548, "grad_norm": 0.05934546887874603, "learning_rate": 1.4595861783155654e-05, "loss": 0.0335, "step": 130960 }, { "epoch": 0.25485, "grad_norm": 0.06932884454727173, "learning_rate": 1.459210322113189e-05, "loss": 0.0343, "step": 130970 }, { "epoch": 0.2549, "grad_norm": 0.05560052767395973, "learning_rate": 1.4588344943656102e-05, "loss": 0.0349, "step": 130980 }, { "epoch": 0.25495, "grad_norm": 0.07452212274074554, "learning_rate": 1.4584586950831064e-05, "loss": 0.0339, "step": 130990 }, { "epoch": 0.255, "grad_norm": 0.05970346927642822, "learning_rate": 1.4580829242759506e-05, "loss": 0.0349, "step": 131000 }, { "epoch": 0.25505, "grad_norm": 0.05696116015315056, "learning_rate": 1.4577071819544152e-05, "loss": 0.0345, "step": 131010 }, { "epoch": 0.2551, "grad_norm": 0.06986770778894424, "learning_rate": 1.4573314681287747e-05, "loss": 0.0342, "step": 131020 }, { "epoch": 0.25515, "grad_norm": 0.06303253024816513, "learning_rate": 1.4569557828092995e-05, "loss": 0.0343, "step": 131030 }, { "epoch": 0.2552, "grad_norm": 0.055824995040893555, "learning_rate": 1.4565801260062628e-05, "loss": 0.0331, "step": 131040 }, { "epoch": 0.25525, "grad_norm": 0.054569143801927567, "learning_rate": 1.4562044977299322e-05, "loss": 0.0333, "step": 131050 }, { "epoch": 0.2553, "grad_norm": 0.054146453738212585, "learning_rate": 1.4558288979905779e-05, "loss": 0.0336, "step": 131060 }, { "epoch": 0.25535, "grad_norm": 0.058401964604854584, "learning_rate": 1.4554533267984705e-05, "loss": 0.0337, "step": 131070 }, { "epoch": 0.2554, "grad_norm": 0.06326146423816681, "learning_rate": 1.4550777841638765e-05, "loss": 0.0338, "step": 131080 }, { "epoch": 0.25545, "grad_norm": 0.06221143156290054, "learning_rate": 1.4547022700970637e-05, "loss": 0.0344, "step": 131090 }, { "epoch": 0.2555, "grad_norm": 0.061658527702093124, "learning_rate": 1.454326784608297e-05, "loss": 0.0348, "step": 131100 }, { "epoch": 0.25555, "grad_norm": 0.07470380514860153, "learning_rate": 1.4539513277078437e-05, "loss": 0.0343, "step": 131110 }, { "epoch": 0.2556, "grad_norm": 0.061844952404499054, "learning_rate": 1.4535758994059687e-05, "loss": 0.0352, "step": 131120 }, { "epoch": 0.25565, "grad_norm": 0.07039070874452591, "learning_rate": 1.4532004997129342e-05, "loss": 0.0338, "step": 131130 }, { "epoch": 0.2557, "grad_norm": 0.05499129742383957, "learning_rate": 1.452825128639006e-05, "loss": 0.0342, "step": 131140 }, { "epoch": 0.25575, "grad_norm": 0.05033637210726738, "learning_rate": 1.4524497861944442e-05, "loss": 0.0356, "step": 131150 }, { "epoch": 0.2558, "grad_norm": 0.06897959858179092, "learning_rate": 1.4520744723895128e-05, "loss": 0.0369, "step": 131160 }, { "epoch": 0.25585, "grad_norm": 0.06327641010284424, "learning_rate": 1.4516991872344715e-05, "loss": 0.036, "step": 131170 }, { "epoch": 0.2559, "grad_norm": 0.06402651965618134, "learning_rate": 1.4513239307395799e-05, "loss": 0.0334, "step": 131180 }, { "epoch": 0.25595, "grad_norm": 0.06561446934938431, "learning_rate": 1.4509487029150986e-05, "loss": 0.0358, "step": 131190 }, { "epoch": 0.256, "grad_norm": 0.08165391534566879, "learning_rate": 1.450573503771286e-05, "loss": 0.0345, "step": 131200 }, { "epoch": 0.25605, "grad_norm": 0.07116862386465073, "learning_rate": 1.4501983333183994e-05, "loss": 0.0346, "step": 131210 }, { "epoch": 0.2561, "grad_norm": 0.05546938627958298, "learning_rate": 1.4498231915666949e-05, "loss": 0.0333, "step": 131220 }, { "epoch": 0.25615, "grad_norm": 0.047905996441841125, "learning_rate": 1.4494480785264308e-05, "loss": 0.0343, "step": 131230 }, { "epoch": 0.2562, "grad_norm": 0.0557168684899807, "learning_rate": 1.4490729942078607e-05, "loss": 0.034, "step": 131240 }, { "epoch": 0.25625, "grad_norm": 0.06065616011619568, "learning_rate": 1.4486979386212412e-05, "loss": 0.0324, "step": 131250 }, { "epoch": 0.2563, "grad_norm": 0.06020541861653328, "learning_rate": 1.4483229117768249e-05, "loss": 0.034, "step": 131260 }, { "epoch": 0.25635, "grad_norm": 0.051777441054582596, "learning_rate": 1.4479479136848645e-05, "loss": 0.0331, "step": 131270 }, { "epoch": 0.2564, "grad_norm": 0.05713462457060814, "learning_rate": 1.4475729443556135e-05, "loss": 0.0344, "step": 131280 }, { "epoch": 0.25645, "grad_norm": 0.05230337381362915, "learning_rate": 1.4471980037993229e-05, "loss": 0.0316, "step": 131290 }, { "epoch": 0.2565, "grad_norm": 0.06317819654941559, "learning_rate": 1.4468230920262432e-05, "loss": 0.0347, "step": 131300 }, { "epoch": 0.25655, "grad_norm": 0.06475378572940826, "learning_rate": 1.4464482090466238e-05, "loss": 0.034, "step": 131310 }, { "epoch": 0.2566, "grad_norm": 0.05910594016313553, "learning_rate": 1.4460733548707144e-05, "loss": 0.0356, "step": 131320 }, { "epoch": 0.25665, "grad_norm": 0.07089439779520035, "learning_rate": 1.4456985295087657e-05, "loss": 0.0349, "step": 131330 }, { "epoch": 0.2567, "grad_norm": 0.06080051511526108, "learning_rate": 1.4453237329710213e-05, "loss": 0.0334, "step": 131340 }, { "epoch": 0.25675, "grad_norm": 0.06926887482404709, "learning_rate": 1.4449489652677303e-05, "loss": 0.0345, "step": 131350 }, { "epoch": 0.2568, "grad_norm": 0.06788626313209534, "learning_rate": 1.4445742264091374e-05, "loss": 0.0328, "step": 131360 }, { "epoch": 0.25685, "grad_norm": 0.0659489706158638, "learning_rate": 1.4441995164054898e-05, "loss": 0.0357, "step": 131370 }, { "epoch": 0.2569, "grad_norm": 0.05504168942570686, "learning_rate": 1.4438248352670309e-05, "loss": 0.0337, "step": 131380 }, { "epoch": 0.25695, "grad_norm": 0.05858158692717552, "learning_rate": 1.4434501830040026e-05, "loss": 0.034, "step": 131390 }, { "epoch": 0.257, "grad_norm": 0.05563119798898697, "learning_rate": 1.443075559626651e-05, "loss": 0.0342, "step": 131400 }, { "epoch": 0.25705, "grad_norm": 0.06597186625003815, "learning_rate": 1.4427009651452161e-05, "loss": 0.0378, "step": 131410 }, { "epoch": 0.2571, "grad_norm": 0.06534725427627563, "learning_rate": 1.4423263995699398e-05, "loss": 0.0364, "step": 131420 }, { "epoch": 0.25715, "grad_norm": 0.058977410197257996, "learning_rate": 1.4419518629110615e-05, "loss": 0.0372, "step": 131430 }, { "epoch": 0.2572, "grad_norm": 0.056536633521318436, "learning_rate": 1.4415773551788214e-05, "loss": 0.0355, "step": 131440 }, { "epoch": 0.25725, "grad_norm": 0.056408364325761795, "learning_rate": 1.4412028763834602e-05, "loss": 0.0351, "step": 131450 }, { "epoch": 0.2573, "grad_norm": 0.06189712509512901, "learning_rate": 1.4408284265352146e-05, "loss": 0.0328, "step": 131460 }, { "epoch": 0.25735, "grad_norm": 0.05862760916352272, "learning_rate": 1.4404540056443216e-05, "loss": 0.0336, "step": 131470 }, { "epoch": 0.2574, "grad_norm": 0.06858477741479874, "learning_rate": 1.4400796137210171e-05, "loss": 0.0338, "step": 131480 }, { "epoch": 0.25745, "grad_norm": 0.05762804299592972, "learning_rate": 1.4397052507755388e-05, "loss": 0.0334, "step": 131490 }, { "epoch": 0.2575, "grad_norm": 0.06049410253763199, "learning_rate": 1.4393309168181207e-05, "loss": 0.0339, "step": 131500 }, { "epoch": 0.25755, "grad_norm": 0.06922336667776108, "learning_rate": 1.438956611858996e-05, "loss": 0.0343, "step": 131510 }, { "epoch": 0.2576, "grad_norm": 0.05931337550282478, "learning_rate": 1.4385823359083994e-05, "loss": 0.0327, "step": 131520 }, { "epoch": 0.25765, "grad_norm": 0.05266860872507095, "learning_rate": 1.4382080889765625e-05, "loss": 0.0327, "step": 131530 }, { "epoch": 0.2577, "grad_norm": 0.052181702107191086, "learning_rate": 1.4378338710737194e-05, "loss": 0.0338, "step": 131540 }, { "epoch": 0.25775, "grad_norm": 0.057044558227062225, "learning_rate": 1.4374596822100972e-05, "loss": 0.0334, "step": 131550 }, { "epoch": 0.2578, "grad_norm": 0.06613699346780777, "learning_rate": 1.4370855223959279e-05, "loss": 0.0331, "step": 131560 }, { "epoch": 0.25785, "grad_norm": 0.04941616579890251, "learning_rate": 1.4367113916414423e-05, "loss": 0.0322, "step": 131570 }, { "epoch": 0.2579, "grad_norm": 0.04932907968759537, "learning_rate": 1.4363372899568677e-05, "loss": 0.0337, "step": 131580 }, { "epoch": 0.25795, "grad_norm": 0.0558835044503212, "learning_rate": 1.4359632173524323e-05, "loss": 0.0353, "step": 131590 }, { "epoch": 0.258, "grad_norm": 0.054416995495557785, "learning_rate": 1.4355891738383614e-05, "loss": 0.0344, "step": 131600 }, { "epoch": 0.25805, "grad_norm": 0.05548791214823723, "learning_rate": 1.435215159424884e-05, "loss": 0.0338, "step": 131610 }, { "epoch": 0.2581, "grad_norm": 0.045173488557338715, "learning_rate": 1.434841174122224e-05, "loss": 0.0323, "step": 131620 }, { "epoch": 0.25815, "grad_norm": 0.06165101379156113, "learning_rate": 1.4344672179406049e-05, "loss": 0.034, "step": 131630 }, { "epoch": 0.2582, "grad_norm": 0.06000905483961105, "learning_rate": 1.4340932908902527e-05, "loss": 0.0339, "step": 131640 }, { "epoch": 0.25825, "grad_norm": 0.07140598446130753, "learning_rate": 1.4337193929813889e-05, "loss": 0.0346, "step": 131650 }, { "epoch": 0.2583, "grad_norm": 0.06754638999700546, "learning_rate": 1.4333455242242371e-05, "loss": 0.034, "step": 131660 }, { "epoch": 0.25835, "grad_norm": 0.05807989090681076, "learning_rate": 1.432971684629018e-05, "loss": 0.0331, "step": 131670 }, { "epoch": 0.2584, "grad_norm": 0.061242345720529556, "learning_rate": 1.432597874205952e-05, "loss": 0.0333, "step": 131680 }, { "epoch": 0.25845, "grad_norm": 0.05244876816868782, "learning_rate": 1.4322240929652581e-05, "loss": 0.0332, "step": 131690 }, { "epoch": 0.2585, "grad_norm": 0.05072590336203575, "learning_rate": 1.4318503409171578e-05, "loss": 0.033, "step": 131700 }, { "epoch": 0.25855, "grad_norm": 0.07088343799114227, "learning_rate": 1.4314766180718675e-05, "loss": 0.0345, "step": 131710 }, { "epoch": 0.2586, "grad_norm": 0.066108338534832, "learning_rate": 1.4311029244396041e-05, "loss": 0.0338, "step": 131720 }, { "epoch": 0.25865, "grad_norm": 0.05816994979977608, "learning_rate": 1.430729260030586e-05, "loss": 0.0339, "step": 131730 }, { "epoch": 0.2587, "grad_norm": 0.07457224279642105, "learning_rate": 1.4303556248550276e-05, "loss": 0.0331, "step": 131740 }, { "epoch": 0.25875, "grad_norm": 0.05543922260403633, "learning_rate": 1.4299820189231456e-05, "loss": 0.0332, "step": 131750 }, { "epoch": 0.2588, "grad_norm": 0.07165886461734772, "learning_rate": 1.4296084422451528e-05, "loss": 0.0353, "step": 131760 }, { "epoch": 0.25885, "grad_norm": 0.06840430945158005, "learning_rate": 1.4292348948312623e-05, "loss": 0.0324, "step": 131770 }, { "epoch": 0.2589, "grad_norm": 0.06386654078960419, "learning_rate": 1.4288613766916883e-05, "loss": 0.0328, "step": 131780 }, { "epoch": 0.25895, "grad_norm": 0.05158427730202675, "learning_rate": 1.428487887836642e-05, "loss": 0.0319, "step": 131790 }, { "epoch": 0.259, "grad_norm": 0.05500097945332527, "learning_rate": 1.4281144282763342e-05, "loss": 0.0351, "step": 131800 }, { "epoch": 0.25905, "grad_norm": 0.06515280157327652, "learning_rate": 1.4277409980209747e-05, "loss": 0.0326, "step": 131810 }, { "epoch": 0.2591, "grad_norm": 0.06642050296068192, "learning_rate": 1.4273675970807732e-05, "loss": 0.0323, "step": 131820 }, { "epoch": 0.25915, "grad_norm": 0.06143985316157341, "learning_rate": 1.4269942254659406e-05, "loss": 0.0341, "step": 131830 }, { "epoch": 0.2592, "grad_norm": 0.05663907155394554, "learning_rate": 1.4266208831866806e-05, "loss": 0.0321, "step": 131840 }, { "epoch": 0.25925, "grad_norm": 0.08131860196590424, "learning_rate": 1.4262475702532036e-05, "loss": 0.0346, "step": 131850 }, { "epoch": 0.2593, "grad_norm": 0.057754553854465485, "learning_rate": 1.4258742866757135e-05, "loss": 0.0327, "step": 131860 }, { "epoch": 0.25935, "grad_norm": 0.05802265927195549, "learning_rate": 1.425501032464418e-05, "loss": 0.0336, "step": 131870 }, { "epoch": 0.2594, "grad_norm": 0.059007786214351654, "learning_rate": 1.4251278076295205e-05, "loss": 0.0332, "step": 131880 }, { "epoch": 0.25945, "grad_norm": 0.05896070599555969, "learning_rate": 1.4247546121812238e-05, "loss": 0.0333, "step": 131890 }, { "epoch": 0.2595, "grad_norm": 0.07986626774072647, "learning_rate": 1.4243814461297334e-05, "loss": 0.0352, "step": 131900 }, { "epoch": 0.25955, "grad_norm": 0.07115952670574188, "learning_rate": 1.4240083094852502e-05, "loss": 0.0326, "step": 131910 }, { "epoch": 0.2596, "grad_norm": 0.061306633055210114, "learning_rate": 1.4236352022579752e-05, "loss": 0.0337, "step": 131920 }, { "epoch": 0.25965, "grad_norm": 0.055750228464603424, "learning_rate": 1.4232621244581088e-05, "loss": 0.0337, "step": 131930 }, { "epoch": 0.2597, "grad_norm": 0.054329417645931244, "learning_rate": 1.4228890760958521e-05, "loss": 0.0341, "step": 131940 }, { "epoch": 0.25975, "grad_norm": 0.06293036788702011, "learning_rate": 1.4225160571814026e-05, "loss": 0.0336, "step": 131950 }, { "epoch": 0.2598, "grad_norm": 0.054844535887241364, "learning_rate": 1.4221430677249604e-05, "loss": 0.0339, "step": 131960 }, { "epoch": 0.25985, "grad_norm": 0.06715855002403259, "learning_rate": 1.4217701077367212e-05, "loss": 0.0336, "step": 131970 }, { "epoch": 0.2599, "grad_norm": 0.057781148701906204, "learning_rate": 1.4213971772268819e-05, "loss": 0.0338, "step": 131980 }, { "epoch": 0.25995, "grad_norm": 0.05987885966897011, "learning_rate": 1.4210242762056391e-05, "loss": 0.033, "step": 131990 }, { "epoch": 0.26, "grad_norm": 0.05983780324459076, "learning_rate": 1.4206514046831876e-05, "loss": 0.0336, "step": 132000 }, { "epoch": 0.26005, "grad_norm": 0.057271696627140045, "learning_rate": 1.4202785626697199e-05, "loss": 0.0339, "step": 132010 }, { "epoch": 0.2601, "grad_norm": 0.06230291724205017, "learning_rate": 1.4199057501754317e-05, "loss": 0.0335, "step": 132020 }, { "epoch": 0.26015, "grad_norm": 0.05530672147870064, "learning_rate": 1.4195329672105135e-05, "loss": 0.0333, "step": 132030 }, { "epoch": 0.2602, "grad_norm": 0.1007639616727829, "learning_rate": 1.41916021378516e-05, "loss": 0.0343, "step": 132040 }, { "epoch": 0.26025, "grad_norm": 0.08219780027866364, "learning_rate": 1.4187874899095587e-05, "loss": 0.0333, "step": 132050 }, { "epoch": 0.2603, "grad_norm": 0.06372511386871338, "learning_rate": 1.4184147955939018e-05, "loss": 0.036, "step": 132060 }, { "epoch": 0.26035, "grad_norm": 0.07079017162322998, "learning_rate": 1.4180421308483771e-05, "loss": 0.0387, "step": 132070 }, { "epoch": 0.2604, "grad_norm": 0.07121309638023376, "learning_rate": 1.417669495683175e-05, "loss": 0.0336, "step": 132080 }, { "epoch": 0.26045, "grad_norm": 0.07378543168306351, "learning_rate": 1.4172968901084827e-05, "loss": 0.0355, "step": 132090 }, { "epoch": 0.2605, "grad_norm": 0.05647118389606476, "learning_rate": 1.4169243141344851e-05, "loss": 0.0326, "step": 132100 }, { "epoch": 0.26055, "grad_norm": 0.07248450815677643, "learning_rate": 1.4165517677713714e-05, "loss": 0.0352, "step": 132110 }, { "epoch": 0.2606, "grad_norm": 0.06483283638954163, "learning_rate": 1.4161792510293248e-05, "loss": 0.0335, "step": 132120 }, { "epoch": 0.26065, "grad_norm": 0.06354866921901703, "learning_rate": 1.4158067639185308e-05, "loss": 0.0341, "step": 132130 }, { "epoch": 0.2607, "grad_norm": 0.06080995872616768, "learning_rate": 1.415434306449171e-05, "loss": 0.0346, "step": 132140 }, { "epoch": 0.26075, "grad_norm": 0.06035395339131355, "learning_rate": 1.41506187863143e-05, "loss": 0.0349, "step": 132150 }, { "epoch": 0.2608, "grad_norm": 0.05553651601076126, "learning_rate": 1.4146894804754912e-05, "loss": 0.0344, "step": 132160 }, { "epoch": 0.26085, "grad_norm": 0.05365028232336044, "learning_rate": 1.4143171119915338e-05, "loss": 0.0341, "step": 132170 }, { "epoch": 0.2609, "grad_norm": 0.049796558916568756, "learning_rate": 1.4139447731897387e-05, "loss": 0.0338, "step": 132180 }, { "epoch": 0.26095, "grad_norm": 0.056830406188964844, "learning_rate": 1.4135724640802844e-05, "loss": 0.0344, "step": 132190 }, { "epoch": 0.261, "grad_norm": 0.05340270698070526, "learning_rate": 1.413200184673352e-05, "loss": 0.0346, "step": 132200 }, { "epoch": 0.26105, "grad_norm": 0.05918886139988899, "learning_rate": 1.4128279349791179e-05, "loss": 0.0338, "step": 132210 }, { "epoch": 0.2611, "grad_norm": 0.06484489142894745, "learning_rate": 1.4124557150077588e-05, "loss": 0.0362, "step": 132220 }, { "epoch": 0.26115, "grad_norm": 0.06631860136985779, "learning_rate": 1.412083524769453e-05, "loss": 0.0346, "step": 132230 }, { "epoch": 0.2612, "grad_norm": 0.05315980687737465, "learning_rate": 1.411711364274374e-05, "loss": 0.0334, "step": 132240 }, { "epoch": 0.26125, "grad_norm": 0.049241699278354645, "learning_rate": 1.4113392335326994e-05, "loss": 0.0328, "step": 132250 }, { "epoch": 0.2613, "grad_norm": 0.04963986948132515, "learning_rate": 1.4109671325545992e-05, "loss": 0.0321, "step": 132260 }, { "epoch": 0.26135, "grad_norm": 0.05320208892226219, "learning_rate": 1.4105950613502482e-05, "loss": 0.0342, "step": 132270 }, { "epoch": 0.2614, "grad_norm": 0.0504610501229763, "learning_rate": 1.4102230199298203e-05, "loss": 0.0328, "step": 132280 }, { "epoch": 0.26145, "grad_norm": 0.06167743355035782, "learning_rate": 1.4098510083034855e-05, "loss": 0.0328, "step": 132290 }, { "epoch": 0.2615, "grad_norm": 0.05889127030968666, "learning_rate": 1.4094790264814145e-05, "loss": 0.0329, "step": 132300 }, { "epoch": 0.26155, "grad_norm": 0.0674830749630928, "learning_rate": 1.4091070744737762e-05, "loss": 0.0329, "step": 132310 }, { "epoch": 0.2616, "grad_norm": 0.0638226568698883, "learning_rate": 1.4087351522907416e-05, "loss": 0.0317, "step": 132320 }, { "epoch": 0.26165, "grad_norm": 0.06432203948497772, "learning_rate": 1.408363259942478e-05, "loss": 0.0333, "step": 132330 }, { "epoch": 0.2617, "grad_norm": 0.06924125552177429, "learning_rate": 1.407991397439152e-05, "loss": 0.0325, "step": 132340 }, { "epoch": 0.26175, "grad_norm": 0.06363802403211594, "learning_rate": 1.4076195647909319e-05, "loss": 0.0342, "step": 132350 }, { "epoch": 0.2618, "grad_norm": 0.05731474980711937, "learning_rate": 1.4072477620079813e-05, "loss": 0.0317, "step": 132360 }, { "epoch": 0.26185, "grad_norm": 0.05997426062822342, "learning_rate": 1.4068759891004673e-05, "loss": 0.0328, "step": 132370 }, { "epoch": 0.2619, "grad_norm": 0.06531939655542374, "learning_rate": 1.4065042460785532e-05, "loss": 0.0354, "step": 132380 }, { "epoch": 0.26195, "grad_norm": 0.05618453025817871, "learning_rate": 1.4061325329524015e-05, "loss": 0.0332, "step": 132390 }, { "epoch": 0.262, "grad_norm": 0.06673513352870941, "learning_rate": 1.4057608497321762e-05, "loss": 0.0328, "step": 132400 }, { "epoch": 0.26205, "grad_norm": 0.05080614238977432, "learning_rate": 1.4053891964280381e-05, "loss": 0.0326, "step": 132410 }, { "epoch": 0.2621, "grad_norm": 0.0496988408267498, "learning_rate": 1.405017573050148e-05, "loss": 0.0334, "step": 132420 }, { "epoch": 0.26215, "grad_norm": 0.0615084283053875, "learning_rate": 1.4046459796086653e-05, "loss": 0.0336, "step": 132430 }, { "epoch": 0.2622, "grad_norm": 0.06069965288043022, "learning_rate": 1.404274416113751e-05, "loss": 0.0337, "step": 132440 }, { "epoch": 0.26225, "grad_norm": 0.0644589215517044, "learning_rate": 1.4039028825755616e-05, "loss": 0.0346, "step": 132450 }, { "epoch": 0.2623, "grad_norm": 0.06995908915996552, "learning_rate": 1.4035313790042565e-05, "loss": 0.0343, "step": 132460 }, { "epoch": 0.26235, "grad_norm": 0.06145939975976944, "learning_rate": 1.4031599054099915e-05, "loss": 0.0334, "step": 132470 }, { "epoch": 0.2624, "grad_norm": 0.061161722987890244, "learning_rate": 1.4027884618029216e-05, "loss": 0.0342, "step": 132480 }, { "epoch": 0.26245, "grad_norm": 0.05761851370334625, "learning_rate": 1.4024170481932041e-05, "loss": 0.034, "step": 132490 }, { "epoch": 0.2625, "grad_norm": 0.06380891054868698, "learning_rate": 1.4020456645909921e-05, "loss": 0.033, "step": 132500 }, { "epoch": 0.26255, "grad_norm": 0.07063276320695877, "learning_rate": 1.401674311006439e-05, "loss": 0.034, "step": 132510 }, { "epoch": 0.2626, "grad_norm": 0.06349416822195053, "learning_rate": 1.401302987449697e-05, "loss": 0.0333, "step": 132520 }, { "epoch": 0.26265, "grad_norm": 0.09622623026371002, "learning_rate": 1.4009316939309183e-05, "loss": 0.0376, "step": 132530 }, { "epoch": 0.2627, "grad_norm": 0.08947856724262238, "learning_rate": 1.4005604304602563e-05, "loss": 0.0359, "step": 132540 }, { "epoch": 0.26275, "grad_norm": 0.0602882094681263, "learning_rate": 1.400189197047857e-05, "loss": 0.0336, "step": 132550 }, { "epoch": 0.2628, "grad_norm": 0.060876134783029556, "learning_rate": 1.3998179937038728e-05, "loss": 0.0349, "step": 132560 }, { "epoch": 0.26285, "grad_norm": 0.07160362601280212, "learning_rate": 1.3994468204384504e-05, "loss": 0.0337, "step": 132570 }, { "epoch": 0.2629, "grad_norm": 0.05875960737466812, "learning_rate": 1.3990756772617394e-05, "loss": 0.0342, "step": 132580 }, { "epoch": 0.26295, "grad_norm": 0.058581165969371796, "learning_rate": 1.3987045641838858e-05, "loss": 0.033, "step": 132590 }, { "epoch": 0.263, "grad_norm": 0.05444763973355293, "learning_rate": 1.398333481215035e-05, "loss": 0.034, "step": 132600 }, { "epoch": 0.26305, "grad_norm": 0.05249619483947754, "learning_rate": 1.3979624283653336e-05, "loss": 0.0329, "step": 132610 }, { "epoch": 0.2631, "grad_norm": 0.05937279388308525, "learning_rate": 1.3975914056449255e-05, "loss": 0.0332, "step": 132620 }, { "epoch": 0.26315, "grad_norm": 0.05373000726103783, "learning_rate": 1.3972204130639544e-05, "loss": 0.033, "step": 132630 }, { "epoch": 0.2632, "grad_norm": 0.06635235249996185, "learning_rate": 1.3968494506325613e-05, "loss": 0.0342, "step": 132640 }, { "epoch": 0.26325, "grad_norm": 0.054282885044813156, "learning_rate": 1.3964785183608902e-05, "loss": 0.0339, "step": 132650 }, { "epoch": 0.2633, "grad_norm": 0.07432691007852554, "learning_rate": 1.396107616259083e-05, "loss": 0.0342, "step": 132660 }, { "epoch": 0.26335, "grad_norm": 0.04971253499388695, "learning_rate": 1.3957367443372787e-05, "loss": 0.0345, "step": 132670 }, { "epoch": 0.2634, "grad_norm": 0.060743436217308044, "learning_rate": 1.3953659026056171e-05, "loss": 0.0362, "step": 132680 }, { "epoch": 0.26345, "grad_norm": 0.05153289809823036, "learning_rate": 1.3949950910742354e-05, "loss": 0.0331, "step": 132690 }, { "epoch": 0.2635, "grad_norm": 0.059696514159440994, "learning_rate": 1.394624309753274e-05, "loss": 0.0332, "step": 132700 }, { "epoch": 0.26355, "grad_norm": 0.054209042340517044, "learning_rate": 1.3942535586528688e-05, "loss": 0.0329, "step": 132710 }, { "epoch": 0.2636, "grad_norm": 0.05692094564437866, "learning_rate": 1.3938828377831548e-05, "loss": 0.0368, "step": 132720 }, { "epoch": 0.26365, "grad_norm": 0.058496423065662384, "learning_rate": 1.3935121471542695e-05, "loss": 0.033, "step": 132730 }, { "epoch": 0.2637, "grad_norm": 0.06355134397745132, "learning_rate": 1.393141486776345e-05, "loss": 0.0345, "step": 132740 }, { "epoch": 0.26375, "grad_norm": 0.06379279494285583, "learning_rate": 1.3927708566595188e-05, "loss": 0.0321, "step": 132750 }, { "epoch": 0.2638, "grad_norm": 0.0828419178724289, "learning_rate": 1.3924002568139194e-05, "loss": 0.0345, "step": 132760 }, { "epoch": 0.26385, "grad_norm": 0.07966157048940659, "learning_rate": 1.3920296872496808e-05, "loss": 0.0338, "step": 132770 }, { "epoch": 0.2639, "grad_norm": 0.06958355009555817, "learning_rate": 1.3916591479769353e-05, "loss": 0.0337, "step": 132780 }, { "epoch": 0.26395, "grad_norm": 0.06073891371488571, "learning_rate": 1.3912886390058125e-05, "loss": 0.0336, "step": 132790 }, { "epoch": 0.264, "grad_norm": 0.047526225447654724, "learning_rate": 1.3909181603464415e-05, "loss": 0.0329, "step": 132800 }, { "epoch": 0.26405, "grad_norm": 0.05731450021266937, "learning_rate": 1.3905477120089505e-05, "loss": 0.033, "step": 132810 }, { "epoch": 0.2641, "grad_norm": 0.057753946632146835, "learning_rate": 1.3901772940034693e-05, "loss": 0.0337, "step": 132820 }, { "epoch": 0.26415, "grad_norm": 0.05305123329162598, "learning_rate": 1.389806906340124e-05, "loss": 0.0326, "step": 132830 }, { "epoch": 0.2642, "grad_norm": 0.06059778109192848, "learning_rate": 1.3894365490290395e-05, "loss": 0.0337, "step": 132840 }, { "epoch": 0.26425, "grad_norm": 0.06575316190719604, "learning_rate": 1.3890662220803437e-05, "loss": 0.033, "step": 132850 }, { "epoch": 0.2643, "grad_norm": 0.05115986987948418, "learning_rate": 1.3886959255041592e-05, "loss": 0.0341, "step": 132860 }, { "epoch": 0.26435, "grad_norm": 0.060626786202192307, "learning_rate": 1.3883256593106115e-05, "loss": 0.0344, "step": 132870 }, { "epoch": 0.2644, "grad_norm": 0.07723161578178406, "learning_rate": 1.3879554235098224e-05, "loss": 0.0348, "step": 132880 }, { "epoch": 0.26445, "grad_norm": 0.05626492574810982, "learning_rate": 1.3875852181119142e-05, "loss": 0.0345, "step": 132890 }, { "epoch": 0.2645, "grad_norm": 0.058993149548769, "learning_rate": 1.3872150431270075e-05, "loss": 0.0347, "step": 132900 }, { "epoch": 0.26455, "grad_norm": 0.06206396594643593, "learning_rate": 1.3868448985652244e-05, "loss": 0.0332, "step": 132910 }, { "epoch": 0.2646, "grad_norm": 0.05647966265678406, "learning_rate": 1.3864747844366838e-05, "loss": 0.0325, "step": 132920 }, { "epoch": 0.26465, "grad_norm": 0.06411507725715637, "learning_rate": 1.3861047007515029e-05, "loss": 0.0336, "step": 132930 }, { "epoch": 0.2647, "grad_norm": 0.07062128931283951, "learning_rate": 1.3857346475198024e-05, "loss": 0.034, "step": 132940 }, { "epoch": 0.26475, "grad_norm": 0.04719579219818115, "learning_rate": 1.3853646247516966e-05, "loss": 0.0325, "step": 132950 }, { "epoch": 0.2648, "grad_norm": 0.05616036430001259, "learning_rate": 1.3849946324573042e-05, "loss": 0.0341, "step": 132960 }, { "epoch": 0.26485, "grad_norm": 0.05677328258752823, "learning_rate": 1.38462467064674e-05, "loss": 0.0325, "step": 132970 }, { "epoch": 0.2649, "grad_norm": 0.05568123236298561, "learning_rate": 1.3842547393301172e-05, "loss": 0.0353, "step": 132980 }, { "epoch": 0.26495, "grad_norm": 0.06068773567676544, "learning_rate": 1.3838848385175515e-05, "loss": 0.0343, "step": 132990 }, { "epoch": 0.265, "grad_norm": 0.05522076413035393, "learning_rate": 1.383514968219155e-05, "loss": 0.034, "step": 133000 }, { "epoch": 0.26505, "grad_norm": 0.05242743715643883, "learning_rate": 1.3831451284450403e-05, "loss": 0.035, "step": 133010 }, { "epoch": 0.2651, "grad_norm": 0.05325956270098686, "learning_rate": 1.3827753192053167e-05, "loss": 0.035, "step": 133020 }, { "epoch": 0.26515, "grad_norm": 0.05957451090216637, "learning_rate": 1.3824055405100961e-05, "loss": 0.0344, "step": 133030 }, { "epoch": 0.2652, "grad_norm": 0.0515049509704113, "learning_rate": 1.3820357923694904e-05, "loss": 0.0341, "step": 133040 }, { "epoch": 0.26525, "grad_norm": 0.059082720428705215, "learning_rate": 1.3816660747936042e-05, "loss": 0.0346, "step": 133050 }, { "epoch": 0.2653, "grad_norm": 0.047861386090517044, "learning_rate": 1.3812963877925482e-05, "loss": 0.034, "step": 133060 }, { "epoch": 0.26535, "grad_norm": 0.047886017709970474, "learning_rate": 1.380926731376428e-05, "loss": 0.0339, "step": 133070 }, { "epoch": 0.2654, "grad_norm": 0.047841113060712814, "learning_rate": 1.3805571055553509e-05, "loss": 0.0329, "step": 133080 }, { "epoch": 0.26545, "grad_norm": 0.05151384696364403, "learning_rate": 1.3801875103394224e-05, "loss": 0.034, "step": 133090 }, { "epoch": 0.2655, "grad_norm": 0.06597917526960373, "learning_rate": 1.3798179457387456e-05, "loss": 0.0341, "step": 133100 }, { "epoch": 0.26555, "grad_norm": 0.052481696009635925, "learning_rate": 1.3794484117634265e-05, "loss": 0.0338, "step": 133110 }, { "epoch": 0.2656, "grad_norm": 0.051191624253988266, "learning_rate": 1.3790789084235667e-05, "loss": 0.0335, "step": 133120 }, { "epoch": 0.26565, "grad_norm": 0.05215437337756157, "learning_rate": 1.3787094357292684e-05, "loss": 0.0343, "step": 133130 }, { "epoch": 0.2657, "grad_norm": 0.05486556515097618, "learning_rate": 1.378339993690632e-05, "loss": 0.0328, "step": 133140 }, { "epoch": 0.26575, "grad_norm": 0.05595598742365837, "learning_rate": 1.3779705823177597e-05, "loss": 0.0334, "step": 133150 }, { "epoch": 0.2658, "grad_norm": 0.05401711165904999, "learning_rate": 1.377601201620749e-05, "loss": 0.033, "step": 133160 }, { "epoch": 0.26585, "grad_norm": 0.05710318684577942, "learning_rate": 1.377231851609701e-05, "loss": 0.0351, "step": 133170 }, { "epoch": 0.2659, "grad_norm": 0.05453859269618988, "learning_rate": 1.3768625322947126e-05, "loss": 0.0333, "step": 133180 }, { "epoch": 0.26595, "grad_norm": 0.05498124659061432, "learning_rate": 1.3764932436858791e-05, "loss": 0.0334, "step": 133190 }, { "epoch": 0.266, "grad_norm": 0.05131393298506737, "learning_rate": 1.3761239857932995e-05, "loss": 0.0331, "step": 133200 }, { "epoch": 0.26605, "grad_norm": 0.07144580781459808, "learning_rate": 1.375754758627068e-05, "loss": 0.037, "step": 133210 }, { "epoch": 0.2661, "grad_norm": 0.06492135673761368, "learning_rate": 1.3753855621972777e-05, "loss": 0.0333, "step": 133220 }, { "epoch": 0.26615, "grad_norm": 0.05913819745182991, "learning_rate": 1.375016396514025e-05, "loss": 0.0319, "step": 133230 }, { "epoch": 0.2662, "grad_norm": 0.05563320964574814, "learning_rate": 1.3746472615874002e-05, "loss": 0.033, "step": 133240 }, { "epoch": 0.26625, "grad_norm": 0.053804319351911545, "learning_rate": 1.3742781574274987e-05, "loss": 0.0339, "step": 133250 }, { "epoch": 0.2663, "grad_norm": 0.05768200010061264, "learning_rate": 1.3739090840444072e-05, "loss": 0.0341, "step": 133260 }, { "epoch": 0.26635, "grad_norm": 0.06974507868289948, "learning_rate": 1.3735400414482194e-05, "loss": 0.0334, "step": 133270 }, { "epoch": 0.2664, "grad_norm": 0.0580982081592083, "learning_rate": 1.3731710296490227e-05, "loss": 0.0339, "step": 133280 }, { "epoch": 0.26645, "grad_norm": 0.07492054253816605, "learning_rate": 1.3728020486569077e-05, "loss": 0.0336, "step": 133290 }, { "epoch": 0.2665, "grad_norm": 0.06081545352935791, "learning_rate": 1.3724330984819611e-05, "loss": 0.0328, "step": 133300 }, { "epoch": 0.26655, "grad_norm": 0.05259373411536217, "learning_rate": 1.3720641791342692e-05, "loss": 0.0333, "step": 133310 }, { "epoch": 0.2666, "grad_norm": 0.053279753774404526, "learning_rate": 1.3716952906239196e-05, "loss": 0.0322, "step": 133320 }, { "epoch": 0.26665, "grad_norm": 0.052714549005031586, "learning_rate": 1.371326432960997e-05, "loss": 0.0336, "step": 133330 }, { "epoch": 0.2667, "grad_norm": 0.04930153861641884, "learning_rate": 1.3709576061555859e-05, "loss": 0.0314, "step": 133340 }, { "epoch": 0.26675, "grad_norm": 0.05692337453365326, "learning_rate": 1.3705888102177685e-05, "loss": 0.035, "step": 133350 }, { "epoch": 0.2668, "grad_norm": 0.048727333545684814, "learning_rate": 1.3702200451576289e-05, "loss": 0.0316, "step": 133360 }, { "epoch": 0.26685, "grad_norm": 0.050843335688114166, "learning_rate": 1.3698513109852496e-05, "loss": 0.0321, "step": 133370 }, { "epoch": 0.2669, "grad_norm": 0.0695711225271225, "learning_rate": 1.369482607710711e-05, "loss": 0.0336, "step": 133380 }, { "epoch": 0.26695, "grad_norm": 0.05299427732825279, "learning_rate": 1.369113935344093e-05, "loss": 0.0322, "step": 133390 }, { "epoch": 0.267, "grad_norm": 0.045001398772001266, "learning_rate": 1.3687452938954746e-05, "loss": 0.0318, "step": 133400 }, { "epoch": 0.26705, "grad_norm": 0.052017249166965485, "learning_rate": 1.3683766833749356e-05, "loss": 0.0313, "step": 133410 }, { "epoch": 0.2671, "grad_norm": 0.04774123430252075, "learning_rate": 1.368008103792553e-05, "loss": 0.0325, "step": 133420 }, { "epoch": 0.26715, "grad_norm": 0.058756422251462936, "learning_rate": 1.3676395551584023e-05, "loss": 0.0353, "step": 133430 }, { "epoch": 0.2672, "grad_norm": 0.08225872367620468, "learning_rate": 1.3672710374825621e-05, "loss": 0.033, "step": 133440 }, { "epoch": 0.26725, "grad_norm": 0.07365263253450394, "learning_rate": 1.3669025507751048e-05, "loss": 0.0322, "step": 133450 }, { "epoch": 0.2673, "grad_norm": 0.0715201273560524, "learning_rate": 1.3665340950461083e-05, "loss": 0.0325, "step": 133460 }, { "epoch": 0.26735, "grad_norm": 0.055179111659526825, "learning_rate": 1.366165670305642e-05, "loss": 0.0332, "step": 133470 }, { "epoch": 0.2674, "grad_norm": 0.04986432567238808, "learning_rate": 1.36579727656378e-05, "loss": 0.0307, "step": 133480 }, { "epoch": 0.26745, "grad_norm": 0.060405880212783813, "learning_rate": 1.3654289138305956e-05, "loss": 0.0328, "step": 133490 }, { "epoch": 0.2675, "grad_norm": 0.06796519458293915, "learning_rate": 1.3650605821161583e-05, "loss": 0.0326, "step": 133500 }, { "epoch": 0.26755, "grad_norm": 0.06855150312185287, "learning_rate": 1.3646922814305385e-05, "loss": 0.032, "step": 133510 }, { "epoch": 0.2676, "grad_norm": 0.060563866049051285, "learning_rate": 1.364324011783804e-05, "loss": 0.0319, "step": 133520 }, { "epoch": 0.26765, "grad_norm": 0.053877994418144226, "learning_rate": 1.3639557731860252e-05, "loss": 0.0319, "step": 133530 }, { "epoch": 0.2677, "grad_norm": 0.05480059236288071, "learning_rate": 1.3635875656472693e-05, "loss": 0.0334, "step": 133540 }, { "epoch": 0.26775, "grad_norm": 0.053848497569561005, "learning_rate": 1.3632193891776013e-05, "loss": 0.0324, "step": 133550 }, { "epoch": 0.2678, "grad_norm": 0.06216348335146904, "learning_rate": 1.3628512437870888e-05, "loss": 0.0318, "step": 133560 }, { "epoch": 0.26785, "grad_norm": 0.06599042564630508, "learning_rate": 1.3624831294857954e-05, "loss": 0.0348, "step": 133570 }, { "epoch": 0.2679, "grad_norm": 0.07977837324142456, "learning_rate": 1.3621150462837867e-05, "loss": 0.0351, "step": 133580 }, { "epoch": 0.26795, "grad_norm": 0.06804382801055908, "learning_rate": 1.3617469941911254e-05, "loss": 0.0328, "step": 133590 }, { "epoch": 0.268, "grad_norm": 0.06853941828012466, "learning_rate": 1.3613789732178722e-05, "loss": 0.0342, "step": 133600 }, { "epoch": 0.26805, "grad_norm": 0.06310584396123886, "learning_rate": 1.3610109833740913e-05, "loss": 0.0334, "step": 133610 }, { "epoch": 0.2681, "grad_norm": 0.05592683330178261, "learning_rate": 1.3606430246698427e-05, "loss": 0.0345, "step": 133620 }, { "epoch": 0.26815, "grad_norm": 0.0685850977897644, "learning_rate": 1.360275097115185e-05, "loss": 0.0333, "step": 133630 }, { "epoch": 0.2682, "grad_norm": 0.060806307941675186, "learning_rate": 1.3599072007201774e-05, "loss": 0.0353, "step": 133640 }, { "epoch": 0.26825, "grad_norm": 0.050386618822813034, "learning_rate": 1.3595393354948797e-05, "loss": 0.0344, "step": 133650 }, { "epoch": 0.2683, "grad_norm": 0.051887303590774536, "learning_rate": 1.3591715014493467e-05, "loss": 0.0365, "step": 133660 }, { "epoch": 0.26835, "grad_norm": 0.05312123894691467, "learning_rate": 1.3588036985936375e-05, "loss": 0.0343, "step": 133670 }, { "epoch": 0.2684, "grad_norm": 0.07142849266529083, "learning_rate": 1.3584359269378066e-05, "loss": 0.0334, "step": 133680 }, { "epoch": 0.26845, "grad_norm": 0.05248669534921646, "learning_rate": 1.3580681864919076e-05, "loss": 0.0338, "step": 133690 }, { "epoch": 0.2685, "grad_norm": 0.06165587157011032, "learning_rate": 1.3577004772659963e-05, "loss": 0.0332, "step": 133700 }, { "epoch": 0.26855, "grad_norm": 0.07028713077306747, "learning_rate": 1.3573327992701245e-05, "loss": 0.0347, "step": 133710 }, { "epoch": 0.2686, "grad_norm": 0.07004103809595108, "learning_rate": 1.356965152514345e-05, "loss": 0.034, "step": 133720 }, { "epoch": 0.26865, "grad_norm": 0.054050881415605545, "learning_rate": 1.356597537008708e-05, "loss": 0.0338, "step": 133730 }, { "epoch": 0.2687, "grad_norm": 0.05609206482768059, "learning_rate": 1.3562299527632643e-05, "loss": 0.0325, "step": 133740 }, { "epoch": 0.26875, "grad_norm": 0.049644213169813156, "learning_rate": 1.3558623997880666e-05, "loss": 0.0332, "step": 133750 }, { "epoch": 0.2688, "grad_norm": 0.07129288464784622, "learning_rate": 1.3554948780931586e-05, "loss": 0.0326, "step": 133760 }, { "epoch": 0.26885, "grad_norm": 0.048589419573545456, "learning_rate": 1.3551273876885917e-05, "loss": 0.0322, "step": 133770 }, { "epoch": 0.2689, "grad_norm": 0.05963992327451706, "learning_rate": 1.3547599285844109e-05, "loss": 0.0331, "step": 133780 }, { "epoch": 0.26895, "grad_norm": 0.07128456979990005, "learning_rate": 1.3543925007906644e-05, "loss": 0.0349, "step": 133790 }, { "epoch": 0.269, "grad_norm": 0.05785830318927765, "learning_rate": 1.3540251043173968e-05, "loss": 0.0342, "step": 133800 }, { "epoch": 0.26905, "grad_norm": 0.06798305362462997, "learning_rate": 1.3536577391746511e-05, "loss": 0.0322, "step": 133810 }, { "epoch": 0.2691, "grad_norm": 0.0627092495560646, "learning_rate": 1.3532904053724731e-05, "loss": 0.0331, "step": 133820 }, { "epoch": 0.26915, "grad_norm": 0.07346533983945847, "learning_rate": 1.352923102920905e-05, "loss": 0.0325, "step": 133830 }, { "epoch": 0.2692, "grad_norm": 0.053694840520620346, "learning_rate": 1.352555831829988e-05, "loss": 0.0317, "step": 133840 }, { "epoch": 0.26925, "grad_norm": 0.054381098598241806, "learning_rate": 1.3521885921097624e-05, "loss": 0.0327, "step": 133850 }, { "epoch": 0.2693, "grad_norm": 0.06548306345939636, "learning_rate": 1.3518213837702697e-05, "loss": 0.0334, "step": 133860 }, { "epoch": 0.26935, "grad_norm": 0.05894751101732254, "learning_rate": 1.3514542068215496e-05, "loss": 0.0337, "step": 133870 }, { "epoch": 0.2694, "grad_norm": 0.05426390469074249, "learning_rate": 1.3510870612736403e-05, "loss": 0.0347, "step": 133880 }, { "epoch": 0.26945, "grad_norm": 0.05677363649010658, "learning_rate": 1.3507199471365794e-05, "loss": 0.0342, "step": 133890 }, { "epoch": 0.2695, "grad_norm": 0.055190473794937134, "learning_rate": 1.3503528644204022e-05, "loss": 0.0348, "step": 133900 }, { "epoch": 0.26955, "grad_norm": 0.06938653439283371, "learning_rate": 1.3499858131351467e-05, "loss": 0.0359, "step": 133910 }, { "epoch": 0.2696, "grad_norm": 0.06496905535459518, "learning_rate": 1.3496187932908472e-05, "loss": 0.0359, "step": 133920 }, { "epoch": 0.26965, "grad_norm": 0.053804852068424225, "learning_rate": 1.3492518048975365e-05, "loss": 0.0338, "step": 133930 }, { "epoch": 0.2697, "grad_norm": 0.050974227488040924, "learning_rate": 1.34888484796525e-05, "loss": 0.034, "step": 133940 }, { "epoch": 0.26975, "grad_norm": 0.05593269690871239, "learning_rate": 1.3485179225040185e-05, "loss": 0.0342, "step": 133950 }, { "epoch": 0.2698, "grad_norm": 0.04842517897486687, "learning_rate": 1.3481510285238764e-05, "loss": 0.0339, "step": 133960 }, { "epoch": 0.26985, "grad_norm": 0.05278436839580536, "learning_rate": 1.3477841660348506e-05, "loss": 0.0353, "step": 133970 }, { "epoch": 0.2699, "grad_norm": 0.06895551085472107, "learning_rate": 1.3474173350469726e-05, "loss": 0.0345, "step": 133980 }, { "epoch": 0.26995, "grad_norm": 0.06069585680961609, "learning_rate": 1.3470505355702729e-05, "loss": 0.033, "step": 133990 }, { "epoch": 0.27, "grad_norm": 0.08514784276485443, "learning_rate": 1.3466837676147781e-05, "loss": 0.0333, "step": 134000 }, { "epoch": 0.27005, "grad_norm": 0.061267901211977005, "learning_rate": 1.346317031190516e-05, "loss": 0.0361, "step": 134010 }, { "epoch": 0.2701, "grad_norm": 0.05103262886404991, "learning_rate": 1.3459503263075118e-05, "loss": 0.0346, "step": 134020 }, { "epoch": 0.27015, "grad_norm": 0.044751379638910294, "learning_rate": 1.3455836529757932e-05, "loss": 0.0325, "step": 134030 }, { "epoch": 0.2702, "grad_norm": 0.0550481379032135, "learning_rate": 1.3452170112053835e-05, "loss": 0.0333, "step": 134040 }, { "epoch": 0.27025, "grad_norm": 0.05319865047931671, "learning_rate": 1.3448504010063062e-05, "loss": 0.0331, "step": 134050 }, { "epoch": 0.2703, "grad_norm": 0.049767591059207916, "learning_rate": 1.3444838223885859e-05, "loss": 0.033, "step": 134060 }, { "epoch": 0.27035, "grad_norm": 0.06488999724388123, "learning_rate": 1.3441172753622428e-05, "loss": 0.0331, "step": 134070 }, { "epoch": 0.2704, "grad_norm": 0.05793136730790138, "learning_rate": 1.3437507599373001e-05, "loss": 0.0333, "step": 134080 }, { "epoch": 0.27045, "grad_norm": 0.07698115706443787, "learning_rate": 1.3433842761237774e-05, "loss": 0.0335, "step": 134090 }, { "epoch": 0.2705, "grad_norm": 0.08233670890331268, "learning_rate": 1.343017823931694e-05, "loss": 0.0333, "step": 134100 }, { "epoch": 0.27055, "grad_norm": 0.08557845652103424, "learning_rate": 1.3426514033710674e-05, "loss": 0.0341, "step": 134110 }, { "epoch": 0.2706, "grad_norm": 0.08992201834917068, "learning_rate": 1.342285014451918e-05, "loss": 0.0336, "step": 134120 }, { "epoch": 0.27065, "grad_norm": 0.06437687575817108, "learning_rate": 1.3419186571842612e-05, "loss": 0.0345, "step": 134130 }, { "epoch": 0.2707, "grad_norm": 0.06617096811532974, "learning_rate": 1.3415523315781123e-05, "loss": 0.034, "step": 134140 }, { "epoch": 0.27075, "grad_norm": 0.061990637332201004, "learning_rate": 1.3411860376434884e-05, "loss": 0.0341, "step": 134150 }, { "epoch": 0.2708, "grad_norm": 0.05145447701215744, "learning_rate": 1.3408197753904018e-05, "loss": 0.0337, "step": 134160 }, { "epoch": 0.27085, "grad_norm": 0.05686485022306442, "learning_rate": 1.3404535448288683e-05, "loss": 0.033, "step": 134170 }, { "epoch": 0.2709, "grad_norm": 0.06566409021615982, "learning_rate": 1.3400873459688989e-05, "loss": 0.0334, "step": 134180 }, { "epoch": 0.27095, "grad_norm": 0.0592525377869606, "learning_rate": 1.339721178820505e-05, "loss": 0.0346, "step": 134190 }, { "epoch": 0.271, "grad_norm": 0.0546598806977272, "learning_rate": 1.3393550433936991e-05, "loss": 0.0325, "step": 134200 }, { "epoch": 0.27105, "grad_norm": 0.053363386541604996, "learning_rate": 1.33898893969849e-05, "loss": 0.0342, "step": 134210 }, { "epoch": 0.2711, "grad_norm": 0.06242294982075691, "learning_rate": 1.3386228677448876e-05, "loss": 0.0353, "step": 134220 }, { "epoch": 0.27115, "grad_norm": 0.05819866061210632, "learning_rate": 1.3382568275428986e-05, "loss": 0.0336, "step": 134230 }, { "epoch": 0.2712, "grad_norm": 0.056766338646411896, "learning_rate": 1.3378908191025313e-05, "loss": 0.0331, "step": 134240 }, { "epoch": 0.27125, "grad_norm": 0.06343390792608261, "learning_rate": 1.3375248424337945e-05, "loss": 0.0338, "step": 134250 }, { "epoch": 0.2713, "grad_norm": 0.05885003134608269, "learning_rate": 1.3371588975466898e-05, "loss": 0.0322, "step": 134260 }, { "epoch": 0.27135, "grad_norm": 0.0626772791147232, "learning_rate": 1.3367929844512247e-05, "loss": 0.0329, "step": 134270 }, { "epoch": 0.2714, "grad_norm": 0.06084499880671501, "learning_rate": 1.3364271031574016e-05, "loss": 0.0375, "step": 134280 }, { "epoch": 0.27145, "grad_norm": 0.05387156084179878, "learning_rate": 1.3360612536752254e-05, "loss": 0.0322, "step": 134290 }, { "epoch": 0.2715, "grad_norm": 0.06268797814846039, "learning_rate": 1.3356954360146973e-05, "loss": 0.0332, "step": 134300 }, { "epoch": 0.27155, "grad_norm": 0.07965473085641861, "learning_rate": 1.3353296501858175e-05, "loss": 0.0344, "step": 134310 }, { "epoch": 0.2716, "grad_norm": 0.057444483041763306, "learning_rate": 1.3349638961985888e-05, "loss": 0.0324, "step": 134320 }, { "epoch": 0.27165, "grad_norm": 0.07856761664152145, "learning_rate": 1.3345981740630092e-05, "loss": 0.0349, "step": 134330 }, { "epoch": 0.2717, "grad_norm": 0.06308524310588837, "learning_rate": 1.3342324837890777e-05, "loss": 0.0336, "step": 134340 }, { "epoch": 0.27175, "grad_norm": 0.05933582782745361, "learning_rate": 1.3338668253867911e-05, "loss": 0.0355, "step": 134350 }, { "epoch": 0.2718, "grad_norm": 0.052174169570207596, "learning_rate": 1.3335011988661486e-05, "loss": 0.0338, "step": 134360 }, { "epoch": 0.27185, "grad_norm": 0.06952715665102005, "learning_rate": 1.333135604237144e-05, "loss": 0.0346, "step": 134370 }, { "epoch": 0.2719, "grad_norm": 0.06507941335439682, "learning_rate": 1.3327700415097743e-05, "loss": 0.0343, "step": 134380 }, { "epoch": 0.27195, "grad_norm": 0.05496826767921448, "learning_rate": 1.3324045106940337e-05, "loss": 0.034, "step": 134390 }, { "epoch": 0.272, "grad_norm": 0.06886152178049088, "learning_rate": 1.3320390117999138e-05, "loss": 0.035, "step": 134400 }, { "epoch": 0.27205, "grad_norm": 0.051142431795597076, "learning_rate": 1.3316735448374095e-05, "loss": 0.0334, "step": 134410 }, { "epoch": 0.2721, "grad_norm": 0.0529659204185009, "learning_rate": 1.3313081098165118e-05, "loss": 0.0336, "step": 134420 }, { "epoch": 0.27215, "grad_norm": 0.05416841432452202, "learning_rate": 1.3309427067472102e-05, "loss": 0.0332, "step": 134430 }, { "epoch": 0.2722, "grad_norm": 0.06381898373365402, "learning_rate": 1.3305773356394969e-05, "loss": 0.0341, "step": 134440 }, { "epoch": 0.27225, "grad_norm": 0.0496458001434803, "learning_rate": 1.3302119965033588e-05, "loss": 0.0329, "step": 134450 }, { "epoch": 0.2723, "grad_norm": 0.055828552693128586, "learning_rate": 1.3298466893487871e-05, "loss": 0.0345, "step": 134460 }, { "epoch": 0.27235, "grad_norm": 0.060162320733070374, "learning_rate": 1.3294814141857653e-05, "loss": 0.0328, "step": 134470 }, { "epoch": 0.2724, "grad_norm": 0.06394720822572708, "learning_rate": 1.3291161710242833e-05, "loss": 0.0328, "step": 134480 }, { "epoch": 0.27245, "grad_norm": 0.05539621040225029, "learning_rate": 1.3287509598743239e-05, "loss": 0.0342, "step": 134490 }, { "epoch": 0.2725, "grad_norm": 0.053097691386938095, "learning_rate": 1.3283857807458744e-05, "loss": 0.0348, "step": 134500 }, { "epoch": 0.27255, "grad_norm": 0.04288434982299805, "learning_rate": 1.3280206336489176e-05, "loss": 0.0338, "step": 134510 }, { "epoch": 0.2726, "grad_norm": 0.055250134319067, "learning_rate": 1.3276555185934353e-05, "loss": 0.0327, "step": 134520 }, { "epoch": 0.27265, "grad_norm": 0.059500452131032944, "learning_rate": 1.3272904355894117e-05, "loss": 0.033, "step": 134530 }, { "epoch": 0.2727, "grad_norm": 0.04469634220004082, "learning_rate": 1.326925384646827e-05, "loss": 0.0316, "step": 134540 }, { "epoch": 0.27275, "grad_norm": 0.05874725431203842, "learning_rate": 1.3265603657756615e-05, "loss": 0.0323, "step": 134550 }, { "epoch": 0.2728, "grad_norm": 0.05500799044966698, "learning_rate": 1.3261953789858939e-05, "loss": 0.0318, "step": 134560 }, { "epoch": 0.27285, "grad_norm": 0.05438421666622162, "learning_rate": 1.3258304242875036e-05, "loss": 0.0337, "step": 134570 }, { "epoch": 0.2729, "grad_norm": 0.060280703008174896, "learning_rate": 1.3254655016904693e-05, "loss": 0.0324, "step": 134580 }, { "epoch": 0.27295, "grad_norm": 0.06587333232164383, "learning_rate": 1.325100611204767e-05, "loss": 0.0352, "step": 134590 }, { "epoch": 0.273, "grad_norm": 0.06733546406030655, "learning_rate": 1.3247357528403725e-05, "loss": 0.0328, "step": 134600 }, { "epoch": 0.27305, "grad_norm": 0.07355872541666031, "learning_rate": 1.3243709266072601e-05, "loss": 0.0329, "step": 134610 }, { "epoch": 0.2731, "grad_norm": 0.11602780967950821, "learning_rate": 1.324006132515406e-05, "loss": 0.0361, "step": 134620 }, { "epoch": 0.27315, "grad_norm": 0.09205681830644608, "learning_rate": 1.3236413705747824e-05, "loss": 0.0362, "step": 134630 }, { "epoch": 0.2732, "grad_norm": 0.10276363044977188, "learning_rate": 1.3232766407953607e-05, "loss": 0.0344, "step": 134640 }, { "epoch": 0.27325, "grad_norm": 0.07067776471376419, "learning_rate": 1.3229119431871145e-05, "loss": 0.0336, "step": 134650 }, { "epoch": 0.2733, "grad_norm": 0.08137717097997665, "learning_rate": 1.322547277760013e-05, "loss": 0.04, "step": 134660 }, { "epoch": 0.27335, "grad_norm": 0.09185691922903061, "learning_rate": 1.3221826445240279e-05, "loss": 0.0343, "step": 134670 }, { "epoch": 0.2734, "grad_norm": 0.07215512543916702, "learning_rate": 1.3218180434891252e-05, "loss": 0.0359, "step": 134680 }, { "epoch": 0.27345, "grad_norm": 0.06042398139834404, "learning_rate": 1.3214534746652746e-05, "loss": 0.0339, "step": 134690 }, { "epoch": 0.2735, "grad_norm": 0.06867025047540665, "learning_rate": 1.3210889380624442e-05, "loss": 0.0366, "step": 134700 }, { "epoch": 0.27355, "grad_norm": 0.07023248821496964, "learning_rate": 1.3207244336905994e-05, "loss": 0.0345, "step": 134710 }, { "epoch": 0.2736, "grad_norm": 0.06673972308635712, "learning_rate": 1.3203599615597052e-05, "loss": 0.0356, "step": 134720 }, { "epoch": 0.27365, "grad_norm": 0.0651073008775711, "learning_rate": 1.3199955216797257e-05, "loss": 0.0339, "step": 134730 }, { "epoch": 0.2737, "grad_norm": 0.05058138817548752, "learning_rate": 1.3196311140606266e-05, "loss": 0.0343, "step": 134740 }, { "epoch": 0.27375, "grad_norm": 0.051638465374708176, "learning_rate": 1.319266738712369e-05, "loss": 0.0334, "step": 134750 }, { "epoch": 0.2738, "grad_norm": 0.06750901788473129, "learning_rate": 1.3189023956449143e-05, "loss": 0.0356, "step": 134760 }, { "epoch": 0.27385, "grad_norm": 0.05553903803229332, "learning_rate": 1.3185380848682255e-05, "loss": 0.0339, "step": 134770 }, { "epoch": 0.2739, "grad_norm": 0.0543627068400383, "learning_rate": 1.3181738063922605e-05, "loss": 0.0332, "step": 134780 }, { "epoch": 0.27395, "grad_norm": 0.05075423792004585, "learning_rate": 1.3178095602269807e-05, "loss": 0.0334, "step": 134790 }, { "epoch": 0.274, "grad_norm": 0.06586143374443054, "learning_rate": 1.3174453463823433e-05, "loss": 0.0337, "step": 134800 }, { "epoch": 0.27405, "grad_norm": 0.05968514084815979, "learning_rate": 1.3170811648683052e-05, "loss": 0.0337, "step": 134810 }, { "epoch": 0.2741, "grad_norm": 0.0509415827691555, "learning_rate": 1.3167170156948242e-05, "loss": 0.0335, "step": 134820 }, { "epoch": 0.27415, "grad_norm": 0.05163281783461571, "learning_rate": 1.3163528988718554e-05, "loss": 0.0338, "step": 134830 }, { "epoch": 0.2742, "grad_norm": 0.06403462588787079, "learning_rate": 1.3159888144093541e-05, "loss": 0.0348, "step": 134840 }, { "epoch": 0.27425, "grad_norm": 0.08456991612911224, "learning_rate": 1.3156247623172727e-05, "loss": 0.035, "step": 134850 }, { "epoch": 0.2743, "grad_norm": 0.06916992366313934, "learning_rate": 1.3152607426055662e-05, "loss": 0.0343, "step": 134860 }, { "epoch": 0.27435, "grad_norm": 0.059461891651153564, "learning_rate": 1.314896755284185e-05, "loss": 0.0329, "step": 134870 }, { "epoch": 0.2744, "grad_norm": 0.05513419583439827, "learning_rate": 1.3145328003630821e-05, "loss": 0.0362, "step": 134880 }, { "epoch": 0.27445, "grad_norm": 0.05778443440794945, "learning_rate": 1.3141688778522072e-05, "loss": 0.0338, "step": 134890 }, { "epoch": 0.2745, "grad_norm": 0.046256016939878464, "learning_rate": 1.3138049877615088e-05, "loss": 0.0349, "step": 134900 }, { "epoch": 0.27455, "grad_norm": 0.05146146938204765, "learning_rate": 1.3134411301009374e-05, "loss": 0.0335, "step": 134910 }, { "epoch": 0.2746, "grad_norm": 0.06435507535934448, "learning_rate": 1.3130773048804396e-05, "loss": 0.0357, "step": 134920 }, { "epoch": 0.27465, "grad_norm": 0.06181373447179794, "learning_rate": 1.3127135121099624e-05, "loss": 0.0351, "step": 134930 }, { "epoch": 0.2747, "grad_norm": 0.05738835781812668, "learning_rate": 1.3123497517994509e-05, "loss": 0.0323, "step": 134940 }, { "epoch": 0.27475, "grad_norm": 0.053311824798583984, "learning_rate": 1.3119860239588507e-05, "loss": 0.0341, "step": 134950 }, { "epoch": 0.2748, "grad_norm": 0.04415374621748924, "learning_rate": 1.3116223285981086e-05, "loss": 0.0331, "step": 134960 }, { "epoch": 0.27485, "grad_norm": 0.050012920051813126, "learning_rate": 1.3112586657271633e-05, "loss": 0.0329, "step": 134970 }, { "epoch": 0.2749, "grad_norm": 0.061650365591049194, "learning_rate": 1.3108950353559607e-05, "loss": 0.0346, "step": 134980 }, { "epoch": 0.27495, "grad_norm": 0.07193632423877716, "learning_rate": 1.3105314374944399e-05, "loss": 0.0337, "step": 134990 }, { "epoch": 0.275, "grad_norm": 0.05671504884958267, "learning_rate": 1.310167872152544e-05, "loss": 0.0343, "step": 135000 }, { "epoch": 0.27505, "grad_norm": 0.06242790445685387, "learning_rate": 1.3098043393402114e-05, "loss": 0.0332, "step": 135010 }, { "epoch": 0.2751, "grad_norm": 0.06047506630420685, "learning_rate": 1.30944083906738e-05, "loss": 0.0327, "step": 135020 }, { "epoch": 0.27515, "grad_norm": 0.06618639826774597, "learning_rate": 1.3090773713439896e-05, "loss": 0.0341, "step": 135030 }, { "epoch": 0.2752, "grad_norm": 0.055529411882162094, "learning_rate": 1.3087139361799766e-05, "loss": 0.0335, "step": 135040 }, { "epoch": 0.27525, "grad_norm": 0.05129161477088928, "learning_rate": 1.3083505335852771e-05, "loss": 0.0351, "step": 135050 }, { "epoch": 0.2753, "grad_norm": 0.05257735773921013, "learning_rate": 1.3079871635698255e-05, "loss": 0.0334, "step": 135060 }, { "epoch": 0.27535, "grad_norm": 0.057310450822114944, "learning_rate": 1.307623826143557e-05, "loss": 0.0341, "step": 135070 }, { "epoch": 0.2754, "grad_norm": 0.06880602985620499, "learning_rate": 1.3072605213164057e-05, "loss": 0.035, "step": 135080 }, { "epoch": 0.27545, "grad_norm": 0.07616132497787476, "learning_rate": 1.3068972490983039e-05, "loss": 0.034, "step": 135090 }, { "epoch": 0.2755, "grad_norm": 0.06144719943404198, "learning_rate": 1.3065340094991832e-05, "loss": 0.0333, "step": 135100 }, { "epoch": 0.27555, "grad_norm": 0.05728575214743614, "learning_rate": 1.3061708025289731e-05, "loss": 0.0336, "step": 135110 }, { "epoch": 0.2756, "grad_norm": 0.07311052083969116, "learning_rate": 1.3058076281976059e-05, "loss": 0.033, "step": 135120 }, { "epoch": 0.27565, "grad_norm": 0.06681732088327408, "learning_rate": 1.3054444865150095e-05, "loss": 0.0325, "step": 135130 }, { "epoch": 0.2757, "grad_norm": 0.06032009422779083, "learning_rate": 1.3050813774911112e-05, "loss": 0.0312, "step": 135140 }, { "epoch": 0.27575, "grad_norm": 0.047618068754673004, "learning_rate": 1.30471830113584e-05, "loss": 0.0314, "step": 135150 }, { "epoch": 0.2758, "grad_norm": 0.05425645038485527, "learning_rate": 1.3043552574591204e-05, "loss": 0.0337, "step": 135160 }, { "epoch": 0.27585, "grad_norm": 0.054841794073581696, "learning_rate": 1.3039922464708806e-05, "loss": 0.0322, "step": 135170 }, { "epoch": 0.2759, "grad_norm": 0.06096262484788895, "learning_rate": 1.303629268181042e-05, "loss": 0.0321, "step": 135180 }, { "epoch": 0.27595, "grad_norm": 0.05185467004776001, "learning_rate": 1.3032663225995292e-05, "loss": 0.0325, "step": 135190 }, { "epoch": 0.276, "grad_norm": 0.05274657532572746, "learning_rate": 1.302903409736267e-05, "loss": 0.0326, "step": 135200 }, { "epoch": 0.27605, "grad_norm": 0.0590011291205883, "learning_rate": 1.3025405296011756e-05, "loss": 0.0318, "step": 135210 }, { "epoch": 0.2761, "grad_norm": 0.05683165043592453, "learning_rate": 1.3021776822041764e-05, "loss": 0.0324, "step": 135220 }, { "epoch": 0.27615, "grad_norm": 0.06452971696853638, "learning_rate": 1.3018148675551884e-05, "loss": 0.0325, "step": 135230 }, { "epoch": 0.2762, "grad_norm": 0.058156803250312805, "learning_rate": 1.3014520856641327e-05, "loss": 0.0322, "step": 135240 }, { "epoch": 0.27625, "grad_norm": 0.05621333420276642, "learning_rate": 1.3010893365409265e-05, "loss": 0.0334, "step": 135250 }, { "epoch": 0.2763, "grad_norm": 0.051411014050245285, "learning_rate": 1.3007266201954866e-05, "loss": 0.0326, "step": 135260 }, { "epoch": 0.27635, "grad_norm": 0.04615700989961624, "learning_rate": 1.3003639366377312e-05, "loss": 0.0341, "step": 135270 }, { "epoch": 0.2764, "grad_norm": 0.0606042705476284, "learning_rate": 1.3000012858775745e-05, "loss": 0.0358, "step": 135280 }, { "epoch": 0.27645, "grad_norm": 0.07736486196517944, "learning_rate": 1.2996386679249325e-05, "loss": 0.0347, "step": 135290 }, { "epoch": 0.2765, "grad_norm": 0.04653066769242287, "learning_rate": 1.2992760827897183e-05, "loss": 0.0337, "step": 135300 }, { "epoch": 0.27655, "grad_norm": 0.0559215284883976, "learning_rate": 1.298913530481845e-05, "loss": 0.0337, "step": 135310 }, { "epoch": 0.2766, "grad_norm": 0.06434296071529388, "learning_rate": 1.2985510110112237e-05, "loss": 0.0349, "step": 135320 }, { "epoch": 0.27665, "grad_norm": 0.04896357282996178, "learning_rate": 1.2981885243877673e-05, "loss": 0.0355, "step": 135330 }, { "epoch": 0.2767, "grad_norm": 0.05654190108180046, "learning_rate": 1.2978260706213852e-05, "loss": 0.0342, "step": 135340 }, { "epoch": 0.27675, "grad_norm": 0.05086608976125717, "learning_rate": 1.2974636497219856e-05, "loss": 0.0333, "step": 135350 }, { "epoch": 0.2768, "grad_norm": 0.05549168214201927, "learning_rate": 1.2971012616994794e-05, "loss": 0.0345, "step": 135360 }, { "epoch": 0.27685, "grad_norm": 0.051113519817590714, "learning_rate": 1.2967389065637713e-05, "loss": 0.0333, "step": 135370 }, { "epoch": 0.2769, "grad_norm": 0.04853764921426773, "learning_rate": 1.2963765843247705e-05, "loss": 0.0335, "step": 135380 }, { "epoch": 0.27695, "grad_norm": 0.05270055681467056, "learning_rate": 1.296014294992382e-05, "loss": 0.0354, "step": 135390 }, { "epoch": 0.277, "grad_norm": 0.056161247193813324, "learning_rate": 1.2956520385765091e-05, "loss": 0.034, "step": 135400 }, { "epoch": 0.27705, "grad_norm": 0.05111818388104439, "learning_rate": 1.295289815087058e-05, "loss": 0.0341, "step": 135410 }, { "epoch": 0.2771, "grad_norm": 0.06116397678852081, "learning_rate": 1.294927624533931e-05, "loss": 0.0353, "step": 135420 }, { "epoch": 0.27715, "grad_norm": 0.06604544073343277, "learning_rate": 1.29456546692703e-05, "loss": 0.0326, "step": 135430 }, { "epoch": 0.2772, "grad_norm": 0.06811371445655823, "learning_rate": 1.2942033422762551e-05, "loss": 0.032, "step": 135440 }, { "epoch": 0.27725, "grad_norm": 0.06051434203982353, "learning_rate": 1.2938412505915079e-05, "loss": 0.0315, "step": 135450 }, { "epoch": 0.2773, "grad_norm": 0.05439218506217003, "learning_rate": 1.2934791918826896e-05, "loss": 0.0318, "step": 135460 }, { "epoch": 0.27735, "grad_norm": 0.05119166523218155, "learning_rate": 1.2931171661596952e-05, "loss": 0.0331, "step": 135470 }, { "epoch": 0.2774, "grad_norm": 0.04811709374189377, "learning_rate": 1.292755173432425e-05, "loss": 0.0326, "step": 135480 }, { "epoch": 0.27745, "grad_norm": 0.06304600089788437, "learning_rate": 1.2923932137107737e-05, "loss": 0.0333, "step": 135490 }, { "epoch": 0.2775, "grad_norm": 0.06395766139030457, "learning_rate": 1.2920312870046394e-05, "loss": 0.0323, "step": 135500 }, { "epoch": 0.27755, "grad_norm": 0.06287093460559845, "learning_rate": 1.2916693933239157e-05, "loss": 0.0332, "step": 135510 }, { "epoch": 0.2776, "grad_norm": 0.05557131767272949, "learning_rate": 1.2913075326784962e-05, "loss": 0.0347, "step": 135520 }, { "epoch": 0.27765, "grad_norm": 0.05720607191324234, "learning_rate": 1.2909457050782752e-05, "loss": 0.0327, "step": 135530 }, { "epoch": 0.2777, "grad_norm": 0.061198148876428604, "learning_rate": 1.2905839105331447e-05, "loss": 0.0343, "step": 135540 }, { "epoch": 0.27775, "grad_norm": 0.059358034282922745, "learning_rate": 1.2902221490529959e-05, "loss": 0.0336, "step": 135550 }, { "epoch": 0.2778, "grad_norm": 0.06045711785554886, "learning_rate": 1.2898604206477178e-05, "loss": 0.0323, "step": 135560 }, { "epoch": 0.27785, "grad_norm": 0.12279672920703888, "learning_rate": 1.2894987253272023e-05, "loss": 0.0349, "step": 135570 }, { "epoch": 0.2779, "grad_norm": 0.09237831830978394, "learning_rate": 1.2891370631013355e-05, "loss": 0.0327, "step": 135580 }, { "epoch": 0.27795, "grad_norm": 0.06324107199907303, "learning_rate": 1.2887754339800079e-05, "loss": 0.0338, "step": 135590 }, { "epoch": 0.278, "grad_norm": 0.0558076836168766, "learning_rate": 1.2884138379731048e-05, "loss": 0.0331, "step": 135600 }, { "epoch": 0.27805, "grad_norm": 0.06076362729072571, "learning_rate": 1.2880522750905111e-05, "loss": 0.0327, "step": 135610 }, { "epoch": 0.2781, "grad_norm": 0.050440818071365356, "learning_rate": 1.2876907453421139e-05, "loss": 0.0328, "step": 135620 }, { "epoch": 0.27815, "grad_norm": 0.06723838299512863, "learning_rate": 1.2873292487377964e-05, "loss": 0.0335, "step": 135630 }, { "epoch": 0.2782, "grad_norm": 0.055715836584568024, "learning_rate": 1.2869677852874407e-05, "loss": 0.0337, "step": 135640 }, { "epoch": 0.27825, "grad_norm": 0.05138610675930977, "learning_rate": 1.286606355000931e-05, "loss": 0.0327, "step": 135650 }, { "epoch": 0.2783, "grad_norm": 0.04878625646233559, "learning_rate": 1.2862449578881466e-05, "loss": 0.0315, "step": 135660 }, { "epoch": 0.27835, "grad_norm": 0.06936074793338776, "learning_rate": 1.2858835939589712e-05, "loss": 0.0337, "step": 135670 }, { "epoch": 0.2784, "grad_norm": 0.059619609266519547, "learning_rate": 1.28552226322328e-05, "loss": 0.0332, "step": 135680 }, { "epoch": 0.27845, "grad_norm": 0.051202233880758286, "learning_rate": 1.2851609656909552e-05, "loss": 0.0323, "step": 135690 }, { "epoch": 0.2785, "grad_norm": 0.048660289496183395, "learning_rate": 1.2847997013718722e-05, "loss": 0.0337, "step": 135700 }, { "epoch": 0.27855, "grad_norm": 0.06189000606536865, "learning_rate": 1.2844384702759094e-05, "loss": 0.0336, "step": 135710 }, { "epoch": 0.2786, "grad_norm": 0.06334463506937027, "learning_rate": 1.2840772724129425e-05, "loss": 0.034, "step": 135720 }, { "epoch": 0.27865, "grad_norm": 0.057740218937397, "learning_rate": 1.283716107792845e-05, "loss": 0.0327, "step": 135730 }, { "epoch": 0.2787, "grad_norm": 0.05583374947309494, "learning_rate": 1.2833549764254932e-05, "loss": 0.0328, "step": 135740 }, { "epoch": 0.27875, "grad_norm": 0.053421925753355026, "learning_rate": 1.2829938783207593e-05, "loss": 0.0331, "step": 135750 }, { "epoch": 0.2788, "grad_norm": 0.05022544041275978, "learning_rate": 1.2826328134885156e-05, "loss": 0.0332, "step": 135760 }, { "epoch": 0.27885, "grad_norm": 0.06130515784025192, "learning_rate": 1.2822717819386324e-05, "loss": 0.0336, "step": 135770 }, { "epoch": 0.2789, "grad_norm": 0.055185750126838684, "learning_rate": 1.2819107836809813e-05, "loss": 0.033, "step": 135780 }, { "epoch": 0.27895, "grad_norm": 0.05438302829861641, "learning_rate": 1.2815498187254327e-05, "loss": 0.0342, "step": 135790 }, { "epoch": 0.279, "grad_norm": 0.05572971701622009, "learning_rate": 1.2811888870818543e-05, "loss": 0.0335, "step": 135800 }, { "epoch": 0.27905, "grad_norm": 0.05455252528190613, "learning_rate": 1.2808279887601138e-05, "loss": 0.0331, "step": 135810 }, { "epoch": 0.2791, "grad_norm": 0.06497219204902649, "learning_rate": 1.280467123770077e-05, "loss": 0.0332, "step": 135820 }, { "epoch": 0.27915, "grad_norm": 0.05847322940826416, "learning_rate": 1.2801062921216111e-05, "loss": 0.0338, "step": 135830 }, { "epoch": 0.2792, "grad_norm": 0.06104997545480728, "learning_rate": 1.2797454938245826e-05, "loss": 0.0348, "step": 135840 }, { "epoch": 0.27925, "grad_norm": 0.055407196283340454, "learning_rate": 1.2793847288888521e-05, "loss": 0.032, "step": 135850 }, { "epoch": 0.2793, "grad_norm": 0.061831653118133545, "learning_rate": 1.2790239973242854e-05, "loss": 0.034, "step": 135860 }, { "epoch": 0.27935, "grad_norm": 0.05441982299089432, "learning_rate": 1.278663299140743e-05, "loss": 0.0317, "step": 135870 }, { "epoch": 0.2794, "grad_norm": 0.05456315353512764, "learning_rate": 1.2783026343480892e-05, "loss": 0.0326, "step": 135880 }, { "epoch": 0.27945, "grad_norm": 0.07653749734163284, "learning_rate": 1.27794200295618e-05, "loss": 0.0341, "step": 135890 }, { "epoch": 0.2795, "grad_norm": 0.0711459368467331, "learning_rate": 1.277581404974878e-05, "loss": 0.0333, "step": 135900 }, { "epoch": 0.27955, "grad_norm": 0.06660649180412292, "learning_rate": 1.2772208404140418e-05, "loss": 0.0335, "step": 135910 }, { "epoch": 0.2796, "grad_norm": 0.06644999980926514, "learning_rate": 1.2768603092835285e-05, "loss": 0.034, "step": 135920 }, { "epoch": 0.27965, "grad_norm": 0.06661864370107651, "learning_rate": 1.2764998115931948e-05, "loss": 0.0347, "step": 135930 }, { "epoch": 0.2797, "grad_norm": 0.05995258688926697, "learning_rate": 1.2761393473528955e-05, "loss": 0.0339, "step": 135940 }, { "epoch": 0.27975, "grad_norm": 0.054902464151382446, "learning_rate": 1.275778916572488e-05, "loss": 0.0334, "step": 135950 }, { "epoch": 0.2798, "grad_norm": 0.0524166114628315, "learning_rate": 1.2754185192618238e-05, "loss": 0.0341, "step": 135960 }, { "epoch": 0.27985, "grad_norm": 0.06843091547489166, "learning_rate": 1.275058155430758e-05, "loss": 0.0378, "step": 135970 }, { "epoch": 0.2799, "grad_norm": 0.06823035329580307, "learning_rate": 1.2746978250891423e-05, "loss": 0.036, "step": 135980 }, { "epoch": 0.27995, "grad_norm": 0.05720892921090126, "learning_rate": 1.2743375282468267e-05, "loss": 0.0338, "step": 135990 }, { "epoch": 0.28, "grad_norm": 0.06375530362129211, "learning_rate": 1.2739772649136636e-05, "loss": 0.0376, "step": 136000 }, { "epoch": 0.28005, "grad_norm": 0.06734709441661835, "learning_rate": 1.2736170350995013e-05, "loss": 0.0342, "step": 136010 }, { "epoch": 0.2801, "grad_norm": 0.07103891670703888, "learning_rate": 1.273256838814188e-05, "loss": 0.0341, "step": 136020 }, { "epoch": 0.28015, "grad_norm": 0.07042978703975677, "learning_rate": 1.2728966760675726e-05, "loss": 0.0342, "step": 136030 }, { "epoch": 0.2802, "grad_norm": 0.059580136090517044, "learning_rate": 1.2725365468695e-05, "loss": 0.0339, "step": 136040 }, { "epoch": 0.28025, "grad_norm": 0.05698537081480026, "learning_rate": 1.272176451229819e-05, "loss": 0.035, "step": 136050 }, { "epoch": 0.2803, "grad_norm": 0.05858423933386803, "learning_rate": 1.2718163891583706e-05, "loss": 0.0332, "step": 136060 }, { "epoch": 0.28035, "grad_norm": 0.05761784315109253, "learning_rate": 1.271456360665002e-05, "loss": 0.035, "step": 136070 }, { "epoch": 0.2804, "grad_norm": 0.09265638142824173, "learning_rate": 1.2710963657595538e-05, "loss": 0.0362, "step": 136080 }, { "epoch": 0.28045, "grad_norm": 0.0590679869055748, "learning_rate": 1.27073640445187e-05, "loss": 0.0348, "step": 136090 }, { "epoch": 0.2805, "grad_norm": 0.05699898675084114, "learning_rate": 1.2703764767517914e-05, "loss": 0.0337, "step": 136100 }, { "epoch": 0.28055, "grad_norm": 0.07581443339586258, "learning_rate": 1.2700165826691568e-05, "loss": 0.0351, "step": 136110 }, { "epoch": 0.2806, "grad_norm": 0.0731777548789978, "learning_rate": 1.2696567222138078e-05, "loss": 0.0336, "step": 136120 }, { "epoch": 0.28065, "grad_norm": 0.08558888733386993, "learning_rate": 1.269296895395582e-05, "loss": 0.0341, "step": 136130 }, { "epoch": 0.2807, "grad_norm": 0.07193329185247421, "learning_rate": 1.2689371022243166e-05, "loss": 0.0345, "step": 136140 }, { "epoch": 0.28075, "grad_norm": 0.06691916286945343, "learning_rate": 1.2685773427098474e-05, "loss": 0.035, "step": 136150 }, { "epoch": 0.2808, "grad_norm": 0.05623365938663483, "learning_rate": 1.2682176168620107e-05, "loss": 0.0335, "step": 136160 }, { "epoch": 0.28085, "grad_norm": 0.05890646204352379, "learning_rate": 1.267857924690643e-05, "loss": 0.0332, "step": 136170 }, { "epoch": 0.2809, "grad_norm": 0.053852204233407974, "learning_rate": 1.2674982662055765e-05, "loss": 0.0349, "step": 136180 }, { "epoch": 0.28095, "grad_norm": 0.0638689175248146, "learning_rate": 1.2671386414166445e-05, "loss": 0.0339, "step": 136190 }, { "epoch": 0.281, "grad_norm": 0.0775456428527832, "learning_rate": 1.266779050333678e-05, "loss": 0.0354, "step": 136200 }, { "epoch": 0.28105, "grad_norm": 0.0630122721195221, "learning_rate": 1.2664194929665096e-05, "loss": 0.0343, "step": 136210 }, { "epoch": 0.2811, "grad_norm": 0.0455157645046711, "learning_rate": 1.2660599693249688e-05, "loss": 0.0338, "step": 136220 }, { "epoch": 0.28115, "grad_norm": 0.0488881878554821, "learning_rate": 1.2657004794188842e-05, "loss": 0.033, "step": 136230 }, { "epoch": 0.2812, "grad_norm": 0.04977099597454071, "learning_rate": 1.2653410232580857e-05, "loss": 0.0338, "step": 136240 }, { "epoch": 0.28125, "grad_norm": 0.07122388482093811, "learning_rate": 1.2649816008523988e-05, "loss": 0.0341, "step": 136250 }, { "epoch": 0.2813, "grad_norm": 0.0568055734038353, "learning_rate": 1.264622212211653e-05, "loss": 0.0325, "step": 136260 }, { "epoch": 0.28135, "grad_norm": 0.05512743070721626, "learning_rate": 1.2642628573456694e-05, "loss": 0.0346, "step": 136270 }, { "epoch": 0.2814, "grad_norm": 0.07318541407585144, "learning_rate": 1.2639035362642755e-05, "loss": 0.0361, "step": 136280 }, { "epoch": 0.28145, "grad_norm": 0.09015218913555145, "learning_rate": 1.2635442489772954e-05, "loss": 0.0336, "step": 136290 }, { "epoch": 0.2815, "grad_norm": 0.060024112462997437, "learning_rate": 1.263184995494551e-05, "loss": 0.0334, "step": 136300 }, { "epoch": 0.28155, "grad_norm": 0.05561055243015289, "learning_rate": 1.2628257758258644e-05, "loss": 0.0335, "step": 136310 }, { "epoch": 0.2816, "grad_norm": 0.063319131731987, "learning_rate": 1.2624665899810551e-05, "loss": 0.0342, "step": 136320 }, { "epoch": 0.28165, "grad_norm": 0.052624449133872986, "learning_rate": 1.262107437969945e-05, "loss": 0.0334, "step": 136330 }, { "epoch": 0.2817, "grad_norm": 0.05468215420842171, "learning_rate": 1.2617483198023527e-05, "loss": 0.033, "step": 136340 }, { "epoch": 0.28175, "grad_norm": 0.055670272558927536, "learning_rate": 1.2613892354880955e-05, "loss": 0.034, "step": 136350 }, { "epoch": 0.2818, "grad_norm": 0.053894419223070145, "learning_rate": 1.2610301850369921e-05, "loss": 0.0342, "step": 136360 }, { "epoch": 0.28185, "grad_norm": 0.06553252786397934, "learning_rate": 1.2606711684588568e-05, "loss": 0.0336, "step": 136370 }, { "epoch": 0.2819, "grad_norm": 0.05735878273844719, "learning_rate": 1.2603121857635073e-05, "loss": 0.0347, "step": 136380 }, { "epoch": 0.28195, "grad_norm": 0.04910058155655861, "learning_rate": 1.2599532369607566e-05, "loss": 0.0338, "step": 136390 }, { "epoch": 0.282, "grad_norm": 0.06366072595119476, "learning_rate": 1.2595943220604178e-05, "loss": 0.0357, "step": 136400 }, { "epoch": 0.28205, "grad_norm": 0.07096876204013824, "learning_rate": 1.2592354410723053e-05, "loss": 0.0349, "step": 136410 }, { "epoch": 0.2821, "grad_norm": 0.0628928542137146, "learning_rate": 1.2588765940062298e-05, "loss": 0.0346, "step": 136420 }, { "epoch": 0.28215, "grad_norm": 0.05668439716100693, "learning_rate": 1.2585177808720017e-05, "loss": 0.0348, "step": 136430 }, { "epoch": 0.2822, "grad_norm": 0.06567050516605377, "learning_rate": 1.2581590016794303e-05, "loss": 0.0347, "step": 136440 }, { "epoch": 0.28225, "grad_norm": 0.0673997700214386, "learning_rate": 1.2578002564383263e-05, "loss": 0.0358, "step": 136450 }, { "epoch": 0.2823, "grad_norm": 0.06713567674160004, "learning_rate": 1.2574415451584954e-05, "loss": 0.0384, "step": 136460 }, { "epoch": 0.28235, "grad_norm": 0.04711003229022026, "learning_rate": 1.2570828678497465e-05, "loss": 0.0342, "step": 136470 }, { "epoch": 0.2824, "grad_norm": 0.05185955390334129, "learning_rate": 1.2567242245218858e-05, "loss": 0.0329, "step": 136480 }, { "epoch": 0.28245, "grad_norm": 0.05066725239157677, "learning_rate": 1.2563656151847162e-05, "loss": 0.0338, "step": 136490 }, { "epoch": 0.2825, "grad_norm": 0.05633728951215744, "learning_rate": 1.2560070398480445e-05, "loss": 0.0345, "step": 136500 }, { "epoch": 0.28255, "grad_norm": 0.0669638141989708, "learning_rate": 1.2556484985216732e-05, "loss": 0.0343, "step": 136510 }, { "epoch": 0.2826, "grad_norm": 0.05856877937912941, "learning_rate": 1.2552899912154042e-05, "loss": 0.0337, "step": 136520 }, { "epoch": 0.28265, "grad_norm": 0.05611797422170639, "learning_rate": 1.2549315179390387e-05, "loss": 0.0344, "step": 136530 }, { "epoch": 0.2827, "grad_norm": 0.06687197834253311, "learning_rate": 1.2545730787023775e-05, "loss": 0.0379, "step": 136540 }, { "epoch": 0.28275, "grad_norm": 0.051813483238220215, "learning_rate": 1.2542146735152222e-05, "loss": 0.0333, "step": 136550 }, { "epoch": 0.2828, "grad_norm": 0.0661899670958519, "learning_rate": 1.2538563023873679e-05, "loss": 0.0342, "step": 136560 }, { "epoch": 0.28285, "grad_norm": 0.06310340762138367, "learning_rate": 1.2534979653286153e-05, "loss": 0.0328, "step": 136570 }, { "epoch": 0.2829, "grad_norm": 0.0555579736828804, "learning_rate": 1.253139662348759e-05, "loss": 0.0343, "step": 136580 }, { "epoch": 0.28295, "grad_norm": 0.05000456050038338, "learning_rate": 1.2527813934575967e-05, "loss": 0.033, "step": 136590 }, { "epoch": 0.283, "grad_norm": 0.059967171400785446, "learning_rate": 1.2524231586649227e-05, "loss": 0.0331, "step": 136600 }, { "epoch": 0.28305, "grad_norm": 0.05584190413355827, "learning_rate": 1.2520649579805297e-05, "loss": 0.0341, "step": 136610 }, { "epoch": 0.2831, "grad_norm": 0.05312740430235863, "learning_rate": 1.2517067914142128e-05, "loss": 0.0333, "step": 136620 }, { "epoch": 0.28315, "grad_norm": 0.05799239128828049, "learning_rate": 1.2513486589757636e-05, "loss": 0.0327, "step": 136630 }, { "epoch": 0.2832, "grad_norm": 0.06061725690960884, "learning_rate": 1.2509905606749728e-05, "loss": 0.0332, "step": 136640 }, { "epoch": 0.28325, "grad_norm": 0.06276709586381912, "learning_rate": 1.2506324965216298e-05, "loss": 0.0347, "step": 136650 }, { "epoch": 0.2833, "grad_norm": 0.05817681550979614, "learning_rate": 1.250274466525525e-05, "loss": 0.0326, "step": 136660 }, { "epoch": 0.28335, "grad_norm": 0.05032667890191078, "learning_rate": 1.2499164706964481e-05, "loss": 0.0327, "step": 136670 }, { "epoch": 0.2834, "grad_norm": 0.05516530200839043, "learning_rate": 1.249558509044185e-05, "loss": 0.033, "step": 136680 }, { "epoch": 0.28345, "grad_norm": 0.05928047373890877, "learning_rate": 1.2492005815785225e-05, "loss": 0.0359, "step": 136690 }, { "epoch": 0.2835, "grad_norm": 0.05928775295615196, "learning_rate": 1.2488426883092453e-05, "loss": 0.0331, "step": 136700 }, { "epoch": 0.28355, "grad_norm": 0.07499240338802338, "learning_rate": 1.2484848292461396e-05, "loss": 0.0354, "step": 136710 }, { "epoch": 0.2836, "grad_norm": 0.048563044518232346, "learning_rate": 1.2481270043989887e-05, "loss": 0.034, "step": 136720 }, { "epoch": 0.28365, "grad_norm": 0.051543883979320526, "learning_rate": 1.2477692137775742e-05, "loss": 0.033, "step": 136730 }, { "epoch": 0.2837, "grad_norm": 0.05584711208939552, "learning_rate": 1.24741145739168e-05, "loss": 0.0364, "step": 136740 }, { "epoch": 0.28375, "grad_norm": 0.060850005596876144, "learning_rate": 1.2470537352510853e-05, "loss": 0.0353, "step": 136750 }, { "epoch": 0.2838, "grad_norm": 0.058700792491436005, "learning_rate": 1.2466960473655723e-05, "loss": 0.0323, "step": 136760 }, { "epoch": 0.28385, "grad_norm": 0.0639333575963974, "learning_rate": 1.2463383937449166e-05, "loss": 0.0343, "step": 136770 }, { "epoch": 0.2839, "grad_norm": 0.05942584574222565, "learning_rate": 1.2459807743988993e-05, "loss": 0.0338, "step": 136780 }, { "epoch": 0.28395, "grad_norm": 0.05773216113448143, "learning_rate": 1.2456231893372955e-05, "loss": 0.0338, "step": 136790 }, { "epoch": 0.284, "grad_norm": 0.05086157098412514, "learning_rate": 1.2452656385698836e-05, "loss": 0.0327, "step": 136800 }, { "epoch": 0.28405, "grad_norm": 0.056690383702516556, "learning_rate": 1.2449081221064377e-05, "loss": 0.0331, "step": 136810 }, { "epoch": 0.2841, "grad_norm": 0.06598389148712158, "learning_rate": 1.2445506399567311e-05, "loss": 0.0321, "step": 136820 }, { "epoch": 0.28415, "grad_norm": 0.06285768747329712, "learning_rate": 1.2441931921305394e-05, "loss": 0.0323, "step": 136830 }, { "epoch": 0.2842, "grad_norm": 0.05682032182812691, "learning_rate": 1.243835778637634e-05, "loss": 0.0315, "step": 136840 }, { "epoch": 0.28425, "grad_norm": 0.062499918043613434, "learning_rate": 1.2434783994877856e-05, "loss": 0.0344, "step": 136850 }, { "epoch": 0.2843, "grad_norm": 0.07428952306509018, "learning_rate": 1.2431210546907666e-05, "loss": 0.0324, "step": 136860 }, { "epoch": 0.28435, "grad_norm": 0.06071348115801811, "learning_rate": 1.2427637442563447e-05, "loss": 0.0335, "step": 136870 }, { "epoch": 0.2844, "grad_norm": 0.048171207308769226, "learning_rate": 1.2424064681942909e-05, "loss": 0.0313, "step": 136880 }, { "epoch": 0.28445, "grad_norm": 0.056041669100522995, "learning_rate": 1.2420492265143719e-05, "loss": 0.034, "step": 136890 }, { "epoch": 0.2845, "grad_norm": 0.0565711185336113, "learning_rate": 1.2416920192263542e-05, "loss": 0.0316, "step": 136900 }, { "epoch": 0.28455, "grad_norm": 0.05370723828673363, "learning_rate": 1.241334846340003e-05, "loss": 0.0328, "step": 136910 }, { "epoch": 0.2846, "grad_norm": 0.05528026446700096, "learning_rate": 1.2409777078650853e-05, "loss": 0.0311, "step": 136920 }, { "epoch": 0.28465, "grad_norm": 0.06202398240566254, "learning_rate": 1.2406206038113641e-05, "loss": 0.0326, "step": 136930 }, { "epoch": 0.2847, "grad_norm": 0.06248015537858009, "learning_rate": 1.2402635341886016e-05, "loss": 0.0326, "step": 136940 }, { "epoch": 0.28475, "grad_norm": 0.052109282463788986, "learning_rate": 1.2399064990065615e-05, "loss": 0.0326, "step": 136950 }, { "epoch": 0.2848, "grad_norm": 0.051801566034555435, "learning_rate": 1.2395494982750037e-05, "loss": 0.0324, "step": 136960 }, { "epoch": 0.28485, "grad_norm": 0.07044409215450287, "learning_rate": 1.2391925320036907e-05, "loss": 0.0347, "step": 136970 }, { "epoch": 0.2849, "grad_norm": 0.05475321784615517, "learning_rate": 1.2388356002023785e-05, "loss": 0.0332, "step": 136980 }, { "epoch": 0.28495, "grad_norm": 0.06268125027418137, "learning_rate": 1.238478702880827e-05, "loss": 0.0346, "step": 136990 }, { "epoch": 0.285, "grad_norm": 0.058081746101379395, "learning_rate": 1.2381218400487949e-05, "loss": 0.0325, "step": 137000 }, { "epoch": 0.28505, "grad_norm": 0.05382139980792999, "learning_rate": 1.2377650117160374e-05, "loss": 0.0333, "step": 137010 }, { "epoch": 0.2851, "grad_norm": 0.06895588338375092, "learning_rate": 1.2374082178923108e-05, "loss": 0.0329, "step": 137020 }, { "epoch": 0.28515, "grad_norm": 0.05296281725168228, "learning_rate": 1.2370514585873677e-05, "loss": 0.0332, "step": 137030 }, { "epoch": 0.2852, "grad_norm": 0.058857038617134094, "learning_rate": 1.2366947338109635e-05, "loss": 0.0327, "step": 137040 }, { "epoch": 0.28525, "grad_norm": 0.05026755481958389, "learning_rate": 1.2363380435728528e-05, "loss": 0.0315, "step": 137050 }, { "epoch": 0.2853, "grad_norm": 0.06389336287975311, "learning_rate": 1.235981387882783e-05, "loss": 0.0341, "step": 137060 }, { "epoch": 0.28535, "grad_norm": 0.05075691267848015, "learning_rate": 1.2356247667505084e-05, "loss": 0.0324, "step": 137070 }, { "epoch": 0.2854, "grad_norm": 0.05633467063307762, "learning_rate": 1.2352681801857766e-05, "loss": 0.0337, "step": 137080 }, { "epoch": 0.28545, "grad_norm": 0.05485008284449577, "learning_rate": 1.2349116281983383e-05, "loss": 0.0343, "step": 137090 }, { "epoch": 0.2855, "grad_norm": 0.06466303020715714, "learning_rate": 1.2345551107979411e-05, "loss": 0.0337, "step": 137100 }, { "epoch": 0.28555, "grad_norm": 0.04850131645798683, "learning_rate": 1.2341986279943308e-05, "loss": 0.0337, "step": 137110 }, { "epoch": 0.2856, "grad_norm": 0.05085092410445213, "learning_rate": 1.2338421797972554e-05, "loss": 0.0343, "step": 137120 }, { "epoch": 0.28565, "grad_norm": 0.059733033180236816, "learning_rate": 1.2334857662164593e-05, "loss": 0.0335, "step": 137130 }, { "epoch": 0.2857, "grad_norm": 0.05192878842353821, "learning_rate": 1.2331293872616862e-05, "loss": 0.0329, "step": 137140 }, { "epoch": 0.28575, "grad_norm": 0.06230088695883751, "learning_rate": 1.232773042942679e-05, "loss": 0.0347, "step": 137150 }, { "epoch": 0.2858, "grad_norm": 0.06391609460115433, "learning_rate": 1.2324167332691817e-05, "loss": 0.0352, "step": 137160 }, { "epoch": 0.28585, "grad_norm": 0.06976404041051865, "learning_rate": 1.2320604582509335e-05, "loss": 0.0336, "step": 137170 }, { "epoch": 0.2859, "grad_norm": 0.05778951942920685, "learning_rate": 1.2317042178976773e-05, "loss": 0.0332, "step": 137180 }, { "epoch": 0.28595, "grad_norm": 0.05799651890993118, "learning_rate": 1.2313480122191511e-05, "loss": 0.034, "step": 137190 }, { "epoch": 0.286, "grad_norm": 0.0600292906165123, "learning_rate": 1.2309918412250927e-05, "loss": 0.0352, "step": 137200 }, { "epoch": 0.28605, "grad_norm": 0.0597301684319973, "learning_rate": 1.2306357049252415e-05, "loss": 0.0337, "step": 137210 }, { "epoch": 0.2861, "grad_norm": 0.06079234182834625, "learning_rate": 1.2302796033293334e-05, "loss": 0.0339, "step": 137220 }, { "epoch": 0.28615, "grad_norm": 0.06248587369918823, "learning_rate": 1.229923536447103e-05, "loss": 0.0337, "step": 137230 }, { "epoch": 0.2862, "grad_norm": 0.05634044110774994, "learning_rate": 1.2295675042882867e-05, "loss": 0.0335, "step": 137240 }, { "epoch": 0.28625, "grad_norm": 0.05484062433242798, "learning_rate": 1.229211506862617e-05, "loss": 0.0334, "step": 137250 }, { "epoch": 0.2863, "grad_norm": 0.05344822630286217, "learning_rate": 1.2288555441798289e-05, "loss": 0.0349, "step": 137260 }, { "epoch": 0.28635, "grad_norm": 0.055607929825782776, "learning_rate": 1.2284996162496507e-05, "loss": 0.0331, "step": 137270 }, { "epoch": 0.2864, "grad_norm": 0.06643947213888168, "learning_rate": 1.2281437230818166e-05, "loss": 0.034, "step": 137280 }, { "epoch": 0.28645, "grad_norm": 0.07382363826036453, "learning_rate": 1.2277878646860542e-05, "loss": 0.0349, "step": 137290 }, { "epoch": 0.2865, "grad_norm": 0.08346658945083618, "learning_rate": 1.2274320410720943e-05, "loss": 0.0334, "step": 137300 }, { "epoch": 0.28655, "grad_norm": 0.04902971163392067, "learning_rate": 1.2270762522496645e-05, "loss": 0.0328, "step": 137310 }, { "epoch": 0.2866, "grad_norm": 0.07123240828514099, "learning_rate": 1.2267204982284908e-05, "loss": 0.0334, "step": 137320 }, { "epoch": 0.28665, "grad_norm": 0.06636399775743484, "learning_rate": 1.2263647790183014e-05, "loss": 0.033, "step": 137330 }, { "epoch": 0.2867, "grad_norm": 0.059328123927116394, "learning_rate": 1.2260090946288203e-05, "loss": 0.0338, "step": 137340 }, { "epoch": 0.28675, "grad_norm": 0.04988183453679085, "learning_rate": 1.225653445069772e-05, "loss": 0.0336, "step": 137350 }, { "epoch": 0.2868, "grad_norm": 0.06137142330408096, "learning_rate": 1.2252978303508789e-05, "loss": 0.0337, "step": 137360 }, { "epoch": 0.28685, "grad_norm": 0.06146937236189842, "learning_rate": 1.2249422504818642e-05, "loss": 0.0348, "step": 137370 }, { "epoch": 0.2869, "grad_norm": 0.056695591658353806, "learning_rate": 1.2245867054724502e-05, "loss": 0.0365, "step": 137380 }, { "epoch": 0.28695, "grad_norm": 0.061989981681108475, "learning_rate": 1.2242311953323566e-05, "loss": 0.034, "step": 137390 }, { "epoch": 0.287, "grad_norm": 0.059609219431877136, "learning_rate": 1.2238757200713027e-05, "loss": 0.033, "step": 137400 }, { "epoch": 0.28705, "grad_norm": 0.05324563756585121, "learning_rate": 1.2235202796990064e-05, "loss": 0.0341, "step": 137410 }, { "epoch": 0.2871, "grad_norm": 0.05062618479132652, "learning_rate": 1.223164874225187e-05, "loss": 0.0344, "step": 137420 }, { "epoch": 0.28715, "grad_norm": 0.042948681861162186, "learning_rate": 1.2228095036595601e-05, "loss": 0.0329, "step": 137430 }, { "epoch": 0.2872, "grad_norm": 0.05113916099071503, "learning_rate": 1.2224541680118408e-05, "loss": 0.033, "step": 137440 }, { "epoch": 0.28725, "grad_norm": 0.07776989787817001, "learning_rate": 1.2220988672917457e-05, "loss": 0.0358, "step": 137450 }, { "epoch": 0.2873, "grad_norm": 0.06589024513959885, "learning_rate": 1.2217436015089864e-05, "loss": 0.0342, "step": 137460 }, { "epoch": 0.28735, "grad_norm": 0.06623056530952454, "learning_rate": 1.2213883706732785e-05, "loss": 0.0339, "step": 137470 }, { "epoch": 0.2874, "grad_norm": 0.05212220549583435, "learning_rate": 1.2210331747943305e-05, "loss": 0.0335, "step": 137480 }, { "epoch": 0.28745, "grad_norm": 0.0750085711479187, "learning_rate": 1.220678013881855e-05, "loss": 0.036, "step": 137490 }, { "epoch": 0.2875, "grad_norm": 0.052169449627399445, "learning_rate": 1.2203228879455627e-05, "loss": 0.0335, "step": 137500 }, { "epoch": 0.28755, "grad_norm": 0.05590628832578659, "learning_rate": 1.2199677969951622e-05, "loss": 0.0331, "step": 137510 }, { "epoch": 0.2876, "grad_norm": 0.06221117451786995, "learning_rate": 1.2196127410403613e-05, "loss": 0.032, "step": 137520 }, { "epoch": 0.28765, "grad_norm": 0.05918494984507561, "learning_rate": 1.2192577200908659e-05, "loss": 0.0332, "step": 137530 }, { "epoch": 0.2877, "grad_norm": 0.048740535974502563, "learning_rate": 1.218902734156384e-05, "loss": 0.0336, "step": 137540 }, { "epoch": 0.28775, "grad_norm": 0.05140310525894165, "learning_rate": 1.2185477832466206e-05, "loss": 0.0329, "step": 137550 }, { "epoch": 0.2878, "grad_norm": 0.051502104848623276, "learning_rate": 1.218192867371278e-05, "loss": 0.0334, "step": 137560 }, { "epoch": 0.28785, "grad_norm": 0.06754551827907562, "learning_rate": 1.2178379865400622e-05, "loss": 0.0344, "step": 137570 }, { "epoch": 0.2879, "grad_norm": 0.05078601837158203, "learning_rate": 1.2174831407626727e-05, "loss": 0.0363, "step": 137580 }, { "epoch": 0.28795, "grad_norm": 0.0703398659825325, "learning_rate": 1.2171283300488136e-05, "loss": 0.0344, "step": 137590 }, { "epoch": 0.288, "grad_norm": 0.06634822487831116, "learning_rate": 1.216773554408184e-05, "loss": 0.0349, "step": 137600 }, { "epoch": 0.28805, "grad_norm": 0.05775686725974083, "learning_rate": 1.2164188138504823e-05, "loss": 0.0326, "step": 137610 }, { "epoch": 0.2881, "grad_norm": 0.057913362979888916, "learning_rate": 1.216064108385409e-05, "loss": 0.0339, "step": 137620 }, { "epoch": 0.28815, "grad_norm": 0.05447633937001228, "learning_rate": 1.2157094380226605e-05, "loss": 0.0336, "step": 137630 }, { "epoch": 0.2882, "grad_norm": 0.06147785484790802, "learning_rate": 1.2153548027719336e-05, "loss": 0.0338, "step": 137640 }, { "epoch": 0.28825, "grad_norm": 0.0486493855714798, "learning_rate": 1.215000202642923e-05, "loss": 0.0334, "step": 137650 }, { "epoch": 0.2883, "grad_norm": 0.04371405765414238, "learning_rate": 1.2146456376453247e-05, "loss": 0.0338, "step": 137660 }, { "epoch": 0.28835, "grad_norm": 0.04846899211406708, "learning_rate": 1.214291107788831e-05, "loss": 0.0335, "step": 137670 }, { "epoch": 0.2884, "grad_norm": 0.053997546434402466, "learning_rate": 1.2139366130831364e-05, "loss": 0.0348, "step": 137680 }, { "epoch": 0.28845, "grad_norm": 0.060016434639692307, "learning_rate": 1.2135821535379316e-05, "loss": 0.0333, "step": 137690 }, { "epoch": 0.2885, "grad_norm": 0.0512930192053318, "learning_rate": 1.2132277291629066e-05, "loss": 0.0325, "step": 137700 }, { "epoch": 0.28855, "grad_norm": 0.047423042356967926, "learning_rate": 1.2128733399677527e-05, "loss": 0.0331, "step": 137710 }, { "epoch": 0.2886, "grad_norm": 0.05766147002577782, "learning_rate": 1.2125189859621583e-05, "loss": 0.0347, "step": 137720 }, { "epoch": 0.28865, "grad_norm": 0.07152627408504486, "learning_rate": 1.2121646671558112e-05, "loss": 0.0341, "step": 137730 }, { "epoch": 0.2887, "grad_norm": 0.05913759395480156, "learning_rate": 1.2118103835583974e-05, "loss": 0.0325, "step": 137740 }, { "epoch": 0.28875, "grad_norm": 0.09551774710416794, "learning_rate": 1.2114561351796037e-05, "loss": 0.0354, "step": 137750 }, { "epoch": 0.2888, "grad_norm": 0.07812321931123734, "learning_rate": 1.211101922029117e-05, "loss": 0.035, "step": 137760 }, { "epoch": 0.28885, "grad_norm": 0.07728440314531326, "learning_rate": 1.2107477441166176e-05, "loss": 0.0343, "step": 137770 }, { "epoch": 0.2889, "grad_norm": 0.0633360967040062, "learning_rate": 1.2103936014517917e-05, "loss": 0.0351, "step": 137780 }, { "epoch": 0.28895, "grad_norm": 0.05036721006035805, "learning_rate": 1.210039494044319e-05, "loss": 0.0332, "step": 137790 }, { "epoch": 0.289, "grad_norm": 0.053151726722717285, "learning_rate": 1.209685421903883e-05, "loss": 0.0341, "step": 137800 }, { "epoch": 0.28905, "grad_norm": 0.05260220915079117, "learning_rate": 1.209331385040163e-05, "loss": 0.0332, "step": 137810 }, { "epoch": 0.2891, "grad_norm": 0.07106596976518631, "learning_rate": 1.208977383462837e-05, "loss": 0.0346, "step": 137820 }, { "epoch": 0.28915, "grad_norm": 0.05681164190173149, "learning_rate": 1.2086234171815852e-05, "loss": 0.0331, "step": 137830 }, { "epoch": 0.2892, "grad_norm": 0.06736712902784348, "learning_rate": 1.2082694862060839e-05, "loss": 0.0334, "step": 137840 }, { "epoch": 0.28925, "grad_norm": 0.0499461404979229, "learning_rate": 1.2079155905460099e-05, "loss": 0.0329, "step": 137850 }, { "epoch": 0.2893, "grad_norm": 0.04850183427333832, "learning_rate": 1.207561730211037e-05, "loss": 0.0334, "step": 137860 }, { "epoch": 0.28935, "grad_norm": 0.06345546245574951, "learning_rate": 1.207207905210841e-05, "loss": 0.0332, "step": 137870 }, { "epoch": 0.2894, "grad_norm": 0.05960962548851967, "learning_rate": 1.206854115555096e-05, "loss": 0.0353, "step": 137880 }, { "epoch": 0.28945, "grad_norm": 0.05867240950465202, "learning_rate": 1.206500361253474e-05, "loss": 0.034, "step": 137890 }, { "epoch": 0.2895, "grad_norm": 0.05325555056333542, "learning_rate": 1.206146642315646e-05, "loss": 0.0353, "step": 137900 }, { "epoch": 0.28955, "grad_norm": 0.05943364277482033, "learning_rate": 1.2057929587512814e-05, "loss": 0.0336, "step": 137910 }, { "epoch": 0.2896, "grad_norm": 0.05827326700091362, "learning_rate": 1.2054393105700523e-05, "loss": 0.0331, "step": 137920 }, { "epoch": 0.28965, "grad_norm": 0.046118587255477905, "learning_rate": 1.2050856977816264e-05, "loss": 0.0334, "step": 137930 }, { "epoch": 0.2897, "grad_norm": 0.04549780115485191, "learning_rate": 1.2047321203956699e-05, "loss": 0.0326, "step": 137940 }, { "epoch": 0.28975, "grad_norm": 0.0508422777056694, "learning_rate": 1.2043785784218514e-05, "loss": 0.033, "step": 137950 }, { "epoch": 0.2898, "grad_norm": 0.07315465062856674, "learning_rate": 1.204025071869835e-05, "loss": 0.0373, "step": 137960 }, { "epoch": 0.28985, "grad_norm": 0.06244395673274994, "learning_rate": 1.2036716007492882e-05, "loss": 0.0355, "step": 137970 }, { "epoch": 0.2899, "grad_norm": 0.061203405261039734, "learning_rate": 1.2033181650698708e-05, "loss": 0.0332, "step": 137980 }, { "epoch": 0.28995, "grad_norm": 0.054324351251125336, "learning_rate": 1.2029647648412479e-05, "loss": 0.0349, "step": 137990 }, { "epoch": 0.29, "grad_norm": 0.051710255444049835, "learning_rate": 1.2026114000730818e-05, "loss": 0.0326, "step": 138000 }, { "epoch": 0.29005, "grad_norm": 0.05411173775792122, "learning_rate": 1.2022580707750325e-05, "loss": 0.0334, "step": 138010 }, { "epoch": 0.2901, "grad_norm": 0.051477011293172836, "learning_rate": 1.2019047769567601e-05, "loss": 0.0371, "step": 138020 }, { "epoch": 0.29015, "grad_norm": 0.06500280648469925, "learning_rate": 1.2015515186279225e-05, "loss": 0.0333, "step": 138030 }, { "epoch": 0.2902, "grad_norm": 0.06152309849858284, "learning_rate": 1.2011982957981795e-05, "loss": 0.0339, "step": 138040 }, { "epoch": 0.29025, "grad_norm": 0.046150896698236465, "learning_rate": 1.2008451084771873e-05, "loss": 0.0337, "step": 138050 }, { "epoch": 0.2903, "grad_norm": 0.07280026376247406, "learning_rate": 1.2004919566746009e-05, "loss": 0.034, "step": 138060 }, { "epoch": 0.29035, "grad_norm": 0.06372442096471786, "learning_rate": 1.2001388404000769e-05, "loss": 0.0314, "step": 138070 }, { "epoch": 0.2904, "grad_norm": 0.053509101271629333, "learning_rate": 1.1997857596632678e-05, "loss": 0.0327, "step": 138080 }, { "epoch": 0.29045, "grad_norm": 0.0671224296092987, "learning_rate": 1.1994327144738285e-05, "loss": 0.0334, "step": 138090 }, { "epoch": 0.2905, "grad_norm": 0.055848028510808945, "learning_rate": 1.1990797048414102e-05, "loss": 0.0325, "step": 138100 }, { "epoch": 0.29055, "grad_norm": 0.07784879952669144, "learning_rate": 1.1987267307756639e-05, "loss": 0.033, "step": 138110 }, { "epoch": 0.2906, "grad_norm": 0.05662766471505165, "learning_rate": 1.1983737922862392e-05, "loss": 0.033, "step": 138120 }, { "epoch": 0.29065, "grad_norm": 0.05109269171953201, "learning_rate": 1.1980208893827868e-05, "loss": 0.0322, "step": 138130 }, { "epoch": 0.2907, "grad_norm": 0.045039620250463486, "learning_rate": 1.1976680220749543e-05, "loss": 0.0328, "step": 138140 }, { "epoch": 0.29075, "grad_norm": 0.044065602123737335, "learning_rate": 1.1973151903723875e-05, "loss": 0.0347, "step": 138150 }, { "epoch": 0.2908, "grad_norm": 0.05793699622154236, "learning_rate": 1.1969623942847355e-05, "loss": 0.0349, "step": 138160 }, { "epoch": 0.29085, "grad_norm": 0.058388493955135345, "learning_rate": 1.1966096338216406e-05, "loss": 0.0336, "step": 138170 }, { "epoch": 0.2909, "grad_norm": 0.06332320719957352, "learning_rate": 1.1962569089927511e-05, "loss": 0.0358, "step": 138180 }, { "epoch": 0.29095, "grad_norm": 0.06636647880077362, "learning_rate": 1.1959042198077056e-05, "loss": 0.0356, "step": 138190 }, { "epoch": 0.291, "grad_norm": 0.05746784806251526, "learning_rate": 1.195551566276149e-05, "loss": 0.0349, "step": 138200 }, { "epoch": 0.29105, "grad_norm": 0.05651381239295006, "learning_rate": 1.1951989484077234e-05, "loss": 0.0339, "step": 138210 }, { "epoch": 0.2911, "grad_norm": 0.05743367224931717, "learning_rate": 1.1948463662120684e-05, "loss": 0.0352, "step": 138220 }, { "epoch": 0.29115, "grad_norm": 0.06609483063220978, "learning_rate": 1.1944938196988234e-05, "loss": 0.0343, "step": 138230 }, { "epoch": 0.2912, "grad_norm": 0.06054598465561867, "learning_rate": 1.194141308877626e-05, "loss": 0.033, "step": 138240 }, { "epoch": 0.29125, "grad_norm": 0.04975799098610878, "learning_rate": 1.1937888337581146e-05, "loss": 0.0338, "step": 138250 }, { "epoch": 0.2913, "grad_norm": 0.053071774542331696, "learning_rate": 1.1934363943499277e-05, "loss": 0.0345, "step": 138260 }, { "epoch": 0.29135, "grad_norm": 0.04644394293427467, "learning_rate": 1.193083990662697e-05, "loss": 0.034, "step": 138270 }, { "epoch": 0.2914, "grad_norm": 0.05511296167969704, "learning_rate": 1.19273162270606e-05, "loss": 0.0347, "step": 138280 }, { "epoch": 0.29145, "grad_norm": 0.05640757456421852, "learning_rate": 1.1923792904896482e-05, "loss": 0.0342, "step": 138290 }, { "epoch": 0.2915, "grad_norm": 0.06079962104558945, "learning_rate": 1.1920269940230963e-05, "loss": 0.0337, "step": 138300 }, { "epoch": 0.29155, "grad_norm": 0.0591752864420414, "learning_rate": 1.1916747333160353e-05, "loss": 0.0336, "step": 138310 }, { "epoch": 0.2916, "grad_norm": 0.060306381434202194, "learning_rate": 1.1913225083780943e-05, "loss": 0.0346, "step": 138320 }, { "epoch": 0.29165, "grad_norm": 0.08270762115716934, "learning_rate": 1.1909703192189054e-05, "loss": 0.0338, "step": 138330 }, { "epoch": 0.2917, "grad_norm": 0.047715380787849426, "learning_rate": 1.1906181658480961e-05, "loss": 0.0327, "step": 138340 }, { "epoch": 0.29175, "grad_norm": 0.04644634574651718, "learning_rate": 1.1902660482752945e-05, "loss": 0.0334, "step": 138350 }, { "epoch": 0.2918, "grad_norm": 0.04178638383746147, "learning_rate": 1.1899139665101259e-05, "loss": 0.0325, "step": 138360 }, { "epoch": 0.29185, "grad_norm": 0.05462385714054108, "learning_rate": 1.1895619205622183e-05, "loss": 0.0339, "step": 138370 }, { "epoch": 0.2919, "grad_norm": 0.05796770751476288, "learning_rate": 1.1892099104411944e-05, "loss": 0.0327, "step": 138380 }, { "epoch": 0.29195, "grad_norm": 0.05922938510775566, "learning_rate": 1.18885793615668e-05, "loss": 0.0346, "step": 138390 }, { "epoch": 0.292, "grad_norm": 0.06663983315229416, "learning_rate": 1.1885059977182975e-05, "loss": 0.0348, "step": 138400 }, { "epoch": 0.29205, "grad_norm": 0.04856368899345398, "learning_rate": 1.188154095135667e-05, "loss": 0.033, "step": 138410 }, { "epoch": 0.2921, "grad_norm": 0.04834192246198654, "learning_rate": 1.1878022284184118e-05, "loss": 0.0338, "step": 138420 }, { "epoch": 0.29215, "grad_norm": 0.04955565929412842, "learning_rate": 1.1874503975761506e-05, "loss": 0.034, "step": 138430 }, { "epoch": 0.2922, "grad_norm": 0.04974037781357765, "learning_rate": 1.1870986026185013e-05, "loss": 0.0332, "step": 138440 }, { "epoch": 0.29225, "grad_norm": 0.06445062160491943, "learning_rate": 1.1867468435550844e-05, "loss": 0.0332, "step": 138450 }, { "epoch": 0.2923, "grad_norm": 0.056080423295497894, "learning_rate": 1.186395120395514e-05, "loss": 0.0329, "step": 138460 }, { "epoch": 0.29235, "grad_norm": 0.0647350624203682, "learning_rate": 1.1860434331494096e-05, "loss": 0.0356, "step": 138470 }, { "epoch": 0.2924, "grad_norm": 0.06235770508646965, "learning_rate": 1.1856917818263824e-05, "loss": 0.033, "step": 138480 }, { "epoch": 0.29245, "grad_norm": 0.058304354548454285, "learning_rate": 1.1853401664360489e-05, "loss": 0.0322, "step": 138490 }, { "epoch": 0.2925, "grad_norm": 0.049607373774051666, "learning_rate": 1.1849885869880203e-05, "loss": 0.0345, "step": 138500 }, { "epoch": 0.29255, "grad_norm": 0.0553838312625885, "learning_rate": 1.1846370434919108e-05, "loss": 0.0332, "step": 138510 }, { "epoch": 0.2926, "grad_norm": 0.04994002357125282, "learning_rate": 1.1842855359573304e-05, "loss": 0.0329, "step": 138520 }, { "epoch": 0.29265, "grad_norm": 0.09766009449958801, "learning_rate": 1.1839340643938881e-05, "loss": 0.0326, "step": 138530 }, { "epoch": 0.2927, "grad_norm": 0.057872503995895386, "learning_rate": 1.183582628811195e-05, "loss": 0.033, "step": 138540 }, { "epoch": 0.29275, "grad_norm": 0.05053986236453056, "learning_rate": 1.1832312292188582e-05, "loss": 0.0326, "step": 138550 }, { "epoch": 0.2928, "grad_norm": 0.048096589744091034, "learning_rate": 1.1828798656264853e-05, "loss": 0.0325, "step": 138560 }, { "epoch": 0.29285, "grad_norm": 0.051040418446063995, "learning_rate": 1.1825285380436807e-05, "loss": 0.0327, "step": 138570 }, { "epoch": 0.2929, "grad_norm": 0.04403144493699074, "learning_rate": 1.182177246480051e-05, "loss": 0.0313, "step": 138580 }, { "epoch": 0.29295, "grad_norm": 0.048360489308834076, "learning_rate": 1.1818259909452014e-05, "loss": 0.0322, "step": 138590 }, { "epoch": 0.293, "grad_norm": 0.049854207783937454, "learning_rate": 1.1814747714487337e-05, "loss": 0.0327, "step": 138600 }, { "epoch": 0.29305, "grad_norm": 0.059948597103357315, "learning_rate": 1.1811235880002507e-05, "loss": 0.0337, "step": 138610 }, { "epoch": 0.2931, "grad_norm": 0.04575120285153389, "learning_rate": 1.1807724406093524e-05, "loss": 0.0328, "step": 138620 }, { "epoch": 0.29315, "grad_norm": 0.0619422048330307, "learning_rate": 1.1804213292856405e-05, "loss": 0.034, "step": 138630 }, { "epoch": 0.2932, "grad_norm": 0.055875059217214584, "learning_rate": 1.1800702540387143e-05, "loss": 0.0336, "step": 138640 }, { "epoch": 0.29325, "grad_norm": 0.06045487895607948, "learning_rate": 1.1797192148781702e-05, "loss": 0.0337, "step": 138650 }, { "epoch": 0.2933, "grad_norm": 0.056943390518426895, "learning_rate": 1.1793682118136076e-05, "loss": 0.0333, "step": 138660 }, { "epoch": 0.29335, "grad_norm": 0.05551246553659439, "learning_rate": 1.179017244854621e-05, "loss": 0.0342, "step": 138670 }, { "epoch": 0.2934, "grad_norm": 0.05962681025266647, "learning_rate": 1.178666314010809e-05, "loss": 0.0336, "step": 138680 }, { "epoch": 0.29345, "grad_norm": 0.05065612867474556, "learning_rate": 1.1783154192917612e-05, "loss": 0.0341, "step": 138690 }, { "epoch": 0.2935, "grad_norm": 0.05642010271549225, "learning_rate": 1.1779645607070736e-05, "loss": 0.0339, "step": 138700 }, { "epoch": 0.29355, "grad_norm": 0.05656618997454643, "learning_rate": 1.1776137382663389e-05, "loss": 0.0356, "step": 138710 }, { "epoch": 0.2936, "grad_norm": 0.056820448487997055, "learning_rate": 1.1772629519791481e-05, "loss": 0.0339, "step": 138720 }, { "epoch": 0.29365, "grad_norm": 0.05296377092599869, "learning_rate": 1.176912201855091e-05, "loss": 0.0331, "step": 138730 }, { "epoch": 0.2937, "grad_norm": 0.05938102677464485, "learning_rate": 1.1765614879037565e-05, "loss": 0.0348, "step": 138740 }, { "epoch": 0.29375, "grad_norm": 0.0567857027053833, "learning_rate": 1.1762108101347344e-05, "loss": 0.034, "step": 138750 }, { "epoch": 0.2938, "grad_norm": 0.05834837257862091, "learning_rate": 1.1758601685576118e-05, "loss": 0.0345, "step": 138760 }, { "epoch": 0.29385, "grad_norm": 0.05760972946882248, "learning_rate": 1.1755095631819734e-05, "loss": 0.0346, "step": 138770 }, { "epoch": 0.2939, "grad_norm": 0.057558294385671616, "learning_rate": 1.1751589940174074e-05, "loss": 0.0319, "step": 138780 }, { "epoch": 0.29395, "grad_norm": 0.059915948659181595, "learning_rate": 1.1748084610734954e-05, "loss": 0.0322, "step": 138790 }, { "epoch": 0.294, "grad_norm": 0.06128979101777077, "learning_rate": 1.1744579643598232e-05, "loss": 0.0326, "step": 138800 }, { "epoch": 0.29405, "grad_norm": 0.06308026611804962, "learning_rate": 1.1741075038859725e-05, "loss": 0.0327, "step": 138810 }, { "epoch": 0.2941, "grad_norm": 0.0532807894051075, "learning_rate": 1.1737570796615236e-05, "loss": 0.0325, "step": 138820 }, { "epoch": 0.29415, "grad_norm": 0.046811792999506, "learning_rate": 1.1734066916960584e-05, "loss": 0.0324, "step": 138830 }, { "epoch": 0.2942, "grad_norm": 0.04966428130865097, "learning_rate": 1.1730563399991563e-05, "loss": 0.0316, "step": 138840 }, { "epoch": 0.29425, "grad_norm": 0.04455842822790146, "learning_rate": 1.1727060245803952e-05, "loss": 0.032, "step": 138850 }, { "epoch": 0.2943, "grad_norm": 0.05419816076755524, "learning_rate": 1.172355745449352e-05, "loss": 0.0331, "step": 138860 }, { "epoch": 0.29435, "grad_norm": 0.05135956034064293, "learning_rate": 1.1720055026156045e-05, "loss": 0.0324, "step": 138870 }, { "epoch": 0.2944, "grad_norm": 0.05116800218820572, "learning_rate": 1.171655296088727e-05, "loss": 0.0332, "step": 138880 }, { "epoch": 0.29445, "grad_norm": 0.045259881764650345, "learning_rate": 1.1713051258782955e-05, "loss": 0.032, "step": 138890 }, { "epoch": 0.2945, "grad_norm": 0.05142403393983841, "learning_rate": 1.1709549919938827e-05, "loss": 0.0328, "step": 138900 }, { "epoch": 0.29455, "grad_norm": 0.05417029187083244, "learning_rate": 1.1706048944450604e-05, "loss": 0.0325, "step": 138910 }, { "epoch": 0.2946, "grad_norm": 0.05212869867682457, "learning_rate": 1.1702548332414014e-05, "loss": 0.0328, "step": 138920 }, { "epoch": 0.29465, "grad_norm": 0.044010862708091736, "learning_rate": 1.169904808392476e-05, "loss": 0.0329, "step": 138930 }, { "epoch": 0.2947, "grad_norm": 0.05676640197634697, "learning_rate": 1.1695548199078534e-05, "loss": 0.0331, "step": 138940 }, { "epoch": 0.29475, "grad_norm": 0.05736074596643448, "learning_rate": 1.1692048677971013e-05, "loss": 0.0332, "step": 138950 }, { "epoch": 0.2948, "grad_norm": 0.054421745240688324, "learning_rate": 1.168854952069788e-05, "loss": 0.0336, "step": 138960 }, { "epoch": 0.29485, "grad_norm": 0.04825567454099655, "learning_rate": 1.1685050727354821e-05, "loss": 0.032, "step": 138970 }, { "epoch": 0.2949, "grad_norm": 0.056606877595186234, "learning_rate": 1.1681552298037457e-05, "loss": 0.0321, "step": 138980 }, { "epoch": 0.29495, "grad_norm": 0.05282709375023842, "learning_rate": 1.1678054232841456e-05, "loss": 0.0333, "step": 138990 }, { "epoch": 0.295, "grad_norm": 0.06271221488714218, "learning_rate": 1.1674556531862438e-05, "loss": 0.035, "step": 139000 }, { "epoch": 0.29505, "grad_norm": 0.059332944452762604, "learning_rate": 1.167105919519605e-05, "loss": 0.0342, "step": 139010 }, { "epoch": 0.2951, "grad_norm": 0.05737001448869705, "learning_rate": 1.1667562222937895e-05, "loss": 0.0331, "step": 139020 }, { "epoch": 0.29515, "grad_norm": 0.05790884792804718, "learning_rate": 1.166406561518357e-05, "loss": 0.0344, "step": 139030 }, { "epoch": 0.2952, "grad_norm": 0.06033563241362572, "learning_rate": 1.166056937202869e-05, "loss": 0.0341, "step": 139040 }, { "epoch": 0.29525, "grad_norm": 0.056599847972393036, "learning_rate": 1.1657073493568834e-05, "loss": 0.0334, "step": 139050 }, { "epoch": 0.2953, "grad_norm": 0.06243237853050232, "learning_rate": 1.1653577979899574e-05, "loss": 0.0333, "step": 139060 }, { "epoch": 0.29535, "grad_norm": 0.0513954721391201, "learning_rate": 1.1650082831116471e-05, "loss": 0.0326, "step": 139070 }, { "epoch": 0.2954, "grad_norm": 0.04541872814297676, "learning_rate": 1.1646588047315084e-05, "loss": 0.0329, "step": 139080 }, { "epoch": 0.29545, "grad_norm": 0.04447149485349655, "learning_rate": 1.1643093628590976e-05, "loss": 0.033, "step": 139090 }, { "epoch": 0.2955, "grad_norm": 0.05670711025595665, "learning_rate": 1.163959957503967e-05, "loss": 0.0354, "step": 139100 }, { "epoch": 0.29555, "grad_norm": 0.04876406863331795, "learning_rate": 1.1636105886756692e-05, "loss": 0.0331, "step": 139110 }, { "epoch": 0.2956, "grad_norm": 0.05532897636294365, "learning_rate": 1.163261256383755e-05, "loss": 0.0329, "step": 139120 }, { "epoch": 0.29565, "grad_norm": 0.061281319707632065, "learning_rate": 1.1629119606377764e-05, "loss": 0.0342, "step": 139130 }, { "epoch": 0.2957, "grad_norm": 0.06245240569114685, "learning_rate": 1.1625627014472828e-05, "loss": 0.0351, "step": 139140 }, { "epoch": 0.29575, "grad_norm": 0.057373180985450745, "learning_rate": 1.1622134788218217e-05, "loss": 0.0323, "step": 139150 }, { "epoch": 0.2958, "grad_norm": 0.05703739821910858, "learning_rate": 1.1618642927709423e-05, "loss": 0.0334, "step": 139160 }, { "epoch": 0.29585, "grad_norm": 0.05991936847567558, "learning_rate": 1.1615151433041894e-05, "loss": 0.0335, "step": 139170 }, { "epoch": 0.2959, "grad_norm": 0.07308800518512726, "learning_rate": 1.1611660304311114e-05, "loss": 0.0339, "step": 139180 }, { "epoch": 0.29595, "grad_norm": 0.06480929255485535, "learning_rate": 1.1608169541612493e-05, "loss": 0.0351, "step": 139190 }, { "epoch": 0.296, "grad_norm": 0.05141975358128548, "learning_rate": 1.1604679145041489e-05, "loss": 0.034, "step": 139200 }, { "epoch": 0.29605, "grad_norm": 0.04355557635426521, "learning_rate": 1.1601189114693531e-05, "loss": 0.0326, "step": 139210 }, { "epoch": 0.2961, "grad_norm": 0.054277196526527405, "learning_rate": 1.1597699450664028e-05, "loss": 0.0321, "step": 139220 }, { "epoch": 0.29615, "grad_norm": 0.058067500591278076, "learning_rate": 1.159421015304839e-05, "loss": 0.0333, "step": 139230 }, { "epoch": 0.2962, "grad_norm": 0.05233805626630783, "learning_rate": 1.1590721221942e-05, "loss": 0.0328, "step": 139240 }, { "epoch": 0.29625, "grad_norm": 0.05005316063761711, "learning_rate": 1.1587232657440264e-05, "loss": 0.033, "step": 139250 }, { "epoch": 0.2963, "grad_norm": 0.06589703261852264, "learning_rate": 1.1583744459638545e-05, "loss": 0.0327, "step": 139260 }, { "epoch": 0.29635, "grad_norm": 0.05952505022287369, "learning_rate": 1.1580256628632208e-05, "loss": 0.0337, "step": 139270 }, { "epoch": 0.2964, "grad_norm": 0.0663764625787735, "learning_rate": 1.1576769164516618e-05, "loss": 0.034, "step": 139280 }, { "epoch": 0.29645, "grad_norm": 0.06101415678858757, "learning_rate": 1.157328206738711e-05, "loss": 0.0343, "step": 139290 }, { "epoch": 0.2965, "grad_norm": 0.060911625623703, "learning_rate": 1.1569795337339035e-05, "loss": 0.0331, "step": 139300 }, { "epoch": 0.29655, "grad_norm": 0.05092538520693779, "learning_rate": 1.1566308974467707e-05, "loss": 0.0335, "step": 139310 }, { "epoch": 0.2966, "grad_norm": 0.056884728372097015, "learning_rate": 1.1562822978868449e-05, "loss": 0.0323, "step": 139320 }, { "epoch": 0.29665, "grad_norm": 0.06848115473985672, "learning_rate": 1.1559337350636552e-05, "loss": 0.0338, "step": 139330 }, { "epoch": 0.2967, "grad_norm": 0.05323183164000511, "learning_rate": 1.1555852089867329e-05, "loss": 0.0321, "step": 139340 }, { "epoch": 0.29675, "grad_norm": 0.05719597637653351, "learning_rate": 1.155236719665606e-05, "loss": 0.0328, "step": 139350 }, { "epoch": 0.2968, "grad_norm": 0.06035812571644783, "learning_rate": 1.1548882671098014e-05, "loss": 0.0357, "step": 139360 }, { "epoch": 0.29685, "grad_norm": 0.061590977013111115, "learning_rate": 1.1545398513288469e-05, "loss": 0.0325, "step": 139370 }, { "epoch": 0.2969, "grad_norm": 0.06258758902549744, "learning_rate": 1.1541914723322664e-05, "loss": 0.0324, "step": 139380 }, { "epoch": 0.29695, "grad_norm": 0.060489144176244736, "learning_rate": 1.1538431301295873e-05, "loss": 0.0333, "step": 139390 }, { "epoch": 0.297, "grad_norm": 0.10881593078374863, "learning_rate": 1.1534948247303295e-05, "loss": 0.0337, "step": 139400 }, { "epoch": 0.29705, "grad_norm": 0.05411689355969429, "learning_rate": 1.1531465561440174e-05, "loss": 0.0333, "step": 139410 }, { "epoch": 0.2971, "grad_norm": 0.0653669610619545, "learning_rate": 1.1527983243801734e-05, "loss": 0.0333, "step": 139420 }, { "epoch": 0.29715, "grad_norm": 0.06009123474359512, "learning_rate": 1.1524501294483173e-05, "loss": 0.0326, "step": 139430 }, { "epoch": 0.2972, "grad_norm": 0.05364300683140755, "learning_rate": 1.1521019713579682e-05, "loss": 0.0328, "step": 139440 }, { "epoch": 0.29725, "grad_norm": 0.058414604514837265, "learning_rate": 1.1517538501186437e-05, "loss": 0.0319, "step": 139450 }, { "epoch": 0.2973, "grad_norm": 0.06450974941253662, "learning_rate": 1.1514057657398624e-05, "loss": 0.0333, "step": 139460 }, { "epoch": 0.29735, "grad_norm": 0.06400681287050247, "learning_rate": 1.151057718231143e-05, "loss": 0.0328, "step": 139470 }, { "epoch": 0.2974, "grad_norm": 0.053194813430309296, "learning_rate": 1.1507097076019967e-05, "loss": 0.0318, "step": 139480 }, { "epoch": 0.29745, "grad_norm": 0.06602397561073303, "learning_rate": 1.1503617338619413e-05, "loss": 0.033, "step": 139490 }, { "epoch": 0.2975, "grad_norm": 0.07012414187192917, "learning_rate": 1.150013797020488e-05, "loss": 0.0345, "step": 139500 }, { "epoch": 0.29755, "grad_norm": 0.059558626264333725, "learning_rate": 1.1496658970871513e-05, "loss": 0.0328, "step": 139510 }, { "epoch": 0.2976, "grad_norm": 0.05656155198812485, "learning_rate": 1.1493180340714416e-05, "loss": 0.0327, "step": 139520 }, { "epoch": 0.29765, "grad_norm": 0.059396471828222275, "learning_rate": 1.1489702079828684e-05, "loss": 0.0345, "step": 139530 }, { "epoch": 0.2977, "grad_norm": 0.04921974241733551, "learning_rate": 1.148622418830943e-05, "loss": 0.0358, "step": 139540 }, { "epoch": 0.29775, "grad_norm": 0.05693204700946808, "learning_rate": 1.1482746666251734e-05, "loss": 0.0336, "step": 139550 }, { "epoch": 0.2978, "grad_norm": 0.055195748805999756, "learning_rate": 1.1479269513750662e-05, "loss": 0.035, "step": 139560 }, { "epoch": 0.29785, "grad_norm": 0.04758436605334282, "learning_rate": 1.1475792730901275e-05, "loss": 0.0343, "step": 139570 }, { "epoch": 0.2979, "grad_norm": 0.06030312925577164, "learning_rate": 1.1472316317798643e-05, "loss": 0.035, "step": 139580 }, { "epoch": 0.29795, "grad_norm": 0.052978452295064926, "learning_rate": 1.146884027453779e-05, "loss": 0.0358, "step": 139590 }, { "epoch": 0.298, "grad_norm": 0.054513029754161835, "learning_rate": 1.1465364601213771e-05, "loss": 0.0328, "step": 139600 }, { "epoch": 0.29805, "grad_norm": 0.05603921785950661, "learning_rate": 1.1461889297921599e-05, "loss": 0.0343, "step": 139610 }, { "epoch": 0.2981, "grad_norm": 0.057258982211351395, "learning_rate": 1.1458414364756275e-05, "loss": 0.0338, "step": 139620 }, { "epoch": 0.29815, "grad_norm": 0.05240438133478165, "learning_rate": 1.145493980181283e-05, "loss": 0.0329, "step": 139630 }, { "epoch": 0.2982, "grad_norm": 0.04709576070308685, "learning_rate": 1.1451465609186238e-05, "loss": 0.0323, "step": 139640 }, { "epoch": 0.29825, "grad_norm": 0.044889263808727264, "learning_rate": 1.1447991786971479e-05, "loss": 0.0321, "step": 139650 }, { "epoch": 0.2983, "grad_norm": 0.0566890612244606, "learning_rate": 1.1444518335263543e-05, "loss": 0.0319, "step": 139660 }, { "epoch": 0.29835, "grad_norm": 0.050611212849617004, "learning_rate": 1.1441045254157373e-05, "loss": 0.0325, "step": 139670 }, { "epoch": 0.2984, "grad_norm": 0.05062335729598999, "learning_rate": 1.143757254374795e-05, "loss": 0.033, "step": 139680 }, { "epoch": 0.29845, "grad_norm": 0.0611569844186306, "learning_rate": 1.143410020413018e-05, "loss": 0.0323, "step": 139690 }, { "epoch": 0.2985, "grad_norm": 0.056232936680316925, "learning_rate": 1.1430628235399025e-05, "loss": 0.0329, "step": 139700 }, { "epoch": 0.29855, "grad_norm": 0.0519404299557209, "learning_rate": 1.1427156637649384e-05, "loss": 0.0345, "step": 139710 }, { "epoch": 0.2986, "grad_norm": 0.07048199325799942, "learning_rate": 1.1423685410976193e-05, "loss": 0.033, "step": 139720 }, { "epoch": 0.29865, "grad_norm": 0.06612701714038849, "learning_rate": 1.142021455547434e-05, "loss": 0.0351, "step": 139730 }, { "epoch": 0.2987, "grad_norm": 0.07865101844072342, "learning_rate": 1.141674407123871e-05, "loss": 0.0331, "step": 139740 }, { "epoch": 0.29875, "grad_norm": 0.07165993750095367, "learning_rate": 1.1413273958364207e-05, "loss": 0.0313, "step": 139750 }, { "epoch": 0.2988, "grad_norm": 0.05868015065789223, "learning_rate": 1.1409804216945688e-05, "loss": 0.0324, "step": 139760 }, { "epoch": 0.29885, "grad_norm": 0.052018482238054276, "learning_rate": 1.1406334847078015e-05, "loss": 0.032, "step": 139770 }, { "epoch": 0.2989, "grad_norm": 0.05405759438872337, "learning_rate": 1.1402865848856031e-05, "loss": 0.032, "step": 139780 }, { "epoch": 0.29895, "grad_norm": 0.046009961515665054, "learning_rate": 1.1399397222374588e-05, "loss": 0.0312, "step": 139790 }, { "epoch": 0.299, "grad_norm": 0.0600450374186039, "learning_rate": 1.1395928967728526e-05, "loss": 0.0327, "step": 139800 }, { "epoch": 0.29905, "grad_norm": 0.053973618894815445, "learning_rate": 1.1392461085012655e-05, "loss": 0.032, "step": 139810 }, { "epoch": 0.2991, "grad_norm": 0.05563103035092354, "learning_rate": 1.1388993574321782e-05, "loss": 0.0308, "step": 139820 }, { "epoch": 0.29915, "grad_norm": 0.05406641215085983, "learning_rate": 1.1385526435750705e-05, "loss": 0.0336, "step": 139830 }, { "epoch": 0.2992, "grad_norm": 0.05502977594733238, "learning_rate": 1.138205966939423e-05, "loss": 0.0314, "step": 139840 }, { "epoch": 0.29925, "grad_norm": 0.06398873031139374, "learning_rate": 1.1378593275347123e-05, "loss": 0.033, "step": 139850 }, { "epoch": 0.2993, "grad_norm": 0.06901369243860245, "learning_rate": 1.1375127253704155e-05, "loss": 0.0348, "step": 139860 }, { "epoch": 0.29935, "grad_norm": 0.08909549564123154, "learning_rate": 1.1371661604560096e-05, "loss": 0.0333, "step": 139870 }, { "epoch": 0.2994, "grad_norm": 0.06168742850422859, "learning_rate": 1.1368196328009682e-05, "loss": 0.032, "step": 139880 }, { "epoch": 0.29945, "grad_norm": 0.05655650794506073, "learning_rate": 1.1364731424147674e-05, "loss": 0.0357, "step": 139890 }, { "epoch": 0.2995, "grad_norm": 0.06828747689723969, "learning_rate": 1.136126689306877e-05, "loss": 0.0348, "step": 139900 }, { "epoch": 0.29955, "grad_norm": 0.05395118519663811, "learning_rate": 1.1357802734867703e-05, "loss": 0.0331, "step": 139910 }, { "epoch": 0.2996, "grad_norm": 0.05931980162858963, "learning_rate": 1.1354338949639196e-05, "loss": 0.0378, "step": 139920 }, { "epoch": 0.29965, "grad_norm": 0.05989324674010277, "learning_rate": 1.1350875537477935e-05, "loss": 0.0328, "step": 139930 }, { "epoch": 0.2997, "grad_norm": 0.04802591726183891, "learning_rate": 1.134741249847861e-05, "loss": 0.0321, "step": 139940 }, { "epoch": 0.29975, "grad_norm": 0.05060574784874916, "learning_rate": 1.1343949832735887e-05, "loss": 0.0326, "step": 139950 }, { "epoch": 0.2998, "grad_norm": 0.05629558488726616, "learning_rate": 1.1340487540344455e-05, "loss": 0.0331, "step": 139960 }, { "epoch": 0.29985, "grad_norm": 0.04951554164290428, "learning_rate": 1.133702562139896e-05, "loss": 0.032, "step": 139970 }, { "epoch": 0.2999, "grad_norm": 0.06243675947189331, "learning_rate": 1.1333564075994047e-05, "loss": 0.0333, "step": 139980 }, { "epoch": 0.29995, "grad_norm": 0.055823612958192825, "learning_rate": 1.1330102904224365e-05, "loss": 0.0327, "step": 139990 }, { "epoch": 0.3, "grad_norm": 0.05091789364814758, "learning_rate": 1.1326642106184524e-05, "loss": 0.0323, "step": 140000 }, { "epoch": 0.30005, "grad_norm": 0.04933144152164459, "learning_rate": 1.1323181681969162e-05, "loss": 0.0329, "step": 140010 }, { "epoch": 0.3001, "grad_norm": 0.05192696675658226, "learning_rate": 1.1319721631672872e-05, "loss": 0.0332, "step": 140020 }, { "epoch": 0.30015, "grad_norm": 0.04468753933906555, "learning_rate": 1.1316261955390246e-05, "loss": 0.032, "step": 140030 }, { "epoch": 0.3002, "grad_norm": 0.04795127734541893, "learning_rate": 1.1312802653215886e-05, "loss": 0.0326, "step": 140040 }, { "epoch": 0.30025, "grad_norm": 0.05080784112215042, "learning_rate": 1.130934372524436e-05, "loss": 0.0331, "step": 140050 }, { "epoch": 0.3003, "grad_norm": 0.0521916002035141, "learning_rate": 1.1305885171570232e-05, "loss": 0.0332, "step": 140060 }, { "epoch": 0.30035, "grad_norm": 0.05924517661333084, "learning_rate": 1.130242699228805e-05, "loss": 0.0341, "step": 140070 }, { "epoch": 0.3004, "grad_norm": 0.07030452787876129, "learning_rate": 1.1298969187492378e-05, "loss": 0.0325, "step": 140080 }, { "epoch": 0.30045, "grad_norm": 0.05708425119519234, "learning_rate": 1.1295511757277732e-05, "loss": 0.0325, "step": 140090 }, { "epoch": 0.3005, "grad_norm": 0.06188962981104851, "learning_rate": 1.1292054701738656e-05, "loss": 0.034, "step": 140100 }, { "epoch": 0.30055, "grad_norm": 0.06431712955236435, "learning_rate": 1.1288598020969651e-05, "loss": 0.0336, "step": 140110 }, { "epoch": 0.3006, "grad_norm": 0.06019863858819008, "learning_rate": 1.128514171506522e-05, "loss": 0.0342, "step": 140120 }, { "epoch": 0.30065, "grad_norm": 0.05877012386918068, "learning_rate": 1.128168578411987e-05, "loss": 0.0336, "step": 140130 }, { "epoch": 0.3007, "grad_norm": 0.05326351523399353, "learning_rate": 1.1278230228228076e-05, "loss": 0.034, "step": 140140 }, { "epoch": 0.30075, "grad_norm": 0.056856438517570496, "learning_rate": 1.1274775047484312e-05, "loss": 0.0333, "step": 140150 }, { "epoch": 0.3008, "grad_norm": 0.05016437917947769, "learning_rate": 1.1271320241983033e-05, "loss": 0.034, "step": 140160 }, { "epoch": 0.30085, "grad_norm": 0.05082874372601509, "learning_rate": 1.1267865811818701e-05, "loss": 0.0347, "step": 140170 }, { "epoch": 0.3009, "grad_norm": 0.057839758694171906, "learning_rate": 1.126441175708578e-05, "loss": 0.0344, "step": 140180 }, { "epoch": 0.30095, "grad_norm": 0.04288686066865921, "learning_rate": 1.1260958077878658e-05, "loss": 0.0341, "step": 140190 }, { "epoch": 0.301, "grad_norm": 0.05189139395952225, "learning_rate": 1.1257504774291793e-05, "loss": 0.0334, "step": 140200 }, { "epoch": 0.30105, "grad_norm": 0.051603466272354126, "learning_rate": 1.1254051846419576e-05, "loss": 0.0351, "step": 140210 }, { "epoch": 0.3011, "grad_norm": 0.054475102573633194, "learning_rate": 1.1250599294356425e-05, "loss": 0.0322, "step": 140220 }, { "epoch": 0.30115, "grad_norm": 0.04744827747344971, "learning_rate": 1.1247147118196724e-05, "loss": 0.0329, "step": 140230 }, { "epoch": 0.3012, "grad_norm": 0.07402417808771133, "learning_rate": 1.1243695318034848e-05, "loss": 0.0352, "step": 140240 }, { "epoch": 0.30125, "grad_norm": 0.05516153201460838, "learning_rate": 1.124024389396518e-05, "loss": 0.034, "step": 140250 }, { "epoch": 0.3013, "grad_norm": 0.04782964661717415, "learning_rate": 1.1236792846082072e-05, "loss": 0.034, "step": 140260 }, { "epoch": 0.30135, "grad_norm": 0.058844517916440964, "learning_rate": 1.1233342174479883e-05, "loss": 0.0338, "step": 140270 }, { "epoch": 0.3014, "grad_norm": 0.05295547470450401, "learning_rate": 1.1229891879252935e-05, "loss": 0.0329, "step": 140280 }, { "epoch": 0.30145, "grad_norm": 0.05227420851588249, "learning_rate": 1.1226441960495567e-05, "loss": 0.033, "step": 140290 }, { "epoch": 0.3015, "grad_norm": 0.048177167773246765, "learning_rate": 1.1222992418302114e-05, "loss": 0.033, "step": 140300 }, { "epoch": 0.30155, "grad_norm": 0.057371679693460464, "learning_rate": 1.1219543252766874e-05, "loss": 0.0367, "step": 140310 }, { "epoch": 0.3016, "grad_norm": 0.04433856159448624, "learning_rate": 1.1216094463984141e-05, "loss": 0.0329, "step": 140320 }, { "epoch": 0.30165, "grad_norm": 0.07420659810304642, "learning_rate": 1.1212646052048198e-05, "loss": 0.0339, "step": 140330 }, { "epoch": 0.3017, "grad_norm": 0.05632463097572327, "learning_rate": 1.1209198017053344e-05, "loss": 0.0329, "step": 140340 }, { "epoch": 0.30175, "grad_norm": 0.05505290627479553, "learning_rate": 1.1205750359093833e-05, "loss": 0.0328, "step": 140350 }, { "epoch": 0.3018, "grad_norm": 0.048898130655288696, "learning_rate": 1.1202303078263917e-05, "loss": 0.0328, "step": 140360 }, { "epoch": 0.30185, "grad_norm": 0.05216413736343384, "learning_rate": 1.119885617465786e-05, "loss": 0.0339, "step": 140370 }, { "epoch": 0.3019, "grad_norm": 0.058374982327222824, "learning_rate": 1.1195409648369881e-05, "loss": 0.0329, "step": 140380 }, { "epoch": 0.30195, "grad_norm": 0.04453812912106514, "learning_rate": 1.1191963499494234e-05, "loss": 0.033, "step": 140390 }, { "epoch": 0.302, "grad_norm": 0.051837850362062454, "learning_rate": 1.11885177281251e-05, "loss": 0.0331, "step": 140400 }, { "epoch": 0.30205, "grad_norm": 0.05339343100786209, "learning_rate": 1.1185072334356702e-05, "loss": 0.0338, "step": 140410 }, { "epoch": 0.3021, "grad_norm": 0.05379300191998482, "learning_rate": 1.1181627318283247e-05, "loss": 0.0328, "step": 140420 }, { "epoch": 0.30215, "grad_norm": 0.05971743166446686, "learning_rate": 1.1178182679998909e-05, "loss": 0.0342, "step": 140430 }, { "epoch": 0.3022, "grad_norm": 0.07061241567134857, "learning_rate": 1.1174738419597863e-05, "loss": 0.0326, "step": 140440 }, { "epoch": 0.30225, "grad_norm": 0.06061761453747749, "learning_rate": 1.1171294537174264e-05, "loss": 0.0336, "step": 140450 }, { "epoch": 0.3023, "grad_norm": 0.06953947991132736, "learning_rate": 1.116785103282229e-05, "loss": 0.0344, "step": 140460 }, { "epoch": 0.30235, "grad_norm": 0.05975327268242836, "learning_rate": 1.116440790663607e-05, "loss": 0.0324, "step": 140470 }, { "epoch": 0.3024, "grad_norm": 0.05084272101521492, "learning_rate": 1.1160965158709732e-05, "loss": 0.0338, "step": 140480 }, { "epoch": 0.30245, "grad_norm": 0.051011331379413605, "learning_rate": 1.1157522789137415e-05, "loss": 0.0328, "step": 140490 }, { "epoch": 0.3025, "grad_norm": 0.05884523317217827, "learning_rate": 1.1154080798013217e-05, "loss": 0.0331, "step": 140500 }, { "epoch": 0.30255, "grad_norm": 0.0666908249258995, "learning_rate": 1.1150639185431258e-05, "loss": 0.0338, "step": 140510 }, { "epoch": 0.3026, "grad_norm": 0.07132396847009659, "learning_rate": 1.1147197951485619e-05, "loss": 0.033, "step": 140520 }, { "epoch": 0.30265, "grad_norm": 0.06169174239039421, "learning_rate": 1.1143757096270386e-05, "loss": 0.0331, "step": 140530 }, { "epoch": 0.3027, "grad_norm": 0.11681129783391953, "learning_rate": 1.1140316619879615e-05, "loss": 0.034, "step": 140540 }, { "epoch": 0.30275, "grad_norm": 0.06157153099775314, "learning_rate": 1.1136876522407393e-05, "loss": 0.0337, "step": 140550 }, { "epoch": 0.3028, "grad_norm": 0.06677278131246567, "learning_rate": 1.1133436803947758e-05, "loss": 0.0335, "step": 140560 }, { "epoch": 0.30285, "grad_norm": 0.0701901838183403, "learning_rate": 1.1129997464594743e-05, "loss": 0.0346, "step": 140570 }, { "epoch": 0.3029, "grad_norm": 0.07152388244867325, "learning_rate": 1.1126558504442397e-05, "loss": 0.0336, "step": 140580 }, { "epoch": 0.30295, "grad_norm": 0.0734538659453392, "learning_rate": 1.1123119923584718e-05, "loss": 0.035, "step": 140590 }, { "epoch": 0.303, "grad_norm": 0.07059818506240845, "learning_rate": 1.1119681722115746e-05, "loss": 0.036, "step": 140600 }, { "epoch": 0.30305, "grad_norm": 0.0695488229393959, "learning_rate": 1.1116243900129441e-05, "loss": 0.0341, "step": 140610 }, { "epoch": 0.3031, "grad_norm": 0.04940784350037575, "learning_rate": 1.1112806457719816e-05, "loss": 0.0337, "step": 140620 }, { "epoch": 0.30315, "grad_norm": 0.05950392410159111, "learning_rate": 1.1109369394980851e-05, "loss": 0.0339, "step": 140630 }, { "epoch": 0.3032, "grad_norm": 0.04839714616537094, "learning_rate": 1.110593271200651e-05, "loss": 0.0344, "step": 140640 }, { "epoch": 0.30325, "grad_norm": 0.04441726580262184, "learning_rate": 1.1102496408890747e-05, "loss": 0.0338, "step": 140650 }, { "epoch": 0.3033, "grad_norm": 0.04511004686355591, "learning_rate": 1.1099060485727502e-05, "loss": 0.0345, "step": 140660 }, { "epoch": 0.30335, "grad_norm": 0.07027915120124817, "learning_rate": 1.1095624942610725e-05, "loss": 0.0347, "step": 140670 }, { "epoch": 0.3034, "grad_norm": 0.05691104754805565, "learning_rate": 1.1092189779634355e-05, "loss": 0.0336, "step": 140680 }, { "epoch": 0.30345, "grad_norm": 0.05413910001516342, "learning_rate": 1.108875499689227e-05, "loss": 0.0342, "step": 140690 }, { "epoch": 0.3035, "grad_norm": 0.05556439980864525, "learning_rate": 1.108532059447841e-05, "loss": 0.0346, "step": 140700 }, { "epoch": 0.30355, "grad_norm": 0.05685647204518318, "learning_rate": 1.1081886572486646e-05, "loss": 0.0355, "step": 140710 }, { "epoch": 0.3036, "grad_norm": 0.061345312744379044, "learning_rate": 1.1078452931010883e-05, "loss": 0.0332, "step": 140720 }, { "epoch": 0.30365, "grad_norm": 0.05181057006120682, "learning_rate": 1.107501967014499e-05, "loss": 0.0339, "step": 140730 }, { "epoch": 0.3037, "grad_norm": 0.05965716391801834, "learning_rate": 1.1071586789982816e-05, "loss": 0.0364, "step": 140740 }, { "epoch": 0.30375, "grad_norm": 0.06506678462028503, "learning_rate": 1.1068154290618235e-05, "loss": 0.0343, "step": 140750 }, { "epoch": 0.3038, "grad_norm": 0.060299888253211975, "learning_rate": 1.1064722172145084e-05, "loss": 0.0332, "step": 140760 }, { "epoch": 0.30385, "grad_norm": 0.05024135485291481, "learning_rate": 1.1061290434657193e-05, "loss": 0.0323, "step": 140770 }, { "epoch": 0.3039, "grad_norm": 0.053525201976299286, "learning_rate": 1.1057859078248376e-05, "loss": 0.035, "step": 140780 }, { "epoch": 0.30395, "grad_norm": 0.05458205193281174, "learning_rate": 1.1054428103012463e-05, "loss": 0.0346, "step": 140790 }, { "epoch": 0.304, "grad_norm": 0.07601752132177353, "learning_rate": 1.1050997509043237e-05, "loss": 0.0326, "step": 140800 }, { "epoch": 0.30405, "grad_norm": 0.06448765844106674, "learning_rate": 1.1047567296434508e-05, "loss": 0.0341, "step": 140810 }, { "epoch": 0.3041, "grad_norm": 0.044361311942338943, "learning_rate": 1.1044137465280047e-05, "loss": 0.034, "step": 140820 }, { "epoch": 0.30415, "grad_norm": 0.06594926118850708, "learning_rate": 1.1040708015673616e-05, "loss": 0.0331, "step": 140830 }, { "epoch": 0.3042, "grad_norm": 0.05299568921327591, "learning_rate": 1.1037278947708993e-05, "loss": 0.0343, "step": 140840 }, { "epoch": 0.30425, "grad_norm": 0.05918257683515549, "learning_rate": 1.1033850261479917e-05, "loss": 0.0329, "step": 140850 }, { "epoch": 0.3043, "grad_norm": 0.05580601096153259, "learning_rate": 1.103042195708012e-05, "loss": 0.0324, "step": 140860 }, { "epoch": 0.30435, "grad_norm": 0.051413439214229584, "learning_rate": 1.1026994034603347e-05, "loss": 0.0326, "step": 140870 }, { "epoch": 0.3044, "grad_norm": 0.05384482815861702, "learning_rate": 1.1023566494143298e-05, "loss": 0.0333, "step": 140880 }, { "epoch": 0.30445, "grad_norm": 0.04275573790073395, "learning_rate": 1.1020139335793711e-05, "loss": 0.0325, "step": 140890 }, { "epoch": 0.3045, "grad_norm": 0.047934915870428085, "learning_rate": 1.101671255964824e-05, "loss": 0.0338, "step": 140900 }, { "epoch": 0.30455, "grad_norm": 0.04720642417669296, "learning_rate": 1.1013286165800608e-05, "loss": 0.0321, "step": 140910 }, { "epoch": 0.3046, "grad_norm": 0.051188092678785324, "learning_rate": 1.1009860154344467e-05, "loss": 0.0328, "step": 140920 }, { "epoch": 0.30465, "grad_norm": 0.04356950521469116, "learning_rate": 1.1006434525373502e-05, "loss": 0.0309, "step": 140930 }, { "epoch": 0.3047, "grad_norm": 0.06769690662622452, "learning_rate": 1.1003009278981361e-05, "loss": 0.0337, "step": 140940 }, { "epoch": 0.30475, "grad_norm": 0.05538792163133621, "learning_rate": 1.0999584415261677e-05, "loss": 0.0326, "step": 140950 }, { "epoch": 0.3048, "grad_norm": 0.05651751160621643, "learning_rate": 1.0996159934308106e-05, "loss": 0.0339, "step": 140960 }, { "epoch": 0.30485, "grad_norm": 0.04621144384145737, "learning_rate": 1.0992735836214261e-05, "loss": 0.0335, "step": 140970 }, { "epoch": 0.3049, "grad_norm": 0.05354321002960205, "learning_rate": 1.0989312121073756e-05, "loss": 0.0365, "step": 140980 }, { "epoch": 0.30495, "grad_norm": 0.06050272285938263, "learning_rate": 1.0985888788980184e-05, "loss": 0.0332, "step": 140990 }, { "epoch": 0.305, "grad_norm": 0.06105947494506836, "learning_rate": 1.0982465840027147e-05, "loss": 0.0331, "step": 141000 }, { "epoch": 0.30505, "grad_norm": 0.06364921480417252, "learning_rate": 1.097904327430824e-05, "loss": 0.0343, "step": 141010 }, { "epoch": 0.3051, "grad_norm": 0.06211453303694725, "learning_rate": 1.0975621091917022e-05, "loss": 0.0326, "step": 141020 }, { "epoch": 0.30515, "grad_norm": 0.05600810796022415, "learning_rate": 1.0972199292947052e-05, "loss": 0.0324, "step": 141030 }, { "epoch": 0.3052, "grad_norm": 0.049081169068813324, "learning_rate": 1.0968777877491875e-05, "loss": 0.0318, "step": 141040 }, { "epoch": 0.30525, "grad_norm": 0.0588548518717289, "learning_rate": 1.096535684564505e-05, "loss": 0.0337, "step": 141050 }, { "epoch": 0.3053, "grad_norm": 0.05001838132739067, "learning_rate": 1.0961936197500097e-05, "loss": 0.0334, "step": 141060 }, { "epoch": 0.30535, "grad_norm": 0.05359674617648125, "learning_rate": 1.0958515933150524e-05, "loss": 0.0327, "step": 141070 }, { "epoch": 0.3054, "grad_norm": 0.05704985931515694, "learning_rate": 1.095509605268986e-05, "loss": 0.0337, "step": 141080 }, { "epoch": 0.30545, "grad_norm": 0.05883404240012169, "learning_rate": 1.0951676556211583e-05, "loss": 0.0324, "step": 141090 }, { "epoch": 0.3055, "grad_norm": 0.058890387415885925, "learning_rate": 1.094825744380921e-05, "loss": 0.0329, "step": 141100 }, { "epoch": 0.30555, "grad_norm": 0.07341659069061279, "learning_rate": 1.0944838715576181e-05, "loss": 0.0344, "step": 141110 }, { "epoch": 0.3056, "grad_norm": 0.058523885905742645, "learning_rate": 1.0941420371605981e-05, "loss": 0.0339, "step": 141120 }, { "epoch": 0.30565, "grad_norm": 0.045745741575956345, "learning_rate": 1.0938002411992077e-05, "loss": 0.0339, "step": 141130 }, { "epoch": 0.3057, "grad_norm": 0.049728427082300186, "learning_rate": 1.0934584836827904e-05, "loss": 0.0338, "step": 141140 }, { "epoch": 0.30575, "grad_norm": 0.07706481218338013, "learning_rate": 1.0931167646206896e-05, "loss": 0.0339, "step": 141150 }, { "epoch": 0.3058, "grad_norm": 0.05314839258790016, "learning_rate": 1.0927750840222473e-05, "loss": 0.0366, "step": 141160 }, { "epoch": 0.30585, "grad_norm": 0.046150967478752136, "learning_rate": 1.0924334418968064e-05, "loss": 0.0338, "step": 141170 }, { "epoch": 0.3059, "grad_norm": 0.05311834439635277, "learning_rate": 1.092091838253706e-05, "loss": 0.036, "step": 141180 }, { "epoch": 0.30595, "grad_norm": 0.05920974910259247, "learning_rate": 1.0917502731022853e-05, "loss": 0.035, "step": 141190 }, { "epoch": 0.306, "grad_norm": 0.04775369539856911, "learning_rate": 1.0914087464518839e-05, "loss": 0.0347, "step": 141200 }, { "epoch": 0.30605, "grad_norm": 0.05612358823418617, "learning_rate": 1.091067258311837e-05, "loss": 0.0338, "step": 141210 }, { "epoch": 0.3061, "grad_norm": 0.06597428023815155, "learning_rate": 1.0907258086914832e-05, "loss": 0.035, "step": 141220 }, { "epoch": 0.30615, "grad_norm": 0.05252828449010849, "learning_rate": 1.0903843976001562e-05, "loss": 0.0327, "step": 141230 }, { "epoch": 0.3062, "grad_norm": 0.05416768416762352, "learning_rate": 1.0900430250471893e-05, "loss": 0.0335, "step": 141240 }, { "epoch": 0.30625, "grad_norm": 0.062133170664310455, "learning_rate": 1.0897016910419172e-05, "loss": 0.0327, "step": 141250 }, { "epoch": 0.3063, "grad_norm": 0.05661081522703171, "learning_rate": 1.0893603955936712e-05, "loss": 0.0336, "step": 141260 }, { "epoch": 0.30635, "grad_norm": 0.05456758290529251, "learning_rate": 1.0890191387117821e-05, "loss": 0.0334, "step": 141270 }, { "epoch": 0.3064, "grad_norm": 0.060421284288167953, "learning_rate": 1.0886779204055786e-05, "loss": 0.0364, "step": 141280 }, { "epoch": 0.30645, "grad_norm": 0.06277312338352203, "learning_rate": 1.0883367406843914e-05, "loss": 0.0325, "step": 141290 }, { "epoch": 0.3065, "grad_norm": 0.051737893372774124, "learning_rate": 1.0879955995575466e-05, "loss": 0.0319, "step": 141300 }, { "epoch": 0.30655, "grad_norm": 0.04213999956846237, "learning_rate": 1.0876544970343728e-05, "loss": 0.0316, "step": 141310 }, { "epoch": 0.3066, "grad_norm": 0.048217009752988815, "learning_rate": 1.0873134331241942e-05, "loss": 0.032, "step": 141320 }, { "epoch": 0.30665, "grad_norm": 0.04515504091978073, "learning_rate": 1.0869724078363344e-05, "loss": 0.0329, "step": 141330 }, { "epoch": 0.3067, "grad_norm": 0.058983009308576584, "learning_rate": 1.0866314211801193e-05, "loss": 0.0322, "step": 141340 }, { "epoch": 0.30675, "grad_norm": 0.06672867387533188, "learning_rate": 1.0862904731648705e-05, "loss": 0.0337, "step": 141350 }, { "epoch": 0.3068, "grad_norm": 0.05784101039171219, "learning_rate": 1.0859495637999086e-05, "loss": 0.0339, "step": 141360 }, { "epoch": 0.30685, "grad_norm": 0.05413747578859329, "learning_rate": 1.0856086930945536e-05, "loss": 0.0344, "step": 141370 }, { "epoch": 0.3069, "grad_norm": 0.0900455191731453, "learning_rate": 1.0852678610581257e-05, "loss": 0.0333, "step": 141380 }, { "epoch": 0.30695, "grad_norm": 0.06914813816547394, "learning_rate": 1.0849270676999446e-05, "loss": 0.0332, "step": 141390 }, { "epoch": 0.307, "grad_norm": 0.07659492641687393, "learning_rate": 1.084586313029324e-05, "loss": 0.0329, "step": 141400 }, { "epoch": 0.30705, "grad_norm": 0.06035493686795235, "learning_rate": 1.0842455970555832e-05, "loss": 0.0333, "step": 141410 }, { "epoch": 0.3071, "grad_norm": 0.07281692326068878, "learning_rate": 1.0839049197880347e-05, "loss": 0.0338, "step": 141420 }, { "epoch": 0.30715, "grad_norm": 0.0637575164437294, "learning_rate": 1.0835642812359945e-05, "loss": 0.0331, "step": 141430 }, { "epoch": 0.3072, "grad_norm": 0.06447659432888031, "learning_rate": 1.0832236814087748e-05, "loss": 0.0332, "step": 141440 }, { "epoch": 0.30725, "grad_norm": 0.06269050389528275, "learning_rate": 1.0828831203156865e-05, "loss": 0.034, "step": 141450 }, { "epoch": 0.3073, "grad_norm": 0.04924452304840088, "learning_rate": 1.0825425979660422e-05, "loss": 0.0324, "step": 141460 }, { "epoch": 0.30735, "grad_norm": 0.0564417727291584, "learning_rate": 1.0822021143691508e-05, "loss": 0.034, "step": 141470 }, { "epoch": 0.3074, "grad_norm": 0.04732852429151535, "learning_rate": 1.081861669534321e-05, "loss": 0.0321, "step": 141480 }, { "epoch": 0.30745, "grad_norm": 0.048619452863931656, "learning_rate": 1.0815212634708593e-05, "loss": 0.0333, "step": 141490 }, { "epoch": 0.3075, "grad_norm": 0.05323924869298935, "learning_rate": 1.0811808961880734e-05, "loss": 0.0338, "step": 141500 }, { "epoch": 0.30755, "grad_norm": 0.12367910891771317, "learning_rate": 1.0808405676952699e-05, "loss": 0.034, "step": 141510 }, { "epoch": 0.3076, "grad_norm": 0.05822361260652542, "learning_rate": 1.0805002780017518e-05, "loss": 0.0338, "step": 141520 }, { "epoch": 0.30765, "grad_norm": 0.05467531830072403, "learning_rate": 1.080160027116823e-05, "loss": 0.0335, "step": 141530 }, { "epoch": 0.3077, "grad_norm": 0.047766610980033875, "learning_rate": 1.0798198150497848e-05, "loss": 0.0337, "step": 141540 }, { "epoch": 0.30775, "grad_norm": 0.06274426728487015, "learning_rate": 1.0794796418099401e-05, "loss": 0.0371, "step": 141550 }, { "epoch": 0.3078, "grad_norm": 0.05022158846259117, "learning_rate": 1.0791395074065883e-05, "loss": 0.0354, "step": 141560 }, { "epoch": 0.30785, "grad_norm": 0.07846273481845856, "learning_rate": 1.0787994118490278e-05, "loss": 0.0356, "step": 141570 }, { "epoch": 0.3079, "grad_norm": 0.05049419030547142, "learning_rate": 1.0784593551465582e-05, "loss": 0.0344, "step": 141580 }, { "epoch": 0.30795, "grad_norm": 0.06500475108623505, "learning_rate": 1.078119337308475e-05, "loss": 0.034, "step": 141590 }, { "epoch": 0.308, "grad_norm": 0.05722379684448242, "learning_rate": 1.0777793583440768e-05, "loss": 0.0334, "step": 141600 }, { "epoch": 0.30805, "grad_norm": 0.050137463957071304, "learning_rate": 1.077439418262655e-05, "loss": 0.0338, "step": 141610 }, { "epoch": 0.3081, "grad_norm": 0.05086752027273178, "learning_rate": 1.0770995170735046e-05, "loss": 0.0328, "step": 141620 }, { "epoch": 0.30815, "grad_norm": 0.04671629145741463, "learning_rate": 1.0767596547859202e-05, "loss": 0.0343, "step": 141630 }, { "epoch": 0.3082, "grad_norm": 0.06504970788955688, "learning_rate": 1.076419831409192e-05, "loss": 0.0336, "step": 141640 }, { "epoch": 0.30825, "grad_norm": 0.04972356930375099, "learning_rate": 1.0760800469526106e-05, "loss": 0.0334, "step": 141650 }, { "epoch": 0.3083, "grad_norm": 0.061417948454618454, "learning_rate": 1.075740301425465e-05, "loss": 0.0329, "step": 141660 }, { "epoch": 0.30835, "grad_norm": 0.0457824282348156, "learning_rate": 1.0754005948370454e-05, "loss": 0.0337, "step": 141670 }, { "epoch": 0.3084, "grad_norm": 0.046346407383680344, "learning_rate": 1.0750609271966384e-05, "loss": 0.0332, "step": 141680 }, { "epoch": 0.30845, "grad_norm": 0.05328997224569321, "learning_rate": 1.0747212985135293e-05, "loss": 0.034, "step": 141690 }, { "epoch": 0.3085, "grad_norm": 0.054923467338085175, "learning_rate": 1.0743817087970054e-05, "loss": 0.0314, "step": 141700 }, { "epoch": 0.30855, "grad_norm": 0.0563756600022316, "learning_rate": 1.0740421580563493e-05, "loss": 0.0343, "step": 141710 }, { "epoch": 0.3086, "grad_norm": 0.06679999828338623, "learning_rate": 1.0737026463008453e-05, "loss": 0.0339, "step": 141720 }, { "epoch": 0.30865, "grad_norm": 0.059505343437194824, "learning_rate": 1.0733631735397755e-05, "loss": 0.0327, "step": 141730 }, { "epoch": 0.3087, "grad_norm": 0.05167857185006142, "learning_rate": 1.0730237397824205e-05, "loss": 0.0327, "step": 141740 }, { "epoch": 0.30875, "grad_norm": 0.06931980699300766, "learning_rate": 1.0726843450380594e-05, "loss": 0.0324, "step": 141750 }, { "epoch": 0.3088, "grad_norm": 0.05788620561361313, "learning_rate": 1.0723449893159731e-05, "loss": 0.0325, "step": 141760 }, { "epoch": 0.30885, "grad_norm": 0.06421661376953125, "learning_rate": 1.0720056726254384e-05, "loss": 0.0333, "step": 141770 }, { "epoch": 0.3089, "grad_norm": 0.04746703803539276, "learning_rate": 1.0716663949757314e-05, "loss": 0.0321, "step": 141780 }, { "epoch": 0.30895, "grad_norm": 0.05642994865775108, "learning_rate": 1.0713271563761293e-05, "loss": 0.0332, "step": 141790 }, { "epoch": 0.309, "grad_norm": 0.04913509637117386, "learning_rate": 1.070987956835905e-05, "loss": 0.0332, "step": 141800 }, { "epoch": 0.30905, "grad_norm": 0.05893111974000931, "learning_rate": 1.0706487963643349e-05, "loss": 0.0348, "step": 141810 }, { "epoch": 0.3091, "grad_norm": 0.059092987328767776, "learning_rate": 1.0703096749706881e-05, "loss": 0.034, "step": 141820 }, { "epoch": 0.30915, "grad_norm": 0.05287367105484009, "learning_rate": 1.0699705926642378e-05, "loss": 0.0351, "step": 141830 }, { "epoch": 0.3092, "grad_norm": 0.05104648694396019, "learning_rate": 1.069631549454255e-05, "loss": 0.0351, "step": 141840 }, { "epoch": 0.30925, "grad_norm": 0.048764459788799286, "learning_rate": 1.0692925453500082e-05, "loss": 0.034, "step": 141850 }, { "epoch": 0.3093, "grad_norm": 0.05129481479525566, "learning_rate": 1.068953580360766e-05, "loss": 0.0344, "step": 141860 }, { "epoch": 0.30935, "grad_norm": 0.046603891998529434, "learning_rate": 1.0686146544957939e-05, "loss": 0.0322, "step": 141870 }, { "epoch": 0.3094, "grad_norm": 0.04391618072986603, "learning_rate": 1.0682757677643596e-05, "loss": 0.033, "step": 141880 }, { "epoch": 0.30945, "grad_norm": 0.06269185245037079, "learning_rate": 1.06793692017573e-05, "loss": 0.0345, "step": 141890 }, { "epoch": 0.3095, "grad_norm": 0.04640522226691246, "learning_rate": 1.0675981117391648e-05, "loss": 0.0324, "step": 141900 }, { "epoch": 0.30955, "grad_norm": 0.05266639217734337, "learning_rate": 1.0672593424639301e-05, "loss": 0.0348, "step": 141910 }, { "epoch": 0.3096, "grad_norm": 0.052203577011823654, "learning_rate": 1.0669206123592862e-05, "loss": 0.0317, "step": 141920 }, { "epoch": 0.30965, "grad_norm": 0.05076339840888977, "learning_rate": 1.0665819214344949e-05, "loss": 0.0339, "step": 141930 }, { "epoch": 0.3097, "grad_norm": 0.04586315155029297, "learning_rate": 1.0662432696988153e-05, "loss": 0.0329, "step": 141940 }, { "epoch": 0.30975, "grad_norm": 0.04728645458817482, "learning_rate": 1.0659046571615055e-05, "loss": 0.0346, "step": 141950 }, { "epoch": 0.3098, "grad_norm": 0.04644077643752098, "learning_rate": 1.0655660838318243e-05, "loss": 0.0334, "step": 141960 }, { "epoch": 0.30985, "grad_norm": 0.049580082297325134, "learning_rate": 1.0652275497190276e-05, "loss": 0.0337, "step": 141970 }, { "epoch": 0.3099, "grad_norm": 0.053173281252384186, "learning_rate": 1.0648890548323705e-05, "loss": 0.0342, "step": 141980 }, { "epoch": 0.30995, "grad_norm": 0.04500650241971016, "learning_rate": 1.0645505991811066e-05, "loss": 0.0348, "step": 141990 }, { "epoch": 0.31, "grad_norm": 0.049792829900979996, "learning_rate": 1.0642121827744911e-05, "loss": 0.0342, "step": 142000 }, { "epoch": 0.31005, "grad_norm": 0.04503854736685753, "learning_rate": 1.0638738056217742e-05, "loss": 0.0337, "step": 142010 }, { "epoch": 0.3101, "grad_norm": 0.040192991495132446, "learning_rate": 1.0635354677322087e-05, "loss": 0.0331, "step": 142020 }, { "epoch": 0.31015, "grad_norm": 0.05078494921326637, "learning_rate": 1.0631971691150438e-05, "loss": 0.0329, "step": 142030 }, { "epoch": 0.3102, "grad_norm": 0.04828430712223053, "learning_rate": 1.0628589097795277e-05, "loss": 0.0342, "step": 142040 }, { "epoch": 0.31025, "grad_norm": 0.04082063212990761, "learning_rate": 1.0625206897349102e-05, "loss": 0.0334, "step": 142050 }, { "epoch": 0.3103, "grad_norm": 0.0471375398337841, "learning_rate": 1.0621825089904369e-05, "loss": 0.0343, "step": 142060 }, { "epoch": 0.31035, "grad_norm": 0.05987219512462616, "learning_rate": 1.0618443675553527e-05, "loss": 0.037, "step": 142070 }, { "epoch": 0.3104, "grad_norm": 0.11808999627828598, "learning_rate": 1.0615062654389041e-05, "loss": 0.0344, "step": 142080 }, { "epoch": 0.31045, "grad_norm": 0.05503125488758087, "learning_rate": 1.0611682026503328e-05, "loss": 0.0347, "step": 142090 }, { "epoch": 0.3105, "grad_norm": 0.0595247745513916, "learning_rate": 1.0608301791988842e-05, "loss": 0.0359, "step": 142100 }, { "epoch": 0.31055, "grad_norm": 0.05358587205410004, "learning_rate": 1.060492195093796e-05, "loss": 0.0326, "step": 142110 }, { "epoch": 0.3106, "grad_norm": 0.06559716165065765, "learning_rate": 1.0601542503443112e-05, "loss": 0.0333, "step": 142120 }, { "epoch": 0.31065, "grad_norm": 0.05155010148882866, "learning_rate": 1.0598163449596676e-05, "loss": 0.0327, "step": 142130 }, { "epoch": 0.3107, "grad_norm": 0.05211557820439339, "learning_rate": 1.0594784789491047e-05, "loss": 0.0329, "step": 142140 }, { "epoch": 0.31075, "grad_norm": 0.05778401345014572, "learning_rate": 1.0591406523218592e-05, "loss": 0.0335, "step": 142150 }, { "epoch": 0.3108, "grad_norm": 0.05356336012482643, "learning_rate": 1.058802865087166e-05, "loss": 0.0326, "step": 142160 }, { "epoch": 0.31085, "grad_norm": 0.053282007575035095, "learning_rate": 1.058465117254262e-05, "loss": 0.0342, "step": 142170 }, { "epoch": 0.3109, "grad_norm": 0.08436008542776108, "learning_rate": 1.05812740883238e-05, "loss": 0.0336, "step": 142180 }, { "epoch": 0.31095, "grad_norm": 0.08286771178245544, "learning_rate": 1.0577897398307529e-05, "loss": 0.0328, "step": 142190 }, { "epoch": 0.311, "grad_norm": 0.0891069695353508, "learning_rate": 1.0574521102586118e-05, "loss": 0.0328, "step": 142200 }, { "epoch": 0.31105, "grad_norm": 0.06336149573326111, "learning_rate": 1.0571145201251882e-05, "loss": 0.0315, "step": 142210 }, { "epoch": 0.3111, "grad_norm": 0.06335395574569702, "learning_rate": 1.056776969439712e-05, "loss": 0.0327, "step": 142220 }, { "epoch": 0.31115, "grad_norm": 0.05544976890087128, "learning_rate": 1.0564394582114115e-05, "loss": 0.0329, "step": 142230 }, { "epoch": 0.3112, "grad_norm": 0.04723066836595535, "learning_rate": 1.056101986449514e-05, "loss": 0.0323, "step": 142240 }, { "epoch": 0.31125, "grad_norm": 0.045306917279958725, "learning_rate": 1.0557645541632447e-05, "loss": 0.0331, "step": 142250 }, { "epoch": 0.3113, "grad_norm": 0.059146132320165634, "learning_rate": 1.0554271613618308e-05, "loss": 0.035, "step": 142260 }, { "epoch": 0.31135, "grad_norm": 0.05136910825967789, "learning_rate": 1.0550898080544958e-05, "loss": 0.0329, "step": 142270 }, { "epoch": 0.3114, "grad_norm": 0.05314375460147858, "learning_rate": 1.0547524942504617e-05, "loss": 0.0335, "step": 142280 }, { "epoch": 0.31145, "grad_norm": 0.04912213236093521, "learning_rate": 1.0544152199589521e-05, "loss": 0.0319, "step": 142290 }, { "epoch": 0.3115, "grad_norm": 0.04902366176247597, "learning_rate": 1.0540779851891865e-05, "loss": 0.0322, "step": 142300 }, { "epoch": 0.31155, "grad_norm": 0.05074556544423103, "learning_rate": 1.0537407899503876e-05, "loss": 0.0314, "step": 142310 }, { "epoch": 0.3116, "grad_norm": 0.06239822879433632, "learning_rate": 1.05340363425177e-05, "loss": 0.0322, "step": 142320 }, { "epoch": 0.31165, "grad_norm": 0.058451853692531586, "learning_rate": 1.053066518102554e-05, "loss": 0.0344, "step": 142330 }, { "epoch": 0.3117, "grad_norm": 0.0508386604487896, "learning_rate": 1.0527294415119562e-05, "loss": 0.0311, "step": 142340 }, { "epoch": 0.31175, "grad_norm": 0.05742720514535904, "learning_rate": 1.0523924044891923e-05, "loss": 0.0331, "step": 142350 }, { "epoch": 0.3118, "grad_norm": 0.05304501950740814, "learning_rate": 1.0520554070434757e-05, "loss": 0.0333, "step": 142360 }, { "epoch": 0.31185, "grad_norm": 0.04958457872271538, "learning_rate": 1.0517184491840199e-05, "loss": 0.0318, "step": 142370 }, { "epoch": 0.3119, "grad_norm": 0.05456435680389404, "learning_rate": 1.051381530920038e-05, "loss": 0.0328, "step": 142380 }, { "epoch": 0.31195, "grad_norm": 0.05465429276227951, "learning_rate": 1.0510446522607412e-05, "loss": 0.0337, "step": 142390 }, { "epoch": 0.312, "grad_norm": 0.059179335832595825, "learning_rate": 1.0507078132153384e-05, "loss": 0.0353, "step": 142400 }, { "epoch": 0.31205, "grad_norm": 0.055974896997213364, "learning_rate": 1.0503710137930401e-05, "loss": 0.037, "step": 142410 }, { "epoch": 0.3121, "grad_norm": 0.06301740556955338, "learning_rate": 1.0500342540030531e-05, "loss": 0.0362, "step": 142420 }, { "epoch": 0.31215, "grad_norm": 0.06001967191696167, "learning_rate": 1.0496975338545857e-05, "loss": 0.0337, "step": 142430 }, { "epoch": 0.3122, "grad_norm": 0.05626402050256729, "learning_rate": 1.0493608533568424e-05, "loss": 0.0344, "step": 142440 }, { "epoch": 0.31225, "grad_norm": 0.049546778202056885, "learning_rate": 1.049024212519028e-05, "loss": 0.0325, "step": 142450 }, { "epoch": 0.3123, "grad_norm": 0.05693064257502556, "learning_rate": 1.0486876113503474e-05, "loss": 0.0337, "step": 142460 }, { "epoch": 0.31235, "grad_norm": 0.05737036466598511, "learning_rate": 1.0483510498600021e-05, "loss": 0.0346, "step": 142470 }, { "epoch": 0.3124, "grad_norm": 0.046279098838567734, "learning_rate": 1.0480145280571937e-05, "loss": 0.0338, "step": 142480 }, { "epoch": 0.31245, "grad_norm": 0.05355663597583771, "learning_rate": 1.0476780459511218e-05, "loss": 0.0343, "step": 142490 }, { "epoch": 0.3125, "grad_norm": 0.0570637546479702, "learning_rate": 1.0473416035509875e-05, "loss": 0.0346, "step": 142500 }, { "epoch": 0.31255, "grad_norm": 0.051324110478162766, "learning_rate": 1.047005200865987e-05, "loss": 0.0344, "step": 142510 }, { "epoch": 0.3126, "grad_norm": 0.04968645051121712, "learning_rate": 1.0466688379053193e-05, "loss": 0.0336, "step": 142520 }, { "epoch": 0.31265, "grad_norm": 0.05262758210301399, "learning_rate": 1.04633251467818e-05, "loss": 0.0354, "step": 142530 }, { "epoch": 0.3127, "grad_norm": 0.055360324680805206, "learning_rate": 1.0459962311937624e-05, "loss": 0.0366, "step": 142540 }, { "epoch": 0.31275, "grad_norm": 0.0528523214161396, "learning_rate": 1.0456599874612624e-05, "loss": 0.0337, "step": 142550 }, { "epoch": 0.3128, "grad_norm": 0.05057939141988754, "learning_rate": 1.0453237834898722e-05, "loss": 0.0344, "step": 142560 }, { "epoch": 0.31285, "grad_norm": 0.05711035802960396, "learning_rate": 1.0449876192887831e-05, "loss": 0.0347, "step": 142570 }, { "epoch": 0.3129, "grad_norm": 0.05266747996211052, "learning_rate": 1.0446514948671848e-05, "loss": 0.0338, "step": 142580 }, { "epoch": 0.31295, "grad_norm": 0.06965702772140503, "learning_rate": 1.044315410234268e-05, "loss": 0.0354, "step": 142590 }, { "epoch": 0.313, "grad_norm": 0.05463392287492752, "learning_rate": 1.043979365399223e-05, "loss": 0.0344, "step": 142600 }, { "epoch": 0.31305, "grad_norm": 0.043276332318782806, "learning_rate": 1.0436433603712334e-05, "loss": 0.0332, "step": 142610 }, { "epoch": 0.3131, "grad_norm": 0.06321835517883301, "learning_rate": 1.043307395159488e-05, "loss": 0.0336, "step": 142620 }, { "epoch": 0.31315, "grad_norm": 0.052459266036748886, "learning_rate": 1.0429714697731702e-05, "loss": 0.0348, "step": 142630 }, { "epoch": 0.3132, "grad_norm": 0.04685082286596298, "learning_rate": 1.0426355842214657e-05, "loss": 0.0341, "step": 142640 }, { "epoch": 0.31325, "grad_norm": 0.05648628994822502, "learning_rate": 1.0422997385135571e-05, "loss": 0.0355, "step": 142650 }, { "epoch": 0.3133, "grad_norm": 0.05795169994235039, "learning_rate": 1.0419639326586253e-05, "loss": 0.0333, "step": 142660 }, { "epoch": 0.31335, "grad_norm": 0.06540055572986603, "learning_rate": 1.0416281666658523e-05, "loss": 0.0328, "step": 142670 }, { "epoch": 0.3134, "grad_norm": 0.05244702100753784, "learning_rate": 1.0412924405444177e-05, "loss": 0.0325, "step": 142680 }, { "epoch": 0.31345, "grad_norm": 0.05591924488544464, "learning_rate": 1.0409567543034995e-05, "loss": 0.0355, "step": 142690 }, { "epoch": 0.3135, "grad_norm": 0.062114112079143524, "learning_rate": 1.0406211079522749e-05, "loss": 0.0327, "step": 142700 }, { "epoch": 0.31355, "grad_norm": 0.04614487290382385, "learning_rate": 1.0402855014999205e-05, "loss": 0.0316, "step": 142710 }, { "epoch": 0.3136, "grad_norm": 0.050484973937273026, "learning_rate": 1.0399499349556133e-05, "loss": 0.0326, "step": 142720 }, { "epoch": 0.31365, "grad_norm": 0.05026634410023689, "learning_rate": 1.0396144083285264e-05, "loss": 0.0327, "step": 142730 }, { "epoch": 0.3137, "grad_norm": 0.05540686473250389, "learning_rate": 1.0392789216278328e-05, "loss": 0.0326, "step": 142740 }, { "epoch": 0.31375, "grad_norm": 0.06008157134056091, "learning_rate": 1.0389434748627038e-05, "loss": 0.033, "step": 142750 }, { "epoch": 0.3138, "grad_norm": 0.06368424743413925, "learning_rate": 1.0386080680423124e-05, "loss": 0.0332, "step": 142760 }, { "epoch": 0.31385, "grad_norm": 0.062105316668748856, "learning_rate": 1.0382727011758273e-05, "loss": 0.033, "step": 142770 }, { "epoch": 0.3139, "grad_norm": 0.0804833397269249, "learning_rate": 1.0379373742724164e-05, "loss": 0.0323, "step": 142780 }, { "epoch": 0.31395, "grad_norm": 0.06516914814710617, "learning_rate": 1.0376020873412492e-05, "loss": 0.0331, "step": 142790 }, { "epoch": 0.314, "grad_norm": 0.059499580413103104, "learning_rate": 1.0372668403914909e-05, "loss": 0.0329, "step": 142800 }, { "epoch": 0.31405, "grad_norm": 0.057656656950712204, "learning_rate": 1.0369316334323096e-05, "loss": 0.0331, "step": 142810 }, { "epoch": 0.3141, "grad_norm": 0.06045324355363846, "learning_rate": 1.0365964664728655e-05, "loss": 0.0343, "step": 142820 }, { "epoch": 0.31415, "grad_norm": 0.054806213825941086, "learning_rate": 1.0362613395223247e-05, "loss": 0.0314, "step": 142830 }, { "epoch": 0.3142, "grad_norm": 0.053731124848127365, "learning_rate": 1.0359262525898497e-05, "loss": 0.0331, "step": 142840 }, { "epoch": 0.31425, "grad_norm": 0.05551392585039139, "learning_rate": 1.0355912056846009e-05, "loss": 0.0328, "step": 142850 }, { "epoch": 0.3143, "grad_norm": 0.049054939299821854, "learning_rate": 1.0352561988157382e-05, "loss": 0.0314, "step": 142860 }, { "epoch": 0.31435, "grad_norm": 0.05738339200615883, "learning_rate": 1.03492123199242e-05, "loss": 0.0332, "step": 142870 }, { "epoch": 0.3144, "grad_norm": 0.04844846576452255, "learning_rate": 1.0345863052238061e-05, "loss": 0.0329, "step": 142880 }, { "epoch": 0.31445, "grad_norm": 0.06852875649929047, "learning_rate": 1.034251418519052e-05, "loss": 0.0336, "step": 142890 }, { "epoch": 0.3145, "grad_norm": 0.060325898230075836, "learning_rate": 1.0339165718873122e-05, "loss": 0.0325, "step": 142900 }, { "epoch": 0.31455, "grad_norm": 0.05586904659867287, "learning_rate": 1.0335817653377436e-05, "loss": 0.0334, "step": 142910 }, { "epoch": 0.3146, "grad_norm": 0.04665564000606537, "learning_rate": 1.0332469988794977e-05, "loss": 0.0335, "step": 142920 }, { "epoch": 0.31465, "grad_norm": 0.05273238942027092, "learning_rate": 1.0329122725217288e-05, "loss": 0.0333, "step": 142930 }, { "epoch": 0.3147, "grad_norm": 0.05633804574608803, "learning_rate": 1.0325775862735873e-05, "loss": 0.032, "step": 142940 }, { "epoch": 0.31475, "grad_norm": 0.044203322380781174, "learning_rate": 1.0322429401442232e-05, "loss": 0.033, "step": 142950 }, { "epoch": 0.3148, "grad_norm": 0.05241499841213226, "learning_rate": 1.0319083341427849e-05, "loss": 0.0342, "step": 142960 }, { "epoch": 0.31485, "grad_norm": 0.04896755516529083, "learning_rate": 1.0315737682784219e-05, "loss": 0.0334, "step": 142970 }, { "epoch": 0.3149, "grad_norm": 0.04177512973546982, "learning_rate": 1.0312392425602805e-05, "loss": 0.0323, "step": 142980 }, { "epoch": 0.31495, "grad_norm": 0.05134735628962517, "learning_rate": 1.0309047569975056e-05, "loss": 0.0331, "step": 142990 }, { "epoch": 0.315, "grad_norm": 0.04518589749932289, "learning_rate": 1.0305703115992434e-05, "loss": 0.0325, "step": 143000 }, { "epoch": 0.31505, "grad_norm": 0.06465231627225876, "learning_rate": 1.0302359063746364e-05, "loss": 0.0339, "step": 143010 }, { "epoch": 0.3151, "grad_norm": 0.05051697790622711, "learning_rate": 1.0299015413328289e-05, "loss": 0.0325, "step": 143020 }, { "epoch": 0.31515, "grad_norm": 0.05085299164056778, "learning_rate": 1.0295672164829595e-05, "loss": 0.0327, "step": 143030 }, { "epoch": 0.3152, "grad_norm": 0.06277789920568466, "learning_rate": 1.0292329318341698e-05, "loss": 0.0337, "step": 143040 }, { "epoch": 0.31525, "grad_norm": 0.05738025903701782, "learning_rate": 1.0288986873955999e-05, "loss": 0.0344, "step": 143050 }, { "epoch": 0.3153, "grad_norm": 0.04553530365228653, "learning_rate": 1.0285644831763876e-05, "loss": 0.0347, "step": 143060 }, { "epoch": 0.31535, "grad_norm": 0.06229349598288536, "learning_rate": 1.0282303191856696e-05, "loss": 0.0327, "step": 143070 }, { "epoch": 0.3154, "grad_norm": 0.055931445211172104, "learning_rate": 1.0278961954325805e-05, "loss": 0.0332, "step": 143080 }, { "epoch": 0.31545, "grad_norm": 0.05285884067416191, "learning_rate": 1.0275621119262565e-05, "loss": 0.0326, "step": 143090 }, { "epoch": 0.3155, "grad_norm": 0.055675894021987915, "learning_rate": 1.0272280686758332e-05, "loss": 0.0326, "step": 143100 }, { "epoch": 0.31555, "grad_norm": 0.054981812834739685, "learning_rate": 1.0268940656904392e-05, "loss": 0.0329, "step": 143110 }, { "epoch": 0.3156, "grad_norm": 0.05291634425520897, "learning_rate": 1.0265601029792088e-05, "loss": 0.0324, "step": 143120 }, { "epoch": 0.31565, "grad_norm": 0.05016390606760979, "learning_rate": 1.026226180551271e-05, "loss": 0.036, "step": 143130 }, { "epoch": 0.3157, "grad_norm": 0.05711352080106735, "learning_rate": 1.0258922984157566e-05, "loss": 0.0338, "step": 143140 }, { "epoch": 0.31575, "grad_norm": 0.04906103014945984, "learning_rate": 1.0255584565817928e-05, "loss": 0.0339, "step": 143150 }, { "epoch": 0.3158, "grad_norm": 0.049137938767671585, "learning_rate": 1.0252246550585059e-05, "loss": 0.0336, "step": 143160 }, { "epoch": 0.31585, "grad_norm": 0.0545496866106987, "learning_rate": 1.0248908938550242e-05, "loss": 0.0343, "step": 143170 }, { "epoch": 0.3159, "grad_norm": 0.05843012407422066, "learning_rate": 1.024557172980471e-05, "loss": 0.0325, "step": 143180 }, { "epoch": 0.31595, "grad_norm": 0.058799244463443756, "learning_rate": 1.0242234924439703e-05, "loss": 0.0346, "step": 143190 }, { "epoch": 0.316, "grad_norm": 0.057011742144823074, "learning_rate": 1.0238898522546442e-05, "loss": 0.0341, "step": 143200 }, { "epoch": 0.31605, "grad_norm": 0.05914317071437836, "learning_rate": 1.0235562524216158e-05, "loss": 0.0356, "step": 143210 }, { "epoch": 0.3161, "grad_norm": 0.05741539224982262, "learning_rate": 1.023222692954004e-05, "loss": 0.0342, "step": 143220 }, { "epoch": 0.31615, "grad_norm": 0.06138933077454567, "learning_rate": 1.0228891738609298e-05, "loss": 0.035, "step": 143230 }, { "epoch": 0.3162, "grad_norm": 0.057428572326898575, "learning_rate": 1.0225556951515106e-05, "loss": 0.034, "step": 143240 }, { "epoch": 0.31625, "grad_norm": 0.05353344604372978, "learning_rate": 1.0222222568348627e-05, "loss": 0.0354, "step": 143250 }, { "epoch": 0.3163, "grad_norm": 0.05361321568489075, "learning_rate": 1.0218888589201043e-05, "loss": 0.0338, "step": 143260 }, { "epoch": 0.31635, "grad_norm": 0.04561053216457367, "learning_rate": 1.0215555014163488e-05, "loss": 0.0341, "step": 143270 }, { "epoch": 0.3164, "grad_norm": 0.05260631814599037, "learning_rate": 1.02122218433271e-05, "loss": 0.034, "step": 143280 }, { "epoch": 0.31645, "grad_norm": 0.0508575513958931, "learning_rate": 1.0208889076783015e-05, "loss": 0.0338, "step": 143290 }, { "epoch": 0.3165, "grad_norm": 0.05368515104055405, "learning_rate": 1.0205556714622342e-05, "loss": 0.0344, "step": 143300 }, { "epoch": 0.31655, "grad_norm": 0.04952288419008255, "learning_rate": 1.0202224756936205e-05, "loss": 0.0349, "step": 143310 }, { "epoch": 0.3166, "grad_norm": 0.051482073962688446, "learning_rate": 1.0198893203815669e-05, "loss": 0.0343, "step": 143320 }, { "epoch": 0.31665, "grad_norm": 0.08468296378850937, "learning_rate": 1.0195562055351837e-05, "loss": 0.0346, "step": 143330 }, { "epoch": 0.3167, "grad_norm": 0.059160616248846054, "learning_rate": 1.0192231311635771e-05, "loss": 0.0341, "step": 143340 }, { "epoch": 0.31675, "grad_norm": 0.05456196144223213, "learning_rate": 1.0188900972758547e-05, "loss": 0.0349, "step": 143350 }, { "epoch": 0.3168, "grad_norm": 0.05162947624921799, "learning_rate": 1.0185571038811204e-05, "loss": 0.0328, "step": 143360 }, { "epoch": 0.31685, "grad_norm": 0.058702047914266586, "learning_rate": 1.0182241509884777e-05, "loss": 0.0333, "step": 143370 }, { "epoch": 0.3169, "grad_norm": 0.0639418363571167, "learning_rate": 1.0178912386070307e-05, "loss": 0.0339, "step": 143380 }, { "epoch": 0.31695, "grad_norm": 0.06946256756782532, "learning_rate": 1.0175583667458804e-05, "loss": 0.0348, "step": 143390 }, { "epoch": 0.317, "grad_norm": 0.05914067104458809, "learning_rate": 1.0172255354141278e-05, "loss": 0.0336, "step": 143400 }, { "epoch": 0.31705, "grad_norm": 0.05531926080584526, "learning_rate": 1.0168927446208707e-05, "loss": 0.0333, "step": 143410 }, { "epoch": 0.3171, "grad_norm": 0.047997161746025085, "learning_rate": 1.016559994375209e-05, "loss": 0.0332, "step": 143420 }, { "epoch": 0.31715, "grad_norm": 0.05001961439847946, "learning_rate": 1.0162272846862405e-05, "loss": 0.0317, "step": 143430 }, { "epoch": 0.3172, "grad_norm": 0.056277453899383545, "learning_rate": 1.0158946155630608e-05, "loss": 0.0338, "step": 143440 }, { "epoch": 0.31725, "grad_norm": 0.057324331253767014, "learning_rate": 1.0155619870147645e-05, "loss": 0.0327, "step": 143450 }, { "epoch": 0.3173, "grad_norm": 0.05637969449162483, "learning_rate": 1.0152293990504452e-05, "loss": 0.0325, "step": 143460 }, { "epoch": 0.31735, "grad_norm": 0.05383256450295448, "learning_rate": 1.014896851679197e-05, "loss": 0.0339, "step": 143470 }, { "epoch": 0.3174, "grad_norm": 0.05018642917275429, "learning_rate": 1.0145643449101111e-05, "loss": 0.0322, "step": 143480 }, { "epoch": 0.31745, "grad_norm": 0.04998883977532387, "learning_rate": 1.014231878752277e-05, "loss": 0.0312, "step": 143490 }, { "epoch": 0.3175, "grad_norm": 0.04939829930663109, "learning_rate": 1.013899453214786e-05, "loss": 0.0317, "step": 143500 }, { "epoch": 0.31755, "grad_norm": 0.07261301577091217, "learning_rate": 1.0135670683067247e-05, "loss": 0.0336, "step": 143510 }, { "epoch": 0.3176, "grad_norm": 0.06238599866628647, "learning_rate": 1.0132347240371835e-05, "loss": 0.0334, "step": 143520 }, { "epoch": 0.31765, "grad_norm": 0.052265483886003494, "learning_rate": 1.012902420415244e-05, "loss": 0.0343, "step": 143530 }, { "epoch": 0.3177, "grad_norm": 0.057238973677158356, "learning_rate": 1.012570157449994e-05, "loss": 0.032, "step": 143540 }, { "epoch": 0.31775, "grad_norm": 0.06168012320995331, "learning_rate": 1.0122379351505179e-05, "loss": 0.0331, "step": 143550 }, { "epoch": 0.3178, "grad_norm": 0.048543546348810196, "learning_rate": 1.011905753525898e-05, "loss": 0.0331, "step": 143560 }, { "epoch": 0.31785, "grad_norm": 0.05441056936979294, "learning_rate": 1.0115736125852154e-05, "loss": 0.0339, "step": 143570 }, { "epoch": 0.3179, "grad_norm": 0.05617294833064079, "learning_rate": 1.0112415123375505e-05, "loss": 0.0325, "step": 143580 }, { "epoch": 0.31795, "grad_norm": 0.05055122449994087, "learning_rate": 1.0109094527919838e-05, "loss": 0.0324, "step": 143590 }, { "epoch": 0.318, "grad_norm": 0.05427250638604164, "learning_rate": 1.0105774339575935e-05, "loss": 0.0322, "step": 143600 }, { "epoch": 0.31805, "grad_norm": 0.045203424990177155, "learning_rate": 1.0102454558434558e-05, "loss": 0.0317, "step": 143610 }, { "epoch": 0.3181, "grad_norm": 0.05851052328944206, "learning_rate": 1.0099135184586484e-05, "loss": 0.034, "step": 143620 }, { "epoch": 0.31815, "grad_norm": 0.05924437940120697, "learning_rate": 1.0095816218122447e-05, "loss": 0.0374, "step": 143630 }, { "epoch": 0.3182, "grad_norm": 0.05943276733160019, "learning_rate": 1.0092497659133205e-05, "loss": 0.0329, "step": 143640 }, { "epoch": 0.31825, "grad_norm": 0.05054665356874466, "learning_rate": 1.0089179507709476e-05, "loss": 0.0314, "step": 143650 }, { "epoch": 0.3183, "grad_norm": 0.0640970915555954, "learning_rate": 1.008586176394197e-05, "loss": 0.0339, "step": 143660 }, { "epoch": 0.31835, "grad_norm": 0.04974190518260002, "learning_rate": 1.0082544427921407e-05, "loss": 0.0319, "step": 143670 }, { "epoch": 0.3184, "grad_norm": 0.057464223355054855, "learning_rate": 1.0079227499738475e-05, "loss": 0.032, "step": 143680 }, { "epoch": 0.31845, "grad_norm": 0.05096564441919327, "learning_rate": 1.007591097948386e-05, "loss": 0.0332, "step": 143690 }, { "epoch": 0.3185, "grad_norm": 0.06046581640839577, "learning_rate": 1.0072594867248223e-05, "loss": 0.0327, "step": 143700 }, { "epoch": 0.31855, "grad_norm": 0.05026427283883095, "learning_rate": 1.0069279163122241e-05, "loss": 0.0323, "step": 143710 }, { "epoch": 0.3186, "grad_norm": 0.05407608672976494, "learning_rate": 1.0065963867196552e-05, "loss": 0.0348, "step": 143720 }, { "epoch": 0.31865, "grad_norm": 0.046073343604803085, "learning_rate": 1.0062648979561806e-05, "loss": 0.0326, "step": 143730 }, { "epoch": 0.3187, "grad_norm": 0.055437296628952026, "learning_rate": 1.0059334500308626e-05, "loss": 0.0349, "step": 143740 }, { "epoch": 0.31875, "grad_norm": 0.06322011351585388, "learning_rate": 1.005602042952762e-05, "loss": 0.0345, "step": 143750 }, { "epoch": 0.3188, "grad_norm": 0.05501188337802887, "learning_rate": 1.0052706767309411e-05, "loss": 0.0333, "step": 143760 }, { "epoch": 0.31885, "grad_norm": 0.04927259311079979, "learning_rate": 1.0049393513744581e-05, "loss": 0.0338, "step": 143770 }, { "epoch": 0.3189, "grad_norm": 0.05020604282617569, "learning_rate": 1.0046080668923717e-05, "loss": 0.035, "step": 143780 }, { "epoch": 0.31895, "grad_norm": 0.054454952478408813, "learning_rate": 1.004276823293738e-05, "loss": 0.0348, "step": 143790 }, { "epoch": 0.319, "grad_norm": 0.05578719824552536, "learning_rate": 1.003945620587614e-05, "loss": 0.0346, "step": 143800 }, { "epoch": 0.31905, "grad_norm": 0.05327814072370529, "learning_rate": 1.0036144587830568e-05, "loss": 0.0354, "step": 143810 }, { "epoch": 0.3191, "grad_norm": 0.059739600867033005, "learning_rate": 1.003283337889116e-05, "loss": 0.0355, "step": 143820 }, { "epoch": 0.31915, "grad_norm": 0.05959959328174591, "learning_rate": 1.0029522579148474e-05, "loss": 0.0344, "step": 143830 }, { "epoch": 0.3192, "grad_norm": 0.057411860674619675, "learning_rate": 1.0026212188693006e-05, "loss": 0.0336, "step": 143840 }, { "epoch": 0.31925, "grad_norm": 0.06283937394618988, "learning_rate": 1.0022902207615284e-05, "loss": 0.036, "step": 143850 }, { "epoch": 0.3193, "grad_norm": 0.05707542225718498, "learning_rate": 1.0019592636005787e-05, "loss": 0.0334, "step": 143860 }, { "epoch": 0.31935, "grad_norm": 0.058691900223493576, "learning_rate": 1.0016283473954993e-05, "loss": 0.034, "step": 143870 }, { "epoch": 0.3194, "grad_norm": 0.06506255269050598, "learning_rate": 1.0012974721553386e-05, "loss": 0.034, "step": 143880 }, { "epoch": 0.31945, "grad_norm": 0.052746180444955826, "learning_rate": 1.0009666378891419e-05, "loss": 0.033, "step": 143890 }, { "epoch": 0.3195, "grad_norm": 0.0566536970436573, "learning_rate": 1.0006358446059544e-05, "loss": 0.0337, "step": 143900 }, { "epoch": 0.31955, "grad_norm": 0.07616173475980759, "learning_rate": 1.0003050923148186e-05, "loss": 0.0343, "step": 143910 }, { "epoch": 0.3196, "grad_norm": 0.08153429627418518, "learning_rate": 9.999743810247783e-06, "loss": 0.0344, "step": 143920 }, { "epoch": 0.31965, "grad_norm": 0.057439543306827545, "learning_rate": 9.996437107448756e-06, "loss": 0.0338, "step": 143930 }, { "epoch": 0.3197, "grad_norm": 0.057741016149520874, "learning_rate": 9.993130814841504e-06, "loss": 0.0329, "step": 143940 }, { "epoch": 0.31975, "grad_norm": 0.04677318036556244, "learning_rate": 9.989824932516415e-06, "loss": 0.0347, "step": 143950 }, { "epoch": 0.3198, "grad_norm": 0.05746036767959595, "learning_rate": 9.986519460563864e-06, "loss": 0.0326, "step": 143960 }, { "epoch": 0.31985, "grad_norm": 0.04599759727716446, "learning_rate": 9.983214399074241e-06, "loss": 0.0318, "step": 143970 }, { "epoch": 0.3199, "grad_norm": 0.0466180220246315, "learning_rate": 9.979909748137897e-06, "loss": 0.0328, "step": 143980 }, { "epoch": 0.31995, "grad_norm": 0.05364130064845085, "learning_rate": 9.976605507845165e-06, "loss": 0.0329, "step": 143990 }, { "epoch": 0.32, "grad_norm": 0.050435539335012436, "learning_rate": 9.973301678286406e-06, "loss": 0.034, "step": 144000 }, { "epoch": 0.32005, "grad_norm": 0.05588700622320175, "learning_rate": 9.969998259551924e-06, "loss": 0.0341, "step": 144010 }, { "epoch": 0.3201, "grad_norm": 0.05198369920253754, "learning_rate": 9.966695251732061e-06, "loss": 0.0336, "step": 144020 }, { "epoch": 0.32015, "grad_norm": 0.05904610827565193, "learning_rate": 9.963392654917084e-06, "loss": 0.0356, "step": 144030 }, { "epoch": 0.3202, "grad_norm": 0.0924912765622139, "learning_rate": 9.960090469197303e-06, "loss": 0.0347, "step": 144040 }, { "epoch": 0.32025, "grad_norm": 0.06023856997489929, "learning_rate": 9.956788694663007e-06, "loss": 0.0333, "step": 144050 }, { "epoch": 0.3203, "grad_norm": 0.06063982471823692, "learning_rate": 9.953487331404456e-06, "loss": 0.0341, "step": 144060 }, { "epoch": 0.32035, "grad_norm": 0.06193577125668526, "learning_rate": 9.950186379511911e-06, "loss": 0.0334, "step": 144070 }, { "epoch": 0.3204, "grad_norm": 0.060056522488594055, "learning_rate": 9.946885839075607e-06, "loss": 0.0346, "step": 144080 }, { "epoch": 0.32045, "grad_norm": 0.05272936448454857, "learning_rate": 9.943585710185796e-06, "loss": 0.0339, "step": 144090 }, { "epoch": 0.3205, "grad_norm": 0.05106258764863014, "learning_rate": 9.940285992932696e-06, "loss": 0.0342, "step": 144100 }, { "epoch": 0.32055, "grad_norm": 0.05771468207240105, "learning_rate": 9.93698668740651e-06, "loss": 0.0339, "step": 144110 }, { "epoch": 0.3206, "grad_norm": 0.060430724173784256, "learning_rate": 9.93368779369746e-06, "loss": 0.0346, "step": 144120 }, { "epoch": 0.32065, "grad_norm": 0.05532369762659073, "learning_rate": 9.930389311895716e-06, "loss": 0.033, "step": 144130 }, { "epoch": 0.3207, "grad_norm": 0.050530027598142624, "learning_rate": 9.927091242091475e-06, "loss": 0.0333, "step": 144140 }, { "epoch": 0.32075, "grad_norm": 0.046072810888290405, "learning_rate": 9.923793584374897e-06, "loss": 0.0323, "step": 144150 }, { "epoch": 0.3208, "grad_norm": 0.05474727600812912, "learning_rate": 9.920496338836135e-06, "loss": 0.0338, "step": 144160 }, { "epoch": 0.32085, "grad_norm": 0.07041291892528534, "learning_rate": 9.917199505565333e-06, "loss": 0.0327, "step": 144170 }, { "epoch": 0.3209, "grad_norm": 0.05222819000482559, "learning_rate": 9.91390308465264e-06, "loss": 0.0336, "step": 144180 }, { "epoch": 0.32095, "grad_norm": 0.05012571066617966, "learning_rate": 9.910607076188166e-06, "loss": 0.0333, "step": 144190 }, { "epoch": 0.321, "grad_norm": 0.05344976484775543, "learning_rate": 9.907311480262019e-06, "loss": 0.0331, "step": 144200 }, { "epoch": 0.32105, "grad_norm": 0.06622076779603958, "learning_rate": 9.904016296964314e-06, "loss": 0.0343, "step": 144210 }, { "epoch": 0.3211, "grad_norm": 0.06994125247001648, "learning_rate": 9.900721526385122e-06, "loss": 0.0333, "step": 144220 }, { "epoch": 0.32115, "grad_norm": 0.06810478121042252, "learning_rate": 9.897427168614542e-06, "loss": 0.0336, "step": 144230 }, { "epoch": 0.3212, "grad_norm": 0.06874731183052063, "learning_rate": 9.894133223742629e-06, "loss": 0.035, "step": 144240 }, { "epoch": 0.32125, "grad_norm": 0.08079435676336288, "learning_rate": 9.89083969185943e-06, "loss": 0.0346, "step": 144250 }, { "epoch": 0.3213, "grad_norm": 0.055063385516405106, "learning_rate": 9.887546573055006e-06, "loss": 0.0331, "step": 144260 }, { "epoch": 0.32135, "grad_norm": 0.04513436555862427, "learning_rate": 9.884253867419383e-06, "loss": 0.0326, "step": 144270 }, { "epoch": 0.3214, "grad_norm": 0.0472201369702816, "learning_rate": 9.880961575042578e-06, "loss": 0.0323, "step": 144280 }, { "epoch": 0.32145, "grad_norm": 0.04506751522421837, "learning_rate": 9.877669696014593e-06, "loss": 0.0338, "step": 144290 }, { "epoch": 0.3215, "grad_norm": 0.046989090740680695, "learning_rate": 9.87437823042544e-06, "loss": 0.034, "step": 144300 }, { "epoch": 0.32155, "grad_norm": 0.05014864727854729, "learning_rate": 9.871087178365124e-06, "loss": 0.0323, "step": 144310 }, { "epoch": 0.3216, "grad_norm": 0.05255356431007385, "learning_rate": 9.86779653992358e-06, "loss": 0.0344, "step": 144320 }, { "epoch": 0.32165, "grad_norm": 0.05546373128890991, "learning_rate": 9.864506315190802e-06, "loss": 0.0338, "step": 144330 }, { "epoch": 0.3217, "grad_norm": 0.059057652950286865, "learning_rate": 9.861216504256728e-06, "loss": 0.0321, "step": 144340 }, { "epoch": 0.32175, "grad_norm": 0.0549306720495224, "learning_rate": 9.857927107211315e-06, "loss": 0.0324, "step": 144350 }, { "epoch": 0.3218, "grad_norm": 0.05145009234547615, "learning_rate": 9.854638124144489e-06, "loss": 0.0326, "step": 144360 }, { "epoch": 0.32185, "grad_norm": 0.04620020091533661, "learning_rate": 9.851349555146153e-06, "loss": 0.0336, "step": 144370 }, { "epoch": 0.3219, "grad_norm": 0.06289909034967422, "learning_rate": 9.848061400306241e-06, "loss": 0.0349, "step": 144380 }, { "epoch": 0.32195, "grad_norm": 0.060509249567985535, "learning_rate": 9.844773659714637e-06, "loss": 0.0338, "step": 144390 }, { "epoch": 0.322, "grad_norm": 0.05424680933356285, "learning_rate": 9.84148633346123e-06, "loss": 0.0343, "step": 144400 }, { "epoch": 0.32205, "grad_norm": 0.04965484142303467, "learning_rate": 9.838199421635883e-06, "loss": 0.0335, "step": 144410 }, { "epoch": 0.3221, "grad_norm": 0.05325651541352272, "learning_rate": 9.834912924328474e-06, "loss": 0.034, "step": 144420 }, { "epoch": 0.32215, "grad_norm": 0.05906248837709427, "learning_rate": 9.831626841628842e-06, "loss": 0.0342, "step": 144430 }, { "epoch": 0.3222, "grad_norm": 0.05295049399137497, "learning_rate": 9.82834117362684e-06, "loss": 0.033, "step": 144440 }, { "epoch": 0.32225, "grad_norm": 0.048718854784965515, "learning_rate": 9.825055920412291e-06, "loss": 0.0327, "step": 144450 }, { "epoch": 0.3223, "grad_norm": 0.04378309100866318, "learning_rate": 9.821771082075004e-06, "loss": 0.0326, "step": 144460 }, { "epoch": 0.32235, "grad_norm": 0.0534726157784462, "learning_rate": 9.818486658704801e-06, "loss": 0.0336, "step": 144470 }, { "epoch": 0.3224, "grad_norm": 0.04605276510119438, "learning_rate": 9.815202650391473e-06, "loss": 0.0318, "step": 144480 }, { "epoch": 0.32245, "grad_norm": 0.057808876037597656, "learning_rate": 9.811919057224786e-06, "loss": 0.0329, "step": 144490 }, { "epoch": 0.3225, "grad_norm": 0.052570734173059464, "learning_rate": 9.808635879294539e-06, "loss": 0.0345, "step": 144500 }, { "epoch": 0.32255, "grad_norm": 0.04239342734217644, "learning_rate": 9.805353116690468e-06, "loss": 0.0327, "step": 144510 }, { "epoch": 0.3226, "grad_norm": 0.06058270484209061, "learning_rate": 9.802070769502355e-06, "loss": 0.0329, "step": 144520 }, { "epoch": 0.32265, "grad_norm": 0.05767593905329704, "learning_rate": 9.798788837819898e-06, "loss": 0.0316, "step": 144530 }, { "epoch": 0.3227, "grad_norm": 0.06306783109903336, "learning_rate": 9.795507321732853e-06, "loss": 0.0346, "step": 144540 }, { "epoch": 0.32275, "grad_norm": 0.053637877106666565, "learning_rate": 9.792226221330916e-06, "loss": 0.0321, "step": 144550 }, { "epoch": 0.3228, "grad_norm": 0.0671757236123085, "learning_rate": 9.788945536703811e-06, "loss": 0.0334, "step": 144560 }, { "epoch": 0.32285, "grad_norm": 0.04795819893479347, "learning_rate": 9.785665267941218e-06, "loss": 0.0319, "step": 144570 }, { "epoch": 0.3229, "grad_norm": 0.06958995759487152, "learning_rate": 9.782385415132816e-06, "loss": 0.0355, "step": 144580 }, { "epoch": 0.32295, "grad_norm": 0.056665509939193726, "learning_rate": 9.779105978368285e-06, "loss": 0.0321, "step": 144590 }, { "epoch": 0.323, "grad_norm": 0.05132095888257027, "learning_rate": 9.77582695773728e-06, "loss": 0.0332, "step": 144600 }, { "epoch": 0.32305, "grad_norm": 0.06006599962711334, "learning_rate": 9.772548353329446e-06, "loss": 0.0334, "step": 144610 }, { "epoch": 0.3231, "grad_norm": 0.05475315824151039, "learning_rate": 9.769270165234413e-06, "loss": 0.0342, "step": 144620 }, { "epoch": 0.32315, "grad_norm": 0.0589086152613163, "learning_rate": 9.76599239354181e-06, "loss": 0.033, "step": 144630 }, { "epoch": 0.3232, "grad_norm": 0.05416763201355934, "learning_rate": 9.762715038341258e-06, "loss": 0.0355, "step": 144640 }, { "epoch": 0.32325, "grad_norm": 0.04965779185295105, "learning_rate": 9.759438099722353e-06, "loss": 0.0329, "step": 144650 }, { "epoch": 0.3233, "grad_norm": 0.04819157347083092, "learning_rate": 9.756161577774688e-06, "loss": 0.0324, "step": 144660 }, { "epoch": 0.32335, "grad_norm": 0.04757603630423546, "learning_rate": 9.752885472587828e-06, "loss": 0.032, "step": 144670 }, { "epoch": 0.3234, "grad_norm": 0.05028046295046806, "learning_rate": 9.74960978425136e-06, "loss": 0.0323, "step": 144680 }, { "epoch": 0.32345, "grad_norm": 0.05156344920396805, "learning_rate": 9.746334512854832e-06, "loss": 0.037, "step": 144690 }, { "epoch": 0.3235, "grad_norm": 0.049711503088474274, "learning_rate": 9.743059658487777e-06, "loss": 0.0326, "step": 144700 }, { "epoch": 0.32355, "grad_norm": 0.06333158165216446, "learning_rate": 9.73978522123975e-06, "loss": 0.0336, "step": 144710 }, { "epoch": 0.3236, "grad_norm": 0.06581209599971771, "learning_rate": 9.736511201200251e-06, "loss": 0.0347, "step": 144720 }, { "epoch": 0.32365, "grad_norm": 0.06127002835273743, "learning_rate": 9.733237598458821e-06, "loss": 0.033, "step": 144730 }, { "epoch": 0.3237, "grad_norm": 0.05636242777109146, "learning_rate": 9.729964413104922e-06, "loss": 0.033, "step": 144740 }, { "epoch": 0.32375, "grad_norm": 0.06685664504766464, "learning_rate": 9.72669164522806e-06, "loss": 0.0327, "step": 144750 }, { "epoch": 0.3238, "grad_norm": 0.051527686417102814, "learning_rate": 9.723419294917719e-06, "loss": 0.0315, "step": 144760 }, { "epoch": 0.32385, "grad_norm": 0.04837772250175476, "learning_rate": 9.720147362263354e-06, "loss": 0.0321, "step": 144770 }, { "epoch": 0.3239, "grad_norm": 0.05207151919603348, "learning_rate": 9.71687584735442e-06, "loss": 0.0316, "step": 144780 }, { "epoch": 0.32395, "grad_norm": 0.045554231852293015, "learning_rate": 9.71360475028035e-06, "loss": 0.0314, "step": 144790 }, { "epoch": 0.324, "grad_norm": 0.04364077001810074, "learning_rate": 9.710334071130595e-06, "loss": 0.0338, "step": 144800 }, { "epoch": 0.32405, "grad_norm": 0.04708629101514816, "learning_rate": 9.707063809994557e-06, "loss": 0.0355, "step": 144810 }, { "epoch": 0.3241, "grad_norm": 0.04998410493135452, "learning_rate": 9.703793966961645e-06, "loss": 0.0324, "step": 144820 }, { "epoch": 0.32415, "grad_norm": 0.05050116032361984, "learning_rate": 9.700524542121266e-06, "loss": 0.0326, "step": 144830 }, { "epoch": 0.3242, "grad_norm": 0.047319792211055756, "learning_rate": 9.697255535562791e-06, "loss": 0.0326, "step": 144840 }, { "epoch": 0.32425, "grad_norm": 0.05137254297733307, "learning_rate": 9.693986947375608e-06, "loss": 0.0317, "step": 144850 }, { "epoch": 0.3243, "grad_norm": 0.05128312483429909, "learning_rate": 9.690718777649071e-06, "loss": 0.0326, "step": 144860 }, { "epoch": 0.32435, "grad_norm": 0.05278492346405983, "learning_rate": 9.687451026472525e-06, "loss": 0.0318, "step": 144870 }, { "epoch": 0.3244, "grad_norm": 0.05346215143799782, "learning_rate": 9.68418369393532e-06, "loss": 0.0324, "step": 144880 }, { "epoch": 0.32445, "grad_norm": 0.04384399205446243, "learning_rate": 9.680916780126783e-06, "loss": 0.0333, "step": 144890 }, { "epoch": 0.3245, "grad_norm": 0.050685182213783264, "learning_rate": 9.677650285136225e-06, "loss": 0.0343, "step": 144900 }, { "epoch": 0.32455, "grad_norm": 0.0474894642829895, "learning_rate": 9.674384209052942e-06, "loss": 0.0332, "step": 144910 }, { "epoch": 0.3246, "grad_norm": 0.04734433814883232, "learning_rate": 9.671118551966246e-06, "loss": 0.0334, "step": 144920 }, { "epoch": 0.32465, "grad_norm": 0.04418141022324562, "learning_rate": 9.667853313965402e-06, "loss": 0.0322, "step": 144930 }, { "epoch": 0.3247, "grad_norm": 0.0511958971619606, "learning_rate": 9.664588495139695e-06, "loss": 0.0316, "step": 144940 }, { "epoch": 0.32475, "grad_norm": 0.05163023620843887, "learning_rate": 9.66132409557838e-06, "loss": 0.0329, "step": 144950 }, { "epoch": 0.3248, "grad_norm": 0.05425497889518738, "learning_rate": 9.658060115370692e-06, "loss": 0.0341, "step": 144960 }, { "epoch": 0.32485, "grad_norm": 0.0633278489112854, "learning_rate": 9.654796554605886e-06, "loss": 0.0336, "step": 144970 }, { "epoch": 0.3249, "grad_norm": 0.056204576045274734, "learning_rate": 9.651533413373179e-06, "loss": 0.035, "step": 144980 }, { "epoch": 0.32495, "grad_norm": 0.05580423027276993, "learning_rate": 9.64827069176178e-06, "loss": 0.0334, "step": 144990 }, { "epoch": 0.325, "grad_norm": 0.04533779248595238, "learning_rate": 9.645008389860882e-06, "loss": 0.0347, "step": 145000 }, { "epoch": 0.32505, "grad_norm": 0.04800092428922653, "learning_rate": 9.64174650775969e-06, "loss": 0.0349, "step": 145010 }, { "epoch": 0.3251, "grad_norm": 0.051888130605220795, "learning_rate": 9.638485045547394e-06, "loss": 0.0325, "step": 145020 }, { "epoch": 0.32515, "grad_norm": 0.0473293699324131, "learning_rate": 9.635224003313128e-06, "loss": 0.0327, "step": 145030 }, { "epoch": 0.3252, "grad_norm": 0.05297275632619858, "learning_rate": 9.631963381146075e-06, "loss": 0.0326, "step": 145040 }, { "epoch": 0.32525, "grad_norm": 0.06034578010439873, "learning_rate": 9.628703179135362e-06, "loss": 0.0339, "step": 145050 }, { "epoch": 0.3253, "grad_norm": 0.05310709774494171, "learning_rate": 9.625443397370137e-06, "loss": 0.0335, "step": 145060 }, { "epoch": 0.32535, "grad_norm": 0.05877617746591568, "learning_rate": 9.622184035939513e-06, "loss": 0.0342, "step": 145070 }, { "epoch": 0.3254, "grad_norm": 0.06464612483978271, "learning_rate": 9.618925094932594e-06, "loss": 0.0332, "step": 145080 }, { "epoch": 0.32545, "grad_norm": 0.048762571066617966, "learning_rate": 9.615666574438492e-06, "loss": 0.0334, "step": 145090 }, { "epoch": 0.3255, "grad_norm": 0.04977282136678696, "learning_rate": 9.612408474546287e-06, "loss": 0.0337, "step": 145100 }, { "epoch": 0.32555, "grad_norm": 0.048385247588157654, "learning_rate": 9.609150795345051e-06, "loss": 0.0325, "step": 145110 }, { "epoch": 0.3256, "grad_norm": 0.04673107713460922, "learning_rate": 9.605893536923841e-06, "loss": 0.0329, "step": 145120 }, { "epoch": 0.32565, "grad_norm": 0.04676985740661621, "learning_rate": 9.60263669937172e-06, "loss": 0.033, "step": 145130 }, { "epoch": 0.3257, "grad_norm": 0.048509273678064346, "learning_rate": 9.599380282777737e-06, "loss": 0.0325, "step": 145140 }, { "epoch": 0.32575, "grad_norm": 0.07794582843780518, "learning_rate": 9.596124287230909e-06, "loss": 0.0328, "step": 145150 }, { "epoch": 0.3258, "grad_norm": 0.06084432825446129, "learning_rate": 9.592868712820258e-06, "loss": 0.034, "step": 145160 }, { "epoch": 0.32585, "grad_norm": 0.050786975771188736, "learning_rate": 9.589613559634778e-06, "loss": 0.0334, "step": 145170 }, { "epoch": 0.3259, "grad_norm": 0.050088442862033844, "learning_rate": 9.586358827763483e-06, "loss": 0.0336, "step": 145180 }, { "epoch": 0.32595, "grad_norm": 0.058238424360752106, "learning_rate": 9.58310451729535e-06, "loss": 0.0325, "step": 145190 }, { "epoch": 0.326, "grad_norm": 0.04916360601782799, "learning_rate": 9.579850628319334e-06, "loss": 0.0336, "step": 145200 }, { "epoch": 0.32605, "grad_norm": 0.07092466205358505, "learning_rate": 9.57659716092442e-06, "loss": 0.0329, "step": 145210 }, { "epoch": 0.3261, "grad_norm": 0.056777022778987885, "learning_rate": 9.573344115199538e-06, "loss": 0.0327, "step": 145220 }, { "epoch": 0.32615, "grad_norm": 0.05091371014714241, "learning_rate": 9.570091491233646e-06, "loss": 0.0328, "step": 145230 }, { "epoch": 0.3262, "grad_norm": 0.05490479990839958, "learning_rate": 9.566839289115643e-06, "loss": 0.0329, "step": 145240 }, { "epoch": 0.32625, "grad_norm": 0.06113282963633537, "learning_rate": 9.563587508934451e-06, "loss": 0.033, "step": 145250 }, { "epoch": 0.3263, "grad_norm": 0.049865081906318665, "learning_rate": 9.560336150778989e-06, "loss": 0.0334, "step": 145260 }, { "epoch": 0.32635, "grad_norm": 0.06664572656154633, "learning_rate": 9.557085214738135e-06, "loss": 0.0341, "step": 145270 }, { "epoch": 0.3264, "grad_norm": 0.06306932121515274, "learning_rate": 9.553834700900771e-06, "loss": 0.0332, "step": 145280 }, { "epoch": 0.32645, "grad_norm": 0.06288663297891617, "learning_rate": 9.550584609355754e-06, "loss": 0.034, "step": 145290 }, { "epoch": 0.3265, "grad_norm": 0.07462607324123383, "learning_rate": 9.547334940191957e-06, "loss": 0.0357, "step": 145300 }, { "epoch": 0.32655, "grad_norm": 0.05369102209806442, "learning_rate": 9.54408569349822e-06, "loss": 0.0319, "step": 145310 }, { "epoch": 0.3266, "grad_norm": 0.04382164403796196, "learning_rate": 9.540836869363365e-06, "loss": 0.0321, "step": 145320 }, { "epoch": 0.32665, "grad_norm": 0.04398827627301216, "learning_rate": 9.53758846787623e-06, "loss": 0.0315, "step": 145330 }, { "epoch": 0.3267, "grad_norm": 0.046040408313274384, "learning_rate": 9.534340489125607e-06, "loss": 0.0324, "step": 145340 }, { "epoch": 0.32675, "grad_norm": 0.048852741718292236, "learning_rate": 9.531092933200314e-06, "loss": 0.0322, "step": 145350 }, { "epoch": 0.3268, "grad_norm": 0.05130552500486374, "learning_rate": 9.52784580018913e-06, "loss": 0.0319, "step": 145360 }, { "epoch": 0.32685, "grad_norm": 0.055272601544857025, "learning_rate": 9.52459909018083e-06, "loss": 0.0336, "step": 145370 }, { "epoch": 0.3269, "grad_norm": 0.049018390476703644, "learning_rate": 9.521352803264167e-06, "loss": 0.0313, "step": 145380 }, { "epoch": 0.32695, "grad_norm": 0.045241452753543854, "learning_rate": 9.518106939527913e-06, "loss": 0.0313, "step": 145390 }, { "epoch": 0.327, "grad_norm": 0.04454249516129494, "learning_rate": 9.514861499060795e-06, "loss": 0.0319, "step": 145400 }, { "epoch": 0.32705, "grad_norm": 0.08072825521230698, "learning_rate": 9.511616481951543e-06, "loss": 0.0325, "step": 145410 }, { "epoch": 0.3271, "grad_norm": 0.06192736327648163, "learning_rate": 9.508371888288884e-06, "loss": 0.034, "step": 145420 }, { "epoch": 0.32715, "grad_norm": 0.05790010839700699, "learning_rate": 9.505127718161508e-06, "loss": 0.0338, "step": 145430 }, { "epoch": 0.3272, "grad_norm": 0.06184834986925125, "learning_rate": 9.50188397165813e-06, "loss": 0.0339, "step": 145440 }, { "epoch": 0.32725, "grad_norm": 0.05743928998708725, "learning_rate": 9.498640648867418e-06, "loss": 0.033, "step": 145450 }, { "epoch": 0.3273, "grad_norm": 0.059182487428188324, "learning_rate": 9.495397749878044e-06, "loss": 0.0349, "step": 145460 }, { "epoch": 0.32735, "grad_norm": 0.05375044420361519, "learning_rate": 9.492155274778675e-06, "loss": 0.0318, "step": 145470 }, { "epoch": 0.3274, "grad_norm": 0.04980049282312393, "learning_rate": 9.488913223657956e-06, "loss": 0.0317, "step": 145480 }, { "epoch": 0.32745, "grad_norm": 0.04385017976164818, "learning_rate": 9.485671596604523e-06, "loss": 0.0327, "step": 145490 }, { "epoch": 0.3275, "grad_norm": 0.05428243428468704, "learning_rate": 9.48243039370699e-06, "loss": 0.0341, "step": 145500 }, { "epoch": 0.32755, "grad_norm": 0.04685281217098236, "learning_rate": 9.47918961505398e-06, "loss": 0.0318, "step": 145510 }, { "epoch": 0.3276, "grad_norm": 0.04714469239115715, "learning_rate": 9.475949260734115e-06, "loss": 0.0328, "step": 145520 }, { "epoch": 0.32765, "grad_norm": 0.04670100286602974, "learning_rate": 9.472709330835946e-06, "loss": 0.033, "step": 145530 }, { "epoch": 0.3277, "grad_norm": 0.06322925537824631, "learning_rate": 9.46946982544808e-06, "loss": 0.0334, "step": 145540 }, { "epoch": 0.32775, "grad_norm": 0.05059336498379707, "learning_rate": 9.466230744659063e-06, "loss": 0.0334, "step": 145550 }, { "epoch": 0.3278, "grad_norm": 0.04755368083715439, "learning_rate": 9.462992088557473e-06, "loss": 0.0327, "step": 145560 }, { "epoch": 0.32785, "grad_norm": 0.04885302484035492, "learning_rate": 9.459753857231843e-06, "loss": 0.0325, "step": 145570 }, { "epoch": 0.3279, "grad_norm": 0.0527208149433136, "learning_rate": 9.456516050770695e-06, "loss": 0.0328, "step": 145580 }, { "epoch": 0.32795, "grad_norm": 0.0456591434776783, "learning_rate": 9.453278669262566e-06, "loss": 0.0332, "step": 145590 }, { "epoch": 0.328, "grad_norm": 0.04801170900464058, "learning_rate": 9.450041712795957e-06, "loss": 0.0322, "step": 145600 }, { "epoch": 0.32805, "grad_norm": 0.04043445363640785, "learning_rate": 9.446805181459368e-06, "loss": 0.034, "step": 145610 }, { "epoch": 0.3281, "grad_norm": 0.05507500097155571, "learning_rate": 9.443569075341274e-06, "loss": 0.0347, "step": 145620 }, { "epoch": 0.32815, "grad_norm": 0.05315954610705376, "learning_rate": 9.440333394530163e-06, "loss": 0.0345, "step": 145630 }, { "epoch": 0.3282, "grad_norm": 0.05816284567117691, "learning_rate": 9.437098139114487e-06, "loss": 0.0332, "step": 145640 }, { "epoch": 0.32825, "grad_norm": 0.0643046498298645, "learning_rate": 9.433863309182706e-06, "loss": 0.0351, "step": 145650 }, { "epoch": 0.3283, "grad_norm": 0.05089773237705231, "learning_rate": 9.430628904823255e-06, "loss": 0.0337, "step": 145660 }, { "epoch": 0.32835, "grad_norm": 0.05307083949446678, "learning_rate": 9.427394926124553e-06, "loss": 0.0379, "step": 145670 }, { "epoch": 0.3284, "grad_norm": 0.056122709065675735, "learning_rate": 9.42416137317503e-06, "loss": 0.0353, "step": 145680 }, { "epoch": 0.32845, "grad_norm": 0.0508899986743927, "learning_rate": 9.420928246063085e-06, "loss": 0.0341, "step": 145690 }, { "epoch": 0.3285, "grad_norm": 0.052487220615148544, "learning_rate": 9.4176955448771e-06, "loss": 0.0333, "step": 145700 }, { "epoch": 0.32855, "grad_norm": 0.05465936288237572, "learning_rate": 9.414463269705475e-06, "loss": 0.0342, "step": 145710 }, { "epoch": 0.3286, "grad_norm": 0.04915636032819748, "learning_rate": 9.41123142063656e-06, "loss": 0.0332, "step": 145720 }, { "epoch": 0.32865, "grad_norm": 0.05120699107646942, "learning_rate": 9.407999997758738e-06, "loss": 0.0321, "step": 145730 }, { "epoch": 0.3287, "grad_norm": 0.047415148466825485, "learning_rate": 9.404769001160322e-06, "loss": 0.0325, "step": 145740 }, { "epoch": 0.32875, "grad_norm": 0.06523957848548889, "learning_rate": 9.401538430929669e-06, "loss": 0.0351, "step": 145750 }, { "epoch": 0.3288, "grad_norm": 0.05517665669322014, "learning_rate": 9.398308287155085e-06, "loss": 0.032, "step": 145760 }, { "epoch": 0.32885, "grad_norm": 0.05780434235930443, "learning_rate": 9.395078569924906e-06, "loss": 0.0354, "step": 145770 }, { "epoch": 0.3289, "grad_norm": 0.05136639624834061, "learning_rate": 9.391849279327411e-06, "loss": 0.0347, "step": 145780 }, { "epoch": 0.32895, "grad_norm": 0.07062689960002899, "learning_rate": 9.388620415450889e-06, "loss": 0.0334, "step": 145790 }, { "epoch": 0.329, "grad_norm": 0.0605822429060936, "learning_rate": 9.385391978383626e-06, "loss": 0.0329, "step": 145800 }, { "epoch": 0.32905, "grad_norm": 0.0673321783542633, "learning_rate": 9.382163968213879e-06, "loss": 0.0335, "step": 145810 }, { "epoch": 0.3291, "grad_norm": 0.05488608032464981, "learning_rate": 9.378936385029901e-06, "loss": 0.0339, "step": 145820 }, { "epoch": 0.32915, "grad_norm": 0.05616806447505951, "learning_rate": 9.37570922891993e-06, "loss": 0.0333, "step": 145830 }, { "epoch": 0.3292, "grad_norm": 0.039877749979496, "learning_rate": 9.372482499972196e-06, "loss": 0.0326, "step": 145840 }, { "epoch": 0.32925, "grad_norm": 0.05098028853535652, "learning_rate": 9.369256198274926e-06, "loss": 0.0351, "step": 145850 }, { "epoch": 0.3293, "grad_norm": 0.04647049680352211, "learning_rate": 9.366030323916323e-06, "loss": 0.0342, "step": 145860 }, { "epoch": 0.32935, "grad_norm": 0.05865845829248428, "learning_rate": 9.362804876984573e-06, "loss": 0.0327, "step": 145870 }, { "epoch": 0.3294, "grad_norm": 0.05891422927379608, "learning_rate": 9.35957985756786e-06, "loss": 0.0333, "step": 145880 }, { "epoch": 0.32945, "grad_norm": 0.06232016533613205, "learning_rate": 9.356355265754362e-06, "loss": 0.0314, "step": 145890 }, { "epoch": 0.3295, "grad_norm": 0.048157431185245514, "learning_rate": 9.353131101632238e-06, "loss": 0.0317, "step": 145900 }, { "epoch": 0.32955, "grad_norm": 0.045827120542526245, "learning_rate": 9.34990736528962e-06, "loss": 0.0317, "step": 145910 }, { "epoch": 0.3296, "grad_norm": 0.05135310813784599, "learning_rate": 9.346684056814665e-06, "loss": 0.0344, "step": 145920 }, { "epoch": 0.32965, "grad_norm": 0.05288746580481529, "learning_rate": 9.343461176295476e-06, "loss": 0.0323, "step": 145930 }, { "epoch": 0.3297, "grad_norm": 0.05296698585152626, "learning_rate": 9.340238723820197e-06, "loss": 0.0326, "step": 145940 }, { "epoch": 0.32975, "grad_norm": 0.054329708218574524, "learning_rate": 9.337016699476888e-06, "loss": 0.0318, "step": 145950 }, { "epoch": 0.3298, "grad_norm": 0.041853830218315125, "learning_rate": 9.333795103353659e-06, "loss": 0.0319, "step": 145960 }, { "epoch": 0.32985, "grad_norm": 0.0503230020403862, "learning_rate": 9.330573935538598e-06, "loss": 0.0333, "step": 145970 }, { "epoch": 0.3299, "grad_norm": 0.0444575697183609, "learning_rate": 9.327353196119757e-06, "loss": 0.0324, "step": 145980 }, { "epoch": 0.32995, "grad_norm": 0.042210306972265244, "learning_rate": 9.324132885185192e-06, "loss": 0.0323, "step": 145990 }, { "epoch": 0.33, "grad_norm": 0.04618427902460098, "learning_rate": 9.320913002822934e-06, "loss": 0.0314, "step": 146000 }, { "epoch": 5e-05, "grad_norm": 0.045645494014024734, "learning_rate": 9.317693549121034e-06, "loss": 0.0326, "step": 146010 }, { "epoch": 0.0001, "grad_norm": 0.044545263051986694, "learning_rate": 9.314474524167502e-06, "loss": 0.0339, "step": 146020 }, { "epoch": 0.00015, "grad_norm": 0.05003022029995918, "learning_rate": 9.311255928050333e-06, "loss": 0.0341, "step": 146030 }, { "epoch": 0.0002, "grad_norm": 0.04548483341932297, "learning_rate": 9.308037760857544e-06, "loss": 0.0333, "step": 146040 }, { "epoch": 0.00025, "grad_norm": 0.04014163464307785, "learning_rate": 9.304820022677097e-06, "loss": 0.0327, "step": 146050 }, { "epoch": 0.0003, "grad_norm": 0.0486493743956089, "learning_rate": 9.301602713596982e-06, "loss": 0.0332, "step": 146060 }, { "epoch": 0.00035, "grad_norm": 0.056167442351579666, "learning_rate": 9.298385833705153e-06, "loss": 0.0326, "step": 146070 }, { "epoch": 0.0004, "grad_norm": 0.045112937688827515, "learning_rate": 9.295169383089547e-06, "loss": 0.0334, "step": 146080 }, { "epoch": 0.00045, "grad_norm": 0.05014557018876076, "learning_rate": 9.291953361838121e-06, "loss": 0.0329, "step": 146090 }, { "epoch": 0.0005, "grad_norm": 0.05027300864458084, "learning_rate": 9.288737770038785e-06, "loss": 0.032, "step": 146100 }, { "epoch": 0.00055, "grad_norm": 0.05353415012359619, "learning_rate": 9.28552260777946e-06, "loss": 0.0334, "step": 146110 }, { "epoch": 0.0006, "grad_norm": 0.05639102682471275, "learning_rate": 9.28230787514803e-06, "loss": 0.0334, "step": 146120 }, { "epoch": 0.00065, "grad_norm": 0.04886871203780174, "learning_rate": 9.279093572232411e-06, "loss": 0.0372, "step": 146130 }, { "epoch": 0.0007, "grad_norm": 0.0415855348110199, "learning_rate": 9.275879699120457e-06, "loss": 0.0322, "step": 146140 }, { "epoch": 0.00075, "grad_norm": 0.04586352780461311, "learning_rate": 9.272666255900053e-06, "loss": 0.0334, "step": 146150 }, { "epoch": 0.0008, "grad_norm": 0.047350455075502396, "learning_rate": 9.269453242659045e-06, "loss": 0.0343, "step": 146160 }, { "epoch": 0.00085, "grad_norm": 0.04921210557222366, "learning_rate": 9.266240659485267e-06, "loss": 0.0346, "step": 146170 }, { "epoch": 0.0009, "grad_norm": 0.044031646102666855, "learning_rate": 9.263028506466567e-06, "loss": 0.0344, "step": 146180 }, { "epoch": 0.00095, "grad_norm": 0.04523542895913124, "learning_rate": 9.259816783690756e-06, "loss": 0.0344, "step": 146190 }, { "epoch": 0.001, "grad_norm": 0.048036910593509674, "learning_rate": 9.256605491245642e-06, "loss": 0.0341, "step": 146200 }, { "epoch": 0.00105, "grad_norm": 0.05942752957344055, "learning_rate": 9.253394629219008e-06, "loss": 0.0356, "step": 146210 }, { "epoch": 0.0011, "grad_norm": 0.05776863545179367, "learning_rate": 9.250184197698653e-06, "loss": 0.0336, "step": 146220 }, { "epoch": 0.00115, "grad_norm": 0.05581040307879448, "learning_rate": 9.246974196772359e-06, "loss": 0.0339, "step": 146230 }, { "epoch": 0.0012, "grad_norm": 0.04974944889545441, "learning_rate": 9.243764626527856e-06, "loss": 0.0341, "step": 146240 }, { "epoch": 0.00125, "grad_norm": 0.054227203130722046, "learning_rate": 9.240555487052918e-06, "loss": 0.0351, "step": 146250 }, { "epoch": 0.0013, "grad_norm": 0.04993123561143875, "learning_rate": 9.237346778435264e-06, "loss": 0.0331, "step": 146260 }, { "epoch": 0.00135, "grad_norm": 0.04671796038746834, "learning_rate": 9.234138500762635e-06, "loss": 0.0325, "step": 146270 }, { "epoch": 0.0014, "grad_norm": 0.06007940322160721, "learning_rate": 9.230930654122736e-06, "loss": 0.0346, "step": 146280 }, { "epoch": 0.00145, "grad_norm": 0.059051770716905594, "learning_rate": 9.227723238603262e-06, "loss": 0.0332, "step": 146290 }, { "epoch": 0.0015, "grad_norm": 0.05878492444753647, "learning_rate": 9.224516254291915e-06, "loss": 0.0331, "step": 146300 }, { "epoch": 0.00155, "grad_norm": 0.05282504856586456, "learning_rate": 9.22130970127637e-06, "loss": 0.0324, "step": 146310 }, { "epoch": 0.0016, "grad_norm": 0.04789440706372261, "learning_rate": 9.218103579644289e-06, "loss": 0.0331, "step": 146320 }, { "epoch": 0.00165, "grad_norm": 0.06352198868989944, "learning_rate": 9.214897889483317e-06, "loss": 0.0337, "step": 146330 }, { "epoch": 0.0017, "grad_norm": 0.06606133282184601, "learning_rate": 9.211692630881108e-06, "loss": 0.0331, "step": 146340 }, { "epoch": 0.00175, "grad_norm": 0.05790630355477333, "learning_rate": 9.2084878039253e-06, "loss": 0.0339, "step": 146350 }, { "epoch": 0.0018, "grad_norm": 0.10246631503105164, "learning_rate": 9.205283408703502e-06, "loss": 0.0348, "step": 146360 }, { "epoch": 0.00185, "grad_norm": 0.1105370968580246, "learning_rate": 9.202079445303322e-06, "loss": 0.0325, "step": 146370 }, { "epoch": 0.0019, "grad_norm": 0.06171422451734543, "learning_rate": 9.198875913812346e-06, "loss": 0.0344, "step": 146380 }, { "epoch": 0.00195, "grad_norm": 0.059385087341070175, "learning_rate": 9.195672814318176e-06, "loss": 0.0336, "step": 146390 }, { "epoch": 0.002, "grad_norm": 0.06413570791482925, "learning_rate": 9.192470146908375e-06, "loss": 0.035, "step": 146400 }, { "epoch": 0.00205, "grad_norm": 0.0679282695055008, "learning_rate": 9.18926791167049e-06, "loss": 0.0338, "step": 146410 }, { "epoch": 0.0021, "grad_norm": 0.0589129775762558, "learning_rate": 9.18606610869209e-06, "loss": 0.034, "step": 146420 }, { "epoch": 0.00215, "grad_norm": 0.05278779938817024, "learning_rate": 9.182864738060693e-06, "loss": 0.0327, "step": 146430 }, { "epoch": 0.0022, "grad_norm": 0.04730449244379997, "learning_rate": 9.179663799863849e-06, "loss": 0.0318, "step": 146440 }, { "epoch": 0.00225, "grad_norm": 0.058954108506441116, "learning_rate": 9.176463294189037e-06, "loss": 0.0347, "step": 146450 }, { "epoch": 0.0023, "grad_norm": 0.050698429346084595, "learning_rate": 9.173263221123773e-06, "loss": 0.034, "step": 146460 }, { "epoch": 0.00235, "grad_norm": 0.05391349270939827, "learning_rate": 9.170063580755556e-06, "loss": 0.0334, "step": 146470 }, { "epoch": 0.0024, "grad_norm": 0.05194341391324997, "learning_rate": 9.166864373171854e-06, "loss": 0.0326, "step": 146480 }, { "epoch": 0.00245, "grad_norm": 0.05862382799386978, "learning_rate": 9.16366559846013e-06, "loss": 0.0333, "step": 146490 }, { "epoch": 0.0025, "grad_norm": 0.05081118643283844, "learning_rate": 9.160467256707834e-06, "loss": 0.0336, "step": 146500 }, { "epoch": 0.00255, "grad_norm": 0.051504164934158325, "learning_rate": 9.157269348002417e-06, "loss": 0.0342, "step": 146510 }, { "epoch": 0.0026, "grad_norm": 0.06455394625663757, "learning_rate": 9.154071872431305e-06, "loss": 0.0358, "step": 146520 }, { "epoch": 0.00265, "grad_norm": 0.06332694739103317, "learning_rate": 9.15087483008191e-06, "loss": 0.033, "step": 146530 }, { "epoch": 0.0027, "grad_norm": 0.053663916885852814, "learning_rate": 9.147678221041647e-06, "loss": 0.0329, "step": 146540 }, { "epoch": 0.00275, "grad_norm": 0.0548773817718029, "learning_rate": 9.1444820453979e-06, "loss": 0.033, "step": 146550 }, { "epoch": 0.0028, "grad_norm": 0.05587553232908249, "learning_rate": 9.141286303238065e-06, "loss": 0.0321, "step": 146560 }, { "epoch": 0.00285, "grad_norm": 0.046682775020599365, "learning_rate": 9.138090994649503e-06, "loss": 0.0332, "step": 146570 }, { "epoch": 0.0029, "grad_norm": 0.05073460564017296, "learning_rate": 9.134896119719578e-06, "loss": 0.0319, "step": 146580 }, { "epoch": 0.00295, "grad_norm": 0.09987479448318481, "learning_rate": 9.131701678535621e-06, "loss": 0.0333, "step": 146590 }, { "epoch": 0.003, "grad_norm": 0.058221518993377686, "learning_rate": 9.128507671184989e-06, "loss": 0.0319, "step": 146600 }, { "epoch": 0.00305, "grad_norm": 0.05034530907869339, "learning_rate": 9.125314097754994e-06, "loss": 0.0336, "step": 146610 }, { "epoch": 0.0031, "grad_norm": 0.04986129701137543, "learning_rate": 9.12212095833294e-06, "loss": 0.0335, "step": 146620 }, { "epoch": 0.00315, "grad_norm": 0.056024134159088135, "learning_rate": 9.11892825300614e-06, "loss": 0.0345, "step": 146630 }, { "epoch": 0.0032, "grad_norm": 0.041960589587688446, "learning_rate": 9.11573598186187e-06, "loss": 0.0333, "step": 146640 }, { "epoch": 0.00325, "grad_norm": 0.04810328409075737, "learning_rate": 9.112544144987417e-06, "loss": 0.0338, "step": 146650 }, { "epoch": 0.0033, "grad_norm": 0.052023597061634064, "learning_rate": 9.10935274247004e-06, "loss": 0.0326, "step": 146660 }, { "epoch": 0.00335, "grad_norm": 0.05089326947927475, "learning_rate": 9.10616177439698e-06, "loss": 0.0323, "step": 146670 }, { "epoch": 0.0034, "grad_norm": 0.046460434794425964, "learning_rate": 9.102971240855494e-06, "loss": 0.0333, "step": 146680 }, { "epoch": 0.00345, "grad_norm": 0.04194452613592148, "learning_rate": 9.099781141932803e-06, "loss": 0.0326, "step": 146690 }, { "epoch": 0.0035, "grad_norm": 0.03830978274345398, "learning_rate": 9.09659147771612e-06, "loss": 0.0323, "step": 146700 }, { "epoch": 0.00355, "grad_norm": 0.05357314646244049, "learning_rate": 9.093402248292645e-06, "loss": 0.0339, "step": 146710 }, { "epoch": 0.0036, "grad_norm": 0.040867824107408524, "learning_rate": 9.090213453749577e-06, "loss": 0.0322, "step": 146720 }, { "epoch": 0.00365, "grad_norm": 0.03825710713863373, "learning_rate": 9.087025094174112e-06, "loss": 0.032, "step": 146730 }, { "epoch": 0.0037, "grad_norm": 0.047729622572660446, "learning_rate": 9.083837169653387e-06, "loss": 0.0337, "step": 146740 }, { "epoch": 0.00375, "grad_norm": 0.05512430518865585, "learning_rate": 9.08064968027458e-06, "loss": 0.0336, "step": 146750 }, { "epoch": 0.0038, "grad_norm": 0.04865284264087677, "learning_rate": 9.077462626124825e-06, "loss": 0.0337, "step": 146760 }, { "epoch": 0.00385, "grad_norm": 0.052600521594285965, "learning_rate": 9.074276007291266e-06, "loss": 0.0332, "step": 146770 }, { "epoch": 0.0039, "grad_norm": 0.07956542819738388, "learning_rate": 9.071089823861021e-06, "loss": 0.0351, "step": 146780 }, { "epoch": 0.00395, "grad_norm": 0.06006759777665138, "learning_rate": 9.067904075921182e-06, "loss": 0.0329, "step": 146790 }, { "epoch": 0.004, "grad_norm": 0.05700773000717163, "learning_rate": 9.064718763558874e-06, "loss": 0.0335, "step": 146800 }, { "epoch": 0.00405, "grad_norm": 0.05110654979944229, "learning_rate": 9.061533886861168e-06, "loss": 0.032, "step": 146810 }, { "epoch": 0.0041, "grad_norm": 0.04639684036374092, "learning_rate": 9.058349445915135e-06, "loss": 0.0324, "step": 146820 }, { "epoch": 0.00415, "grad_norm": 0.053005319088697433, "learning_rate": 9.05516544080783e-06, "loss": 0.0336, "step": 146830 }, { "epoch": 0.0042, "grad_norm": 0.08806686848402023, "learning_rate": 9.051981871626326e-06, "loss": 0.0348, "step": 146840 }, { "epoch": 0.00425, "grad_norm": 0.052222974598407745, "learning_rate": 9.048798738457632e-06, "loss": 0.0333, "step": 146850 }, { "epoch": 0.0043, "grad_norm": 0.049556005746126175, "learning_rate": 9.045616041388799e-06, "loss": 0.0341, "step": 146860 }, { "epoch": 0.00435, "grad_norm": 0.04521545395255089, "learning_rate": 9.042433780506829e-06, "loss": 0.0336, "step": 146870 }, { "epoch": 0.0044, "grad_norm": 0.0510425828397274, "learning_rate": 9.039251955898715e-06, "loss": 0.0335, "step": 146880 }, { "epoch": 0.00445, "grad_norm": 0.05148714408278465, "learning_rate": 9.036070567651463e-06, "loss": 0.0352, "step": 146890 }, { "epoch": 0.0045, "grad_norm": 0.06311694532632828, "learning_rate": 9.03288961585205e-06, "loss": 0.035, "step": 146900 }, { "epoch": 0.00455, "grad_norm": 0.04238202050328255, "learning_rate": 9.029709100587425e-06, "loss": 0.0329, "step": 146910 }, { "epoch": 0.0046, "grad_norm": 0.047373171895742416, "learning_rate": 9.02652902194456e-06, "loss": 0.0343, "step": 146920 }, { "epoch": 0.00465, "grad_norm": 0.04213150963187218, "learning_rate": 9.023349380010384e-06, "loss": 0.0329, "step": 146930 }, { "epoch": 0.0047, "grad_norm": 0.04656418040394783, "learning_rate": 9.020170174871851e-06, "loss": 0.0329, "step": 146940 }, { "epoch": 0.00475, "grad_norm": 0.05302516743540764, "learning_rate": 9.016991406615843e-06, "loss": 0.0325, "step": 146950 }, { "epoch": 0.0048, "grad_norm": 0.04817183315753937, "learning_rate": 9.013813075329297e-06, "loss": 0.0334, "step": 146960 }, { "epoch": 0.00485, "grad_norm": 0.06088642403483391, "learning_rate": 9.010635181099087e-06, "loss": 0.0333, "step": 146970 }, { "epoch": 0.0049, "grad_norm": 0.042321301996707916, "learning_rate": 9.007457724012112e-06, "loss": 0.0339, "step": 146980 }, { "epoch": 0.00495, "grad_norm": 0.05898347496986389, "learning_rate": 9.004280704155233e-06, "loss": 0.0336, "step": 146990 }, { "epoch": 0.005, "grad_norm": 0.04829243943095207, "learning_rate": 9.001104121615303e-06, "loss": 0.0327, "step": 147000 }, { "epoch": 0.00505, "grad_norm": 0.05537727102637291, "learning_rate": 8.997927976479185e-06, "loss": 0.0333, "step": 147010 }, { "epoch": 0.0051, "grad_norm": 0.04421060532331467, "learning_rate": 8.994752268833703e-06, "loss": 0.0317, "step": 147020 }, { "epoch": 0.00515, "grad_norm": 0.05640105530619621, "learning_rate": 8.991576998765682e-06, "loss": 0.0318, "step": 147030 }, { "epoch": 0.0052, "grad_norm": 0.05159401521086693, "learning_rate": 8.988402166361923e-06, "loss": 0.0324, "step": 147040 }, { "epoch": 0.00525, "grad_norm": 0.040277011692523956, "learning_rate": 8.985227771709233e-06, "loss": 0.0323, "step": 147050 }, { "epoch": 0.0053, "grad_norm": 0.04992297664284706, "learning_rate": 8.98205381489441e-06, "loss": 0.0329, "step": 147060 }, { "epoch": 0.00535, "grad_norm": 0.04317447543144226, "learning_rate": 8.978880296004217e-06, "loss": 0.033, "step": 147070 }, { "epoch": 0.0054, "grad_norm": 0.044240791350603104, "learning_rate": 8.975707215125417e-06, "loss": 0.0323, "step": 147080 }, { "epoch": 0.00545, "grad_norm": 0.055999599397182465, "learning_rate": 8.97253457234475e-06, "loss": 0.0322, "step": 147090 }, { "epoch": 0.0055, "grad_norm": 0.053453151136636734, "learning_rate": 8.969362367748982e-06, "loss": 0.0328, "step": 147100 }, { "epoch": 0.00555, "grad_norm": 0.04140254482626915, "learning_rate": 8.96619060142482e-06, "loss": 0.0318, "step": 147110 }, { "epoch": 0.0056, "grad_norm": 0.04652618616819382, "learning_rate": 8.963019273458975e-06, "loss": 0.0325, "step": 147120 }, { "epoch": 0.00565, "grad_norm": 0.049115218222141266, "learning_rate": 8.959848383938168e-06, "loss": 0.0326, "step": 147130 }, { "epoch": 0.0057, "grad_norm": 0.063838429749012, "learning_rate": 8.95667793294907e-06, "loss": 0.0348, "step": 147140 }, { "epoch": 0.00575, "grad_norm": 0.055249787867069244, "learning_rate": 8.953507920578391e-06, "loss": 0.0324, "step": 147150 }, { "epoch": 0.0058, "grad_norm": 0.06927811354398727, "learning_rate": 8.950338346912757e-06, "loss": 0.0333, "step": 147160 }, { "epoch": 0.00585, "grad_norm": 0.05433012172579765, "learning_rate": 8.947169212038842e-06, "loss": 0.0341, "step": 147170 }, { "epoch": 0.0059, "grad_norm": 0.0582391656935215, "learning_rate": 8.9440005160433e-06, "loss": 0.033, "step": 147180 }, { "epoch": 0.00595, "grad_norm": 0.04783938080072403, "learning_rate": 8.940832259012751e-06, "loss": 0.0327, "step": 147190 }, { "epoch": 0.006, "grad_norm": 0.05929884314537048, "learning_rate": 8.937664441033817e-06, "loss": 0.0318, "step": 147200 }, { "epoch": 0.00605, "grad_norm": 0.06661206483840942, "learning_rate": 8.934497062193092e-06, "loss": 0.0329, "step": 147210 }, { "epoch": 0.0061, "grad_norm": 0.05749279633164406, "learning_rate": 8.931330122577191e-06, "loss": 0.032, "step": 147220 }, { "epoch": 0.00615, "grad_norm": 0.05884834751486778, "learning_rate": 8.92816362227269e-06, "loss": 0.0334, "step": 147230 }, { "epoch": 0.0062, "grad_norm": 0.05903388932347298, "learning_rate": 8.924997561366146e-06, "loss": 0.0331, "step": 147240 }, { "epoch": 0.00625, "grad_norm": 0.06084084510803223, "learning_rate": 8.92183193994414e-06, "loss": 0.0342, "step": 147250 }, { "epoch": 0.0063, "grad_norm": 0.0606980137526989, "learning_rate": 8.9186667580932e-06, "loss": 0.0335, "step": 147260 }, { "epoch": 0.00635, "grad_norm": 0.057444553822278976, "learning_rate": 8.915502015899876e-06, "loss": 0.0333, "step": 147270 }, { "epoch": 0.0064, "grad_norm": 0.07764684408903122, "learning_rate": 8.912337713450685e-06, "loss": 0.0337, "step": 147280 }, { "epoch": 0.00645, "grad_norm": 0.05538986250758171, "learning_rate": 8.909173850832131e-06, "loss": 0.0336, "step": 147290 }, { "epoch": 0.0065, "grad_norm": 0.07165313512086868, "learning_rate": 8.906010428130723e-06, "loss": 0.0342, "step": 147300 }, { "epoch": 0.00655, "grad_norm": 0.060678672045469284, "learning_rate": 8.902847445432947e-06, "loss": 0.0336, "step": 147310 }, { "epoch": 0.0066, "grad_norm": 0.054712288081645966, "learning_rate": 8.899684902825273e-06, "loss": 0.0333, "step": 147320 }, { "epoch": 0.00665, "grad_norm": 0.059218235313892365, "learning_rate": 8.896522800394158e-06, "loss": 0.0347, "step": 147330 }, { "epoch": 0.0067, "grad_norm": 0.050863150507211685, "learning_rate": 8.893361138226067e-06, "loss": 0.0336, "step": 147340 }, { "epoch": 0.00675, "grad_norm": 0.047979872673749924, "learning_rate": 8.890199916407425e-06, "loss": 0.0343, "step": 147350 }, { "epoch": 0.0068, "grad_norm": 0.050607260316610336, "learning_rate": 8.88703913502467e-06, "loss": 0.0328, "step": 147360 }, { "epoch": 0.00685, "grad_norm": 0.04506528750061989, "learning_rate": 8.883878794164213e-06, "loss": 0.0334, "step": 147370 }, { "epoch": 0.0069, "grad_norm": 0.04153824225068092, "learning_rate": 8.880718893912449e-06, "loss": 0.0335, "step": 147380 }, { "epoch": 0.00695, "grad_norm": 0.044839829206466675, "learning_rate": 8.87755943435578e-06, "loss": 0.0342, "step": 147390 }, { "epoch": 0.007, "grad_norm": 0.046069853007793427, "learning_rate": 8.87440041558058e-06, "loss": 0.0323, "step": 147400 }, { "epoch": 0.00705, "grad_norm": 0.04640653729438782, "learning_rate": 8.871241837673216e-06, "loss": 0.0315, "step": 147410 }, { "epoch": 0.0071, "grad_norm": 0.054289527237415314, "learning_rate": 8.868083700720034e-06, "loss": 0.0316, "step": 147420 }, { "epoch": 0.00715, "grad_norm": 0.045878611505031586, "learning_rate": 8.864926004807381e-06, "loss": 0.0325, "step": 147430 }, { "epoch": 0.0072, "grad_norm": 0.05043681710958481, "learning_rate": 8.861768750021604e-06, "loss": 0.0326, "step": 147440 }, { "epoch": 0.00725, "grad_norm": 0.0385061539709568, "learning_rate": 8.858611936448993e-06, "loss": 0.032, "step": 147450 }, { "epoch": 0.0073, "grad_norm": 0.04848558083176613, "learning_rate": 8.855455564175875e-06, "loss": 0.0324, "step": 147460 }, { "epoch": 0.00735, "grad_norm": 0.04834940657019615, "learning_rate": 8.852299633288527e-06, "loss": 0.033, "step": 147470 }, { "epoch": 0.0074, "grad_norm": 0.05229799076914787, "learning_rate": 8.84914414387325e-06, "loss": 0.0323, "step": 147480 }, { "epoch": 0.00745, "grad_norm": 0.06077688932418823, "learning_rate": 8.845989096016305e-06, "loss": 0.0329, "step": 147490 }, { "epoch": 0.0075, "grad_norm": 0.04313872382044792, "learning_rate": 8.84283448980394e-06, "loss": 0.0317, "step": 147500 }, { "epoch": 0.00755, "grad_norm": 0.049466077238321304, "learning_rate": 8.839680325322419e-06, "loss": 0.032, "step": 147510 }, { "epoch": 0.0076, "grad_norm": 0.060395073145627975, "learning_rate": 8.836526602657966e-06, "loss": 0.0328, "step": 147520 }, { "epoch": 0.00765, "grad_norm": 0.04946276545524597, "learning_rate": 8.833373321896805e-06, "loss": 0.0304, "step": 147530 }, { "epoch": 0.0077, "grad_norm": 0.0486181303858757, "learning_rate": 8.830220483125135e-06, "loss": 0.0307, "step": 147540 }, { "epoch": 0.00775, "grad_norm": 0.0499403178691864, "learning_rate": 8.827068086429161e-06, "loss": 0.0309, "step": 147550 }, { "epoch": 0.0078, "grad_norm": 0.042571984231472015, "learning_rate": 8.823916131895079e-06, "loss": 0.0319, "step": 147560 }, { "epoch": 0.00785, "grad_norm": 0.06789781153202057, "learning_rate": 8.820764619609053e-06, "loss": 0.032, "step": 147570 }, { "epoch": 0.0079, "grad_norm": 0.05580562725663185, "learning_rate": 8.817613549657244e-06, "loss": 0.0321, "step": 147580 }, { "epoch": 0.00795, "grad_norm": 0.04523137956857681, "learning_rate": 8.81446292212579e-06, "loss": 0.0302, "step": 147590 }, { "epoch": 0.008, "grad_norm": 0.04392389580607414, "learning_rate": 8.811312737100852e-06, "loss": 0.031, "step": 147600 }, { "epoch": 0.00805, "grad_norm": 0.045671090483665466, "learning_rate": 8.80816299466854e-06, "loss": 0.0317, "step": 147610 }, { "epoch": 0.0081, "grad_norm": 0.04727155715227127, "learning_rate": 8.80501369491496e-06, "loss": 0.032, "step": 147620 }, { "epoch": 0.00815, "grad_norm": 0.05655229836702347, "learning_rate": 8.80186483792623e-06, "loss": 0.0312, "step": 147630 }, { "epoch": 0.0082, "grad_norm": 0.04339873418211937, "learning_rate": 8.798716423788422e-06, "loss": 0.0321, "step": 147640 }, { "epoch": 0.00825, "grad_norm": 0.05079496279358864, "learning_rate": 8.795568452587639e-06, "loss": 0.033, "step": 147650 }, { "epoch": 0.0083, "grad_norm": 0.05334087833762169, "learning_rate": 8.792420924409905e-06, "loss": 0.0337, "step": 147660 }, { "epoch": 0.00835, "grad_norm": 0.04979987442493439, "learning_rate": 8.789273839341297e-06, "loss": 0.0343, "step": 147670 }, { "epoch": 0.0084, "grad_norm": 0.05215590074658394, "learning_rate": 8.78612719746786e-06, "loss": 0.0324, "step": 147680 }, { "epoch": 0.00845, "grad_norm": 0.04747818037867546, "learning_rate": 8.782980998875615e-06, "loss": 0.032, "step": 147690 }, { "epoch": 0.0085, "grad_norm": 0.04803787171840668, "learning_rate": 8.779835243650578e-06, "loss": 0.032, "step": 147700 }, { "epoch": 0.00855, "grad_norm": 0.04493388906121254, "learning_rate": 8.776689931878744e-06, "loss": 0.0323, "step": 147710 }, { "epoch": 0.0086, "grad_norm": 0.06509993970394135, "learning_rate": 8.773545063646116e-06, "loss": 0.0327, "step": 147720 }, { "epoch": 0.00865, "grad_norm": 0.046299874782562256, "learning_rate": 8.770400639038672e-06, "loss": 0.0326, "step": 147730 }, { "epoch": 0.0087, "grad_norm": 0.052435941994190216, "learning_rate": 8.76725665814237e-06, "loss": 0.0318, "step": 147740 }, { "epoch": 0.00875, "grad_norm": 0.055246591567993164, "learning_rate": 8.764113121043182e-06, "loss": 0.0333, "step": 147750 }, { "epoch": 0.0088, "grad_norm": 0.04831727594137192, "learning_rate": 8.76097002782703e-06, "loss": 0.0324, "step": 147760 }, { "epoch": 0.00885, "grad_norm": 0.04088793694972992, "learning_rate": 8.75782737857987e-06, "loss": 0.0322, "step": 147770 }, { "epoch": 0.0089, "grad_norm": 0.05812996253371239, "learning_rate": 8.754685173387604e-06, "loss": 0.0331, "step": 147780 }, { "epoch": 0.00895, "grad_norm": 0.06011068448424339, "learning_rate": 8.751543412336144e-06, "loss": 0.0329, "step": 147790 }, { "epoch": 0.009, "grad_norm": 0.058193374425172806, "learning_rate": 8.748402095511374e-06, "loss": 0.034, "step": 147800 }, { "epoch": 0.00905, "grad_norm": 0.05291305109858513, "learning_rate": 8.745261222999181e-06, "loss": 0.0333, "step": 147810 }, { "epoch": 0.0091, "grad_norm": 0.0512242391705513, "learning_rate": 8.742120794885464e-06, "loss": 0.0337, "step": 147820 }, { "epoch": 0.00915, "grad_norm": 0.04407033696770668, "learning_rate": 8.738980811256033e-06, "loss": 0.0331, "step": 147830 }, { "epoch": 0.0092, "grad_norm": 0.047220148146152496, "learning_rate": 8.735841272196771e-06, "loss": 0.0338, "step": 147840 }, { "epoch": 0.00925, "grad_norm": 0.04800218343734741, "learning_rate": 8.732702177793487e-06, "loss": 0.0337, "step": 147850 }, { "epoch": 0.0093, "grad_norm": 0.044987574219703674, "learning_rate": 8.729563528132023e-06, "loss": 0.0332, "step": 147860 }, { "epoch": 0.00935, "grad_norm": 0.05002535879611969, "learning_rate": 8.726425323298181e-06, "loss": 0.0325, "step": 147870 }, { "epoch": 0.0094, "grad_norm": 0.05462311580777168, "learning_rate": 8.723287563377749e-06, "loss": 0.0334, "step": 147880 }, { "epoch": 0.00945, "grad_norm": 0.048291947692632675, "learning_rate": 8.720150248456527e-06, "loss": 0.0332, "step": 147890 }, { "epoch": 0.0095, "grad_norm": 0.06289687007665634, "learning_rate": 8.717013378620282e-06, "loss": 0.0339, "step": 147900 }, { "epoch": 0.00955, "grad_norm": 0.04649645835161209, "learning_rate": 8.713876953954772e-06, "loss": 0.0323, "step": 147910 }, { "epoch": 0.0096, "grad_norm": 0.047846417874097824, "learning_rate": 8.710740974545742e-06, "loss": 0.0318, "step": 147920 }, { "epoch": 0.00965, "grad_norm": 0.04475260153412819, "learning_rate": 8.707605440478933e-06, "loss": 0.0315, "step": 147930 }, { "epoch": 0.0097, "grad_norm": 0.04678916186094284, "learning_rate": 8.704470351840077e-06, "loss": 0.0333, "step": 147940 }, { "epoch": 0.00975, "grad_norm": 0.05469123646616936, "learning_rate": 8.701335708714883e-06, "loss": 0.0325, "step": 147950 }, { "epoch": 0.0098, "grad_norm": 0.05012594163417816, "learning_rate": 8.698201511189048e-06, "loss": 0.0325, "step": 147960 }, { "epoch": 0.00985, "grad_norm": 0.05331389605998993, "learning_rate": 8.695067759348246e-06, "loss": 0.0362, "step": 147970 }, { "epoch": 0.0099, "grad_norm": 0.041243359446525574, "learning_rate": 8.691934453278178e-06, "loss": 0.0322, "step": 147980 }, { "epoch": 0.00995, "grad_norm": 0.04781245440244675, "learning_rate": 8.688801593064494e-06, "loss": 0.0338, "step": 147990 }, { "epoch": 0.01, "grad_norm": 0.056681521236896515, "learning_rate": 8.68566917879284e-06, "loss": 0.0333, "step": 148000 }, { "epoch": 0.01005, "grad_norm": 0.056346699595451355, "learning_rate": 8.682537210548868e-06, "loss": 0.0327, "step": 148010 }, { "epoch": 0.0101, "grad_norm": 0.050917401909828186, "learning_rate": 8.679405688418193e-06, "loss": 0.0323, "step": 148020 }, { "epoch": 0.01015, "grad_norm": 0.04824027791619301, "learning_rate": 8.676274612486449e-06, "loss": 0.032, "step": 148030 }, { "epoch": 0.0102, "grad_norm": 0.04922124743461609, "learning_rate": 8.67314398283921e-06, "loss": 0.0334, "step": 148040 }, { "epoch": 0.01025, "grad_norm": 0.05176009237766266, "learning_rate": 8.670013799562087e-06, "loss": 0.035, "step": 148050 }, { "epoch": 0.0103, "grad_norm": 0.0421922504901886, "learning_rate": 8.666884062740646e-06, "loss": 0.0328, "step": 148060 }, { "epoch": 0.01035, "grad_norm": 0.04595519229769707, "learning_rate": 8.663754772460464e-06, "loss": 0.0324, "step": 148070 }, { "epoch": 0.0104, "grad_norm": 0.06706260144710541, "learning_rate": 8.660625928807092e-06, "loss": 0.0337, "step": 148080 }, { "epoch": 0.01045, "grad_norm": 0.044981446117162704, "learning_rate": 8.657497531866057e-06, "loss": 0.0327, "step": 148090 }, { "epoch": 0.0105, "grad_norm": 0.06317142397165298, "learning_rate": 8.654369581722912e-06, "loss": 0.0332, "step": 148100 }, { "epoch": 0.01055, "grad_norm": 0.057659924030303955, "learning_rate": 8.65124207846316e-06, "loss": 0.0344, "step": 148110 }, { "epoch": 0.0106, "grad_norm": 0.05683725327253342, "learning_rate": 8.648115022172299e-06, "loss": 0.0333, "step": 148120 }, { "epoch": 0.01065, "grad_norm": 0.04943874478340149, "learning_rate": 8.64498841293584e-06, "loss": 0.0341, "step": 148130 }, { "epoch": 0.0107, "grad_norm": 0.05976469814777374, "learning_rate": 8.641862250839245e-06, "loss": 0.032, "step": 148140 }, { "epoch": 0.01075, "grad_norm": 0.052283961325883865, "learning_rate": 8.638736535967998e-06, "loss": 0.0317, "step": 148150 }, { "epoch": 0.0108, "grad_norm": 0.05344764515757561, "learning_rate": 8.635611268407545e-06, "loss": 0.0339, "step": 148160 }, { "epoch": 0.01085, "grad_norm": 0.04558238387107849, "learning_rate": 8.632486448243335e-06, "loss": 0.0327, "step": 148170 }, { "epoch": 0.0109, "grad_norm": 0.05175260826945305, "learning_rate": 8.62936207556079e-06, "loss": 0.0326, "step": 148180 }, { "epoch": 0.01095, "grad_norm": 0.04138151556253433, "learning_rate": 8.62623815044534e-06, "loss": 0.0312, "step": 148190 }, { "epoch": 0.011, "grad_norm": 0.054857030510902405, "learning_rate": 8.62311467298239e-06, "loss": 0.032, "step": 148200 }, { "epoch": 0.01105, "grad_norm": 0.04743338003754616, "learning_rate": 8.619991643257324e-06, "loss": 0.0316, "step": 148210 }, { "epoch": 0.0111, "grad_norm": 0.04671315848827362, "learning_rate": 8.61686906135554e-06, "loss": 0.0323, "step": 148220 }, { "epoch": 0.01115, "grad_norm": 0.04476676881313324, "learning_rate": 8.613746927362392e-06, "loss": 0.0322, "step": 148230 }, { "epoch": 0.0112, "grad_norm": 0.04818776994943619, "learning_rate": 8.610625241363265e-06, "loss": 0.0317, "step": 148240 }, { "epoch": 0.01125, "grad_norm": 0.048856962472200394, "learning_rate": 8.607504003443473e-06, "loss": 0.0315, "step": 148250 }, { "epoch": 0.0113, "grad_norm": 0.048947639763355255, "learning_rate": 8.604383213688358e-06, "loss": 0.035, "step": 148260 }, { "epoch": 0.01135, "grad_norm": 0.059466104954481125, "learning_rate": 8.601262872183257e-06, "loss": 0.0344, "step": 148270 }, { "epoch": 0.0114, "grad_norm": 0.04785541072487831, "learning_rate": 8.598142979013469e-06, "loss": 0.0339, "step": 148280 }, { "epoch": 0.01145, "grad_norm": 0.05641579627990723, "learning_rate": 8.595023534264291e-06, "loss": 0.0336, "step": 148290 }, { "epoch": 0.0115, "grad_norm": 0.05350305512547493, "learning_rate": 8.591904538020995e-06, "loss": 0.0333, "step": 148300 }, { "epoch": 0.01155, "grad_norm": 0.057271573692560196, "learning_rate": 8.588785990368866e-06, "loss": 0.0335, "step": 148310 }, { "epoch": 0.0116, "grad_norm": 0.06025751307606697, "learning_rate": 8.585667891393179e-06, "loss": 0.0352, "step": 148320 }, { "epoch": 0.01165, "grad_norm": 0.061855290085077286, "learning_rate": 8.58255024117915e-06, "loss": 0.0346, "step": 148330 }, { "epoch": 0.0117, "grad_norm": 0.05323246493935585, "learning_rate": 8.579433039812037e-06, "loss": 0.0347, "step": 148340 }, { "epoch": 0.01175, "grad_norm": 0.09866755455732346, "learning_rate": 8.576316287377047e-06, "loss": 0.0343, "step": 148350 }, { "epoch": 0.0118, "grad_norm": 0.09101955592632294, "learning_rate": 8.57319998395941e-06, "loss": 0.0334, "step": 148360 }, { "epoch": 0.01185, "grad_norm": 0.09026189893484116, "learning_rate": 8.570084129644312e-06, "loss": 0.0334, "step": 148370 }, { "epoch": 0.0119, "grad_norm": 0.05150333791971207, "learning_rate": 8.566968724516933e-06, "loss": 0.0345, "step": 148380 }, { "epoch": 0.01195, "grad_norm": 0.051717933267354965, "learning_rate": 8.563853768662463e-06, "loss": 0.0339, "step": 148390 }, { "epoch": 0.012, "grad_norm": 0.04366070404648781, "learning_rate": 8.56073926216606e-06, "loss": 0.0327, "step": 148400 }, { "epoch": 0.01205, "grad_norm": 0.04918728768825531, "learning_rate": 8.557625205112864e-06, "loss": 0.0336, "step": 148410 }, { "epoch": 0.0121, "grad_norm": 0.04548975080251694, "learning_rate": 8.554511597588017e-06, "loss": 0.032, "step": 148420 }, { "epoch": 0.01215, "grad_norm": 0.048331983387470245, "learning_rate": 8.551398439676648e-06, "loss": 0.0334, "step": 148430 }, { "epoch": 0.0122, "grad_norm": 0.05829129368066788, "learning_rate": 8.548285731463856e-06, "loss": 0.0335, "step": 148440 }, { "epoch": 0.01225, "grad_norm": 0.05428208038210869, "learning_rate": 8.545173473034765e-06, "loss": 0.0335, "step": 148450 }, { "epoch": 0.0123, "grad_norm": 0.044460829347372055, "learning_rate": 8.542061664474446e-06, "loss": 0.0346, "step": 148460 }, { "epoch": 0.01235, "grad_norm": 0.04665042832493782, "learning_rate": 8.53895030586797e-06, "loss": 0.0335, "step": 148470 }, { "epoch": 0.0124, "grad_norm": 0.04890317842364311, "learning_rate": 8.535839397300418e-06, "loss": 0.0342, "step": 148480 }, { "epoch": 0.01245, "grad_norm": 0.04238392040133476, "learning_rate": 8.532728938856832e-06, "loss": 0.0349, "step": 148490 }, { "epoch": 0.0125, "grad_norm": 0.03974468261003494, "learning_rate": 8.529618930622241e-06, "loss": 0.033, "step": 148500 }, { "epoch": 0.01255, "grad_norm": 0.05076686665415764, "learning_rate": 8.52650937268169e-06, "loss": 0.036, "step": 148510 }, { "epoch": 0.0126, "grad_norm": 0.043793726712465286, "learning_rate": 8.523400265120174e-06, "loss": 0.0347, "step": 148520 }, { "epoch": 0.01265, "grad_norm": 0.04453187808394432, "learning_rate": 8.520291608022724e-06, "loss": 0.0344, "step": 148530 }, { "epoch": 0.0127, "grad_norm": 0.0442209355533123, "learning_rate": 8.51718340147429e-06, "loss": 0.034, "step": 148540 }, { "epoch": 0.01275, "grad_norm": 0.05199456587433815, "learning_rate": 8.51407564555988e-06, "loss": 0.0331, "step": 148550 }, { "epoch": 0.0128, "grad_norm": 0.05228140205144882, "learning_rate": 8.51096834036444e-06, "loss": 0.0335, "step": 148560 }, { "epoch": 0.01285, "grad_norm": 0.0460548959672451, "learning_rate": 8.50786148597294e-06, "loss": 0.0337, "step": 148570 }, { "epoch": 0.0129, "grad_norm": 0.05205879732966423, "learning_rate": 8.504755082470308e-06, "loss": 0.0336, "step": 148580 }, { "epoch": 0.01295, "grad_norm": 0.053257234394550323, "learning_rate": 8.501649129941472e-06, "loss": 0.0329, "step": 148590 }, { "epoch": 0.013, "grad_norm": 0.06522348523139954, "learning_rate": 8.498543628471353e-06, "loss": 0.0351, "step": 148600 }, { "epoch": 0.01305, "grad_norm": 0.054856497794389725, "learning_rate": 8.495438578144856e-06, "loss": 0.0328, "step": 148610 }, { "epoch": 0.0131, "grad_norm": 0.04815627261996269, "learning_rate": 8.492333979046868e-06, "loss": 0.0327, "step": 148620 }, { "epoch": 0.01315, "grad_norm": 0.04261309280991554, "learning_rate": 8.489229831262257e-06, "loss": 0.0315, "step": 148630 }, { "epoch": 0.0132, "grad_norm": 0.0457463413476944, "learning_rate": 8.4861261348759e-06, "loss": 0.0338, "step": 148640 }, { "epoch": 0.01325, "grad_norm": 0.044380463659763336, "learning_rate": 8.483022889972658e-06, "loss": 0.0332, "step": 148650 }, { "epoch": 0.0133, "grad_norm": 0.05065517500042915, "learning_rate": 8.479920096637367e-06, "loss": 0.0334, "step": 148660 }, { "epoch": 0.01335, "grad_norm": 0.04468074440956116, "learning_rate": 8.476817754954855e-06, "loss": 0.0324, "step": 148670 }, { "epoch": 0.0134, "grad_norm": 0.05222933366894722, "learning_rate": 8.473715865009927e-06, "loss": 0.0337, "step": 148680 }, { "epoch": 0.01345, "grad_norm": 0.04466511681675911, "learning_rate": 8.470614426887407e-06, "loss": 0.033, "step": 148690 }, { "epoch": 0.0135, "grad_norm": 0.04284243658185005, "learning_rate": 8.467513440672081e-06, "loss": 0.0323, "step": 148700 }, { "epoch": 0.01355, "grad_norm": 0.04450090974569321, "learning_rate": 8.464412906448718e-06, "loss": 0.0326, "step": 148710 }, { "epoch": 0.0136, "grad_norm": 0.0457986555993557, "learning_rate": 8.4613128243021e-06, "loss": 0.0338, "step": 148720 }, { "epoch": 0.01365, "grad_norm": 0.047325409948825836, "learning_rate": 8.458213194316972e-06, "loss": 0.0325, "step": 148730 }, { "epoch": 0.0137, "grad_norm": 0.05123257637023926, "learning_rate": 8.455114016578095e-06, "loss": 0.037, "step": 148740 }, { "epoch": 0.01375, "grad_norm": 0.061940617859363556, "learning_rate": 8.452015291170168e-06, "loss": 0.0332, "step": 148750 }, { "epoch": 0.0138, "grad_norm": 0.04933027923107147, "learning_rate": 8.448917018177923e-06, "loss": 0.033, "step": 148760 }, { "epoch": 0.01385, "grad_norm": 0.0544595904648304, "learning_rate": 8.445819197686084e-06, "loss": 0.0326, "step": 148770 }, { "epoch": 0.0139, "grad_norm": 0.04772160202264786, "learning_rate": 8.442721829779324e-06, "loss": 0.0338, "step": 148780 }, { "epoch": 0.01395, "grad_norm": 0.047685492783784866, "learning_rate": 8.439624914542329e-06, "loss": 0.0354, "step": 148790 }, { "epoch": 0.014, "grad_norm": 0.047835707664489746, "learning_rate": 8.43652845205976e-06, "loss": 0.034, "step": 148800 }, { "epoch": 0.01405, "grad_norm": 0.0465179979801178, "learning_rate": 8.433432442416291e-06, "loss": 0.0326, "step": 148810 }, { "epoch": 0.0141, "grad_norm": 0.0501941442489624, "learning_rate": 8.430336885696555e-06, "loss": 0.0331, "step": 148820 }, { "epoch": 0.01415, "grad_norm": 0.053617771714925766, "learning_rate": 8.427241781985174e-06, "loss": 0.0323, "step": 148830 }, { "epoch": 0.0142, "grad_norm": 0.056008417159318924, "learning_rate": 8.424147131366783e-06, "loss": 0.0335, "step": 148840 }, { "epoch": 0.01425, "grad_norm": 0.05336494371294975, "learning_rate": 8.421052933925976e-06, "loss": 0.0336, "step": 148850 }, { "epoch": 0.0143, "grad_norm": 0.04270797222852707, "learning_rate": 8.417959189747363e-06, "loss": 0.033, "step": 148860 }, { "epoch": 0.01435, "grad_norm": 0.04849123954772949, "learning_rate": 8.414865898915513e-06, "loss": 0.0339, "step": 148870 }, { "epoch": 0.0144, "grad_norm": 0.057978514581918716, "learning_rate": 8.411773061514991e-06, "loss": 0.0347, "step": 148880 }, { "epoch": 0.01445, "grad_norm": 0.05593280866742134, "learning_rate": 8.408680677630371e-06, "loss": 0.0344, "step": 148890 }, { "epoch": 0.0145, "grad_norm": 0.05761462450027466, "learning_rate": 8.405588747346189e-06, "loss": 0.0352, "step": 148900 }, { "epoch": 0.01455, "grad_norm": 0.04773632436990738, "learning_rate": 8.402497270746976e-06, "loss": 0.033, "step": 148910 }, { "epoch": 0.0146, "grad_norm": 0.055619977414608, "learning_rate": 8.399406247917241e-06, "loss": 0.0347, "step": 148920 }, { "epoch": 0.01465, "grad_norm": 0.047743767499923706, "learning_rate": 8.396315678941513e-06, "loss": 0.0345, "step": 148930 }, { "epoch": 0.0147, "grad_norm": 0.05026392266154289, "learning_rate": 8.39322556390427e-06, "loss": 0.0345, "step": 148940 }, { "epoch": 0.01475, "grad_norm": 0.044830456376075745, "learning_rate": 8.390135902890007e-06, "loss": 0.0335, "step": 148950 }, { "epoch": 0.0148, "grad_norm": 0.05488893389701843, "learning_rate": 8.387046695983188e-06, "loss": 0.0355, "step": 148960 }, { "epoch": 0.01485, "grad_norm": 0.04811578243970871, "learning_rate": 8.383957943268267e-06, "loss": 0.035, "step": 148970 }, { "epoch": 0.0149, "grad_norm": 0.04665228724479675, "learning_rate": 8.380869644829698e-06, "loss": 0.0332, "step": 148980 }, { "epoch": 0.01495, "grad_norm": 0.04551544412970543, "learning_rate": 8.37778180075191e-06, "loss": 0.033, "step": 148990 }, { "epoch": 0.015, "grad_norm": 0.047599222511053085, "learning_rate": 8.374694411119325e-06, "loss": 0.0334, "step": 149000 }, { "epoch": 0.01505, "grad_norm": 0.04838322475552559, "learning_rate": 8.371607476016338e-06, "loss": 0.0335, "step": 149010 }, { "epoch": 0.0151, "grad_norm": 0.051446493715047836, "learning_rate": 8.368520995527356e-06, "loss": 0.0324, "step": 149020 }, { "epoch": 0.01515, "grad_norm": 0.060208600014448166, "learning_rate": 8.36543496973678e-06, "loss": 0.0327, "step": 149030 }, { "epoch": 0.0152, "grad_norm": 0.06569566577672958, "learning_rate": 8.362349398728944e-06, "loss": 0.0333, "step": 149040 }, { "epoch": 0.01525, "grad_norm": 0.050883982330560684, "learning_rate": 8.359264282588233e-06, "loss": 0.0338, "step": 149050 }, { "epoch": 0.0153, "grad_norm": 0.062197163701057434, "learning_rate": 8.35617962139898e-06, "loss": 0.0329, "step": 149060 }, { "epoch": 0.01535, "grad_norm": 0.06242142245173454, "learning_rate": 8.353095415245527e-06, "loss": 0.0318, "step": 149070 }, { "epoch": 0.0154, "grad_norm": 0.06413523107767105, "learning_rate": 8.350011664212195e-06, "loss": 0.0336, "step": 149080 }, { "epoch": 0.01545, "grad_norm": 0.058268480002880096, "learning_rate": 8.34692836838328e-06, "loss": 0.0322, "step": 149090 }, { "epoch": 0.0155, "grad_norm": 0.06240219622850418, "learning_rate": 8.343845527843094e-06, "loss": 0.0314, "step": 149100 }, { "epoch": 0.01555, "grad_norm": 0.05119459331035614, "learning_rate": 8.340763142675914e-06, "loss": 0.0331, "step": 149110 }, { "epoch": 0.0156, "grad_norm": 0.04056113213300705, "learning_rate": 8.337681212966014e-06, "loss": 0.0317, "step": 149120 }, { "epoch": 0.01565, "grad_norm": 0.06105110049247742, "learning_rate": 8.334599738797638e-06, "loss": 0.0317, "step": 149130 }, { "epoch": 0.0157, "grad_norm": 0.056628599762916565, "learning_rate": 8.331518720255047e-06, "loss": 0.0325, "step": 149140 }, { "epoch": 0.01575, "grad_norm": 0.050194285809993744, "learning_rate": 8.328438157422478e-06, "loss": 0.0313, "step": 149150 }, { "epoch": 0.0158, "grad_norm": 0.04470765218138695, "learning_rate": 8.325358050384149e-06, "loss": 0.0309, "step": 149160 }, { "epoch": 0.01585, "grad_norm": 0.0473356693983078, "learning_rate": 8.322278399224265e-06, "loss": 0.0321, "step": 149170 }, { "epoch": 0.0159, "grad_norm": 0.04666948691010475, "learning_rate": 8.319199204027015e-06, "loss": 0.0318, "step": 149180 }, { "epoch": 0.01595, "grad_norm": 0.05364212766289711, "learning_rate": 8.3161204648766e-06, "loss": 0.0313, "step": 149190 }, { "epoch": 0.016, "grad_norm": 0.04709921032190323, "learning_rate": 8.313042181857186e-06, "loss": 0.0319, "step": 149200 }, { "epoch": 0.01605, "grad_norm": 0.04465370252728462, "learning_rate": 8.309964355052921e-06, "loss": 0.0311, "step": 149210 }, { "epoch": 0.0161, "grad_norm": 0.04493457078933716, "learning_rate": 8.306886984547969e-06, "loss": 0.0321, "step": 149220 }, { "epoch": 0.01615, "grad_norm": 0.049775637686252594, "learning_rate": 8.303810070426447e-06, "loss": 0.0327, "step": 149230 }, { "epoch": 0.0162, "grad_norm": 0.04455522075295448, "learning_rate": 8.3007336127725e-06, "loss": 0.0324, "step": 149240 }, { "epoch": 0.01625, "grad_norm": 0.04167736694216728, "learning_rate": 8.297657611670207e-06, "loss": 0.0311, "step": 149250 }, { "epoch": 0.0163, "grad_norm": 0.05651344731450081, "learning_rate": 8.294582067203688e-06, "loss": 0.033, "step": 149260 }, { "epoch": 0.01635, "grad_norm": 0.043863266706466675, "learning_rate": 8.291506979457011e-06, "loss": 0.0323, "step": 149270 }, { "epoch": 0.0164, "grad_norm": 0.04590892791748047, "learning_rate": 8.288432348514267e-06, "loss": 0.0321, "step": 149280 }, { "epoch": 0.01645, "grad_norm": 0.05240105465054512, "learning_rate": 8.2853581744595e-06, "loss": 0.032, "step": 149290 }, { "epoch": 0.0165, "grad_norm": 0.04963365197181702, "learning_rate": 8.282284457376758e-06, "loss": 0.0325, "step": 149300 }, { "epoch": 0.01655, "grad_norm": 0.05475384742021561, "learning_rate": 8.279211197350081e-06, "loss": 0.0316, "step": 149310 }, { "epoch": 0.0166, "grad_norm": 0.06097935512661934, "learning_rate": 8.276138394463492e-06, "loss": 0.0341, "step": 149320 }, { "epoch": 0.01665, "grad_norm": 0.05405963957309723, "learning_rate": 8.273066048800988e-06, "loss": 0.0325, "step": 149330 }, { "epoch": 0.0167, "grad_norm": 0.054827336221933365, "learning_rate": 8.269994160446579e-06, "loss": 0.0325, "step": 149340 }, { "epoch": 0.01675, "grad_norm": 0.04705236107110977, "learning_rate": 8.26692272948424e-06, "loss": 0.0326, "step": 149350 }, { "epoch": 0.0168, "grad_norm": 0.05563652142882347, "learning_rate": 8.263851755997954e-06, "loss": 0.0342, "step": 149360 }, { "epoch": 0.01685, "grad_norm": 0.06205492094159126, "learning_rate": 8.260781240071674e-06, "loss": 0.0338, "step": 149370 }, { "epoch": 0.0169, "grad_norm": 0.05830933526158333, "learning_rate": 8.257711181789346e-06, "loss": 0.0321, "step": 149380 }, { "epoch": 0.01695, "grad_norm": 0.04731369391083717, "learning_rate": 8.254641581234895e-06, "loss": 0.033, "step": 149390 }, { "epoch": 0.017, "grad_norm": 0.04741034284234047, "learning_rate": 8.251572438492261e-06, "loss": 0.034, "step": 149400 }, { "epoch": 0.01705, "grad_norm": 0.05141216516494751, "learning_rate": 8.248503753645345e-06, "loss": 0.0335, "step": 149410 }, { "epoch": 0.0171, "grad_norm": 0.04521205648779869, "learning_rate": 8.245435526778036e-06, "loss": 0.034, "step": 149420 }, { "epoch": 0.01715, "grad_norm": 0.05473974347114563, "learning_rate": 8.242367757974233e-06, "loss": 0.0335, "step": 149430 }, { "epoch": 0.0172, "grad_norm": 0.05049604922533035, "learning_rate": 8.23930044731779e-06, "loss": 0.0345, "step": 149440 }, { "epoch": 0.01725, "grad_norm": 0.04677336663007736, "learning_rate": 8.236233594892595e-06, "loss": 0.0337, "step": 149450 }, { "epoch": 0.0173, "grad_norm": 0.05898798257112503, "learning_rate": 8.233167200782458e-06, "loss": 0.0338, "step": 149460 }, { "epoch": 0.01735, "grad_norm": 0.05927513912320137, "learning_rate": 8.23010126507123e-06, "loss": 0.0333, "step": 149470 }, { "epoch": 0.0174, "grad_norm": 0.04218801110982895, "learning_rate": 8.227035787842744e-06, "loss": 0.0334, "step": 149480 }, { "epoch": 0.01745, "grad_norm": 0.0435371994972229, "learning_rate": 8.223970769180796e-06, "loss": 0.0335, "step": 149490 }, { "epoch": 0.0175, "grad_norm": 0.04960029572248459, "learning_rate": 8.220906209169185e-06, "loss": 0.0341, "step": 149500 }, { "epoch": 0.01755, "grad_norm": 0.0509033203125, "learning_rate": 8.217842107891688e-06, "loss": 0.0318, "step": 149510 }, { "epoch": 0.0176, "grad_norm": 0.046687591820955276, "learning_rate": 8.21477846543208e-06, "loss": 0.0324, "step": 149520 }, { "epoch": 0.01765, "grad_norm": 0.07635864615440369, "learning_rate": 8.211715281874141e-06, "loss": 0.0337, "step": 149530 }, { "epoch": 0.0177, "grad_norm": 0.0632171630859375, "learning_rate": 8.208652557301582e-06, "loss": 0.0332, "step": 149540 }, { "epoch": 0.01775, "grad_norm": 0.12133687734603882, "learning_rate": 8.205590291798162e-06, "loss": 0.0332, "step": 149550 }, { "epoch": 0.0178, "grad_norm": 0.057598382234573364, "learning_rate": 8.202528485447589e-06, "loss": 0.0358, "step": 149560 }, { "epoch": 0.01785, "grad_norm": 0.07385782152414322, "learning_rate": 8.19946713833358e-06, "loss": 0.0325, "step": 149570 }, { "epoch": 0.0179, "grad_norm": 0.064969003200531, "learning_rate": 8.196406250539831e-06, "loss": 0.0349, "step": 149580 }, { "epoch": 0.01795, "grad_norm": 0.05651523545384407, "learning_rate": 8.193345822150014e-06, "loss": 0.0344, "step": 149590 }, { "epoch": 0.018, "grad_norm": 0.0455944761633873, "learning_rate": 8.190285853247815e-06, "loss": 0.0324, "step": 149600 }, { "epoch": 0.01805, "grad_norm": 0.05375504493713379, "learning_rate": 8.187226343916887e-06, "loss": 0.0334, "step": 149610 }, { "epoch": 0.0181, "grad_norm": 0.053510844707489014, "learning_rate": 8.184167294240874e-06, "loss": 0.0339, "step": 149620 }, { "epoch": 0.01815, "grad_norm": 0.050734080374240875, "learning_rate": 8.1811087043034e-06, "loss": 0.0332, "step": 149630 }, { "epoch": 0.0182, "grad_norm": 0.05367788299918175, "learning_rate": 8.178050574188106e-06, "loss": 0.0343, "step": 149640 }, { "epoch": 0.01825, "grad_norm": 0.07095736265182495, "learning_rate": 8.174992903978581e-06, "loss": 0.0353, "step": 149650 }, { "epoch": 0.0183, "grad_norm": 0.06505496054887772, "learning_rate": 8.171935693758437e-06, "loss": 0.0341, "step": 149660 }, { "epoch": 0.01835, "grad_norm": 0.05861425772309303, "learning_rate": 8.16887894361125e-06, "loss": 0.0348, "step": 149670 }, { "epoch": 0.0184, "grad_norm": 0.05692286789417267, "learning_rate": 8.165822653620578e-06, "loss": 0.0341, "step": 149680 }, { "epoch": 0.01845, "grad_norm": 0.04605856165289879, "learning_rate": 8.162766823870002e-06, "loss": 0.0325, "step": 149690 }, { "epoch": 0.0185, "grad_norm": 0.042736127972602844, "learning_rate": 8.159711454443054e-06, "loss": 0.0323, "step": 149700 }, { "epoch": 0.01855, "grad_norm": 0.04387698322534561, "learning_rate": 8.15665654542326e-06, "loss": 0.0346, "step": 149710 }, { "epoch": 0.0186, "grad_norm": 0.04287761077284813, "learning_rate": 8.153602096894159e-06, "loss": 0.0331, "step": 149720 }, { "epoch": 0.01865, "grad_norm": 0.048668548464775085, "learning_rate": 8.150548108939236e-06, "loss": 0.0342, "step": 149730 }, { "epoch": 0.0187, "grad_norm": 0.045768819749355316, "learning_rate": 8.147494581642015e-06, "loss": 0.0338, "step": 149740 }, { "epoch": 0.01875, "grad_norm": 0.04629748314619064, "learning_rate": 8.144441515085946e-06, "loss": 0.0317, "step": 149750 }, { "epoch": 0.0188, "grad_norm": 0.050622209906578064, "learning_rate": 8.14138890935452e-06, "loss": 0.0338, "step": 149760 }, { "epoch": 0.01885, "grad_norm": 0.05139302834868431, "learning_rate": 8.138336764531182e-06, "loss": 0.0323, "step": 149770 }, { "epoch": 0.0189, "grad_norm": 0.04795796051621437, "learning_rate": 8.135285080699387e-06, "loss": 0.032, "step": 149780 }, { "epoch": 0.01895, "grad_norm": 0.04131333902478218, "learning_rate": 8.132233857942564e-06, "loss": 0.0322, "step": 149790 }, { "epoch": 0.019, "grad_norm": 0.056618932634592056, "learning_rate": 8.129183096344123e-06, "loss": 0.0338, "step": 149800 }, { "epoch": 0.01905, "grad_norm": 0.05299549549818039, "learning_rate": 8.126132795987485e-06, "loss": 0.0322, "step": 149810 }, { "epoch": 0.0191, "grad_norm": 0.05240718647837639, "learning_rate": 8.123082956956037e-06, "loss": 0.0358, "step": 149820 }, { "epoch": 0.01915, "grad_norm": 0.04959771782159805, "learning_rate": 8.120033579333162e-06, "loss": 0.0336, "step": 149830 }, { "epoch": 0.0192, "grad_norm": 0.05134819075465202, "learning_rate": 8.116984663202218e-06, "loss": 0.0323, "step": 149840 }, { "epoch": 0.01925, "grad_norm": 0.04623432829976082, "learning_rate": 8.113936208646572e-06, "loss": 0.0337, "step": 149850 }, { "epoch": 0.0193, "grad_norm": 0.04831528291106224, "learning_rate": 8.110888215749574e-06, "loss": 0.0338, "step": 149860 }, { "epoch": 0.01935, "grad_norm": 0.03964311257004738, "learning_rate": 8.107840684594547e-06, "loss": 0.0327, "step": 149870 }, { "epoch": 0.0194, "grad_norm": 0.07050962001085281, "learning_rate": 8.104793615264807e-06, "loss": 0.0331, "step": 149880 }, { "epoch": 0.01945, "grad_norm": 0.07801316678524017, "learning_rate": 8.101747007843658e-06, "loss": 0.0337, "step": 149890 }, { "epoch": 0.0195, "grad_norm": 0.061165932565927505, "learning_rate": 8.098700862414404e-06, "loss": 0.0323, "step": 149900 }, { "epoch": 0.01955, "grad_norm": 0.0446687787771225, "learning_rate": 8.095655179060318e-06, "loss": 0.0332, "step": 149910 }, { "epoch": 0.0196, "grad_norm": 0.05215480923652649, "learning_rate": 8.092609957864663e-06, "loss": 0.0329, "step": 149920 }, { "epoch": 0.01965, "grad_norm": 0.04582972824573517, "learning_rate": 8.089565198910706e-06, "loss": 0.032, "step": 149930 }, { "epoch": 0.0197, "grad_norm": 0.05249819532036781, "learning_rate": 8.086520902281677e-06, "loss": 0.033, "step": 149940 }, { "epoch": 0.01975, "grad_norm": 0.04421620070934296, "learning_rate": 8.083477068060827e-06, "loss": 0.0328, "step": 149950 }, { "epoch": 0.0198, "grad_norm": 0.043347086757421494, "learning_rate": 8.080433696331344e-06, "loss": 0.0355, "step": 149960 }, { "epoch": 0.01985, "grad_norm": 0.04572188854217529, "learning_rate": 8.077390787176447e-06, "loss": 0.0329, "step": 149970 }, { "epoch": 0.0199, "grad_norm": 0.04811607301235199, "learning_rate": 8.074348340679336e-06, "loss": 0.0338, "step": 149980 }, { "epoch": 0.01995, "grad_norm": 0.045632511377334595, "learning_rate": 8.071306356923184e-06, "loss": 0.0343, "step": 149990 }, { "epoch": 0.02, "grad_norm": 0.047678910195827484, "learning_rate": 8.068264835991155e-06, "loss": 0.0326, "step": 150000 }, { "epoch": 0.02005, "grad_norm": 0.046015266329050064, "learning_rate": 8.065223777966394e-06, "loss": 0.0344, "step": 150010 }, { "epoch": 0.0201, "grad_norm": 0.04278041422367096, "learning_rate": 8.062183182932065e-06, "loss": 0.0341, "step": 150020 }, { "epoch": 0.02015, "grad_norm": 0.04753004014492035, "learning_rate": 8.059143050971283e-06, "loss": 0.0357, "step": 150030 }, { "epoch": 0.0202, "grad_norm": 0.04136762022972107, "learning_rate": 8.056103382167156e-06, "loss": 0.0331, "step": 150040 }, { "epoch": 0.02025, "grad_norm": 0.04396098479628563, "learning_rate": 8.053064176602806e-06, "loss": 0.0351, "step": 150050 }, { "epoch": 0.0203, "grad_norm": 0.047903914004564285, "learning_rate": 8.050025434361308e-06, "loss": 0.0329, "step": 150060 }, { "epoch": 0.02035, "grad_norm": 0.046780530363321304, "learning_rate": 8.046987155525754e-06, "loss": 0.0335, "step": 150070 }, { "epoch": 0.0204, "grad_norm": 0.05217145010828972, "learning_rate": 8.043949340179203e-06, "loss": 0.0341, "step": 150080 }, { "epoch": 0.02045, "grad_norm": 0.045180562883615494, "learning_rate": 8.040911988404697e-06, "loss": 0.033, "step": 150090 }, { "epoch": 0.0205, "grad_norm": 0.05050816759467125, "learning_rate": 8.037875100285297e-06, "loss": 0.0332, "step": 150100 }, { "epoch": 0.02055, "grad_norm": 0.052226655185222626, "learning_rate": 8.034838675904017e-06, "loss": 0.0337, "step": 150110 }, { "epoch": 0.0206, "grad_norm": 0.052902113646268845, "learning_rate": 8.031802715343875e-06, "loss": 0.0327, "step": 150120 }, { "epoch": 0.02065, "grad_norm": 0.04634511470794678, "learning_rate": 8.028767218687864e-06, "loss": 0.0336, "step": 150130 }, { "epoch": 0.0207, "grad_norm": 0.050081513822078705, "learning_rate": 8.025732186018989e-06, "loss": 0.0324, "step": 150140 }, { "epoch": 0.02075, "grad_norm": 0.05681074783205986, "learning_rate": 8.02269761742021e-06, "loss": 0.0349, "step": 150150 }, { "epoch": 0.0208, "grad_norm": 0.057326339185237885, "learning_rate": 8.019663512974509e-06, "loss": 0.034, "step": 150160 }, { "epoch": 0.02085, "grad_norm": 0.05329889431595802, "learning_rate": 8.01662987276483e-06, "loss": 0.0344, "step": 150170 }, { "epoch": 0.0209, "grad_norm": 0.048233937472105026, "learning_rate": 8.0135966968741e-06, "loss": 0.0335, "step": 150180 }, { "epoch": 0.02095, "grad_norm": 0.0592784509062767, "learning_rate": 8.010563985385264e-06, "loss": 0.0342, "step": 150190 }, { "epoch": 0.021, "grad_norm": 0.05796947330236435, "learning_rate": 8.007531738381225e-06, "loss": 0.0366, "step": 150200 }, { "epoch": 0.02105, "grad_norm": 0.05200716853141785, "learning_rate": 8.004499955944886e-06, "loss": 0.0342, "step": 150210 }, { "epoch": 0.0211, "grad_norm": 0.0459669791162014, "learning_rate": 8.001468638159124e-06, "loss": 0.0353, "step": 150220 }, { "epoch": 0.02115, "grad_norm": 0.04280899465084076, "learning_rate": 7.998437785106825e-06, "loss": 0.0348, "step": 150230 }, { "epoch": 0.0212, "grad_norm": 0.05121898651123047, "learning_rate": 7.995407396870862e-06, "loss": 0.0344, "step": 150240 }, { "epoch": 0.02125, "grad_norm": 0.07777711749076843, "learning_rate": 7.992377473534061e-06, "loss": 0.036, "step": 150250 }, { "epoch": 0.0213, "grad_norm": 0.04437794163823128, "learning_rate": 7.989348015179274e-06, "loss": 0.0342, "step": 150260 }, { "epoch": 0.02135, "grad_norm": 0.049214769154787064, "learning_rate": 7.986319021889316e-06, "loss": 0.0354, "step": 150270 }, { "epoch": 0.0214, "grad_norm": 0.049523212015628815, "learning_rate": 7.983290493747012e-06, "loss": 0.036, "step": 150280 }, { "epoch": 0.02145, "grad_norm": 0.05269530043005943, "learning_rate": 7.980262430835153e-06, "loss": 0.0352, "step": 150290 }, { "epoch": 0.0215, "grad_norm": 0.049670156091451645, "learning_rate": 7.977234833236519e-06, "loss": 0.0346, "step": 150300 }, { "epoch": 0.02155, "grad_norm": 0.06403318047523499, "learning_rate": 7.974207701033895e-06, "loss": 0.0348, "step": 150310 }, { "epoch": 0.0216, "grad_norm": 0.05004489794373512, "learning_rate": 7.971181034310037e-06, "loss": 0.0346, "step": 150320 }, { "epoch": 0.02165, "grad_norm": 0.05563574284315109, "learning_rate": 7.968154833147692e-06, "loss": 0.0337, "step": 150330 }, { "epoch": 0.0217, "grad_norm": 0.04923945292830467, "learning_rate": 7.965129097629587e-06, "loss": 0.0338, "step": 150340 }, { "epoch": 0.02175, "grad_norm": 0.054012149572372437, "learning_rate": 7.962103827838455e-06, "loss": 0.0337, "step": 150350 }, { "epoch": 0.0218, "grad_norm": 0.06014170125126839, "learning_rate": 7.959079023857007e-06, "loss": 0.0352, "step": 150360 }, { "epoch": 0.02185, "grad_norm": 0.05869707465171814, "learning_rate": 7.956054685767941e-06, "loss": 0.0339, "step": 150370 }, { "epoch": 0.0219, "grad_norm": 0.04561547935009003, "learning_rate": 7.953030813653934e-06, "loss": 0.0336, "step": 150380 }, { "epoch": 0.02195, "grad_norm": 0.053486719727516174, "learning_rate": 7.950007407597654e-06, "loss": 0.0328, "step": 150390 }, { "epoch": 0.022, "grad_norm": 0.047152649611234665, "learning_rate": 7.946984467681773e-06, "loss": 0.0328, "step": 150400 }, { "epoch": 0.02205, "grad_norm": 0.05373353883624077, "learning_rate": 7.94396199398893e-06, "loss": 0.0321, "step": 150410 }, { "epoch": 0.0221, "grad_norm": 0.04691655933856964, "learning_rate": 7.94093998660175e-06, "loss": 0.033, "step": 150420 }, { "epoch": 0.02215, "grad_norm": 0.047290802001953125, "learning_rate": 7.937918445602871e-06, "loss": 0.0329, "step": 150430 }, { "epoch": 0.0222, "grad_norm": 0.05117656663060188, "learning_rate": 7.934897371074884e-06, "loss": 0.0354, "step": 150440 }, { "epoch": 0.02225, "grad_norm": 0.04447808116674423, "learning_rate": 7.931876763100407e-06, "loss": 0.0323, "step": 150450 }, { "epoch": 0.0223, "grad_norm": 0.048859499394893646, "learning_rate": 7.928856621761993e-06, "loss": 0.0339, "step": 150460 }, { "epoch": 0.02235, "grad_norm": 0.06283494830131531, "learning_rate": 7.925836947142223e-06, "loss": 0.033, "step": 150470 }, { "epoch": 0.0224, "grad_norm": 0.05105786770582199, "learning_rate": 7.922817739323665e-06, "loss": 0.0327, "step": 150480 }, { "epoch": 0.02245, "grad_norm": 0.045580655336380005, "learning_rate": 7.919798998388856e-06, "loss": 0.0335, "step": 150490 }, { "epoch": 0.0225, "grad_norm": 0.060102153569459915, "learning_rate": 7.916780724420326e-06, "loss": 0.0322, "step": 150500 }, { "epoch": 0.02255, "grad_norm": 0.042073022574186325, "learning_rate": 7.91376291750058e-06, "loss": 0.0317, "step": 150510 }, { "epoch": 0.0226, "grad_norm": 0.04824558645486832, "learning_rate": 7.91074557771215e-06, "loss": 0.0324, "step": 150520 }, { "epoch": 0.02265, "grad_norm": 0.04602494835853577, "learning_rate": 7.907728705137516e-06, "loss": 0.0317, "step": 150530 }, { "epoch": 0.0227, "grad_norm": 0.04034003987908363, "learning_rate": 7.904712299859145e-06, "loss": 0.0313, "step": 150540 }, { "epoch": 0.02275, "grad_norm": 0.0565539188683033, "learning_rate": 7.901696361959532e-06, "loss": 0.0337, "step": 150550 }, { "epoch": 0.0228, "grad_norm": 0.04368201643228531, "learning_rate": 7.898680891521105e-06, "loss": 0.0324, "step": 150560 }, { "epoch": 0.02285, "grad_norm": 0.046120114624500275, "learning_rate": 7.895665888626325e-06, "loss": 0.0312, "step": 150570 }, { "epoch": 0.0229, "grad_norm": 0.049742378294467926, "learning_rate": 7.892651353357616e-06, "loss": 0.0337, "step": 150580 }, { "epoch": 0.02295, "grad_norm": 0.050557803362607956, "learning_rate": 7.889637285797391e-06, "loss": 0.0341, "step": 150590 }, { "epoch": 0.023, "grad_norm": 0.05375564098358154, "learning_rate": 7.886623686028047e-06, "loss": 0.0331, "step": 150600 }, { "epoch": 0.02305, "grad_norm": 0.05111253634095192, "learning_rate": 7.883610554131989e-06, "loss": 0.0335, "step": 150610 }, { "epoch": 0.0231, "grad_norm": 0.04323218762874603, "learning_rate": 7.880597890191587e-06, "loss": 0.0328, "step": 150620 }, { "epoch": 0.02315, "grad_norm": 0.05458337441086769, "learning_rate": 7.877585694289203e-06, "loss": 0.0339, "step": 150630 }, { "epoch": 0.0232, "grad_norm": 0.051620274782180786, "learning_rate": 7.8745739665072e-06, "loss": 0.0337, "step": 150640 }, { "epoch": 0.02325, "grad_norm": 0.04495498538017273, "learning_rate": 7.871562706927904e-06, "loss": 0.0325, "step": 150650 }, { "epoch": 0.0233, "grad_norm": 0.03468136861920357, "learning_rate": 7.868551915633662e-06, "loss": 0.0323, "step": 150660 }, { "epoch": 0.02335, "grad_norm": 0.053658053278923035, "learning_rate": 7.86554159270676e-06, "loss": 0.0334, "step": 150670 }, { "epoch": 0.0234, "grad_norm": 0.04565673694014549, "learning_rate": 7.862531738229515e-06, "loss": 0.0335, "step": 150680 }, { "epoch": 0.02345, "grad_norm": 0.053873706609010696, "learning_rate": 7.859522352284222e-06, "loss": 0.0338, "step": 150690 }, { "epoch": 0.0235, "grad_norm": 0.047571588307619095, "learning_rate": 7.856513434953147e-06, "loss": 0.033, "step": 150700 }, { "epoch": 0.02355, "grad_norm": 0.05546896904706955, "learning_rate": 7.853504986318555e-06, "loss": 0.0329, "step": 150710 }, { "epoch": 0.0236, "grad_norm": 0.05743944272398949, "learning_rate": 7.850497006462684e-06, "loss": 0.0333, "step": 150720 }, { "epoch": 0.02365, "grad_norm": 0.056781135499477386, "learning_rate": 7.847489495467786e-06, "loss": 0.0347, "step": 150730 }, { "epoch": 0.0237, "grad_norm": 0.08583921939134598, "learning_rate": 7.844482453416096e-06, "loss": 0.0334, "step": 150740 }, { "epoch": 0.02375, "grad_norm": 0.058244261890649796, "learning_rate": 7.841475880389795e-06, "loss": 0.033, "step": 150750 }, { "epoch": 0.0238, "grad_norm": 0.045430902391672134, "learning_rate": 7.838469776471105e-06, "loss": 0.0336, "step": 150760 }, { "epoch": 0.02385, "grad_norm": 0.04519680514931679, "learning_rate": 7.835464141742197e-06, "loss": 0.0345, "step": 150770 }, { "epoch": 0.0239, "grad_norm": 0.049604613333940506, "learning_rate": 7.832458976285256e-06, "loss": 0.034, "step": 150780 }, { "epoch": 0.02395, "grad_norm": 0.04675721377134323, "learning_rate": 7.829454280182442e-06, "loss": 0.034, "step": 150790 }, { "epoch": 0.024, "grad_norm": 0.05836396664381027, "learning_rate": 7.826450053515886e-06, "loss": 0.0334, "step": 150800 }, { "epoch": 0.02405, "grad_norm": 0.047985147684812546, "learning_rate": 7.823446296367739e-06, "loss": 0.0339, "step": 150810 }, { "epoch": 0.0241, "grad_norm": 0.05170302093029022, "learning_rate": 7.820443008820122e-06, "loss": 0.0331, "step": 150820 }, { "epoch": 0.02415, "grad_norm": 0.04719945415854454, "learning_rate": 7.817440190955137e-06, "loss": 0.0354, "step": 150830 }, { "epoch": 0.0242, "grad_norm": 0.06532011181116104, "learning_rate": 7.814437842854875e-06, "loss": 0.0369, "step": 150840 }, { "epoch": 0.02425, "grad_norm": 0.04977961257100105, "learning_rate": 7.811435964601432e-06, "loss": 0.0324, "step": 150850 }, { "epoch": 0.0243, "grad_norm": 0.0519782118499279, "learning_rate": 7.808434556276866e-06, "loss": 0.0347, "step": 150860 }, { "epoch": 0.02435, "grad_norm": 0.07689446210861206, "learning_rate": 7.805433617963251e-06, "loss": 0.0338, "step": 150870 }, { "epoch": 0.0244, "grad_norm": 0.06940881907939911, "learning_rate": 7.802433149742617e-06, "loss": 0.0349, "step": 150880 }, { "epoch": 0.02445, "grad_norm": 0.06524743884801865, "learning_rate": 7.799433151696995e-06, "loss": 0.0339, "step": 150890 }, { "epoch": 0.0245, "grad_norm": 0.04952292516827583, "learning_rate": 7.796433623908413e-06, "loss": 0.034, "step": 150900 }, { "epoch": 0.02455, "grad_norm": 0.05044098570942879, "learning_rate": 7.793434566458876e-06, "loss": 0.0331, "step": 150910 }, { "epoch": 0.0246, "grad_norm": 0.04742221534252167, "learning_rate": 7.790435979430363e-06, "loss": 0.0331, "step": 150920 }, { "epoch": 0.02465, "grad_norm": 0.04695028066635132, "learning_rate": 7.787437862904875e-06, "loss": 0.0332, "step": 150930 }, { "epoch": 0.0247, "grad_norm": 0.052516527473926544, "learning_rate": 7.784440216964361e-06, "loss": 0.0336, "step": 150940 }, { "epoch": 0.02475, "grad_norm": 0.048497218638658524, "learning_rate": 7.7814430416908e-06, "loss": 0.0322, "step": 150950 }, { "epoch": 0.0248, "grad_norm": 0.06365183740854263, "learning_rate": 7.7784463371661e-06, "loss": 0.0351, "step": 150960 }, { "epoch": 0.02485, "grad_norm": 0.05415652319788933, "learning_rate": 7.775450103472217e-06, "loss": 0.0334, "step": 150970 }, { "epoch": 0.0249, "grad_norm": 0.06245667114853859, "learning_rate": 7.772454340691052e-06, "loss": 0.037, "step": 150980 }, { "epoch": 0.02495, "grad_norm": 0.06386829167604446, "learning_rate": 7.769459048904518e-06, "loss": 0.0342, "step": 150990 }, { "epoch": 0.025, "grad_norm": 0.043057940900325775, "learning_rate": 7.7664642281945e-06, "loss": 0.0335, "step": 151000 }, { "epoch": 0.02505, "grad_norm": 0.05063904449343681, "learning_rate": 7.763469878642868e-06, "loss": 0.0336, "step": 151010 }, { "epoch": 0.0251, "grad_norm": 0.04828261956572533, "learning_rate": 7.7604760003315e-06, "loss": 0.0342, "step": 151020 }, { "epoch": 0.02515, "grad_norm": 0.05346516892313957, "learning_rate": 7.757482593342243e-06, "loss": 0.034, "step": 151030 }, { "epoch": 0.0252, "grad_norm": 0.049246352165937424, "learning_rate": 7.754489657756938e-06, "loss": 0.0339, "step": 151040 }, { "epoch": 0.02525, "grad_norm": 0.06279134750366211, "learning_rate": 7.751497193657396e-06, "loss": 0.0335, "step": 151050 }, { "epoch": 0.0253, "grad_norm": 0.04387279972434044, "learning_rate": 7.748505201125438e-06, "loss": 0.0324, "step": 151060 }, { "epoch": 0.02535, "grad_norm": 0.05643988773226738, "learning_rate": 7.74551368024288e-06, "loss": 0.0357, "step": 151070 }, { "epoch": 0.0254, "grad_norm": 0.0686245784163475, "learning_rate": 7.742522631091492e-06, "loss": 0.0348, "step": 151080 }, { "epoch": 0.02545, "grad_norm": 0.05688413232564926, "learning_rate": 7.739532053753055e-06, "loss": 0.0325, "step": 151090 }, { "epoch": 0.0255, "grad_norm": 0.04766567051410675, "learning_rate": 7.736541948309314e-06, "loss": 0.0336, "step": 151100 }, { "epoch": 0.02555, "grad_norm": 0.06140110269188881, "learning_rate": 7.733552314842043e-06, "loss": 0.0345, "step": 151110 }, { "epoch": 0.0256, "grad_norm": 0.09793264418840408, "learning_rate": 7.730563153432965e-06, "loss": 0.0324, "step": 151120 }, { "epoch": 0.02565, "grad_norm": 0.05823006108403206, "learning_rate": 7.727574464163792e-06, "loss": 0.0318, "step": 151130 }, { "epoch": 0.0257, "grad_norm": 0.04784746095538139, "learning_rate": 7.724586247116256e-06, "loss": 0.0326, "step": 151140 }, { "epoch": 0.02575, "grad_norm": 0.0745970755815506, "learning_rate": 7.72159850237203e-06, "loss": 0.0349, "step": 151150 }, { "epoch": 0.0258, "grad_norm": 0.053788185119628906, "learning_rate": 7.718611230012826e-06, "loss": 0.0333, "step": 151160 }, { "epoch": 0.02585, "grad_norm": 0.05982402712106705, "learning_rate": 7.715624430120286e-06, "loss": 0.0347, "step": 151170 }, { "epoch": 0.0259, "grad_norm": 0.04564383253455162, "learning_rate": 7.712638102776076e-06, "loss": 0.0315, "step": 151180 }, { "epoch": 0.02595, "grad_norm": 0.040683455765247345, "learning_rate": 7.709652248061858e-06, "loss": 0.0322, "step": 151190 }, { "epoch": 0.026, "grad_norm": 0.03503594174981117, "learning_rate": 7.706666866059251e-06, "loss": 0.0322, "step": 151200 }, { "epoch": 0.02605, "grad_norm": 0.038955919444561005, "learning_rate": 7.703681956849873e-06, "loss": 0.0325, "step": 151210 }, { "epoch": 0.0261, "grad_norm": 0.0518600158393383, "learning_rate": 7.700697520515327e-06, "loss": 0.0325, "step": 151220 }, { "epoch": 0.02615, "grad_norm": 0.03950066119432449, "learning_rate": 7.697713557137218e-06, "loss": 0.0322, "step": 151230 }, { "epoch": 0.0262, "grad_norm": 0.041293129324913025, "learning_rate": 7.694730066797121e-06, "loss": 0.0313, "step": 151240 }, { "epoch": 0.02625, "grad_norm": 0.04547581821680069, "learning_rate": 7.691747049576593e-06, "loss": 0.0315, "step": 151250 }, { "epoch": 0.0263, "grad_norm": 0.04586722329258919, "learning_rate": 7.688764505557208e-06, "loss": 0.0317, "step": 151260 }, { "epoch": 0.02635, "grad_norm": 0.05034118890762329, "learning_rate": 7.685782434820488e-06, "loss": 0.0324, "step": 151270 }, { "epoch": 0.0264, "grad_norm": 0.04623064771294594, "learning_rate": 7.682800837447982e-06, "loss": 0.0326, "step": 151280 }, { "epoch": 0.02645, "grad_norm": 0.04324917495250702, "learning_rate": 7.679819713521194e-06, "loss": 0.0325, "step": 151290 }, { "epoch": 0.0265, "grad_norm": 0.04792845994234085, "learning_rate": 7.676839063121621e-06, "loss": 0.0324, "step": 151300 }, { "epoch": 0.02655, "grad_norm": 0.04959246516227722, "learning_rate": 7.673858886330768e-06, "loss": 0.0358, "step": 151310 }, { "epoch": 0.0266, "grad_norm": 0.04334568977355957, "learning_rate": 7.6708791832301e-06, "loss": 0.0334, "step": 151320 }, { "epoch": 0.02665, "grad_norm": 0.04518502950668335, "learning_rate": 7.667899953901089e-06, "loss": 0.0335, "step": 151330 }, { "epoch": 0.0267, "grad_norm": 0.04649922251701355, "learning_rate": 7.664921198425173e-06, "loss": 0.0338, "step": 151340 }, { "epoch": 0.02675, "grad_norm": 0.054697297513484955, "learning_rate": 7.661942916883807e-06, "loss": 0.0337, "step": 151350 }, { "epoch": 0.0268, "grad_norm": 0.05046942085027695, "learning_rate": 7.658965109358401e-06, "loss": 0.0348, "step": 151360 }, { "epoch": 0.02685, "grad_norm": 0.050941936671733856, "learning_rate": 7.655987775930381e-06, "loss": 0.034, "step": 151370 }, { "epoch": 0.0269, "grad_norm": 0.04394199326634407, "learning_rate": 7.653010916681141e-06, "loss": 0.0353, "step": 151380 }, { "epoch": 0.02695, "grad_norm": 0.049377329647541046, "learning_rate": 7.650034531692055e-06, "loss": 0.0354, "step": 151390 }, { "epoch": 0.027, "grad_norm": 0.05509459972381592, "learning_rate": 7.647058621044516e-06, "loss": 0.0375, "step": 151400 }, { "epoch": 0.02705, "grad_norm": 0.04569574445486069, "learning_rate": 7.644083184819876e-06, "loss": 0.0347, "step": 151410 }, { "epoch": 0.0271, "grad_norm": 0.04505246505141258, "learning_rate": 7.64110822309948e-06, "loss": 0.0338, "step": 151420 }, { "epoch": 0.02715, "grad_norm": 0.04855041578412056, "learning_rate": 7.638133735964655e-06, "loss": 0.0341, "step": 151430 }, { "epoch": 0.0272, "grad_norm": 0.04252030327916145, "learning_rate": 7.635159723496735e-06, "loss": 0.0342, "step": 151440 }, { "epoch": 0.02725, "grad_norm": 0.04364844784140587, "learning_rate": 7.632186185777037e-06, "loss": 0.0335, "step": 151450 }, { "epoch": 0.0273, "grad_norm": 0.041523970663547516, "learning_rate": 7.6292131228868305e-06, "loss": 0.0361, "step": 151460 }, { "epoch": 0.02735, "grad_norm": 0.03853632137179375, "learning_rate": 7.626240534907417e-06, "loss": 0.0336, "step": 151470 }, { "epoch": 0.0274, "grad_norm": 0.045850805938243866, "learning_rate": 7.6232684219200515e-06, "loss": 0.0333, "step": 151480 }, { "epoch": 0.02745, "grad_norm": 0.050423864275217056, "learning_rate": 7.620296784006009e-06, "loss": 0.035, "step": 151490 }, { "epoch": 0.0275, "grad_norm": 0.059781789779663086, "learning_rate": 7.617325621246523e-06, "loss": 0.0349, "step": 151500 }, { "epoch": 0.02755, "grad_norm": 0.054336484521627426, "learning_rate": 7.6143549337228175e-06, "loss": 0.0334, "step": 151510 }, { "epoch": 0.0276, "grad_norm": 0.05191996321082115, "learning_rate": 7.611384721516121e-06, "loss": 0.0344, "step": 151520 }, { "epoch": 0.02765, "grad_norm": 0.051657989621162415, "learning_rate": 7.608414984707635e-06, "loss": 0.0324, "step": 151530 }, { "epoch": 0.0277, "grad_norm": 0.045168012380599976, "learning_rate": 7.605445723378552e-06, "loss": 0.0315, "step": 151540 }, { "epoch": 0.02775, "grad_norm": 0.0468955934047699, "learning_rate": 7.602476937610037e-06, "loss": 0.0339, "step": 151550 }, { "epoch": 0.0278, "grad_norm": 0.040408361703157425, "learning_rate": 7.599508627483268e-06, "loss": 0.0334, "step": 151560 }, { "epoch": 0.02785, "grad_norm": 0.04691183194518089, "learning_rate": 7.596540793079404e-06, "loss": 0.0326, "step": 151570 }, { "epoch": 0.0279, "grad_norm": 0.0571589358150959, "learning_rate": 7.593573434479579e-06, "loss": 0.0334, "step": 151580 }, { "epoch": 0.02795, "grad_norm": 0.06184757128357887, "learning_rate": 7.590606551764912e-06, "loss": 0.0346, "step": 151590 }, { "epoch": 0.028, "grad_norm": 0.047520771622657776, "learning_rate": 7.5876401450165165e-06, "loss": 0.0328, "step": 151600 }, { "epoch": 0.02805, "grad_norm": 0.05671709403395653, "learning_rate": 7.584674214315507e-06, "loss": 0.0347, "step": 151610 }, { "epoch": 0.0281, "grad_norm": 0.051435671746730804, "learning_rate": 7.581708759742959e-06, "loss": 0.0331, "step": 151620 }, { "epoch": 0.02815, "grad_norm": 0.05739184468984604, "learning_rate": 7.578743781379944e-06, "loss": 0.0343, "step": 151630 }, { "epoch": 0.0282, "grad_norm": 0.05141328275203705, "learning_rate": 7.575779279307535e-06, "loss": 0.0324, "step": 151640 }, { "epoch": 0.02825, "grad_norm": 0.049890533089637756, "learning_rate": 7.5728152536067686e-06, "loss": 0.0337, "step": 151650 }, { "epoch": 0.0283, "grad_norm": 0.043297071009874344, "learning_rate": 7.569851704358699e-06, "loss": 0.0326, "step": 151660 }, { "epoch": 0.02835, "grad_norm": 0.04433068633079529, "learning_rate": 7.566888631644323e-06, "loss": 0.0331, "step": 151670 }, { "epoch": 0.0284, "grad_norm": 0.054903291165828705, "learning_rate": 7.56392603554466e-06, "loss": 0.0335, "step": 151680 }, { "epoch": 0.02845, "grad_norm": 0.04608643427491188, "learning_rate": 7.56096391614072e-06, "loss": 0.0324, "step": 151690 }, { "epoch": 0.0285, "grad_norm": 0.041108906269073486, "learning_rate": 7.5580022735134735e-06, "loss": 0.0325, "step": 151700 }, { "epoch": 0.02855, "grad_norm": 0.04551394283771515, "learning_rate": 7.55504110774389e-06, "loss": 0.0336, "step": 151710 }, { "epoch": 0.0286, "grad_norm": 0.04845035448670387, "learning_rate": 7.5520804189129245e-06, "loss": 0.0335, "step": 151720 }, { "epoch": 0.02865, "grad_norm": 0.04496077448129654, "learning_rate": 7.549120207101532e-06, "loss": 0.0333, "step": 151730 }, { "epoch": 0.0287, "grad_norm": 0.05059627443552017, "learning_rate": 7.546160472390634e-06, "loss": 0.0339, "step": 151740 }, { "epoch": 0.02875, "grad_norm": 0.048855770379304886, "learning_rate": 7.543201214861148e-06, "loss": 0.0336, "step": 151750 }, { "epoch": 0.0288, "grad_norm": 0.06655757129192352, "learning_rate": 7.5402424345939884e-06, "loss": 0.0371, "step": 151760 }, { "epoch": 0.02885, "grad_norm": 0.0655401349067688, "learning_rate": 7.5372841316700335e-06, "loss": 0.0346, "step": 151770 }, { "epoch": 0.0289, "grad_norm": 0.05282272398471832, "learning_rate": 7.534326306170178e-06, "loss": 0.0344, "step": 151780 }, { "epoch": 0.02895, "grad_norm": 0.04365290328860283, "learning_rate": 7.531368958175281e-06, "loss": 0.0329, "step": 151790 }, { "epoch": 0.029, "grad_norm": 0.03897915780544281, "learning_rate": 7.528412087766193e-06, "loss": 0.0328, "step": 151800 }, { "epoch": 0.02905, "grad_norm": 0.058162543922662735, "learning_rate": 7.525455695023745e-06, "loss": 0.0331, "step": 151810 }, { "epoch": 0.0291, "grad_norm": 0.04760391265153885, "learning_rate": 7.522499780028783e-06, "loss": 0.0331, "step": 151820 }, { "epoch": 0.02915, "grad_norm": 0.04888239502906799, "learning_rate": 7.519544342862112e-06, "loss": 0.0324, "step": 151830 }, { "epoch": 0.0292, "grad_norm": 0.05259674787521362, "learning_rate": 7.516589383604522e-06, "loss": 0.0347, "step": 151840 }, { "epoch": 0.02925, "grad_norm": 0.04942569509148598, "learning_rate": 7.513634902336819e-06, "loss": 0.0325, "step": 151850 }, { "epoch": 0.0293, "grad_norm": 0.04997270554304123, "learning_rate": 7.510680899139761e-06, "loss": 0.0339, "step": 151860 }, { "epoch": 0.02935, "grad_norm": 0.04699475318193436, "learning_rate": 7.507727374094131e-06, "loss": 0.0325, "step": 151870 }, { "epoch": 0.0294, "grad_norm": 0.05285327881574631, "learning_rate": 7.504774327280648e-06, "loss": 0.0339, "step": 151880 }, { "epoch": 0.02945, "grad_norm": 0.04937724769115448, "learning_rate": 7.501821758780062e-06, "loss": 0.0335, "step": 151890 }, { "epoch": 0.0295, "grad_norm": 0.07506319880485535, "learning_rate": 7.498869668673106e-06, "loss": 0.0333, "step": 151900 }, { "epoch": 0.02955, "grad_norm": 0.056446999311447144, "learning_rate": 7.495918057040474e-06, "loss": 0.033, "step": 151910 }, { "epoch": 0.0296, "grad_norm": 0.04971238970756531, "learning_rate": 7.492966923962869e-06, "loss": 0.0325, "step": 151920 }, { "epoch": 0.02965, "grad_norm": 0.052909985184669495, "learning_rate": 7.490016269520963e-06, "loss": 0.0335, "step": 151930 }, { "epoch": 0.0297, "grad_norm": 0.04486185684800148, "learning_rate": 7.487066093795434e-06, "loss": 0.0324, "step": 151940 }, { "epoch": 0.02975, "grad_norm": 0.05712229385972023, "learning_rate": 7.4841163968669524e-06, "loss": 0.034, "step": 151950 }, { "epoch": 0.0298, "grad_norm": 0.050738777965307236, "learning_rate": 7.481167178816134e-06, "loss": 0.0323, "step": 151960 }, { "epoch": 0.02985, "grad_norm": 0.059770889580249786, "learning_rate": 7.478218439723633e-06, "loss": 0.0336, "step": 151970 }, { "epoch": 0.0299, "grad_norm": 0.04169420152902603, "learning_rate": 7.475270179670046e-06, "loss": 0.0329, "step": 151980 }, { "epoch": 0.02995, "grad_norm": 0.05187474563717842, "learning_rate": 7.472322398735998e-06, "loss": 0.0339, "step": 151990 }, { "epoch": 0.03, "grad_norm": 0.034226927906274796, "learning_rate": 7.469375097002071e-06, "loss": 0.0348, "step": 152000 }, { "epoch": 0.03005, "grad_norm": 0.045822881162166595, "learning_rate": 7.466428274548837e-06, "loss": 0.0347, "step": 152010 }, { "epoch": 0.0301, "grad_norm": 0.051734428852796555, "learning_rate": 7.463481931456873e-06, "loss": 0.0355, "step": 152020 }, { "epoch": 0.03015, "grad_norm": 0.05155622959136963, "learning_rate": 7.460536067806722e-06, "loss": 0.0335, "step": 152030 }, { "epoch": 0.0302, "grad_norm": 0.10209552198648453, "learning_rate": 7.457590683678928e-06, "loss": 0.0371, "step": 152040 }, { "epoch": 0.03025, "grad_norm": 0.06496402621269226, "learning_rate": 7.454645779154007e-06, "loss": 0.0353, "step": 152050 }, { "epoch": 0.0303, "grad_norm": 0.059463124722242355, "learning_rate": 7.451701354312487e-06, "loss": 0.0352, "step": 152060 }, { "epoch": 0.03035, "grad_norm": 0.04688674584031105, "learning_rate": 7.448757409234852e-06, "loss": 0.0346, "step": 152070 }, { "epoch": 0.0304, "grad_norm": 0.05447123199701309, "learning_rate": 7.445813944001601e-06, "loss": 0.034, "step": 152080 }, { "epoch": 0.03045, "grad_norm": 0.05676489695906639, "learning_rate": 7.442870958693204e-06, "loss": 0.0338, "step": 152090 }, { "epoch": 0.0305, "grad_norm": 0.04234858974814415, "learning_rate": 7.439928453390111e-06, "loss": 0.0347, "step": 152100 }, { "epoch": 0.03055, "grad_norm": 0.03998962789773941, "learning_rate": 7.436986428172785e-06, "loss": 0.0328, "step": 152110 }, { "epoch": 0.0306, "grad_norm": 0.046995196491479874, "learning_rate": 7.434044883121652e-06, "loss": 0.0336, "step": 152120 }, { "epoch": 0.03065, "grad_norm": 0.0382130928337574, "learning_rate": 7.431103818317123e-06, "loss": 0.0313, "step": 152130 }, { "epoch": 0.0307, "grad_norm": 0.04772825539112091, "learning_rate": 7.428163233839624e-06, "loss": 0.0323, "step": 152140 }, { "epoch": 0.03075, "grad_norm": 0.04025093838572502, "learning_rate": 7.4252231297695345e-06, "loss": 0.0319, "step": 152150 }, { "epoch": 0.0308, "grad_norm": 0.0439000241458416, "learning_rate": 7.4222835061872554e-06, "loss": 0.0316, "step": 152160 }, { "epoch": 0.03085, "grad_norm": 0.04240845516324043, "learning_rate": 7.419344363173128e-06, "loss": 0.0307, "step": 152170 }, { "epoch": 0.0309, "grad_norm": 0.06312000006437302, "learning_rate": 7.416405700807527e-06, "loss": 0.0328, "step": 152180 }, { "epoch": 0.03095, "grad_norm": 0.04434799775481224, "learning_rate": 7.413467519170783e-06, "loss": 0.0328, "step": 152190 }, { "epoch": 0.031, "grad_norm": 0.046847231686115265, "learning_rate": 7.410529818343237e-06, "loss": 0.0323, "step": 152200 }, { "epoch": 0.03105, "grad_norm": 0.04772993549704552, "learning_rate": 7.407592598405197e-06, "loss": 0.0326, "step": 152210 }, { "epoch": 0.0311, "grad_norm": 0.05239289253950119, "learning_rate": 7.404655859436957e-06, "loss": 0.0325, "step": 152220 }, { "epoch": 0.03115, "grad_norm": 0.042153894901275635, "learning_rate": 7.401719601518825e-06, "loss": 0.0327, "step": 152230 }, { "epoch": 0.0312, "grad_norm": 0.04499209299683571, "learning_rate": 7.398783824731067e-06, "loss": 0.0328, "step": 152240 }, { "epoch": 0.03125, "grad_norm": 0.042803965508937836, "learning_rate": 7.395848529153948e-06, "loss": 0.0333, "step": 152250 }, { "epoch": 0.0313, "grad_norm": 0.039181217551231384, "learning_rate": 7.392913714867708e-06, "loss": 0.0332, "step": 152260 }, { "epoch": 0.03135, "grad_norm": 0.040339015424251556, "learning_rate": 7.3899793819525945e-06, "loss": 0.0377, "step": 152270 }, { "epoch": 0.0314, "grad_norm": 0.05206860229372978, "learning_rate": 7.387045530488834e-06, "loss": 0.0331, "step": 152280 }, { "epoch": 0.03145, "grad_norm": 0.04714643210172653, "learning_rate": 7.384112160556633e-06, "loss": 0.0345, "step": 152290 }, { "epoch": 0.0315, "grad_norm": 0.053952787071466446, "learning_rate": 7.381179272236186e-06, "loss": 0.0344, "step": 152300 }, { "epoch": 0.03155, "grad_norm": 0.04658818617463112, "learning_rate": 7.378246865607672e-06, "loss": 0.0345, "step": 152310 }, { "epoch": 0.0316, "grad_norm": 0.05002017691731453, "learning_rate": 7.375314940751277e-06, "loss": 0.0329, "step": 152320 }, { "epoch": 0.03165, "grad_norm": 0.04997474327683449, "learning_rate": 7.372383497747149e-06, "loss": 0.0338, "step": 152330 }, { "epoch": 0.0317, "grad_norm": 0.04623773321509361, "learning_rate": 7.369452536675425e-06, "loss": 0.0321, "step": 152340 }, { "epoch": 0.03175, "grad_norm": 0.044432058930397034, "learning_rate": 7.366522057616257e-06, "loss": 0.0336, "step": 152350 }, { "epoch": 0.0318, "grad_norm": 0.05239921808242798, "learning_rate": 7.363592060649741e-06, "loss": 0.0328, "step": 152360 }, { "epoch": 0.03185, "grad_norm": 0.04295680671930313, "learning_rate": 7.360662545856006e-06, "loss": 0.0331, "step": 152370 }, { "epoch": 0.0319, "grad_norm": 0.05377604812383652, "learning_rate": 7.357733513315118e-06, "loss": 0.0334, "step": 152380 }, { "epoch": 0.03195, "grad_norm": 0.047571294009685516, "learning_rate": 7.354804963107165e-06, "loss": 0.0328, "step": 152390 }, { "epoch": 0.032, "grad_norm": 0.05396561324596405, "learning_rate": 7.351876895312226e-06, "loss": 0.0365, "step": 152400 }, { "epoch": 0.03205, "grad_norm": 0.04686145484447479, "learning_rate": 7.348949310010339e-06, "loss": 0.0333, "step": 152410 }, { "epoch": 0.0321, "grad_norm": 0.04809960350394249, "learning_rate": 7.34602220728155e-06, "loss": 0.0337, "step": 152420 }, { "epoch": 0.03215, "grad_norm": 0.03867340087890625, "learning_rate": 7.3430955872058724e-06, "loss": 0.0337, "step": 152430 }, { "epoch": 0.0322, "grad_norm": 0.09577205032110214, "learning_rate": 7.340169449863335e-06, "loss": 0.0329, "step": 152440 }, { "epoch": 0.03225, "grad_norm": 0.04986964166164398, "learning_rate": 7.337243795333931e-06, "loss": 0.0346, "step": 152450 }, { "epoch": 0.0323, "grad_norm": 0.05069958046078682, "learning_rate": 7.334318623697639e-06, "loss": 0.0334, "step": 152460 }, { "epoch": 0.03235, "grad_norm": 0.042942311614751816, "learning_rate": 7.3313939350344475e-06, "loss": 0.0362, "step": 152470 }, { "epoch": 0.0324, "grad_norm": 0.05629931390285492, "learning_rate": 7.328469729424301e-06, "loss": 0.0342, "step": 152480 }, { "epoch": 0.03245, "grad_norm": 0.06817825883626938, "learning_rate": 7.325546006947156e-06, "loss": 0.037, "step": 152490 }, { "epoch": 0.0325, "grad_norm": 0.060083985328674316, "learning_rate": 7.322622767682949e-06, "loss": 0.0351, "step": 152500 }, { "epoch": 0.03255, "grad_norm": 0.044535864144563675, "learning_rate": 7.319700011711584e-06, "loss": 0.0335, "step": 152510 }, { "epoch": 0.0326, "grad_norm": 0.0461667962372303, "learning_rate": 7.316777739112985e-06, "loss": 0.033, "step": 152520 }, { "epoch": 0.03265, "grad_norm": 0.05126021057367325, "learning_rate": 7.313855949967041e-06, "loss": 0.0336, "step": 152530 }, { "epoch": 0.0327, "grad_norm": 0.04119449481368065, "learning_rate": 7.310934644353632e-06, "loss": 0.0373, "step": 152540 }, { "epoch": 0.03275, "grad_norm": 0.05020667612552643, "learning_rate": 7.308013822352614e-06, "loss": 0.0315, "step": 152550 }, { "epoch": 0.0328, "grad_norm": 0.040357209742069244, "learning_rate": 7.30509348404386e-06, "loss": 0.0321, "step": 152560 }, { "epoch": 0.03285, "grad_norm": 0.04429003596305847, "learning_rate": 7.3021736295071975e-06, "loss": 0.0324, "step": 152570 }, { "epoch": 0.0329, "grad_norm": 0.07690481096506119, "learning_rate": 7.2992542588224635e-06, "loss": 0.0341, "step": 152580 }, { "epoch": 0.03295, "grad_norm": 0.05730977654457092, "learning_rate": 7.2963353720694685e-06, "loss": 0.034, "step": 152590 }, { "epoch": 0.033, "grad_norm": 0.047369591891765594, "learning_rate": 7.293416969328007e-06, "loss": 0.0321, "step": 152600 }, { "epoch": 0.03305, "grad_norm": 0.06182011216878891, "learning_rate": 7.290499050677882e-06, "loss": 0.0337, "step": 152610 }, { "epoch": 0.0331, "grad_norm": 0.06516768783330917, "learning_rate": 7.287581616198858e-06, "loss": 0.0329, "step": 152620 }, { "epoch": 0.03315, "grad_norm": 0.05089006945490837, "learning_rate": 7.2846646659707005e-06, "loss": 0.0323, "step": 152630 }, { "epoch": 0.0332, "grad_norm": 0.04354912415146828, "learning_rate": 7.281748200073146e-06, "loss": 0.0333, "step": 152640 }, { "epoch": 0.03325, "grad_norm": 0.039361096918582916, "learning_rate": 7.27883221858594e-06, "loss": 0.0312, "step": 152650 }, { "epoch": 0.0333, "grad_norm": 0.050111912190914154, "learning_rate": 7.275916721588818e-06, "loss": 0.0316, "step": 152660 }, { "epoch": 0.03335, "grad_norm": 0.04241053760051727, "learning_rate": 7.273001709161459e-06, "loss": 0.0323, "step": 152670 }, { "epoch": 0.0334, "grad_norm": 0.052290454506874084, "learning_rate": 7.270087181383583e-06, "loss": 0.0337, "step": 152680 }, { "epoch": 0.03345, "grad_norm": 0.048461172729730606, "learning_rate": 7.267173138334854e-06, "loss": 0.0337, "step": 152690 }, { "epoch": 0.0335, "grad_norm": 0.047181010246276855, "learning_rate": 7.264259580094956e-06, "loss": 0.034, "step": 152700 }, { "epoch": 0.03355, "grad_norm": 0.058403100818395615, "learning_rate": 7.261346506743538e-06, "loss": 0.0337, "step": 152710 }, { "epoch": 0.0336, "grad_norm": 0.0491732694208622, "learning_rate": 7.258433918360238e-06, "loss": 0.0338, "step": 152720 }, { "epoch": 0.03365, "grad_norm": 0.04675913229584694, "learning_rate": 7.255521815024694e-06, "loss": 0.0339, "step": 152730 }, { "epoch": 0.0337, "grad_norm": 0.03744709864258766, "learning_rate": 7.252610196816517e-06, "loss": 0.0322, "step": 152740 }, { "epoch": 0.03375, "grad_norm": 0.04835504665970802, "learning_rate": 7.249699063815313e-06, "loss": 0.0326, "step": 152750 }, { "epoch": 0.0338, "grad_norm": 0.04576469957828522, "learning_rate": 7.246788416100658e-06, "loss": 0.0333, "step": 152760 }, { "epoch": 0.03385, "grad_norm": 0.04791948199272156, "learning_rate": 7.24387825375214e-06, "loss": 0.0327, "step": 152770 }, { "epoch": 0.0339, "grad_norm": 0.0478530153632164, "learning_rate": 7.240968576849324e-06, "loss": 0.0324, "step": 152780 }, { "epoch": 0.03395, "grad_norm": 0.04098542034626007, "learning_rate": 7.23805938547176e-06, "loss": 0.0334, "step": 152790 }, { "epoch": 0.034, "grad_norm": 0.047090351581573486, "learning_rate": 7.235150679698977e-06, "loss": 0.0328, "step": 152800 }, { "epoch": 0.03405, "grad_norm": 0.04444635286927223, "learning_rate": 7.232242459610491e-06, "loss": 0.0318, "step": 152810 }, { "epoch": 0.0341, "grad_norm": 0.04326888918876648, "learning_rate": 7.2293347252858305e-06, "loss": 0.0322, "step": 152820 }, { "epoch": 0.03415, "grad_norm": 0.050240468233823776, "learning_rate": 7.226427476804484e-06, "loss": 0.0344, "step": 152830 }, { "epoch": 0.0342, "grad_norm": 0.04395769163966179, "learning_rate": 7.223520714245924e-06, "loss": 0.0321, "step": 152840 }, { "epoch": 0.03425, "grad_norm": 0.04159471392631531, "learning_rate": 7.220614437689638e-06, "loss": 0.0328, "step": 152850 }, { "epoch": 0.0343, "grad_norm": 0.05443045496940613, "learning_rate": 7.217708647215063e-06, "loss": 0.0343, "step": 152860 }, { "epoch": 0.03435, "grad_norm": 0.042212799191474915, "learning_rate": 7.214803342901671e-06, "loss": 0.033, "step": 152870 }, { "epoch": 0.0344, "grad_norm": 0.04545920342206955, "learning_rate": 7.211898524828859e-06, "loss": 0.0319, "step": 152880 }, { "epoch": 0.03445, "grad_norm": 0.04602941870689392, "learning_rate": 7.208994193076057e-06, "loss": 0.0328, "step": 152890 }, { "epoch": 0.0345, "grad_norm": 0.050577692687511444, "learning_rate": 7.20609034772268e-06, "loss": 0.0336, "step": 152900 }, { "epoch": 0.03455, "grad_norm": 0.03855586424469948, "learning_rate": 7.203186988848107e-06, "loss": 0.0316, "step": 152910 }, { "epoch": 0.0346, "grad_norm": 0.048223525285720825, "learning_rate": 7.200284116531716e-06, "loss": 0.0312, "step": 152920 }, { "epoch": 0.03465, "grad_norm": 0.0466330461204052, "learning_rate": 7.197381730852862e-06, "loss": 0.0327, "step": 152930 }, { "epoch": 0.0347, "grad_norm": 0.047722309827804565, "learning_rate": 7.19447983189091e-06, "loss": 0.031, "step": 152940 }, { "epoch": 0.03475, "grad_norm": 0.04887279495596886, "learning_rate": 7.191578419725192e-06, "loss": 0.0313, "step": 152950 }, { "epoch": 0.0348, "grad_norm": 0.0436398871243, "learning_rate": 7.18867749443502e-06, "loss": 0.0312, "step": 152960 }, { "epoch": 0.03485, "grad_norm": 0.040157947689294815, "learning_rate": 7.185777056099724e-06, "loss": 0.0329, "step": 152970 }, { "epoch": 0.0349, "grad_norm": 0.044679731130599976, "learning_rate": 7.182877104798583e-06, "loss": 0.0313, "step": 152980 }, { "epoch": 0.03495, "grad_norm": 0.04023885726928711, "learning_rate": 7.179977640610894e-06, "loss": 0.0307, "step": 152990 }, { "epoch": 0.035, "grad_norm": 0.04457961022853851, "learning_rate": 7.177078663615921e-06, "loss": 0.0306, "step": 153000 }, { "epoch": 0.03505, "grad_norm": 0.04377239570021629, "learning_rate": 7.174180173892925e-06, "loss": 0.0322, "step": 153010 }, { "epoch": 0.0351, "grad_norm": 0.042049601674079895, "learning_rate": 7.171282171521138e-06, "loss": 0.0329, "step": 153020 }, { "epoch": 0.03515, "grad_norm": 0.04242038354277611, "learning_rate": 7.168384656579804e-06, "loss": 0.0319, "step": 153030 }, { "epoch": 0.0352, "grad_norm": 0.052859652787446976, "learning_rate": 7.165487629148135e-06, "loss": 0.0329, "step": 153040 }, { "epoch": 0.03525, "grad_norm": 0.05696389451622963, "learning_rate": 7.162591089305326e-06, "loss": 0.0321, "step": 153050 }, { "epoch": 0.0353, "grad_norm": 0.05143987387418747, "learning_rate": 7.1596950371305845e-06, "loss": 0.0311, "step": 153060 }, { "epoch": 0.03535, "grad_norm": 0.047393981367349625, "learning_rate": 7.156799472703072e-06, "loss": 0.0322, "step": 153070 }, { "epoch": 0.0354, "grad_norm": 0.042954448610544205, "learning_rate": 7.1539043961019706e-06, "loss": 0.0318, "step": 153080 }, { "epoch": 0.03545, "grad_norm": 0.05207926407456398, "learning_rate": 7.151009807406403e-06, "loss": 0.0321, "step": 153090 }, { "epoch": 0.0355, "grad_norm": 0.04027773067355156, "learning_rate": 7.148115706695524e-06, "loss": 0.0329, "step": 153100 }, { "epoch": 0.03555, "grad_norm": 0.0464700311422348, "learning_rate": 7.145222094048462e-06, "loss": 0.0325, "step": 153110 }, { "epoch": 0.0356, "grad_norm": 0.043472904711961746, "learning_rate": 7.142328969544321e-06, "loss": 0.0321, "step": 153120 }, { "epoch": 0.03565, "grad_norm": 0.04800725355744362, "learning_rate": 7.139436333262195e-06, "loss": 0.033, "step": 153130 }, { "epoch": 0.0357, "grad_norm": 0.04517852142453194, "learning_rate": 7.136544185281163e-06, "loss": 0.0322, "step": 153140 }, { "epoch": 0.03575, "grad_norm": 0.047509852796792984, "learning_rate": 7.1336525256803034e-06, "loss": 0.0332, "step": 153150 }, { "epoch": 0.0358, "grad_norm": 0.05477967858314514, "learning_rate": 7.130761354538687e-06, "loss": 0.0331, "step": 153160 }, { "epoch": 0.03585, "grad_norm": 0.0509810596704483, "learning_rate": 7.127870671935324e-06, "loss": 0.0336, "step": 153170 }, { "epoch": 0.0359, "grad_norm": 0.04239951819181442, "learning_rate": 7.124980477949272e-06, "loss": 0.0328, "step": 153180 }, { "epoch": 0.03595, "grad_norm": 0.07508710771799088, "learning_rate": 7.122090772659531e-06, "loss": 0.0355, "step": 153190 }, { "epoch": 0.036, "grad_norm": 0.05007263273000717, "learning_rate": 7.119201556145119e-06, "loss": 0.0331, "step": 153200 }, { "epoch": 0.03605, "grad_norm": 0.08754939585924149, "learning_rate": 7.11631282848502e-06, "loss": 0.0339, "step": 153210 }, { "epoch": 0.0361, "grad_norm": 0.06672363728284836, "learning_rate": 7.1134245897582e-06, "loss": 0.0332, "step": 153220 }, { "epoch": 0.03615, "grad_norm": 0.04547875002026558, "learning_rate": 7.110536840043641e-06, "loss": 0.0334, "step": 153230 }, { "epoch": 0.0362, "grad_norm": 0.04429539293050766, "learning_rate": 7.107649579420283e-06, "loss": 0.0317, "step": 153240 }, { "epoch": 0.03625, "grad_norm": 0.06021294370293617, "learning_rate": 7.104762807967066e-06, "loss": 0.0328, "step": 153250 }, { "epoch": 0.0363, "grad_norm": 0.04863046109676361, "learning_rate": 7.101876525762901e-06, "loss": 0.0321, "step": 153260 }, { "epoch": 0.03635, "grad_norm": 0.04178769141435623, "learning_rate": 7.098990732886718e-06, "loss": 0.0335, "step": 153270 }, { "epoch": 0.0364, "grad_norm": 0.03681156039237976, "learning_rate": 7.096105429417393e-06, "loss": 0.0358, "step": 153280 }, { "epoch": 0.03645, "grad_norm": 0.049410223960876465, "learning_rate": 7.093220615433827e-06, "loss": 0.0334, "step": 153290 }, { "epoch": 0.0365, "grad_norm": 0.04368637502193451, "learning_rate": 7.090336291014884e-06, "loss": 0.0322, "step": 153300 }, { "epoch": 0.03655, "grad_norm": 0.04722609370946884, "learning_rate": 7.08745245623941e-06, "loss": 0.0328, "step": 153310 }, { "epoch": 0.0366, "grad_norm": 0.057430315762758255, "learning_rate": 7.084569111186262e-06, "loss": 0.0339, "step": 153320 }, { "epoch": 0.03665, "grad_norm": 0.054572589695453644, "learning_rate": 7.0816862559342664e-06, "loss": 0.033, "step": 153330 }, { "epoch": 0.0367, "grad_norm": 0.05039558559656143, "learning_rate": 7.07880389056223e-06, "loss": 0.0359, "step": 153340 }, { "epoch": 0.03675, "grad_norm": 0.06452251225709915, "learning_rate": 7.075922015148967e-06, "loss": 0.0326, "step": 153350 }, { "epoch": 0.0368, "grad_norm": 0.04780831187963486, "learning_rate": 7.073040629773259e-06, "loss": 0.0332, "step": 153360 }, { "epoch": 0.03685, "grad_norm": 0.051232628524303436, "learning_rate": 7.070159734513898e-06, "loss": 0.0323, "step": 153370 }, { "epoch": 0.0369, "grad_norm": 0.05242280662059784, "learning_rate": 7.067279329449616e-06, "loss": 0.0331, "step": 153380 }, { "epoch": 0.03695, "grad_norm": 0.04547596722841263, "learning_rate": 7.064399414659193e-06, "loss": 0.0322, "step": 153390 }, { "epoch": 0.037, "grad_norm": 0.051696255803108215, "learning_rate": 7.061519990221341e-06, "loss": 0.0337, "step": 153400 }, { "epoch": 0.03705, "grad_norm": 0.0777956172823906, "learning_rate": 7.058641056214801e-06, "loss": 0.0332, "step": 153410 }, { "epoch": 0.0371, "grad_norm": 0.04949126020073891, "learning_rate": 7.055762612718275e-06, "loss": 0.0323, "step": 153420 }, { "epoch": 0.03715, "grad_norm": 0.05280499532818794, "learning_rate": 7.052884659810452e-06, "loss": 0.0318, "step": 153430 }, { "epoch": 0.0372, "grad_norm": 0.050723519176244736, "learning_rate": 7.050007197570024e-06, "loss": 0.0333, "step": 153440 }, { "epoch": 0.03725, "grad_norm": 0.05991830676794052, "learning_rate": 7.04713022607566e-06, "loss": 0.0326, "step": 153450 }, { "epoch": 0.0373, "grad_norm": 0.04820108786225319, "learning_rate": 7.044253745406007e-06, "loss": 0.0328, "step": 153460 }, { "epoch": 0.03735, "grad_norm": 0.04432525113224983, "learning_rate": 7.0413777556397055e-06, "loss": 0.0323, "step": 153470 }, { "epoch": 0.0374, "grad_norm": 0.04415155202150345, "learning_rate": 7.038502256855389e-06, "loss": 0.0316, "step": 153480 }, { "epoch": 0.03745, "grad_norm": 0.049692247062921524, "learning_rate": 7.035627249131682e-06, "loss": 0.0334, "step": 153490 }, { "epoch": 0.0375, "grad_norm": 0.05337144061923027, "learning_rate": 7.032752732547174e-06, "loss": 0.0322, "step": 153500 }, { "epoch": 0.03755, "grad_norm": 0.03996986150741577, "learning_rate": 7.02987870718046e-06, "loss": 0.0308, "step": 153510 }, { "epoch": 0.0376, "grad_norm": 0.038624316453933716, "learning_rate": 7.027005173110099e-06, "loss": 0.0317, "step": 153520 }, { "epoch": 0.03765, "grad_norm": 0.040363065898418427, "learning_rate": 7.0241321304146765e-06, "loss": 0.0328, "step": 153530 }, { "epoch": 0.0377, "grad_norm": 0.04164469242095947, "learning_rate": 7.021259579172726e-06, "loss": 0.0323, "step": 153540 }, { "epoch": 0.03775, "grad_norm": 0.046082183718681335, "learning_rate": 7.018387519462777e-06, "loss": 0.0325, "step": 153550 }, { "epoch": 0.0378, "grad_norm": 0.04982241988182068, "learning_rate": 7.0155159513633635e-06, "loss": 0.0333, "step": 153560 }, { "epoch": 0.03785, "grad_norm": 0.052763279527425766, "learning_rate": 7.0126448749529836e-06, "loss": 0.0333, "step": 153570 }, { "epoch": 0.0379, "grad_norm": 0.04755253344774246, "learning_rate": 7.009774290310148e-06, "loss": 0.0326, "step": 153580 }, { "epoch": 0.03795, "grad_norm": 0.07079523801803589, "learning_rate": 7.006904197513308e-06, "loss": 0.0348, "step": 153590 }, { "epoch": 0.038, "grad_norm": 0.08428723365068436, "learning_rate": 7.0040345966409514e-06, "loss": 0.0342, "step": 153600 }, { "epoch": 0.03805, "grad_norm": 0.04555227607488632, "learning_rate": 7.001165487771536e-06, "loss": 0.0327, "step": 153610 }, { "epoch": 0.0381, "grad_norm": 0.04715776443481445, "learning_rate": 6.998296870983489e-06, "loss": 0.0338, "step": 153620 }, { "epoch": 0.03815, "grad_norm": 0.04404429718852043, "learning_rate": 6.995428746355248e-06, "loss": 0.0347, "step": 153630 }, { "epoch": 0.0382, "grad_norm": 0.045779477804899216, "learning_rate": 6.99256111396521e-06, "loss": 0.0329, "step": 153640 }, { "epoch": 0.03825, "grad_norm": 0.046304021030664444, "learning_rate": 6.989693973891795e-06, "loss": 0.0338, "step": 153650 }, { "epoch": 0.0383, "grad_norm": 0.051491059362888336, "learning_rate": 6.986827326213383e-06, "loss": 0.0345, "step": 153660 }, { "epoch": 0.03835, "grad_norm": 0.048529427498579025, "learning_rate": 6.9839611710083325e-06, "loss": 0.034, "step": 153670 }, { "epoch": 0.0384, "grad_norm": 0.04330417141318321, "learning_rate": 6.981095508355026e-06, "loss": 0.0332, "step": 153680 }, { "epoch": 0.03845, "grad_norm": 0.044940460473299026, "learning_rate": 6.97823033833179e-06, "loss": 0.033, "step": 153690 }, { "epoch": 0.0385, "grad_norm": 0.03975803032517433, "learning_rate": 6.9753656610169745e-06, "loss": 0.0313, "step": 153700 }, { "epoch": 0.03855, "grad_norm": 0.04785052686929703, "learning_rate": 6.972501476488891e-06, "loss": 0.032, "step": 153710 }, { "epoch": 0.0386, "grad_norm": 0.04156506061553955, "learning_rate": 6.969637784825836e-06, "loss": 0.0321, "step": 153720 }, { "epoch": 0.03865, "grad_norm": 0.0429970882833004, "learning_rate": 6.966774586106117e-06, "loss": 0.0318, "step": 153730 }, { "epoch": 0.0387, "grad_norm": 0.046473126858472824, "learning_rate": 6.963911880408006e-06, "loss": 0.0313, "step": 153740 }, { "epoch": 0.03875, "grad_norm": 0.04150541126728058, "learning_rate": 6.961049667809768e-06, "loss": 0.0317, "step": 153750 }, { "epoch": 0.0388, "grad_norm": 0.04941317439079285, "learning_rate": 6.958187948389649e-06, "loss": 0.0332, "step": 153760 }, { "epoch": 0.03885, "grad_norm": 0.05646712705492973, "learning_rate": 6.955326722225902e-06, "loss": 0.0325, "step": 153770 }, { "epoch": 0.0389, "grad_norm": 0.052297193557024, "learning_rate": 6.952465989396733e-06, "loss": 0.0322, "step": 153780 }, { "epoch": 0.03895, "grad_norm": 0.047596145421266556, "learning_rate": 6.949605749980375e-06, "loss": 0.033, "step": 153790 }, { "epoch": 0.039, "grad_norm": 0.04238370433449745, "learning_rate": 6.9467460040550134e-06, "loss": 0.0339, "step": 153800 }, { "epoch": 0.03905, "grad_norm": 0.04646526649594307, "learning_rate": 6.943886751698825e-06, "loss": 0.0323, "step": 153810 }, { "epoch": 0.0391, "grad_norm": 0.04616091027855873, "learning_rate": 6.941027992989996e-06, "loss": 0.0334, "step": 153820 }, { "epoch": 0.03915, "grad_norm": 0.05092548951506615, "learning_rate": 6.938169728006677e-06, "loss": 0.0351, "step": 153830 }, { "epoch": 0.0392, "grad_norm": 0.0540512315928936, "learning_rate": 6.935311956827015e-06, "loss": 0.0336, "step": 153840 }, { "epoch": 0.03925, "grad_norm": 0.04341093450784683, "learning_rate": 6.932454679529129e-06, "loss": 0.0337, "step": 153850 }, { "epoch": 0.0393, "grad_norm": 0.04935673996806145, "learning_rate": 6.929597896191142e-06, "loss": 0.0352, "step": 153860 }, { "epoch": 0.03935, "grad_norm": 0.052990105003118515, "learning_rate": 6.926741606891179e-06, "loss": 0.0352, "step": 153870 }, { "epoch": 0.0394, "grad_norm": 0.042551975697278976, "learning_rate": 6.923885811707292e-06, "loss": 0.0339, "step": 153880 }, { "epoch": 0.03945, "grad_norm": 0.04354416951537132, "learning_rate": 6.921030510717585e-06, "loss": 0.035, "step": 153890 }, { "epoch": 0.0395, "grad_norm": 0.05921914428472519, "learning_rate": 6.918175704000104e-06, "loss": 0.0327, "step": 153900 }, { "epoch": 0.03955, "grad_norm": 0.05651251971721649, "learning_rate": 6.915321391632915e-06, "loss": 0.0337, "step": 153910 }, { "epoch": 0.0396, "grad_norm": 0.07422874867916107, "learning_rate": 6.912467573694042e-06, "loss": 0.0349, "step": 153920 }, { "epoch": 0.03965, "grad_norm": 0.04788966849446297, "learning_rate": 6.909614250261507e-06, "loss": 0.0325, "step": 153930 }, { "epoch": 0.0397, "grad_norm": 0.0516754575073719, "learning_rate": 6.906761421413327e-06, "loss": 0.0345, "step": 153940 }, { "epoch": 0.03975, "grad_norm": 0.04991249740123749, "learning_rate": 6.903909087227495e-06, "loss": 0.0336, "step": 153950 }, { "epoch": 0.0398, "grad_norm": 0.04570896178483963, "learning_rate": 6.901057247781986e-06, "loss": 0.0346, "step": 153960 }, { "epoch": 0.03985, "grad_norm": 0.04673990607261658, "learning_rate": 6.898205903154767e-06, "loss": 0.0334, "step": 153970 }, { "epoch": 0.0399, "grad_norm": 0.04619419947266579, "learning_rate": 6.895355053423799e-06, "loss": 0.0342, "step": 153980 }, { "epoch": 0.03995, "grad_norm": 0.050655726343393326, "learning_rate": 6.8925046986670295e-06, "loss": 0.0338, "step": 153990 }, { "epoch": 0.04, "grad_norm": 0.047669846564531326, "learning_rate": 6.889654838962379e-06, "loss": 0.0349, "step": 154000 }, { "epoch": 0.04005, "grad_norm": 0.042578499764204025, "learning_rate": 6.886805474387759e-06, "loss": 0.034, "step": 154010 }, { "epoch": 0.0401, "grad_norm": 0.04635762423276901, "learning_rate": 6.883956605021066e-06, "loss": 0.0331, "step": 154020 }, { "epoch": 0.04015, "grad_norm": 0.04816887900233269, "learning_rate": 6.881108230940203e-06, "loss": 0.0351, "step": 154030 }, { "epoch": 0.0402, "grad_norm": 0.0424954779446125, "learning_rate": 6.8782603522230314e-06, "loss": 0.0336, "step": 154040 }, { "epoch": 0.04025, "grad_norm": 0.04485035315155983, "learning_rate": 6.875412968947409e-06, "loss": 0.0339, "step": 154050 }, { "epoch": 0.0403, "grad_norm": 0.044789139181375504, "learning_rate": 6.8725660811911924e-06, "loss": 0.0344, "step": 154060 }, { "epoch": 0.04035, "grad_norm": 0.04421795904636383, "learning_rate": 6.869719689032203e-06, "loss": 0.0338, "step": 154070 }, { "epoch": 0.0404, "grad_norm": 0.047407958656549454, "learning_rate": 6.866873792548281e-06, "loss": 0.0349, "step": 154080 }, { "epoch": 0.04045, "grad_norm": 0.04338439926505089, "learning_rate": 6.864028391817201e-06, "loss": 0.0356, "step": 154090 }, { "epoch": 0.0405, "grad_norm": 0.057887546718120575, "learning_rate": 6.861183486916773e-06, "loss": 0.0339, "step": 154100 }, { "epoch": 0.04055, "grad_norm": 0.05058251693844795, "learning_rate": 6.858339077924777e-06, "loss": 0.0337, "step": 154110 }, { "epoch": 0.0406, "grad_norm": 0.10525360703468323, "learning_rate": 6.855495164918979e-06, "loss": 0.0354, "step": 154120 }, { "epoch": 0.04065, "grad_norm": 0.04736180230975151, "learning_rate": 6.852651747977126e-06, "loss": 0.0337, "step": 154130 }, { "epoch": 0.0407, "grad_norm": 0.04907584190368652, "learning_rate": 6.849808827176951e-06, "loss": 0.0335, "step": 154140 }, { "epoch": 0.04075, "grad_norm": 0.04502156376838684, "learning_rate": 6.846966402596189e-06, "loss": 0.0336, "step": 154150 }, { "epoch": 0.0408, "grad_norm": 0.04660884663462639, "learning_rate": 6.8441244743125466e-06, "loss": 0.033, "step": 154160 }, { "epoch": 0.04085, "grad_norm": 0.0450296513736248, "learning_rate": 6.841283042403712e-06, "loss": 0.0334, "step": 154170 }, { "epoch": 0.0409, "grad_norm": 0.043547023087739944, "learning_rate": 6.838442106947385e-06, "loss": 0.0321, "step": 154180 }, { "epoch": 0.04095, "grad_norm": 0.05435403436422348, "learning_rate": 6.8356016680212215e-06, "loss": 0.0336, "step": 154190 }, { "epoch": 0.041, "grad_norm": 0.053517408668994904, "learning_rate": 6.832761725702891e-06, "loss": 0.0331, "step": 154200 }, { "epoch": 0.04105, "grad_norm": 0.05222579836845398, "learning_rate": 6.829922280070028e-06, "loss": 0.0327, "step": 154210 }, { "epoch": 0.0411, "grad_norm": 0.04833085462450981, "learning_rate": 6.827083331200265e-06, "loss": 0.0324, "step": 154220 }, { "epoch": 0.04115, "grad_norm": 0.04768088459968567, "learning_rate": 6.82424487917121e-06, "loss": 0.0331, "step": 154230 }, { "epoch": 0.0412, "grad_norm": 0.057974107563495636, "learning_rate": 6.821406924060478e-06, "loss": 0.034, "step": 154240 }, { "epoch": 0.04125, "grad_norm": 0.05497260019183159, "learning_rate": 6.818569465945654e-06, "loss": 0.0324, "step": 154250 }, { "epoch": 0.0413, "grad_norm": 0.05150937661528587, "learning_rate": 6.815732504904298e-06, "loss": 0.0322, "step": 154260 }, { "epoch": 0.04135, "grad_norm": 0.0442415289580822, "learning_rate": 6.812896041013994e-06, "loss": 0.0325, "step": 154270 }, { "epoch": 0.0414, "grad_norm": 0.04612816125154495, "learning_rate": 6.810060074352273e-06, "loss": 0.0333, "step": 154280 }, { "epoch": 0.04145, "grad_norm": 0.054583415389060974, "learning_rate": 6.80722460499669e-06, "loss": 0.0339, "step": 154290 }, { "epoch": 0.0415, "grad_norm": 0.05033816769719124, "learning_rate": 6.804389633024738e-06, "loss": 0.0352, "step": 154300 }, { "epoch": 0.04155, "grad_norm": 0.04901561886072159, "learning_rate": 6.801555158513937e-06, "loss": 0.0346, "step": 154310 }, { "epoch": 0.0416, "grad_norm": 0.053737394511699677, "learning_rate": 6.798721181541787e-06, "loss": 0.0341, "step": 154320 }, { "epoch": 0.04165, "grad_norm": 0.05055680498480797, "learning_rate": 6.795887702185763e-06, "loss": 0.0339, "step": 154330 }, { "epoch": 0.0417, "grad_norm": 0.04323967546224594, "learning_rate": 6.793054720523329e-06, "loss": 0.0324, "step": 154340 }, { "epoch": 0.04175, "grad_norm": 0.047659508883953094, "learning_rate": 6.790222236631933e-06, "loss": 0.0325, "step": 154350 }, { "epoch": 0.0418, "grad_norm": 0.03829479217529297, "learning_rate": 6.787390250589018e-06, "loss": 0.0328, "step": 154360 }, { "epoch": 0.04185, "grad_norm": 0.06552759557962418, "learning_rate": 6.784558762472029e-06, "loss": 0.034, "step": 154370 }, { "epoch": 0.0419, "grad_norm": 0.054503217339515686, "learning_rate": 6.781727772358346e-06, "loss": 0.0362, "step": 154380 }, { "epoch": 0.04195, "grad_norm": 0.07738342881202698, "learning_rate": 6.778897280325386e-06, "loss": 0.0356, "step": 154390 }, { "epoch": 0.042, "grad_norm": 0.03825148195028305, "learning_rate": 6.776067286450521e-06, "loss": 0.0333, "step": 154400 }, { "epoch": 0.04205, "grad_norm": 0.04342617094516754, "learning_rate": 6.773237790811141e-06, "loss": 0.0334, "step": 154410 }, { "epoch": 0.0421, "grad_norm": 0.04140163213014603, "learning_rate": 6.77040879348459e-06, "loss": 0.0331, "step": 154420 }, { "epoch": 0.04215, "grad_norm": 0.047371137887239456, "learning_rate": 6.767580294548207e-06, "loss": 0.0327, "step": 154430 }, { "epoch": 0.0422, "grad_norm": 0.03845300152897835, "learning_rate": 6.764752294079335e-06, "loss": 0.0321, "step": 154440 }, { "epoch": 0.04225, "grad_norm": 0.0411493293941021, "learning_rate": 6.7619247921552854e-06, "loss": 0.0314, "step": 154450 }, { "epoch": 0.0423, "grad_norm": 0.07412539422512054, "learning_rate": 6.759097788853364e-06, "loss": 0.0325, "step": 154460 }, { "epoch": 0.04235, "grad_norm": 0.0695980116724968, "learning_rate": 6.756271284250845e-06, "loss": 0.0325, "step": 154470 }, { "epoch": 0.0424, "grad_norm": 0.050232771784067154, "learning_rate": 6.753445278425022e-06, "loss": 0.0323, "step": 154480 }, { "epoch": 0.04245, "grad_norm": 0.04724445194005966, "learning_rate": 6.750619771453146e-06, "loss": 0.0329, "step": 154490 }, { "epoch": 0.0425, "grad_norm": 0.051420923322439194, "learning_rate": 6.7477947634124735e-06, "loss": 0.0329, "step": 154500 }, { "epoch": 0.04255, "grad_norm": 0.04676588997244835, "learning_rate": 6.744970254380237e-06, "loss": 0.0329, "step": 154510 }, { "epoch": 0.0426, "grad_norm": 0.04401226341724396, "learning_rate": 6.742146244433648e-06, "loss": 0.0328, "step": 154520 }, { "epoch": 0.04265, "grad_norm": 0.042951542884111404, "learning_rate": 6.73932273364993e-06, "loss": 0.0328, "step": 154530 }, { "epoch": 0.0427, "grad_norm": 0.03603766858577728, "learning_rate": 6.736499722106266e-06, "loss": 0.0336, "step": 154540 }, { "epoch": 0.04275, "grad_norm": 0.04851701110601425, "learning_rate": 6.733677209879832e-06, "loss": 0.033, "step": 154550 }, { "epoch": 0.0428, "grad_norm": 0.04410025477409363, "learning_rate": 6.7308551970478085e-06, "loss": 0.035, "step": 154560 }, { "epoch": 0.04285, "grad_norm": 0.04761466011404991, "learning_rate": 6.728033683687332e-06, "loss": 0.034, "step": 154570 }, { "epoch": 0.0429, "grad_norm": 0.051995132118463516, "learning_rate": 6.725212669875567e-06, "loss": 0.0344, "step": 154580 }, { "epoch": 0.04295, "grad_norm": 0.04637273773550987, "learning_rate": 6.722392155689605e-06, "loss": 0.0343, "step": 154590 }, { "epoch": 0.043, "grad_norm": 0.04322204366326332, "learning_rate": 6.719572141206584e-06, "loss": 0.0354, "step": 154600 }, { "epoch": 0.04305, "grad_norm": 0.04716825112700462, "learning_rate": 6.716752626503586e-06, "loss": 0.0342, "step": 154610 }, { "epoch": 0.0431, "grad_norm": 0.04313306137919426, "learning_rate": 6.713933611657708e-06, "loss": 0.0336, "step": 154620 }, { "epoch": 0.04315, "grad_norm": 0.05106494948267937, "learning_rate": 6.7111150967460155e-06, "loss": 0.0349, "step": 154630 }, { "epoch": 0.0432, "grad_norm": 0.04225478321313858, "learning_rate": 6.7082970818455605e-06, "loss": 0.0339, "step": 154640 }, { "epoch": 0.04325, "grad_norm": 0.04658160358667374, "learning_rate": 6.705479567033396e-06, "loss": 0.034, "step": 154650 }, { "epoch": 0.0433, "grad_norm": 0.04925062134861946, "learning_rate": 6.7026625523865485e-06, "loss": 0.0331, "step": 154660 }, { "epoch": 0.04335, "grad_norm": 0.04897158220410347, "learning_rate": 6.699846037982033e-06, "loss": 0.0332, "step": 154670 }, { "epoch": 0.0434, "grad_norm": 0.044098395854234695, "learning_rate": 6.697030023896844e-06, "loss": 0.0328, "step": 154680 }, { "epoch": 0.04345, "grad_norm": 0.04288400709629059, "learning_rate": 6.694214510207978e-06, "loss": 0.034, "step": 154690 }, { "epoch": 0.0435, "grad_norm": 0.04962920770049095, "learning_rate": 6.6913994969924176e-06, "loss": 0.0339, "step": 154700 }, { "epoch": 0.04355, "grad_norm": 0.044396474957466125, "learning_rate": 6.688584984327115e-06, "loss": 0.0345, "step": 154710 }, { "epoch": 0.0436, "grad_norm": 0.04685245454311371, "learning_rate": 6.685770972289021e-06, "loss": 0.0327, "step": 154720 }, { "epoch": 0.04365, "grad_norm": 0.050125036388635635, "learning_rate": 6.682957460955061e-06, "loss": 0.0342, "step": 154730 }, { "epoch": 0.0437, "grad_norm": 0.048011697828769684, "learning_rate": 6.68014445040217e-06, "loss": 0.0327, "step": 154740 }, { "epoch": 0.04375, "grad_norm": 0.03696242347359657, "learning_rate": 6.677331940707249e-06, "loss": 0.0325, "step": 154750 }, { "epoch": 0.0438, "grad_norm": 0.03834865242242813, "learning_rate": 6.674519931947179e-06, "loss": 0.0327, "step": 154760 }, { "epoch": 0.04385, "grad_norm": 0.038775499910116196, "learning_rate": 6.671708424198858e-06, "loss": 0.0329, "step": 154770 }, { "epoch": 0.0439, "grad_norm": 0.0430787093937397, "learning_rate": 6.668897417539136e-06, "loss": 0.0323, "step": 154780 }, { "epoch": 0.04395, "grad_norm": 0.04222596436738968, "learning_rate": 6.666086912044889e-06, "loss": 0.0317, "step": 154790 }, { "epoch": 0.044, "grad_norm": 0.04960113391280174, "learning_rate": 6.663276907792921e-06, "loss": 0.0329, "step": 154800 }, { "epoch": 0.04405, "grad_norm": 0.04817935824394226, "learning_rate": 6.660467404860071e-06, "loss": 0.0314, "step": 154810 }, { "epoch": 0.0441, "grad_norm": 0.057387106120586395, "learning_rate": 6.657658403323164e-06, "loss": 0.0339, "step": 154820 }, { "epoch": 0.04415, "grad_norm": 0.05020061880350113, "learning_rate": 6.654849903258983e-06, "loss": 0.032, "step": 154830 }, { "epoch": 0.0442, "grad_norm": 0.05041590705513954, "learning_rate": 6.652041904744314e-06, "loss": 0.0337, "step": 154840 }, { "epoch": 0.04425, "grad_norm": 0.048387397080659866, "learning_rate": 6.64923440785592e-06, "loss": 0.0318, "step": 154850 }, { "epoch": 0.0443, "grad_norm": 0.04516212269663811, "learning_rate": 6.646427412670569e-06, "loss": 0.0328, "step": 154860 }, { "epoch": 0.04435, "grad_norm": 0.04717303439974785, "learning_rate": 6.643620919264998e-06, "loss": 0.0315, "step": 154870 }, { "epoch": 0.0444, "grad_norm": 0.04056984931230545, "learning_rate": 6.640814927715927e-06, "loss": 0.0315, "step": 154880 }, { "epoch": 0.04445, "grad_norm": 0.04171687737107277, "learning_rate": 6.6380094381000875e-06, "loss": 0.0314, "step": 154890 }, { "epoch": 0.0445, "grad_norm": 0.039897263050079346, "learning_rate": 6.635204450494162e-06, "loss": 0.0317, "step": 154900 }, { "epoch": 0.04455, "grad_norm": 0.03952673822641373, "learning_rate": 6.632399964974856e-06, "loss": 0.0331, "step": 154910 }, { "epoch": 0.0446, "grad_norm": 0.04094167798757553, "learning_rate": 6.629595981618836e-06, "loss": 0.0326, "step": 154920 }, { "epoch": 0.04465, "grad_norm": 0.04107446223497391, "learning_rate": 6.6267925005027515e-06, "loss": 0.0342, "step": 154930 }, { "epoch": 0.0447, "grad_norm": 0.03734170272946358, "learning_rate": 6.623989521703267e-06, "loss": 0.0332, "step": 154940 }, { "epoch": 0.04475, "grad_norm": 0.0554497204720974, "learning_rate": 6.621187045297003e-06, "loss": 0.0332, "step": 154950 }, { "epoch": 0.0448, "grad_norm": 0.04572724923491478, "learning_rate": 6.618385071360581e-06, "loss": 0.0343, "step": 154960 }, { "epoch": 0.04485, "grad_norm": 0.045753978192806244, "learning_rate": 6.615583599970598e-06, "loss": 0.0332, "step": 154970 }, { "epoch": 0.0449, "grad_norm": 0.061113327741622925, "learning_rate": 6.612782631203662e-06, "loss": 0.0339, "step": 154980 }, { "epoch": 0.04495, "grad_norm": 0.0401221327483654, "learning_rate": 6.609982165136331e-06, "loss": 0.0333, "step": 154990 }, { "epoch": 0.045, "grad_norm": 0.05207759141921997, "learning_rate": 6.607182201845188e-06, "loss": 0.0333, "step": 155000 }, { "epoch": 0.04505, "grad_norm": 0.048382531851530075, "learning_rate": 6.604382741406773e-06, "loss": 0.0345, "step": 155010 }, { "epoch": 0.0451, "grad_norm": 0.050435684621334076, "learning_rate": 6.601583783897617e-06, "loss": 0.0339, "step": 155020 }, { "epoch": 0.04515, "grad_norm": 0.052984338253736496, "learning_rate": 6.598785329394252e-06, "loss": 0.033, "step": 155030 }, { "epoch": 0.0452, "grad_norm": 0.04428156837821007, "learning_rate": 6.595987377973187e-06, "loss": 0.0335, "step": 155040 }, { "epoch": 0.04525, "grad_norm": 0.041596777737140656, "learning_rate": 6.5931899297109106e-06, "loss": 0.0336, "step": 155050 }, { "epoch": 0.0453, "grad_norm": 0.03791414573788643, "learning_rate": 6.590392984683902e-06, "loss": 0.0329, "step": 155060 }, { "epoch": 0.04535, "grad_norm": 0.050665635615587234, "learning_rate": 6.58759654296863e-06, "loss": 0.0341, "step": 155070 }, { "epoch": 0.0454, "grad_norm": 0.05314168706536293, "learning_rate": 6.584800604641567e-06, "loss": 0.0333, "step": 155080 }, { "epoch": 0.04545, "grad_norm": 0.045230139046907425, "learning_rate": 6.582005169779123e-06, "loss": 0.0336, "step": 155090 }, { "epoch": 0.0455, "grad_norm": 0.03916275128722191, "learning_rate": 6.579210238457745e-06, "loss": 0.0324, "step": 155100 }, { "epoch": 0.04555, "grad_norm": 0.05019238591194153, "learning_rate": 6.576415810753833e-06, "loss": 0.0336, "step": 155110 }, { "epoch": 0.0456, "grad_norm": 0.041514333337545395, "learning_rate": 6.5736218867437974e-06, "loss": 0.0331, "step": 155120 }, { "epoch": 0.04565, "grad_norm": 0.0469856783747673, "learning_rate": 6.570828466504017e-06, "loss": 0.0325, "step": 155130 }, { "epoch": 0.0457, "grad_norm": 0.045561276376247406, "learning_rate": 6.568035550110855e-06, "loss": 0.0336, "step": 155140 }, { "epoch": 0.04575, "grad_norm": 0.044542096555233, "learning_rate": 6.565243137640683e-06, "loss": 0.0334, "step": 155150 }, { "epoch": 0.0458, "grad_norm": 0.04612474516034126, "learning_rate": 6.562451229169839e-06, "loss": 0.033, "step": 155160 }, { "epoch": 0.04585, "grad_norm": 0.052123814821243286, "learning_rate": 6.559659824774652e-06, "loss": 0.0335, "step": 155170 }, { "epoch": 0.0459, "grad_norm": 0.051325369626283646, "learning_rate": 6.556868924531431e-06, "loss": 0.0314, "step": 155180 }, { "epoch": 0.04595, "grad_norm": 0.04750853776931763, "learning_rate": 6.554078528516483e-06, "loss": 0.0328, "step": 155190 }, { "epoch": 0.046, "grad_norm": 0.04768137261271477, "learning_rate": 6.551288636806108e-06, "loss": 0.0327, "step": 155200 }, { "epoch": 0.04605, "grad_norm": 0.03860845789313316, "learning_rate": 6.54849924947657e-06, "loss": 0.0339, "step": 155210 }, { "epoch": 0.0461, "grad_norm": 0.05007299780845642, "learning_rate": 6.54571036660413e-06, "loss": 0.0331, "step": 155220 }, { "epoch": 0.04615, "grad_norm": 0.07463549077510834, "learning_rate": 6.5429219882650275e-06, "loss": 0.0351, "step": 155230 }, { "epoch": 0.0462, "grad_norm": 0.07337648421525955, "learning_rate": 6.540134114535512e-06, "loss": 0.0346, "step": 155240 }, { "epoch": 0.04625, "grad_norm": 0.039757709950208664, "learning_rate": 6.5373467454917955e-06, "loss": 0.0334, "step": 155250 }, { "epoch": 0.0463, "grad_norm": 0.058458272367715836, "learning_rate": 6.534559881210073e-06, "loss": 0.034, "step": 155260 }, { "epoch": 0.04635, "grad_norm": 0.04493442922830582, "learning_rate": 6.531773521766557e-06, "loss": 0.0322, "step": 155270 }, { "epoch": 0.0464, "grad_norm": 0.04561833664774895, "learning_rate": 6.528987667237405e-06, "loss": 0.0322, "step": 155280 }, { "epoch": 0.04645, "grad_norm": 0.04375355690717697, "learning_rate": 6.5262023176988065e-06, "loss": 0.0322, "step": 155290 }, { "epoch": 0.0465, "grad_norm": 0.043602511286735535, "learning_rate": 6.5234174732268814e-06, "loss": 0.0317, "step": 155300 }, { "epoch": 0.04655, "grad_norm": 0.04093132168054581, "learning_rate": 6.520633133897783e-06, "loss": 0.032, "step": 155310 }, { "epoch": 0.0466, "grad_norm": 0.06170346215367317, "learning_rate": 6.517849299787637e-06, "loss": 0.0348, "step": 155320 }, { "epoch": 0.04665, "grad_norm": 0.04812345653772354, "learning_rate": 6.515065970972548e-06, "loss": 0.0332, "step": 155330 }, { "epoch": 0.0467, "grad_norm": 0.058116111904382706, "learning_rate": 6.5122831475286115e-06, "loss": 0.0336, "step": 155340 }, { "epoch": 0.04675, "grad_norm": 0.05342204123735428, "learning_rate": 6.509500829531901e-06, "loss": 0.0325, "step": 155350 }, { "epoch": 0.0468, "grad_norm": 0.046791404485702515, "learning_rate": 6.506719017058499e-06, "loss": 0.033, "step": 155360 }, { "epoch": 0.04685, "grad_norm": 0.0552666075527668, "learning_rate": 6.503937710184452e-06, "loss": 0.0333, "step": 155370 }, { "epoch": 0.0469, "grad_norm": 0.05266944691538811, "learning_rate": 6.501156908985792e-06, "loss": 0.0367, "step": 155380 }, { "epoch": 0.04695, "grad_norm": 0.06553314626216888, "learning_rate": 6.498376613538556e-06, "loss": 0.0331, "step": 155390 }, { "epoch": 0.047, "grad_norm": 0.04647262021899223, "learning_rate": 6.4955968239187505e-06, "loss": 0.034, "step": 155400 }, { "epoch": 0.04705, "grad_norm": 0.05105951428413391, "learning_rate": 6.492817540202381e-06, "loss": 0.0336, "step": 155410 }, { "epoch": 0.0471, "grad_norm": 0.05529676377773285, "learning_rate": 6.490038762465428e-06, "loss": 0.0338, "step": 155420 }, { "epoch": 0.04715, "grad_norm": 0.05912640690803528, "learning_rate": 6.487260490783859e-06, "loss": 0.0333, "step": 155430 }, { "epoch": 0.0472, "grad_norm": 0.047453779727220535, "learning_rate": 6.484482725233629e-06, "loss": 0.0316, "step": 155440 }, { "epoch": 0.04725, "grad_norm": 0.04527302831411362, "learning_rate": 6.481705465890689e-06, "loss": 0.0322, "step": 155450 }, { "epoch": 0.0473, "grad_norm": 0.04401553422212601, "learning_rate": 6.478928712830967e-06, "loss": 0.0317, "step": 155460 }, { "epoch": 0.04735, "grad_norm": 0.045882705599069595, "learning_rate": 6.4761524661303695e-06, "loss": 0.0319, "step": 155470 }, { "epoch": 0.0474, "grad_norm": 0.04246285557746887, "learning_rate": 6.47337672586481e-06, "loss": 0.032, "step": 155480 }, { "epoch": 0.04745, "grad_norm": 0.0423617921769619, "learning_rate": 6.4706014921101645e-06, "loss": 0.0322, "step": 155490 }, { "epoch": 0.0475, "grad_norm": 0.043872471898794174, "learning_rate": 6.467826764942319e-06, "loss": 0.032, "step": 155500 }, { "epoch": 0.04755, "grad_norm": 0.04625878110527992, "learning_rate": 6.465052544437131e-06, "loss": 0.0328, "step": 155510 }, { "epoch": 0.0476, "grad_norm": 0.046431832015514374, "learning_rate": 6.462278830670432e-06, "loss": 0.0314, "step": 155520 }, { "epoch": 0.04765, "grad_norm": 0.04099517688155174, "learning_rate": 6.459505623718073e-06, "loss": 0.0321, "step": 155530 }, { "epoch": 0.0477, "grad_norm": 0.04131109267473221, "learning_rate": 6.456732923655867e-06, "loss": 0.0324, "step": 155540 }, { "epoch": 0.04775, "grad_norm": 0.040093787014484406, "learning_rate": 6.453960730559616e-06, "loss": 0.032, "step": 155550 }, { "epoch": 0.0478, "grad_norm": 0.04762173816561699, "learning_rate": 6.451189044505104e-06, "loss": 0.0334, "step": 155560 }, { "epoch": 0.04785, "grad_norm": 0.05464419350028038, "learning_rate": 6.4484178655681125e-06, "loss": 0.0322, "step": 155570 }, { "epoch": 0.0479, "grad_norm": 0.04581758379936218, "learning_rate": 6.445647193824425e-06, "loss": 0.0329, "step": 155580 }, { "epoch": 0.04795, "grad_norm": 0.05991151183843613, "learning_rate": 6.442877029349756e-06, "loss": 0.0352, "step": 155590 }, { "epoch": 0.048, "grad_norm": 0.06271591037511826, "learning_rate": 6.4401073722198665e-06, "loss": 0.0339, "step": 155600 }, { "epoch": 0.04805, "grad_norm": 0.04929928481578827, "learning_rate": 6.437338222510461e-06, "loss": 0.0324, "step": 155610 }, { "epoch": 0.0481, "grad_norm": 0.04830869287252426, "learning_rate": 6.4345695802972615e-06, "loss": 0.0336, "step": 155620 }, { "epoch": 0.04815, "grad_norm": 0.05835934728384018, "learning_rate": 6.431801445655955e-06, "loss": 0.0332, "step": 155630 }, { "epoch": 0.0482, "grad_norm": 0.0470610111951828, "learning_rate": 6.429033818662214e-06, "loss": 0.0326, "step": 155640 }, { "epoch": 0.04825, "grad_norm": 0.04287794232368469, "learning_rate": 6.426266699391717e-06, "loss": 0.0321, "step": 155650 }, { "epoch": 0.0483, "grad_norm": 0.03915727511048317, "learning_rate": 6.4235000879201145e-06, "loss": 0.0317, "step": 155660 }, { "epoch": 0.04835, "grad_norm": 0.0421532541513443, "learning_rate": 6.420733984323038e-06, "loss": 0.0327, "step": 155670 }, { "epoch": 0.0484, "grad_norm": 0.04445471987128258, "learning_rate": 6.417968388676107e-06, "loss": 0.0327, "step": 155680 }, { "epoch": 0.04845, "grad_norm": 0.0533364862203598, "learning_rate": 6.415203301054948e-06, "loss": 0.033, "step": 155690 }, { "epoch": 0.0485, "grad_norm": 0.04205596446990967, "learning_rate": 6.412438721535141e-06, "loss": 0.0324, "step": 155700 }, { "epoch": 0.04855, "grad_norm": 0.0469808354973793, "learning_rate": 6.409674650192282e-06, "loss": 0.032, "step": 155710 }, { "epoch": 0.0486, "grad_norm": 0.05807473510503769, "learning_rate": 6.406911087101938e-06, "loss": 0.0322, "step": 155720 }, { "epoch": 0.04865, "grad_norm": 0.05198590084910393, "learning_rate": 6.404148032339649e-06, "loss": 0.0324, "step": 155730 }, { "epoch": 0.0487, "grad_norm": 0.08055607229471207, "learning_rate": 6.401385485980976e-06, "loss": 0.0339, "step": 155740 }, { "epoch": 0.04875, "grad_norm": 0.048523034900426865, "learning_rate": 6.398623448101434e-06, "loss": 0.0349, "step": 155750 }, { "epoch": 0.0488, "grad_norm": 0.0461469441652298, "learning_rate": 6.395861918776533e-06, "loss": 0.0327, "step": 155760 }, { "epoch": 0.04885, "grad_norm": 0.04415708780288696, "learning_rate": 6.393100898081786e-06, "loss": 0.0333, "step": 155770 }, { "epoch": 0.0489, "grad_norm": 0.04208403080701828, "learning_rate": 6.390340386092664e-06, "loss": 0.0333, "step": 155780 }, { "epoch": 0.04895, "grad_norm": 0.03819586709141731, "learning_rate": 6.387580382884656e-06, "loss": 0.0326, "step": 155790 }, { "epoch": 0.049, "grad_norm": 0.049763914197683334, "learning_rate": 6.384820888533194e-06, "loss": 0.0337, "step": 155800 }, { "epoch": 0.04905, "grad_norm": 0.04622012749314308, "learning_rate": 6.382061903113743e-06, "loss": 0.0335, "step": 155810 }, { "epoch": 0.0491, "grad_norm": 0.039324529469013214, "learning_rate": 6.379303426701719e-06, "loss": 0.0327, "step": 155820 }, { "epoch": 0.04915, "grad_norm": 0.0509813018143177, "learning_rate": 6.376545459372552e-06, "loss": 0.0343, "step": 155830 }, { "epoch": 0.0492, "grad_norm": 0.06558476388454437, "learning_rate": 6.3737880012016335e-06, "loss": 0.0355, "step": 155840 }, { "epoch": 0.04925, "grad_norm": 0.05903024226427078, "learning_rate": 6.3710310522643455e-06, "loss": 0.0338, "step": 155850 }, { "epoch": 0.0493, "grad_norm": 0.04996471479535103, "learning_rate": 6.36827461263608e-06, "loss": 0.0347, "step": 155860 }, { "epoch": 0.04935, "grad_norm": 0.05509728565812111, "learning_rate": 6.365518682392186e-06, "loss": 0.034, "step": 155870 }, { "epoch": 0.0494, "grad_norm": 0.047669917345047, "learning_rate": 6.3627632616080095e-06, "loss": 0.0344, "step": 155880 }, { "epoch": 0.04945, "grad_norm": 0.04750572144985199, "learning_rate": 6.360008350358876e-06, "loss": 0.0334, "step": 155890 }, { "epoch": 0.0495, "grad_norm": 0.04436175152659416, "learning_rate": 6.357253948720113e-06, "loss": 0.0359, "step": 155900 }, { "epoch": 0.04955, "grad_norm": 0.05103078484535217, "learning_rate": 6.354500056767029e-06, "loss": 0.0332, "step": 155910 }, { "epoch": 0.0496, "grad_norm": 0.04148725047707558, "learning_rate": 6.35174667457491e-06, "loss": 0.0343, "step": 155920 }, { "epoch": 0.04965, "grad_norm": 0.04589519277215004, "learning_rate": 6.348993802219031e-06, "loss": 0.0331, "step": 155930 }, { "epoch": 0.0497, "grad_norm": 0.04733560234308243, "learning_rate": 6.346241439774648e-06, "loss": 0.0326, "step": 155940 }, { "epoch": 0.04975, "grad_norm": 0.047574255615472794, "learning_rate": 6.343489587317022e-06, "loss": 0.0341, "step": 155950 }, { "epoch": 0.0498, "grad_norm": 0.04523150995373726, "learning_rate": 6.340738244921382e-06, "loss": 0.0331, "step": 155960 }, { "epoch": 0.04985, "grad_norm": 0.04262368753552437, "learning_rate": 6.337987412662941e-06, "loss": 0.0326, "step": 155970 }, { "epoch": 0.0499, "grad_norm": 0.05411456897854805, "learning_rate": 6.335237090616922e-06, "loss": 0.0344, "step": 155980 }, { "epoch": 0.04995, "grad_norm": 0.04779418185353279, "learning_rate": 6.332487278858498e-06, "loss": 0.0322, "step": 155990 }, { "epoch": 0.05, "grad_norm": 0.04428921639919281, "learning_rate": 6.329737977462877e-06, "loss": 0.0326, "step": 156000 }, { "epoch": 0.05005, "grad_norm": 0.051774270832538605, "learning_rate": 6.326989186505192e-06, "loss": 0.0349, "step": 156010 }, { "epoch": 0.0501, "grad_norm": 0.05182502418756485, "learning_rate": 6.324240906060602e-06, "loss": 0.0343, "step": 156020 }, { "epoch": 0.05015, "grad_norm": 0.047747816890478134, "learning_rate": 6.321493136204262e-06, "loss": 0.034, "step": 156030 }, { "epoch": 0.0502, "grad_norm": 0.043513353914022446, "learning_rate": 6.318745877011281e-06, "loss": 0.0317, "step": 156040 }, { "epoch": 0.05025, "grad_norm": 0.04383932426571846, "learning_rate": 6.315999128556768e-06, "loss": 0.0328, "step": 156050 }, { "epoch": 0.0503, "grad_norm": 0.04224616289138794, "learning_rate": 6.313252890915813e-06, "loss": 0.0327, "step": 156060 }, { "epoch": 0.05035, "grad_norm": 0.04971592128276825, "learning_rate": 6.310507164163512e-06, "loss": 0.0346, "step": 156070 }, { "epoch": 0.0504, "grad_norm": 0.060726605355739594, "learning_rate": 6.307761948374924e-06, "loss": 0.0324, "step": 156080 }, { "epoch": 0.05045, "grad_norm": 0.04588589072227478, "learning_rate": 6.305017243625094e-06, "loss": 0.032, "step": 156090 }, { "epoch": 0.0505, "grad_norm": 0.04747506603598595, "learning_rate": 6.302273049989077e-06, "loss": 0.0326, "step": 156100 }, { "epoch": 0.05055, "grad_norm": 0.043740060180425644, "learning_rate": 6.299529367541882e-06, "loss": 0.0337, "step": 156110 }, { "epoch": 0.0506, "grad_norm": 0.03844473138451576, "learning_rate": 6.296786196358537e-06, "loss": 0.0325, "step": 156120 }, { "epoch": 0.05065, "grad_norm": 0.045568566769361496, "learning_rate": 6.29404353651403e-06, "loss": 0.0335, "step": 156130 }, { "epoch": 0.0507, "grad_norm": 0.04699577018618584, "learning_rate": 6.291301388083337e-06, "loss": 0.0328, "step": 156140 }, { "epoch": 0.05075, "grad_norm": 0.04970989748835564, "learning_rate": 6.288559751141443e-06, "loss": 0.0323, "step": 156150 }, { "epoch": 0.0508, "grad_norm": 0.046929433941841125, "learning_rate": 6.285818625763299e-06, "loss": 0.0334, "step": 156160 }, { "epoch": 0.05085, "grad_norm": 0.050449028611183167, "learning_rate": 6.283078012023841e-06, "loss": 0.0338, "step": 156170 }, { "epoch": 0.0509, "grad_norm": 0.05461689457297325, "learning_rate": 6.280337909997991e-06, "loss": 0.0355, "step": 156180 }, { "epoch": 0.05095, "grad_norm": 0.04760809615254402, "learning_rate": 6.277598319760677e-06, "loss": 0.0343, "step": 156190 }, { "epoch": 0.051, "grad_norm": 0.04256792366504669, "learning_rate": 6.2748592413867854e-06, "loss": 0.0336, "step": 156200 }, { "epoch": 0.05105, "grad_norm": 0.039392486214637756, "learning_rate": 6.272120674951212e-06, "loss": 0.0327, "step": 156210 }, { "epoch": 0.0511, "grad_norm": 0.03896992281079292, "learning_rate": 6.269382620528827e-06, "loss": 0.0339, "step": 156220 }, { "epoch": 0.05115, "grad_norm": 0.04479774087667465, "learning_rate": 6.266645078194475e-06, "loss": 0.0329, "step": 156230 }, { "epoch": 0.0512, "grad_norm": 0.045208126306533813, "learning_rate": 6.263908048023015e-06, "loss": 0.0339, "step": 156240 }, { "epoch": 0.05125, "grad_norm": 0.048391927033662796, "learning_rate": 6.2611715300892715e-06, "loss": 0.0336, "step": 156250 }, { "epoch": 0.0513, "grad_norm": 0.048994030803442, "learning_rate": 6.258435524468059e-06, "loss": 0.033, "step": 156260 }, { "epoch": 0.05135, "grad_norm": 0.05166760832071304, "learning_rate": 6.2557000312341715e-06, "loss": 0.0328, "step": 156270 }, { "epoch": 0.0514, "grad_norm": 0.08172396570444107, "learning_rate": 6.252965050462403e-06, "loss": 0.032, "step": 156280 }, { "epoch": 0.05145, "grad_norm": 0.057565782219171524, "learning_rate": 6.250230582227539e-06, "loss": 0.0335, "step": 156290 }, { "epoch": 0.0515, "grad_norm": 0.047792620956897736, "learning_rate": 6.247496626604316e-06, "loss": 0.0331, "step": 156300 }, { "epoch": 0.05155, "grad_norm": 0.04930257424712181, "learning_rate": 6.244763183667496e-06, "loss": 0.0315, "step": 156310 }, { "epoch": 0.0516, "grad_norm": 0.053147315979003906, "learning_rate": 6.242030253491798e-06, "loss": 0.0344, "step": 156320 }, { "epoch": 0.05165, "grad_norm": 0.0465032234787941, "learning_rate": 6.2392978361519525e-06, "loss": 0.0316, "step": 156330 }, { "epoch": 0.0517, "grad_norm": 0.05579762905836105, "learning_rate": 6.2365659317226545e-06, "loss": 0.0348, "step": 156340 }, { "epoch": 0.05175, "grad_norm": 0.04690273851156235, "learning_rate": 6.233834540278591e-06, "loss": 0.0325, "step": 156350 }, { "epoch": 0.0518, "grad_norm": 0.03898358345031738, "learning_rate": 6.2311036618944464e-06, "loss": 0.0333, "step": 156360 }, { "epoch": 0.05185, "grad_norm": 0.05056428536772728, "learning_rate": 6.228373296644877e-06, "loss": 0.0361, "step": 156370 }, { "epoch": 0.0519, "grad_norm": 0.04741507023572922, "learning_rate": 6.225643444604529e-06, "loss": 0.0335, "step": 156380 }, { "epoch": 0.05195, "grad_norm": 0.0468575656414032, "learning_rate": 6.2229141058480265e-06, "loss": 0.0338, "step": 156390 }, { "epoch": 0.052, "grad_norm": 0.046106282621622086, "learning_rate": 6.2201852804500025e-06, "loss": 0.0337, "step": 156400 }, { "epoch": 0.05205, "grad_norm": 0.050913646817207336, "learning_rate": 6.217456968485061e-06, "loss": 0.0339, "step": 156410 }, { "epoch": 0.0521, "grad_norm": 0.04370098561048508, "learning_rate": 6.214729170027792e-06, "loss": 0.0328, "step": 156420 }, { "epoch": 0.05215, "grad_norm": 0.04934019222855568, "learning_rate": 6.212001885152771e-06, "loss": 0.0344, "step": 156430 }, { "epoch": 0.0522, "grad_norm": 0.04041753336787224, "learning_rate": 6.209275113934552e-06, "loss": 0.0334, "step": 156440 }, { "epoch": 0.05225, "grad_norm": 0.05130622163414955, "learning_rate": 6.206548856447697e-06, "loss": 0.0351, "step": 156450 }, { "epoch": 0.0523, "grad_norm": 0.04814887419342995, "learning_rate": 6.203823112766738e-06, "loss": 0.0332, "step": 156460 }, { "epoch": 0.05235, "grad_norm": 0.050084177404642105, "learning_rate": 6.201097882966186e-06, "loss": 0.0332, "step": 156470 }, { "epoch": 0.0524, "grad_norm": 0.05311131477355957, "learning_rate": 6.198373167120564e-06, "loss": 0.035, "step": 156480 }, { "epoch": 0.05245, "grad_norm": 0.054679013788700104, "learning_rate": 6.195648965304348e-06, "loss": 0.035, "step": 156490 }, { "epoch": 0.0525, "grad_norm": 0.042142126709222794, "learning_rate": 6.19292527759204e-06, "loss": 0.034, "step": 156500 }, { "epoch": 0.05255, "grad_norm": 0.04101485759019852, "learning_rate": 6.190202104058074e-06, "loss": 0.033, "step": 156510 }, { "epoch": 0.0526, "grad_norm": 0.05309950187802315, "learning_rate": 6.187479444776914e-06, "loss": 0.0327, "step": 156520 }, { "epoch": 0.05265, "grad_norm": 0.05121513828635216, "learning_rate": 6.18475729982301e-06, "loss": 0.0348, "step": 156530 }, { "epoch": 0.0527, "grad_norm": 0.04467151314020157, "learning_rate": 6.182035669270769e-06, "loss": 0.0332, "step": 156540 }, { "epoch": 0.05275, "grad_norm": 0.04641894996166229, "learning_rate": 6.179314553194607e-06, "loss": 0.0321, "step": 156550 }, { "epoch": 0.0528, "grad_norm": 0.05059409141540527, "learning_rate": 6.1765939516689045e-06, "loss": 0.0331, "step": 156560 }, { "epoch": 0.05285, "grad_norm": 0.049843620508909225, "learning_rate": 6.173873864768059e-06, "loss": 0.0334, "step": 156570 }, { "epoch": 0.0529, "grad_norm": 0.03690807521343231, "learning_rate": 6.1711542925664305e-06, "loss": 0.0313, "step": 156580 }, { "epoch": 0.05295, "grad_norm": 0.03983112797141075, "learning_rate": 6.168435235138362e-06, "loss": 0.0328, "step": 156590 }, { "epoch": 0.053, "grad_norm": 0.04444614425301552, "learning_rate": 6.1657166925582075e-06, "loss": 0.0355, "step": 156600 }, { "epoch": 0.05305, "grad_norm": 0.04253576695919037, "learning_rate": 6.162998664900274e-06, "loss": 0.0324, "step": 156610 }, { "epoch": 0.0531, "grad_norm": 0.06054367870092392, "learning_rate": 6.160281152238889e-06, "loss": 0.0344, "step": 156620 }, { "epoch": 0.05315, "grad_norm": 0.038273777812719345, "learning_rate": 6.1575641546483405e-06, "loss": 0.0319, "step": 156630 }, { "epoch": 0.0532, "grad_norm": 0.049611713737249374, "learning_rate": 6.154847672202907e-06, "loss": 0.0311, "step": 156640 }, { "epoch": 0.05325, "grad_norm": 0.04181080684065819, "learning_rate": 6.1521317049768515e-06, "loss": 0.0318, "step": 156650 }, { "epoch": 0.0533, "grad_norm": 0.0436883345246315, "learning_rate": 6.149416253044443e-06, "loss": 0.0322, "step": 156660 }, { "epoch": 0.05335, "grad_norm": 0.0463520772755146, "learning_rate": 6.146701316479911e-06, "loss": 0.0325, "step": 156670 }, { "epoch": 0.0534, "grad_norm": 0.052116844803094864, "learning_rate": 6.143986895357476e-06, "loss": 0.0327, "step": 156680 }, { "epoch": 0.05345, "grad_norm": 0.03883455693721771, "learning_rate": 6.141272989751362e-06, "loss": 0.0327, "step": 156690 }, { "epoch": 0.0535, "grad_norm": 0.04669365659356117, "learning_rate": 6.138559599735752e-06, "loss": 0.0325, "step": 156700 }, { "epoch": 0.05355, "grad_norm": 0.048563260585069656, "learning_rate": 6.135846725384844e-06, "loss": 0.0342, "step": 156710 }, { "epoch": 0.0536, "grad_norm": 0.04328960180282593, "learning_rate": 6.1331343667728e-06, "loss": 0.0333, "step": 156720 }, { "epoch": 0.05365, "grad_norm": 0.051987145096063614, "learning_rate": 6.130422523973766e-06, "loss": 0.0331, "step": 156730 }, { "epoch": 0.0537, "grad_norm": 0.04236971586942673, "learning_rate": 6.1277111970619e-06, "loss": 0.0328, "step": 156740 }, { "epoch": 0.05375, "grad_norm": 0.050633545964956284, "learning_rate": 6.125000386111321e-06, "loss": 0.0344, "step": 156750 }, { "epoch": 0.0538, "grad_norm": 0.06444554030895233, "learning_rate": 6.122290091196137e-06, "loss": 0.0337, "step": 156760 }, { "epoch": 0.05385, "grad_norm": 0.04705757647752762, "learning_rate": 6.119580312390447e-06, "loss": 0.034, "step": 156770 }, { "epoch": 0.0539, "grad_norm": 0.049717262387275696, "learning_rate": 6.116871049768333e-06, "loss": 0.0346, "step": 156780 }, { "epoch": 0.05395, "grad_norm": 0.056450214236974716, "learning_rate": 6.114162303403889e-06, "loss": 0.0346, "step": 156790 }, { "epoch": 0.054, "grad_norm": 0.053649645298719406, "learning_rate": 6.111454073371136e-06, "loss": 0.0349, "step": 156800 }, { "epoch": 0.05405, "grad_norm": 0.04505099356174469, "learning_rate": 6.108746359744141e-06, "loss": 0.0354, "step": 156810 }, { "epoch": 0.0541, "grad_norm": 0.05334670469164848, "learning_rate": 6.106039162596916e-06, "loss": 0.0331, "step": 156820 }, { "epoch": 0.05415, "grad_norm": 0.0388176292181015, "learning_rate": 6.103332482003488e-06, "loss": 0.0326, "step": 156830 }, { "epoch": 0.0542, "grad_norm": 0.04762693867087364, "learning_rate": 6.100626318037853e-06, "loss": 0.0333, "step": 156840 }, { "epoch": 0.05425, "grad_norm": 0.0472024604678154, "learning_rate": 6.097920670773985e-06, "loss": 0.0339, "step": 156850 }, { "epoch": 0.0543, "grad_norm": 0.04920189082622528, "learning_rate": 6.095215540285873e-06, "loss": 0.0352, "step": 156860 }, { "epoch": 0.05435, "grad_norm": 0.049344856292009354, "learning_rate": 6.092510926647466e-06, "loss": 0.0332, "step": 156870 }, { "epoch": 0.0544, "grad_norm": 0.042951151728630066, "learning_rate": 6.089806829932707e-06, "loss": 0.0332, "step": 156880 }, { "epoch": 0.05445, "grad_norm": 0.04782388359308243, "learning_rate": 6.087103250215518e-06, "loss": 0.0338, "step": 156890 }, { "epoch": 0.0545, "grad_norm": 0.04540665075182915, "learning_rate": 6.0844001875698275e-06, "loss": 0.034, "step": 156900 }, { "epoch": 0.05455, "grad_norm": 0.05436317250132561, "learning_rate": 6.081697642069523e-06, "loss": 0.0345, "step": 156910 }, { "epoch": 0.0546, "grad_norm": 0.04602791741490364, "learning_rate": 6.0789956137885045e-06, "loss": 0.0341, "step": 156920 }, { "epoch": 0.05465, "grad_norm": 0.04309307783842087, "learning_rate": 6.0762941028006365e-06, "loss": 0.0332, "step": 156930 }, { "epoch": 0.0547, "grad_norm": 0.040035031735897064, "learning_rate": 6.0735931091797735e-06, "loss": 0.0343, "step": 156940 }, { "epoch": 0.05475, "grad_norm": 0.04369068890810013, "learning_rate": 6.070892632999769e-06, "loss": 0.0346, "step": 156950 }, { "epoch": 0.0548, "grad_norm": 0.04811226949095726, "learning_rate": 6.068192674334449e-06, "loss": 0.0339, "step": 156960 }, { "epoch": 0.05485, "grad_norm": 0.048526640981435776, "learning_rate": 6.065493233257624e-06, "loss": 0.0355, "step": 156970 }, { "epoch": 0.0549, "grad_norm": 0.0485498420894146, "learning_rate": 6.062794309843106e-06, "loss": 0.0344, "step": 156980 }, { "epoch": 0.05495, "grad_norm": 0.05765537545084953, "learning_rate": 6.060095904164673e-06, "loss": 0.0348, "step": 156990 }, { "epoch": 0.055, "grad_norm": 0.056350041180849075, "learning_rate": 6.0573980162961145e-06, "loss": 0.0336, "step": 157000 }, { "epoch": 0.05505, "grad_norm": 0.05552685633301735, "learning_rate": 6.0547006463111625e-06, "loss": 0.0353, "step": 157010 }, { "epoch": 0.0551, "grad_norm": 0.05745597183704376, "learning_rate": 6.0520037942835865e-06, "loss": 0.0361, "step": 157020 }, { "epoch": 0.05515, "grad_norm": 0.04678814485669136, "learning_rate": 6.049307460287101e-06, "loss": 0.0348, "step": 157030 }, { "epoch": 0.0552, "grad_norm": 0.04759768024086952, "learning_rate": 6.046611644395437e-06, "loss": 0.0343, "step": 157040 }, { "epoch": 0.05525, "grad_norm": 0.04795956611633301, "learning_rate": 6.043916346682288e-06, "loss": 0.0338, "step": 157050 }, { "epoch": 0.0553, "grad_norm": 0.0424167737364769, "learning_rate": 6.041221567221339e-06, "loss": 0.0335, "step": 157060 }, { "epoch": 0.05535, "grad_norm": 0.04473331198096275, "learning_rate": 6.0385273060862775e-06, "loss": 0.0334, "step": 157070 }, { "epoch": 0.0554, "grad_norm": 0.04596247524023056, "learning_rate": 6.035833563350757e-06, "loss": 0.0337, "step": 157080 }, { "epoch": 0.05545, "grad_norm": 0.05837064981460571, "learning_rate": 6.033140339088422e-06, "loss": 0.0325, "step": 157090 }, { "epoch": 0.0555, "grad_norm": 0.04107905924320221, "learning_rate": 6.030447633372896e-06, "loss": 0.0337, "step": 157100 }, { "epoch": 0.05555, "grad_norm": 0.05001448467373848, "learning_rate": 6.027755446277805e-06, "loss": 0.0328, "step": 157110 }, { "epoch": 0.0556, "grad_norm": 0.04254784807562828, "learning_rate": 6.025063777876761e-06, "loss": 0.0314, "step": 157120 }, { "epoch": 0.05565, "grad_norm": 0.043021947145462036, "learning_rate": 6.0223726282433445e-06, "loss": 0.0329, "step": 157130 }, { "epoch": 0.0557, "grad_norm": 0.039636269211769104, "learning_rate": 6.019681997451132e-06, "loss": 0.0358, "step": 157140 }, { "epoch": 0.05575, "grad_norm": 0.04664872959256172, "learning_rate": 6.016991885573672e-06, "loss": 0.0316, "step": 157150 }, { "epoch": 0.0558, "grad_norm": 0.04457870125770569, "learning_rate": 6.014302292684534e-06, "loss": 0.0323, "step": 157160 }, { "epoch": 0.05585, "grad_norm": 0.05264793708920479, "learning_rate": 6.011613218857237e-06, "loss": 0.0331, "step": 157170 }, { "epoch": 0.0559, "grad_norm": 0.04360983520746231, "learning_rate": 6.0089246641652916e-06, "loss": 0.0317, "step": 157180 }, { "epoch": 0.05595, "grad_norm": 0.04845651239156723, "learning_rate": 6.006236628682221e-06, "loss": 0.0336, "step": 157190 }, { "epoch": 0.056, "grad_norm": 0.04868089035153389, "learning_rate": 6.003549112481496e-06, "loss": 0.0322, "step": 157200 }, { "epoch": 0.05605, "grad_norm": 0.03788581117987633, "learning_rate": 6.0008621156366184e-06, "loss": 0.0335, "step": 157210 }, { "epoch": 0.0561, "grad_norm": 0.03969200700521469, "learning_rate": 5.998175638221018e-06, "loss": 0.0322, "step": 157220 }, { "epoch": 0.05615, "grad_norm": 0.050981346517801285, "learning_rate": 5.9954896803081585e-06, "loss": 0.0323, "step": 157230 }, { "epoch": 0.0562, "grad_norm": 0.043794598430395126, "learning_rate": 5.992804241971475e-06, "loss": 0.034, "step": 157240 }, { "epoch": 0.05625, "grad_norm": 0.04196935519576073, "learning_rate": 5.990119323284385e-06, "loss": 0.0321, "step": 157250 }, { "epoch": 0.0563, "grad_norm": 0.05080387368798256, "learning_rate": 5.98743492432029e-06, "loss": 0.0323, "step": 157260 }, { "epoch": 0.05635, "grad_norm": 0.04499037191271782, "learning_rate": 5.984751045152576e-06, "loss": 0.0325, "step": 157270 }, { "epoch": 0.0564, "grad_norm": 0.05216510593891144, "learning_rate": 5.982067685854631e-06, "loss": 0.0325, "step": 157280 }, { "epoch": 0.05645, "grad_norm": 0.04326760023832321, "learning_rate": 5.979384846499811e-06, "loss": 0.0328, "step": 157290 }, { "epoch": 0.0565, "grad_norm": 0.052308179438114166, "learning_rate": 5.976702527161457e-06, "loss": 0.0344, "step": 157300 }, { "epoch": 0.05655, "grad_norm": 0.042604342103004456, "learning_rate": 5.974020727912913e-06, "loss": 0.0325, "step": 157310 }, { "epoch": 0.0566, "grad_norm": 0.047163888812065125, "learning_rate": 5.97133944882749e-06, "loss": 0.0336, "step": 157320 }, { "epoch": 0.05665, "grad_norm": 0.04162364825606346, "learning_rate": 5.9686586899785065e-06, "loss": 0.0366, "step": 157330 }, { "epoch": 0.0567, "grad_norm": 0.041386570781469345, "learning_rate": 5.965978451439242e-06, "loss": 0.0344, "step": 157340 }, { "epoch": 0.05675, "grad_norm": 0.0403309091925621, "learning_rate": 5.963298733282968e-06, "loss": 0.0338, "step": 157350 }, { "epoch": 0.0568, "grad_norm": 0.04396276921033859, "learning_rate": 5.960619535582965e-06, "loss": 0.0346, "step": 157360 }, { "epoch": 0.05685, "grad_norm": 0.04074764624238014, "learning_rate": 5.957940858412469e-06, "loss": 0.0346, "step": 157370 }, { "epoch": 0.0569, "grad_norm": 0.053959768265485764, "learning_rate": 5.9552627018447185e-06, "loss": 0.0358, "step": 157380 }, { "epoch": 0.05695, "grad_norm": 0.04601474478840828, "learning_rate": 5.952585065952923e-06, "loss": 0.0332, "step": 157390 }, { "epoch": 0.057, "grad_norm": 0.050543323159217834, "learning_rate": 5.949907950810302e-06, "loss": 0.0347, "step": 157400 }, { "epoch": 0.05705, "grad_norm": 0.045483339577913284, "learning_rate": 5.9472313564900325e-06, "loss": 0.0338, "step": 157410 }, { "epoch": 0.0571, "grad_norm": 0.039692364633083344, "learning_rate": 5.944555283065309e-06, "loss": 0.034, "step": 157420 }, { "epoch": 0.05715, "grad_norm": 0.03985406085848808, "learning_rate": 5.941879730609284e-06, "loss": 0.0333, "step": 157430 }, { "epoch": 0.0572, "grad_norm": 0.03966673091053963, "learning_rate": 5.939204699195103e-06, "loss": 0.0339, "step": 157440 }, { "epoch": 0.05725, "grad_norm": 0.05533109977841377, "learning_rate": 5.936530188895908e-06, "loss": 0.0345, "step": 157450 }, { "epoch": 0.0573, "grad_norm": 0.04460675269365311, "learning_rate": 5.933856199784821e-06, "loss": 0.0347, "step": 157460 }, { "epoch": 0.05735, "grad_norm": 0.0479511059820652, "learning_rate": 5.931182731934939e-06, "loss": 0.0345, "step": 157470 }, { "epoch": 0.0574, "grad_norm": 0.054211050271987915, "learning_rate": 5.92850978541935e-06, "loss": 0.0335, "step": 157480 }, { "epoch": 0.05745, "grad_norm": 0.05536212399601936, "learning_rate": 5.92583736031114e-06, "loss": 0.0364, "step": 157490 }, { "epoch": 0.0575, "grad_norm": 0.04610919579863548, "learning_rate": 5.923165456683383e-06, "loss": 0.0335, "step": 157500 }, { "epoch": 0.05755, "grad_norm": 0.04443402588367462, "learning_rate": 5.920494074609104e-06, "loss": 0.0351, "step": 157510 }, { "epoch": 0.0576, "grad_norm": 0.038978736847639084, "learning_rate": 5.917823214161356e-06, "loss": 0.0325, "step": 157520 }, { "epoch": 0.05765, "grad_norm": 0.04635247588157654, "learning_rate": 5.915152875413144e-06, "loss": 0.0336, "step": 157530 }, { "epoch": 0.0577, "grad_norm": 0.05298512056469917, "learning_rate": 5.912483058437487e-06, "loss": 0.0337, "step": 157540 }, { "epoch": 0.05775, "grad_norm": 0.04913019761443138, "learning_rate": 5.909813763307376e-06, "loss": 0.0324, "step": 157550 }, { "epoch": 0.0578, "grad_norm": 0.04847853630781174, "learning_rate": 5.907144990095778e-06, "loss": 0.0339, "step": 157560 }, { "epoch": 0.05785, "grad_norm": 0.048599254339933395, "learning_rate": 5.9044767388756695e-06, "loss": 0.0323, "step": 157570 }, { "epoch": 0.0579, "grad_norm": 0.044404398649930954, "learning_rate": 5.9018090097199915e-06, "loss": 0.0332, "step": 157580 }, { "epoch": 0.05795, "grad_norm": 0.05398423969745636, "learning_rate": 5.899141802701683e-06, "loss": 0.0321, "step": 157590 }, { "epoch": 0.058, "grad_norm": 0.0505392923951149, "learning_rate": 5.8964751178936516e-06, "loss": 0.0327, "step": 157600 }, { "epoch": 0.05805, "grad_norm": 0.06475657224655151, "learning_rate": 5.893808955368818e-06, "loss": 0.0341, "step": 157610 }, { "epoch": 0.0581, "grad_norm": 0.05430496111512184, "learning_rate": 5.891143315200073e-06, "loss": 0.0329, "step": 157620 }, { "epoch": 0.05815, "grad_norm": 0.045613840222358704, "learning_rate": 5.888478197460293e-06, "loss": 0.0308, "step": 157630 }, { "epoch": 0.0582, "grad_norm": 0.0437619574368, "learning_rate": 5.885813602222337e-06, "loss": 0.0318, "step": 157640 }, { "epoch": 0.05825, "grad_norm": 0.04621758684515953, "learning_rate": 5.883149529559051e-06, "loss": 0.0322, "step": 157650 }, { "epoch": 0.0583, "grad_norm": 0.048031993210315704, "learning_rate": 5.880485979543282e-06, "loss": 0.0331, "step": 157660 }, { "epoch": 0.05835, "grad_norm": 0.04733674228191376, "learning_rate": 5.877822952247841e-06, "loss": 0.0322, "step": 157670 }, { "epoch": 0.0584, "grad_norm": 0.04080720245838165, "learning_rate": 5.875160447745534e-06, "loss": 0.0322, "step": 157680 }, { "epoch": 0.05845, "grad_norm": 0.04310221970081329, "learning_rate": 5.872498466109158e-06, "loss": 0.0342, "step": 157690 }, { "epoch": 0.0585, "grad_norm": 0.046025704592466354, "learning_rate": 5.869837007411483e-06, "loss": 0.0326, "step": 157700 }, { "epoch": 0.05855, "grad_norm": 0.0485345833003521, "learning_rate": 5.867176071725292e-06, "loss": 0.0331, "step": 157710 }, { "epoch": 0.0586, "grad_norm": 0.04319790005683899, "learning_rate": 5.864515659123304e-06, "loss": 0.0322, "step": 157720 }, { "epoch": 0.05865, "grad_norm": 0.047216981649398804, "learning_rate": 5.861855769678271e-06, "loss": 0.0323, "step": 157730 }, { "epoch": 0.0587, "grad_norm": 0.045297492295503616, "learning_rate": 5.859196403462916e-06, "loss": 0.0323, "step": 157740 }, { "epoch": 0.05875, "grad_norm": 0.03981221094727516, "learning_rate": 5.856537560549943e-06, "loss": 0.0324, "step": 157750 }, { "epoch": 0.0588, "grad_norm": 0.04106670618057251, "learning_rate": 5.853879241012039e-06, "loss": 0.0333, "step": 157760 }, { "epoch": 0.05885, "grad_norm": 0.04203006625175476, "learning_rate": 5.851221444921878e-06, "loss": 0.0335, "step": 157770 }, { "epoch": 0.0589, "grad_norm": 0.04400645196437836, "learning_rate": 5.8485641723521364e-06, "loss": 0.0338, "step": 157780 }, { "epoch": 0.05895, "grad_norm": 0.04924129322171211, "learning_rate": 5.845907423375455e-06, "loss": 0.0328, "step": 157790 }, { "epoch": 0.059, "grad_norm": 0.05753031000494957, "learning_rate": 5.843251198064459e-06, "loss": 0.0362, "step": 157800 }, { "epoch": 0.05905, "grad_norm": 0.08049297332763672, "learning_rate": 5.840595496491788e-06, "loss": 0.0344, "step": 157810 }, { "epoch": 0.0591, "grad_norm": 0.05114581808447838, "learning_rate": 5.837940318730031e-06, "loss": 0.0337, "step": 157820 }, { "epoch": 0.05915, "grad_norm": 0.05438638851046562, "learning_rate": 5.8352856648517945e-06, "loss": 0.0325, "step": 157830 }, { "epoch": 0.0592, "grad_norm": 0.053465284407138824, "learning_rate": 5.8326315349296476e-06, "loss": 0.0321, "step": 157840 }, { "epoch": 0.05925, "grad_norm": 0.044169776141643524, "learning_rate": 5.829977929036154e-06, "loss": 0.0323, "step": 157850 }, { "epoch": 0.0593, "grad_norm": 0.041343994438648224, "learning_rate": 5.827324847243853e-06, "loss": 0.0341, "step": 157860 }, { "epoch": 0.05935, "grad_norm": 0.04701950401067734, "learning_rate": 5.824672289625297e-06, "loss": 0.0324, "step": 157870 }, { "epoch": 0.0594, "grad_norm": 0.043092407286167145, "learning_rate": 5.822020256252997e-06, "loss": 0.0326, "step": 157880 }, { "epoch": 0.05945, "grad_norm": 0.042744092643260956, "learning_rate": 5.81936874719945e-06, "loss": 0.0322, "step": 157890 }, { "epoch": 0.0595, "grad_norm": 0.04643106460571289, "learning_rate": 5.816717762537163e-06, "loss": 0.0335, "step": 157900 }, { "epoch": 0.05955, "grad_norm": 0.042287472635507584, "learning_rate": 5.8140673023385965e-06, "loss": 0.0325, "step": 157910 }, { "epoch": 0.0596, "grad_norm": 0.04261939972639084, "learning_rate": 5.8114173666762335e-06, "loss": 0.0339, "step": 157920 }, { "epoch": 0.05965, "grad_norm": 0.04192541539669037, "learning_rate": 5.8087679556225075e-06, "loss": 0.034, "step": 157930 }, { "epoch": 0.0597, "grad_norm": 0.045936793088912964, "learning_rate": 5.806119069249849e-06, "loss": 0.0338, "step": 157940 }, { "epoch": 0.05975, "grad_norm": 0.03764326870441437, "learning_rate": 5.803470707630692e-06, "loss": 0.0325, "step": 157950 }, { "epoch": 0.0598, "grad_norm": 0.05140896141529083, "learning_rate": 5.800822870837436e-06, "loss": 0.0358, "step": 157960 }, { "epoch": 0.05985, "grad_norm": 0.042791955173015594, "learning_rate": 5.798175558942468e-06, "loss": 0.0345, "step": 157970 }, { "epoch": 0.0599, "grad_norm": 0.05092308670282364, "learning_rate": 5.7955287720181574e-06, "loss": 0.0329, "step": 157980 }, { "epoch": 0.05995, "grad_norm": 0.04748637229204178, "learning_rate": 5.792882510136879e-06, "loss": 0.0333, "step": 157990 }, { "epoch": 0.06, "grad_norm": 0.05431441217660904, "learning_rate": 5.7902367733709915e-06, "loss": 0.0342, "step": 158000 }, { "epoch": 0.06005, "grad_norm": 0.04721912741661072, "learning_rate": 5.787591561792796e-06, "loss": 0.036, "step": 158010 }, { "epoch": 0.0601, "grad_norm": 0.046072278171777725, "learning_rate": 5.784946875474639e-06, "loss": 0.0349, "step": 158020 }, { "epoch": 0.06015, "grad_norm": 0.05052720382809639, "learning_rate": 5.7823027144888075e-06, "loss": 0.0341, "step": 158030 }, { "epoch": 0.0602, "grad_norm": 0.043350934982299805, "learning_rate": 5.779659078907607e-06, "loss": 0.0332, "step": 158040 }, { "epoch": 0.06025, "grad_norm": 0.04384405538439751, "learning_rate": 5.777015968803307e-06, "loss": 0.0332, "step": 158050 }, { "epoch": 0.0603, "grad_norm": 0.050363317131996155, "learning_rate": 5.774373384248163e-06, "loss": 0.0332, "step": 158060 }, { "epoch": 0.06035, "grad_norm": 0.04257839918136597, "learning_rate": 5.771731325314433e-06, "loss": 0.032, "step": 158070 }, { "epoch": 0.0604, "grad_norm": 0.058831073343753815, "learning_rate": 5.769089792074345e-06, "loss": 0.0332, "step": 158080 }, { "epoch": 0.06045, "grad_norm": 0.044406186789274216, "learning_rate": 5.766448784600117e-06, "loss": 0.034, "step": 158090 }, { "epoch": 0.0605, "grad_norm": 0.05047066509723663, "learning_rate": 5.763808302963949e-06, "loss": 0.0356, "step": 158100 }, { "epoch": 0.06055, "grad_norm": 0.0494023896753788, "learning_rate": 5.761168347238041e-06, "loss": 0.0341, "step": 158110 }, { "epoch": 0.0606, "grad_norm": 0.05668467655777931, "learning_rate": 5.758528917494554e-06, "loss": 0.0344, "step": 158120 }, { "epoch": 0.06065, "grad_norm": 0.04482339695096016, "learning_rate": 5.7558900138056676e-06, "loss": 0.0346, "step": 158130 }, { "epoch": 0.0607, "grad_norm": 0.04861469194293022, "learning_rate": 5.753251636243518e-06, "loss": 0.0322, "step": 158140 }, { "epoch": 0.06075, "grad_norm": 0.04743332415819168, "learning_rate": 5.7506137848802295e-06, "loss": 0.0345, "step": 158150 }, { "epoch": 0.0608, "grad_norm": 0.038775719702243805, "learning_rate": 5.747976459787935e-06, "loss": 0.0335, "step": 158160 }, { "epoch": 0.06085, "grad_norm": 0.048766493797302246, "learning_rate": 5.745339661038732e-06, "loss": 0.0332, "step": 158170 }, { "epoch": 0.0609, "grad_norm": 0.04896395280957222, "learning_rate": 5.7427033887047036e-06, "loss": 0.0341, "step": 158180 }, { "epoch": 0.06095, "grad_norm": 0.04900505766272545, "learning_rate": 5.7400676428579356e-06, "loss": 0.0339, "step": 158190 }, { "epoch": 0.061, "grad_norm": 0.04926890507340431, "learning_rate": 5.737432423570477e-06, "loss": 0.0348, "step": 158200 }, { "epoch": 0.06105, "grad_norm": 0.04470258206129074, "learning_rate": 5.7347977309143904e-06, "loss": 0.0334, "step": 158210 }, { "epoch": 0.0611, "grad_norm": 0.04631470888853073, "learning_rate": 5.732163564961684e-06, "loss": 0.0334, "step": 158220 }, { "epoch": 0.06115, "grad_norm": 0.03767970949411392, "learning_rate": 5.729529925784394e-06, "loss": 0.0331, "step": 158230 }, { "epoch": 0.0612, "grad_norm": 0.044212449342012405, "learning_rate": 5.726896813454511e-06, "loss": 0.0328, "step": 158240 }, { "epoch": 0.06125, "grad_norm": 0.04186420887708664, "learning_rate": 5.724264228044032e-06, "loss": 0.0326, "step": 158250 }, { "epoch": 0.0613, "grad_norm": 0.042783111333847046, "learning_rate": 5.72163216962493e-06, "loss": 0.0328, "step": 158260 }, { "epoch": 0.06135, "grad_norm": 0.04458148777484894, "learning_rate": 5.719000638269154e-06, "loss": 0.0328, "step": 158270 }, { "epoch": 0.0614, "grad_norm": 0.053216926753520966, "learning_rate": 5.716369634048665e-06, "loss": 0.0331, "step": 158280 }, { "epoch": 0.06145, "grad_norm": 0.04896605387330055, "learning_rate": 5.713739157035386e-06, "loss": 0.0336, "step": 158290 }, { "epoch": 0.0615, "grad_norm": 0.04810934141278267, "learning_rate": 5.711109207301232e-06, "loss": 0.0333, "step": 158300 }, { "epoch": 0.06155, "grad_norm": 0.04081054776906967, "learning_rate": 5.708479784918097e-06, "loss": 0.0326, "step": 158310 }, { "epoch": 0.0616, "grad_norm": 0.04219655692577362, "learning_rate": 5.705850889957881e-06, "loss": 0.0323, "step": 158320 }, { "epoch": 0.06165, "grad_norm": 0.03747668117284775, "learning_rate": 5.703222522492457e-06, "loss": 0.0356, "step": 158330 }, { "epoch": 0.0617, "grad_norm": 0.040792036801576614, "learning_rate": 5.700594682593682e-06, "loss": 0.0341, "step": 158340 }, { "epoch": 0.06175, "grad_norm": 0.04115452989935875, "learning_rate": 5.697967370333396e-06, "loss": 0.033, "step": 158350 }, { "epoch": 0.0618, "grad_norm": 0.04136979579925537, "learning_rate": 5.695340585783424e-06, "loss": 0.0339, "step": 158360 }, { "epoch": 0.06185, "grad_norm": 0.040849775075912476, "learning_rate": 5.692714329015597e-06, "loss": 0.0333, "step": 158370 }, { "epoch": 0.0619, "grad_norm": 0.047011423856019974, "learning_rate": 5.690088600101703e-06, "loss": 0.034, "step": 158380 }, { "epoch": 0.06195, "grad_norm": 0.04129017889499664, "learning_rate": 5.68746339911353e-06, "loss": 0.0357, "step": 158390 }, { "epoch": 0.062, "grad_norm": 0.045220695436000824, "learning_rate": 5.684838726122854e-06, "loss": 0.0331, "step": 158400 }, { "epoch": 0.06205, "grad_norm": 0.043388549238443375, "learning_rate": 5.6822145812014285e-06, "loss": 0.0341, "step": 158410 }, { "epoch": 0.0621, "grad_norm": 0.04896579682826996, "learning_rate": 5.6795909644210114e-06, "loss": 0.0332, "step": 158420 }, { "epoch": 0.06215, "grad_norm": 0.040435150265693665, "learning_rate": 5.676967875853303e-06, "loss": 0.0336, "step": 158430 }, { "epoch": 0.0622, "grad_norm": 0.03940362483263016, "learning_rate": 5.674345315570037e-06, "loss": 0.0334, "step": 158440 }, { "epoch": 0.06225, "grad_norm": 0.047289952635765076, "learning_rate": 5.671723283642916e-06, "loss": 0.0353, "step": 158450 }, { "epoch": 0.0623, "grad_norm": 0.04400908201932907, "learning_rate": 5.669101780143618e-06, "loss": 0.0326, "step": 158460 }, { "epoch": 0.06235, "grad_norm": 0.03765032812952995, "learning_rate": 5.666480805143815e-06, "loss": 0.0329, "step": 158470 }, { "epoch": 0.0624, "grad_norm": 0.05119500309228897, "learning_rate": 5.663860358715156e-06, "loss": 0.0322, "step": 158480 }, { "epoch": 0.06245, "grad_norm": 0.03913963586091995, "learning_rate": 5.6612404409293e-06, "loss": 0.0334, "step": 158490 }, { "epoch": 0.0625, "grad_norm": 0.03637481853365898, "learning_rate": 5.658621051857863e-06, "loss": 0.0342, "step": 158500 }, { "epoch": 0.06255, "grad_norm": 0.04477520287036896, "learning_rate": 5.656002191572452e-06, "loss": 0.0338, "step": 158510 }, { "epoch": 0.0626, "grad_norm": 0.06019911915063858, "learning_rate": 5.6533838601446845e-06, "loss": 0.0331, "step": 158520 }, { "epoch": 0.06265, "grad_norm": 0.04236951097846031, "learning_rate": 5.650766057646123e-06, "loss": 0.0328, "step": 158530 }, { "epoch": 0.0627, "grad_norm": 0.05303535982966423, "learning_rate": 5.648148784148358e-06, "loss": 0.0345, "step": 158540 }, { "epoch": 0.06275, "grad_norm": 0.043178990483284, "learning_rate": 5.6455320397229336e-06, "loss": 0.033, "step": 158550 }, { "epoch": 0.0628, "grad_norm": 0.04464222490787506, "learning_rate": 5.642915824441386e-06, "loss": 0.0339, "step": 158560 }, { "epoch": 0.06285, "grad_norm": 0.047780852764844894, "learning_rate": 5.640300138375257e-06, "loss": 0.0334, "step": 158570 }, { "epoch": 0.0629, "grad_norm": 0.04555336758494377, "learning_rate": 5.637684981596045e-06, "loss": 0.0341, "step": 158580 }, { "epoch": 0.06295, "grad_norm": 0.05541256442666054, "learning_rate": 5.635070354175254e-06, "loss": 0.034, "step": 158590 }, { "epoch": 0.063, "grad_norm": 0.04075656086206436, "learning_rate": 5.632456256184357e-06, "loss": 0.0342, "step": 158600 }, { "epoch": 0.06305, "grad_norm": 0.04417794942855835, "learning_rate": 5.629842687694837e-06, "loss": 0.0338, "step": 158610 }, { "epoch": 0.0631, "grad_norm": 0.04109339043498039, "learning_rate": 5.627229648778132e-06, "loss": 0.0331, "step": 158620 }, { "epoch": 0.06315, "grad_norm": 0.0533117949962616, "learning_rate": 5.6246171395057e-06, "loss": 0.0337, "step": 158630 }, { "epoch": 0.0632, "grad_norm": 0.04230556637048721, "learning_rate": 5.622005159948957e-06, "loss": 0.0328, "step": 158640 }, { "epoch": 0.06325, "grad_norm": 0.04738219827413559, "learning_rate": 5.619393710179302e-06, "loss": 0.0346, "step": 158650 }, { "epoch": 0.0633, "grad_norm": 0.057871103286743164, "learning_rate": 5.616782790268152e-06, "loss": 0.0343, "step": 158660 }, { "epoch": 0.06335, "grad_norm": 0.056884873658418655, "learning_rate": 5.614172400286877e-06, "loss": 0.0343, "step": 158670 }, { "epoch": 0.0634, "grad_norm": 0.044703949242830276, "learning_rate": 5.611562540306847e-06, "loss": 0.0363, "step": 158680 }, { "epoch": 0.06345, "grad_norm": 0.04357363283634186, "learning_rate": 5.608953210399406e-06, "loss": 0.0368, "step": 158690 }, { "epoch": 0.0635, "grad_norm": 0.050504423677921295, "learning_rate": 5.6063444106359e-06, "loss": 0.0342, "step": 158700 }, { "epoch": 0.06355, "grad_norm": 0.04960758984088898, "learning_rate": 5.6037361410876645e-06, "loss": 0.0331, "step": 158710 }, { "epoch": 0.0636, "grad_norm": 0.04468027502298355, "learning_rate": 5.601128401825984e-06, "loss": 0.0335, "step": 158720 }, { "epoch": 0.06365, "grad_norm": 0.045513805001974106, "learning_rate": 5.598521192922171e-06, "loss": 0.0335, "step": 158730 }, { "epoch": 0.0637, "grad_norm": 0.04335379600524902, "learning_rate": 5.595914514447493e-06, "loss": 0.0328, "step": 158740 }, { "epoch": 0.06375, "grad_norm": 0.04896742105484009, "learning_rate": 5.593308366473227e-06, "loss": 0.0331, "step": 158750 }, { "epoch": 0.0638, "grad_norm": 0.04479243606328964, "learning_rate": 5.5907027490706225e-06, "loss": 0.0326, "step": 158760 }, { "epoch": 0.06385, "grad_norm": 0.04396402835845947, "learning_rate": 5.5880976623109036e-06, "loss": 0.0327, "step": 158770 }, { "epoch": 0.0639, "grad_norm": 0.043852224946022034, "learning_rate": 5.5854931062653105e-06, "loss": 0.033, "step": 158780 }, { "epoch": 0.06395, "grad_norm": 0.043481361120939255, "learning_rate": 5.582889081005044e-06, "loss": 0.0325, "step": 158790 }, { "epoch": 0.064, "grad_norm": 0.04225054010748863, "learning_rate": 5.5802855866012915e-06, "loss": 0.0322, "step": 158800 }, { "epoch": 0.06405, "grad_norm": 0.03953962028026581, "learning_rate": 5.577682623125233e-06, "loss": 0.0316, "step": 158810 }, { "epoch": 0.0641, "grad_norm": 0.03687303513288498, "learning_rate": 5.57508019064803e-06, "loss": 0.0313, "step": 158820 }, { "epoch": 0.06415, "grad_norm": 0.04551881551742554, "learning_rate": 5.572478289240849e-06, "loss": 0.0343, "step": 158830 }, { "epoch": 0.0642, "grad_norm": 0.05536499246954918, "learning_rate": 5.569876918974809e-06, "loss": 0.0339, "step": 158840 }, { "epoch": 0.06425, "grad_norm": 0.049355749040842056, "learning_rate": 5.567276079921036e-06, "loss": 0.0326, "step": 158850 }, { "epoch": 0.0643, "grad_norm": 0.04733234643936157, "learning_rate": 5.564675772150626e-06, "loss": 0.033, "step": 158860 }, { "epoch": 0.06435, "grad_norm": 0.06434501707553864, "learning_rate": 5.562075995734689e-06, "loss": 0.0345, "step": 158870 }, { "epoch": 0.0644, "grad_norm": 0.04427826777100563, "learning_rate": 5.559476750744288e-06, "loss": 0.0323, "step": 158880 }, { "epoch": 0.06445, "grad_norm": 0.04419584572315216, "learning_rate": 5.5568780372504845e-06, "loss": 0.0329, "step": 158890 }, { "epoch": 0.0645, "grad_norm": 0.05121339112520218, "learning_rate": 5.554279855324337e-06, "loss": 0.0328, "step": 158900 }, { "epoch": 0.06455, "grad_norm": 0.05606408789753914, "learning_rate": 5.551682205036867e-06, "loss": 0.0343, "step": 158910 }, { "epoch": 0.0646, "grad_norm": 0.051648642867803574, "learning_rate": 5.549085086459113e-06, "loss": 0.032, "step": 158920 }, { "epoch": 0.06465, "grad_norm": 0.05112677812576294, "learning_rate": 5.54648849966205e-06, "loss": 0.0316, "step": 158930 }, { "epoch": 0.0647, "grad_norm": 0.07420983165502548, "learning_rate": 5.543892444716686e-06, "loss": 0.0328, "step": 158940 }, { "epoch": 0.06475, "grad_norm": 0.05548356845974922, "learning_rate": 5.541296921693998e-06, "loss": 0.0328, "step": 158950 }, { "epoch": 0.0648, "grad_norm": 0.04403974860906601, "learning_rate": 5.538701930664941e-06, "loss": 0.0327, "step": 158960 }, { "epoch": 0.06485, "grad_norm": 0.04643482714891434, "learning_rate": 5.536107471700463e-06, "loss": 0.0321, "step": 158970 }, { "epoch": 0.0649, "grad_norm": 0.04126209765672684, "learning_rate": 5.533513544871488e-06, "loss": 0.0318, "step": 158980 }, { "epoch": 0.06495, "grad_norm": 0.04286893084645271, "learning_rate": 5.5309201502489475e-06, "loss": 0.0321, "step": 158990 }, { "epoch": 0.065, "grad_norm": 0.05262665078043938, "learning_rate": 5.528327287903734e-06, "loss": 0.0329, "step": 159000 }, { "epoch": 0.06505, "grad_norm": 0.051432929933071136, "learning_rate": 5.525734957906731e-06, "loss": 0.0332, "step": 159010 }, { "epoch": 0.0651, "grad_norm": 0.06794150918722153, "learning_rate": 5.523143160328823e-06, "loss": 0.0355, "step": 159020 }, { "epoch": 0.06515, "grad_norm": 0.06131485849618912, "learning_rate": 5.520551895240858e-06, "loss": 0.0338, "step": 159030 }, { "epoch": 0.0652, "grad_norm": 0.09646933525800705, "learning_rate": 5.517961162713695e-06, "loss": 0.0347, "step": 159040 }, { "epoch": 0.06525, "grad_norm": 0.052614159882068634, "learning_rate": 5.5153709628181534e-06, "loss": 0.0328, "step": 159050 }, { "epoch": 0.0653, "grad_norm": 0.0494559109210968, "learning_rate": 5.51278129562505e-06, "loss": 0.0336, "step": 159060 }, { "epoch": 0.06535, "grad_norm": 0.04907679557800293, "learning_rate": 5.510192161205177e-06, "loss": 0.0345, "step": 159070 }, { "epoch": 0.0654, "grad_norm": 0.0441729798913002, "learning_rate": 5.507603559629337e-06, "loss": 0.0333, "step": 159080 }, { "epoch": 0.06545, "grad_norm": 0.05236222967505455, "learning_rate": 5.505015490968291e-06, "loss": 0.0328, "step": 159090 }, { "epoch": 0.0655, "grad_norm": 0.038976993411779404, "learning_rate": 5.502427955292791e-06, "loss": 0.0338, "step": 159100 }, { "epoch": 0.06555, "grad_norm": 0.040054868906736374, "learning_rate": 5.499840952673593e-06, "loss": 0.0332, "step": 159110 }, { "epoch": 0.0656, "grad_norm": 0.0409533828496933, "learning_rate": 5.497254483181413e-06, "loss": 0.0335, "step": 159120 }, { "epoch": 0.06565, "grad_norm": 0.051507771015167236, "learning_rate": 5.4946685468869715e-06, "loss": 0.034, "step": 159130 }, { "epoch": 0.0657, "grad_norm": 0.05494391545653343, "learning_rate": 5.492083143860966e-06, "loss": 0.0344, "step": 159140 }, { "epoch": 0.06575, "grad_norm": 0.039874982088804245, "learning_rate": 5.489498274174071e-06, "loss": 0.0329, "step": 159150 }, { "epoch": 0.0658, "grad_norm": 0.04216877371072769, "learning_rate": 5.486913937896973e-06, "loss": 0.0344, "step": 159160 }, { "epoch": 0.06585, "grad_norm": 0.04714996740221977, "learning_rate": 5.484330135100313e-06, "loss": 0.0337, "step": 159170 }, { "epoch": 0.0659, "grad_norm": 0.04491034150123596, "learning_rate": 5.48174686585474e-06, "loss": 0.0338, "step": 159180 }, { "epoch": 0.06595, "grad_norm": 0.04163685441017151, "learning_rate": 5.479164130230862e-06, "loss": 0.0338, "step": 159190 }, { "epoch": 0.066, "grad_norm": 0.04118207469582558, "learning_rate": 5.47658192829931e-06, "loss": 0.0337, "step": 159200 }, { "epoch": 0.06605, "grad_norm": 0.04150707647204399, "learning_rate": 5.474000260130682e-06, "loss": 0.0332, "step": 159210 }, { "epoch": 0.0661, "grad_norm": 0.041581131517887115, "learning_rate": 5.471419125795541e-06, "loss": 0.0335, "step": 159220 }, { "epoch": 0.06615, "grad_norm": 0.044462401419878006, "learning_rate": 5.46883852536447e-06, "loss": 0.0326, "step": 159230 }, { "epoch": 0.0662, "grad_norm": 0.04261643439531326, "learning_rate": 5.466258458908008e-06, "loss": 0.0332, "step": 159240 }, { "epoch": 0.06625, "grad_norm": 0.046739377081394196, "learning_rate": 5.46367892649671e-06, "loss": 0.0334, "step": 159250 }, { "epoch": 0.0663, "grad_norm": 0.036309320479631424, "learning_rate": 5.461099928201088e-06, "loss": 0.0325, "step": 159260 }, { "epoch": 0.06635, "grad_norm": 0.04956991970539093, "learning_rate": 5.458521464091648e-06, "loss": 0.0333, "step": 159270 }, { "epoch": 0.0664, "grad_norm": 0.042923565953969955, "learning_rate": 5.4559435342389e-06, "loss": 0.0324, "step": 159280 }, { "epoch": 0.06645, "grad_norm": 0.040057938545942307, "learning_rate": 5.453366138713309e-06, "loss": 0.0327, "step": 159290 }, { "epoch": 0.0665, "grad_norm": 0.03960050642490387, "learning_rate": 5.450789277585347e-06, "loss": 0.0331, "step": 159300 }, { "epoch": 0.06655, "grad_norm": 0.04826389253139496, "learning_rate": 5.448212950925455e-06, "loss": 0.0332, "step": 159310 }, { "epoch": 0.0666, "grad_norm": 0.03730017691850662, "learning_rate": 5.445637158804082e-06, "loss": 0.0331, "step": 159320 }, { "epoch": 0.06665, "grad_norm": 0.03824429586529732, "learning_rate": 5.443061901291635e-06, "loss": 0.0332, "step": 159330 }, { "epoch": 0.0667, "grad_norm": 0.04533061012625694, "learning_rate": 5.440487178458533e-06, "loss": 0.0335, "step": 159340 }, { "epoch": 0.06675, "grad_norm": 0.0460333377122879, "learning_rate": 5.437912990375169e-06, "loss": 0.034, "step": 159350 }, { "epoch": 0.0668, "grad_norm": 0.04362259805202484, "learning_rate": 5.435339337111905e-06, "loss": 0.034, "step": 159360 }, { "epoch": 0.06685, "grad_norm": 0.04969523474574089, "learning_rate": 5.432766218739118e-06, "loss": 0.0331, "step": 159370 }, { "epoch": 0.0669, "grad_norm": 0.04067446291446686, "learning_rate": 5.430193635327155e-06, "loss": 0.0322, "step": 159380 }, { "epoch": 0.06695, "grad_norm": 0.04759836196899414, "learning_rate": 5.427621586946338e-06, "loss": 0.034, "step": 159390 }, { "epoch": 0.067, "grad_norm": 0.04831007868051529, "learning_rate": 5.425050073667002e-06, "loss": 0.033, "step": 159400 }, { "epoch": 0.06705, "grad_norm": 0.04356855899095535, "learning_rate": 5.422479095559435e-06, "loss": 0.033, "step": 159410 }, { "epoch": 0.0671, "grad_norm": 0.03892563655972481, "learning_rate": 5.419908652693947e-06, "loss": 0.0338, "step": 159420 }, { "epoch": 0.06715, "grad_norm": 0.05179993808269501, "learning_rate": 5.417338745140788e-06, "loss": 0.0343, "step": 159430 }, { "epoch": 0.0672, "grad_norm": 0.041009191423654556, "learning_rate": 5.414769372970238e-06, "loss": 0.0319, "step": 159440 }, { "epoch": 0.06725, "grad_norm": 0.04942528158426285, "learning_rate": 5.412200536252529e-06, "loss": 0.0351, "step": 159450 }, { "epoch": 0.0673, "grad_norm": 0.0511196106672287, "learning_rate": 5.409632235057904e-06, "loss": 0.0343, "step": 159460 }, { "epoch": 0.06735, "grad_norm": 0.04779931902885437, "learning_rate": 5.4070644694565745e-06, "loss": 0.0322, "step": 159470 }, { "epoch": 0.0674, "grad_norm": 0.040881089866161346, "learning_rate": 5.404497239518735e-06, "loss": 0.0328, "step": 159480 }, { "epoch": 0.06745, "grad_norm": 0.04905702546238899, "learning_rate": 5.401930545314587e-06, "loss": 0.0342, "step": 159490 }, { "epoch": 0.0675, "grad_norm": 0.04856644570827484, "learning_rate": 5.399364386914294e-06, "loss": 0.0331, "step": 159500 }, { "epoch": 0.06755, "grad_norm": 0.04014168679714203, "learning_rate": 5.396798764388017e-06, "loss": 0.033, "step": 159510 }, { "epoch": 0.0676, "grad_norm": 0.036229848861694336, "learning_rate": 5.3942336778058875e-06, "loss": 0.0323, "step": 159520 }, { "epoch": 0.06765, "grad_norm": 0.05225673317909241, "learning_rate": 5.391669127238044e-06, "loss": 0.0342, "step": 159530 }, { "epoch": 0.0677, "grad_norm": 0.0477435439825058, "learning_rate": 5.389105112754609e-06, "loss": 0.0331, "step": 159540 }, { "epoch": 0.06775, "grad_norm": 0.04617225006222725, "learning_rate": 5.3865416344256705e-06, "loss": 0.0336, "step": 159550 }, { "epoch": 0.0678, "grad_norm": 0.05308155715465546, "learning_rate": 5.3839786923213175e-06, "loss": 0.0374, "step": 159560 }, { "epoch": 0.06785, "grad_norm": 0.03811059147119522, "learning_rate": 5.3814162865116094e-06, "loss": 0.0335, "step": 159570 }, { "epoch": 0.0679, "grad_norm": 0.04400903731584549, "learning_rate": 5.378854417066612e-06, "loss": 0.0346, "step": 159580 }, { "epoch": 0.06795, "grad_norm": 0.0434289425611496, "learning_rate": 5.376293084056375e-06, "loss": 0.0348, "step": 159590 }, { "epoch": 0.068, "grad_norm": 0.04518948495388031, "learning_rate": 5.373732287550897e-06, "loss": 0.0364, "step": 159600 }, { "epoch": 0.06805, "grad_norm": 0.04921133071184158, "learning_rate": 5.371172027620213e-06, "loss": 0.0354, "step": 159610 }, { "epoch": 0.0681, "grad_norm": 0.044258587062358856, "learning_rate": 5.368612304334308e-06, "loss": 0.034, "step": 159620 }, { "epoch": 0.06815, "grad_norm": 0.03922766447067261, "learning_rate": 5.366053117763179e-06, "loss": 0.0336, "step": 159630 }, { "epoch": 0.0682, "grad_norm": 0.04011528939008713, "learning_rate": 5.363494467976768e-06, "loss": 0.0344, "step": 159640 }, { "epoch": 0.06825, "grad_norm": 0.04395853728055954, "learning_rate": 5.360936355045041e-06, "loss": 0.033, "step": 159650 }, { "epoch": 0.0683, "grad_norm": 0.04406457394361496, "learning_rate": 5.3583787790379424e-06, "loss": 0.0346, "step": 159660 }, { "epoch": 0.06835, "grad_norm": 0.041080668568611145, "learning_rate": 5.355821740025391e-06, "loss": 0.0343, "step": 159670 }, { "epoch": 0.0684, "grad_norm": 0.04846780747175217, "learning_rate": 5.3532652380772904e-06, "loss": 0.0346, "step": 159680 }, { "epoch": 0.06845, "grad_norm": 0.05273908004164696, "learning_rate": 5.350709273263533e-06, "loss": 0.0334, "step": 159690 }, { "epoch": 0.0685, "grad_norm": 0.04335326701402664, "learning_rate": 5.348153845654008e-06, "loss": 0.0331, "step": 159700 }, { "epoch": 0.06855, "grad_norm": 0.049266718327999115, "learning_rate": 5.345598955318565e-06, "loss": 0.0328, "step": 159710 }, { "epoch": 0.0686, "grad_norm": 0.07993786036968231, "learning_rate": 5.343044602327072e-06, "loss": 0.033, "step": 159720 }, { "epoch": 0.06865, "grad_norm": 0.05523889511823654, "learning_rate": 5.340490786749355e-06, "loss": 0.0325, "step": 159730 }, { "epoch": 0.0687, "grad_norm": 0.046398960053920746, "learning_rate": 5.337937508655228e-06, "loss": 0.0335, "step": 159740 }, { "epoch": 0.06875, "grad_norm": 0.05909277871251106, "learning_rate": 5.3353847681145066e-06, "loss": 0.0319, "step": 159750 }, { "epoch": 0.0688, "grad_norm": 0.05883355066180229, "learning_rate": 5.3328325651969795e-06, "loss": 0.0329, "step": 159760 }, { "epoch": 0.06885, "grad_norm": 0.044400133192539215, "learning_rate": 5.330280899972415e-06, "loss": 0.0325, "step": 159770 }, { "epoch": 0.0689, "grad_norm": 0.050274547189474106, "learning_rate": 5.327729772510587e-06, "loss": 0.0328, "step": 159780 }, { "epoch": 0.06895, "grad_norm": 0.06526787579059601, "learning_rate": 5.325179182881232e-06, "loss": 0.0331, "step": 159790 }, { "epoch": 0.069, "grad_norm": 0.05052199214696884, "learning_rate": 5.322629131154097e-06, "loss": 0.0333, "step": 159800 }, { "epoch": 0.06905, "grad_norm": 0.04053528234362602, "learning_rate": 5.320079617398879e-06, "loss": 0.033, "step": 159810 }, { "epoch": 0.0691, "grad_norm": 0.05289068445563316, "learning_rate": 5.3175306416852945e-06, "loss": 0.0334, "step": 159820 }, { "epoch": 0.06915, "grad_norm": 0.037741102278232574, "learning_rate": 5.314982204083025e-06, "loss": 0.032, "step": 159830 }, { "epoch": 0.0692, "grad_norm": 0.03996310755610466, "learning_rate": 5.312434304661748e-06, "loss": 0.0335, "step": 159840 }, { "epoch": 0.06925, "grad_norm": 0.03911514952778816, "learning_rate": 5.3098869434911245e-06, "loss": 0.0324, "step": 159850 }, { "epoch": 0.0693, "grad_norm": 0.04355086758732796, "learning_rate": 5.307340120640789e-06, "loss": 0.032, "step": 159860 }, { "epoch": 0.06935, "grad_norm": 0.038841210305690765, "learning_rate": 5.3047938361803804e-06, "loss": 0.0339, "step": 159870 }, { "epoch": 0.0694, "grad_norm": 0.04024119675159454, "learning_rate": 5.3022480901795096e-06, "loss": 0.0321, "step": 159880 }, { "epoch": 0.06945, "grad_norm": 0.05228933319449425, "learning_rate": 5.299702882707777e-06, "loss": 0.0341, "step": 159890 }, { "epoch": 0.0695, "grad_norm": 0.052483391016721725, "learning_rate": 5.29715821383476e-06, "loss": 0.0331, "step": 159900 }, { "epoch": 0.06955, "grad_norm": 0.042383596301078796, "learning_rate": 5.294614083630034e-06, "loss": 0.0333, "step": 159910 }, { "epoch": 0.0696, "grad_norm": 0.04163794964551926, "learning_rate": 5.292070492163165e-06, "loss": 0.0328, "step": 159920 }, { "epoch": 0.06965, "grad_norm": 0.04140407219529152, "learning_rate": 5.289527439503683e-06, "loss": 0.032, "step": 159930 }, { "epoch": 0.0697, "grad_norm": 0.04909062013030052, "learning_rate": 5.286984925721117e-06, "loss": 0.0339, "step": 159940 }, { "epoch": 0.06975, "grad_norm": 0.040237706154584885, "learning_rate": 5.284442950884969e-06, "loss": 0.0319, "step": 159950 }, { "epoch": 0.0698, "grad_norm": 0.0394199937582016, "learning_rate": 5.28190151506475e-06, "loss": 0.0324, "step": 159960 }, { "epoch": 0.06985, "grad_norm": 0.040862519294023514, "learning_rate": 5.279360618329937e-06, "loss": 0.0329, "step": 159970 }, { "epoch": 0.0699, "grad_norm": 0.03927301615476608, "learning_rate": 5.27682026074999e-06, "loss": 0.0317, "step": 159980 }, { "epoch": 0.06995, "grad_norm": 0.042789362370967865, "learning_rate": 5.274280442394375e-06, "loss": 0.0327, "step": 159990 }, { "epoch": 0.07, "grad_norm": 0.042050816118717194, "learning_rate": 5.271741163332514e-06, "loss": 0.0324, "step": 160000 }, { "epoch": 0.07005, "grad_norm": 0.04403001442551613, "learning_rate": 5.269202423633851e-06, "loss": 0.0322, "step": 160010 }, { "epoch": 0.0701, "grad_norm": 0.03922681510448456, "learning_rate": 5.2666642233677676e-06, "loss": 0.032, "step": 160020 }, { "epoch": 0.07015, "grad_norm": 0.044261377304792404, "learning_rate": 5.264126562603672e-06, "loss": 0.033, "step": 160030 }, { "epoch": 0.0702, "grad_norm": 0.03984180465340614, "learning_rate": 5.26158944141095e-06, "loss": 0.0327, "step": 160040 }, { "epoch": 0.07025, "grad_norm": 0.04177449643611908, "learning_rate": 5.259052859858954e-06, "loss": 0.0331, "step": 160050 }, { "epoch": 0.0703, "grad_norm": 0.04058413580060005, "learning_rate": 5.2565168180170374e-06, "loss": 0.0329, "step": 160060 }, { "epoch": 0.07035, "grad_norm": 0.04518529027700424, "learning_rate": 5.253981315954528e-06, "loss": 0.0328, "step": 160070 }, { "epoch": 0.0704, "grad_norm": 0.04935334622859955, "learning_rate": 5.2514463537407576e-06, "loss": 0.0318, "step": 160080 }, { "epoch": 0.07045, "grad_norm": 0.04204292595386505, "learning_rate": 5.248911931445024e-06, "loss": 0.032, "step": 160090 }, { "epoch": 0.0705, "grad_norm": 0.040166597813367844, "learning_rate": 5.24637804913661e-06, "loss": 0.0312, "step": 160100 }, { "epoch": 0.07055, "grad_norm": 0.06826286762952805, "learning_rate": 5.24384470688481e-06, "loss": 0.0332, "step": 160110 }, { "epoch": 0.0706, "grad_norm": 0.041277870535850525, "learning_rate": 5.241311904758864e-06, "loss": 0.0321, "step": 160120 }, { "epoch": 0.07065, "grad_norm": 0.04410373792052269, "learning_rate": 5.238779642828034e-06, "loss": 0.0341, "step": 160130 }, { "epoch": 0.0707, "grad_norm": 0.046558964997529984, "learning_rate": 5.2362479211615466e-06, "loss": 0.0325, "step": 160140 }, { "epoch": 0.07075, "grad_norm": 0.03963978961110115, "learning_rate": 5.233716739828606e-06, "loss": 0.0327, "step": 160150 }, { "epoch": 0.0708, "grad_norm": 0.03820747137069702, "learning_rate": 5.231186098898433e-06, "loss": 0.0319, "step": 160160 }, { "epoch": 0.07085, "grad_norm": 0.03716480731964111, "learning_rate": 5.2286559984402075e-06, "loss": 0.0323, "step": 160170 }, { "epoch": 0.0709, "grad_norm": 0.04506603255867958, "learning_rate": 5.2261264385230964e-06, "loss": 0.0328, "step": 160180 }, { "epoch": 0.07095, "grad_norm": 0.05314488336443901, "learning_rate": 5.223597419216253e-06, "loss": 0.0323, "step": 160190 }, { "epoch": 0.071, "grad_norm": 0.043797120451927185, "learning_rate": 5.221068940588833e-06, "loss": 0.0339, "step": 160200 }, { "epoch": 0.07105, "grad_norm": 0.03779615834355354, "learning_rate": 5.21854100270995e-06, "loss": 0.0319, "step": 160210 }, { "epoch": 0.0711, "grad_norm": 0.03833272308111191, "learning_rate": 5.216013605648734e-06, "loss": 0.0319, "step": 160220 }, { "epoch": 0.07115, "grad_norm": 0.04615391418337822, "learning_rate": 5.213486749474273e-06, "loss": 0.0327, "step": 160230 }, { "epoch": 0.0712, "grad_norm": 0.04216158762574196, "learning_rate": 5.210960434255643e-06, "loss": 0.0328, "step": 160240 }, { "epoch": 0.07125, "grad_norm": 0.039154306054115295, "learning_rate": 5.208434660061928e-06, "loss": 0.0327, "step": 160250 }, { "epoch": 0.0713, "grad_norm": 0.04261749982833862, "learning_rate": 5.2059094269621715e-06, "loss": 0.0336, "step": 160260 }, { "epoch": 0.07135, "grad_norm": 0.04285505414009094, "learning_rate": 5.203384735025418e-06, "loss": 0.0342, "step": 160270 }, { "epoch": 0.0714, "grad_norm": 0.04513030871748924, "learning_rate": 5.20086058432068e-06, "loss": 0.0327, "step": 160280 }, { "epoch": 0.07145, "grad_norm": 0.042512886226177216, "learning_rate": 5.198336974916976e-06, "loss": 0.0339, "step": 160290 }, { "epoch": 0.0715, "grad_norm": 0.038196902722120285, "learning_rate": 5.195813906883315e-06, "loss": 0.0348, "step": 160300 }, { "epoch": 0.07155, "grad_norm": 0.040633756667375565, "learning_rate": 5.193291380288648e-06, "loss": 0.0332, "step": 160310 }, { "epoch": 0.0716, "grad_norm": 0.03849083557724953, "learning_rate": 5.1907693952019585e-06, "loss": 0.0336, "step": 160320 }, { "epoch": 0.07165, "grad_norm": 0.04616985470056534, "learning_rate": 5.188247951692185e-06, "loss": 0.0335, "step": 160330 }, { "epoch": 0.0717, "grad_norm": 0.04377584904432297, "learning_rate": 5.185727049828276e-06, "loss": 0.0338, "step": 160340 }, { "epoch": 0.07175, "grad_norm": 0.04809568449854851, "learning_rate": 5.183206689679148e-06, "loss": 0.0323, "step": 160350 }, { "epoch": 0.0718, "grad_norm": 0.04436049982905388, "learning_rate": 5.180686871313695e-06, "loss": 0.0328, "step": 160360 }, { "epoch": 0.07185, "grad_norm": 0.041805848479270935, "learning_rate": 5.178167594800825e-06, "loss": 0.0324, "step": 160370 }, { "epoch": 0.0719, "grad_norm": 0.03261794522404671, "learning_rate": 5.175648860209406e-06, "loss": 0.0331, "step": 160380 }, { "epoch": 0.07195, "grad_norm": 0.03945595771074295, "learning_rate": 5.1731306676083e-06, "loss": 0.0337, "step": 160390 }, { "epoch": 0.072, "grad_norm": 0.044130872935056686, "learning_rate": 5.1706130170663474e-06, "loss": 0.0331, "step": 160400 }, { "epoch": 0.07205, "grad_norm": 0.04020753875374794, "learning_rate": 5.1680959086523845e-06, "loss": 0.0353, "step": 160410 }, { "epoch": 0.0721, "grad_norm": 0.03573004901409149, "learning_rate": 5.165579342435234e-06, "loss": 0.0323, "step": 160420 }, { "epoch": 0.07215, "grad_norm": 0.05447396636009216, "learning_rate": 5.163063318483694e-06, "loss": 0.0343, "step": 160430 }, { "epoch": 0.0722, "grad_norm": 0.041657350957393646, "learning_rate": 5.16054783686655e-06, "loss": 0.0323, "step": 160440 }, { "epoch": 0.07225, "grad_norm": 0.053743381053209305, "learning_rate": 5.15803289765257e-06, "loss": 0.0342, "step": 160450 }, { "epoch": 0.0723, "grad_norm": 0.045378465205430984, "learning_rate": 5.155518500910522e-06, "loss": 0.0334, "step": 160460 }, { "epoch": 0.07235, "grad_norm": 0.04740902781486511, "learning_rate": 5.153004646709142e-06, "loss": 0.0344, "step": 160470 }, { "epoch": 0.0724, "grad_norm": 0.04553137347102165, "learning_rate": 5.150491335117153e-06, "loss": 0.0333, "step": 160480 }, { "epoch": 0.07245, "grad_norm": 0.03902588039636612, "learning_rate": 5.1479785662032795e-06, "loss": 0.0341, "step": 160490 }, { "epoch": 0.0725, "grad_norm": 0.04232773184776306, "learning_rate": 5.145466340036206e-06, "loss": 0.0339, "step": 160500 }, { "epoch": 0.07255, "grad_norm": 0.039230186492204666, "learning_rate": 5.14295465668464e-06, "loss": 0.0346, "step": 160510 }, { "epoch": 0.0726, "grad_norm": 0.04060075804591179, "learning_rate": 5.14044351621722e-06, "loss": 0.0353, "step": 160520 }, { "epoch": 0.07265, "grad_norm": 0.03841876611113548, "learning_rate": 5.13793291870262e-06, "loss": 0.0332, "step": 160530 }, { "epoch": 0.0727, "grad_norm": 0.04524306207895279, "learning_rate": 5.1354228642094635e-06, "loss": 0.0336, "step": 160540 }, { "epoch": 0.07275, "grad_norm": 0.040125150233507156, "learning_rate": 5.132913352806393e-06, "loss": 0.0342, "step": 160550 }, { "epoch": 0.0728, "grad_norm": 0.051557350903749466, "learning_rate": 5.1304043845620045e-06, "loss": 0.0344, "step": 160560 }, { "epoch": 0.07285, "grad_norm": 0.04546048492193222, "learning_rate": 5.12789595954489e-06, "loss": 0.0338, "step": 160570 }, { "epoch": 0.0729, "grad_norm": 0.04418913647532463, "learning_rate": 5.125388077823642e-06, "loss": 0.0341, "step": 160580 }, { "epoch": 0.07295, "grad_norm": 0.04851749911904335, "learning_rate": 5.122880739466818e-06, "loss": 0.0345, "step": 160590 }, { "epoch": 0.073, "grad_norm": 0.047638460993766785, "learning_rate": 5.120373944542958e-06, "loss": 0.0329, "step": 160600 }, { "epoch": 0.07305, "grad_norm": 0.0418444387614727, "learning_rate": 5.117867693120612e-06, "loss": 0.033, "step": 160610 }, { "epoch": 0.0731, "grad_norm": 0.03957684710621834, "learning_rate": 5.115361985268291e-06, "loss": 0.0333, "step": 160620 }, { "epoch": 0.07315, "grad_norm": 0.04226352646946907, "learning_rate": 5.112856821054507e-06, "loss": 0.0331, "step": 160630 }, { "epoch": 0.0732, "grad_norm": 0.0388200469315052, "learning_rate": 5.110352200547747e-06, "loss": 0.0327, "step": 160640 }, { "epoch": 0.07325, "grad_norm": 0.053701531141996384, "learning_rate": 5.107848123816486e-06, "loss": 0.034, "step": 160650 }, { "epoch": 0.0733, "grad_norm": 0.05368814989924431, "learning_rate": 5.105344590929176e-06, "loss": 0.0372, "step": 160660 }, { "epoch": 0.07335, "grad_norm": 0.05174095556139946, "learning_rate": 5.102841601954278e-06, "loss": 0.0325, "step": 160670 }, { "epoch": 0.0734, "grad_norm": 0.041240040212869644, "learning_rate": 5.100339156960218e-06, "loss": 0.0335, "step": 160680 }, { "epoch": 0.07345, "grad_norm": 0.05635536462068558, "learning_rate": 5.0978372560154e-06, "loss": 0.0335, "step": 160690 }, { "epoch": 0.0735, "grad_norm": 0.04730790853500366, "learning_rate": 5.095335899188241e-06, "loss": 0.0334, "step": 160700 }, { "epoch": 0.07355, "grad_norm": 0.043954696506261826, "learning_rate": 5.092835086547115e-06, "loss": 0.0338, "step": 160710 }, { "epoch": 0.0736, "grad_norm": 0.0432947538793087, "learning_rate": 5.090334818160414e-06, "loss": 0.0329, "step": 160720 }, { "epoch": 0.07365, "grad_norm": 0.05044640228152275, "learning_rate": 5.087835094096463e-06, "loss": 0.034, "step": 160730 }, { "epoch": 0.0737, "grad_norm": 0.04494147375226021, "learning_rate": 5.085335914423622e-06, "loss": 0.0332, "step": 160740 }, { "epoch": 0.07375, "grad_norm": 0.04129369556903839, "learning_rate": 5.08283727921022e-06, "loss": 0.0324, "step": 160750 }, { "epoch": 0.0738, "grad_norm": 0.03822697326540947, "learning_rate": 5.080339188524566e-06, "loss": 0.0311, "step": 160760 }, { "epoch": 0.07385, "grad_norm": 0.053277526050806046, "learning_rate": 5.077841642434955e-06, "loss": 0.0333, "step": 160770 }, { "epoch": 0.0739, "grad_norm": 0.054511070251464844, "learning_rate": 5.075344641009663e-06, "loss": 0.0319, "step": 160780 }, { "epoch": 0.07395, "grad_norm": 0.04316005855798721, "learning_rate": 5.072848184316964e-06, "loss": 0.0328, "step": 160790 }, { "epoch": 0.074, "grad_norm": 0.041936952620744705, "learning_rate": 5.070352272425119e-06, "loss": 0.0329, "step": 160800 }, { "epoch": 0.07405, "grad_norm": 0.04728446528315544, "learning_rate": 5.067856905402346e-06, "loss": 0.0341, "step": 160810 }, { "epoch": 0.0741, "grad_norm": 0.04272538423538208, "learning_rate": 5.065362083316882e-06, "loss": 0.0324, "step": 160820 }, { "epoch": 0.07415, "grad_norm": 0.042639367282390594, "learning_rate": 5.062867806236923e-06, "loss": 0.0333, "step": 160830 }, { "epoch": 0.0742, "grad_norm": 0.03823878616094589, "learning_rate": 5.0603740742306755e-06, "loss": 0.0322, "step": 160840 }, { "epoch": 0.07425, "grad_norm": 0.0475417897105217, "learning_rate": 5.057880887366309e-06, "loss": 0.0345, "step": 160850 }, { "epoch": 0.0743, "grad_norm": 0.04159359261393547, "learning_rate": 5.055388245711978e-06, "loss": 0.036, "step": 160860 }, { "epoch": 0.07435, "grad_norm": 0.044668398797512054, "learning_rate": 5.052896149335851e-06, "loss": 0.0321, "step": 160870 }, { "epoch": 0.0744, "grad_norm": 0.03785253316164017, "learning_rate": 5.0504045983060465e-06, "loss": 0.0326, "step": 160880 }, { "epoch": 0.07445, "grad_norm": 0.044448185712099075, "learning_rate": 5.0479135926906865e-06, "loss": 0.0319, "step": 160890 }, { "epoch": 0.0745, "grad_norm": 0.04127410799264908, "learning_rate": 5.0454231325578666e-06, "loss": 0.0325, "step": 160900 }, { "epoch": 0.07455, "grad_norm": 0.04340953379869461, "learning_rate": 5.042933217975687e-06, "loss": 0.0328, "step": 160910 }, { "epoch": 0.0746, "grad_norm": 0.0437263585627079, "learning_rate": 5.040443849012211e-06, "loss": 0.0327, "step": 160920 }, { "epoch": 0.07465, "grad_norm": 0.07725926488637924, "learning_rate": 5.037955025735508e-06, "loss": 0.036, "step": 160930 }, { "epoch": 0.0747, "grad_norm": 0.03931087255477905, "learning_rate": 5.035466748213616e-06, "loss": 0.033, "step": 160940 }, { "epoch": 0.07475, "grad_norm": 0.04674782231450081, "learning_rate": 5.032979016514555e-06, "loss": 0.0341, "step": 160950 }, { "epoch": 0.0748, "grad_norm": 0.0424048937857151, "learning_rate": 5.030491830706352e-06, "loss": 0.0331, "step": 160960 }, { "epoch": 0.07485, "grad_norm": 0.04696732386946678, "learning_rate": 5.028005190857002e-06, "loss": 0.0326, "step": 160970 }, { "epoch": 0.0749, "grad_norm": 0.03869684040546417, "learning_rate": 5.025519097034478e-06, "loss": 0.034, "step": 160980 }, { "epoch": 0.07495, "grad_norm": 0.04522455111145973, "learning_rate": 5.023033549306766e-06, "loss": 0.0329, "step": 160990 }, { "epoch": 0.075, "grad_norm": 0.04113109037280083, "learning_rate": 5.020548547741805e-06, "loss": 0.0335, "step": 161000 }, { "epoch": 0.07505, "grad_norm": 0.0423266626894474, "learning_rate": 5.018064092407554e-06, "loss": 0.0338, "step": 161010 }, { "epoch": 0.0751, "grad_norm": 0.05098946765065193, "learning_rate": 5.015580183371912e-06, "loss": 0.0346, "step": 161020 }, { "epoch": 0.07515, "grad_norm": 0.04282857105135918, "learning_rate": 5.013096820702804e-06, "loss": 0.0341, "step": 161030 }, { "epoch": 0.0752, "grad_norm": 0.03867142274975777, "learning_rate": 5.010614004468114e-06, "loss": 0.0343, "step": 161040 }, { "epoch": 0.07525, "grad_norm": 0.04410089552402496, "learning_rate": 5.008131734735735e-06, "loss": 0.0327, "step": 161050 }, { "epoch": 0.0753, "grad_norm": 0.043044958263635635, "learning_rate": 5.005650011573526e-06, "loss": 0.0331, "step": 161060 }, { "epoch": 0.07535, "grad_norm": 0.041100747883319855, "learning_rate": 5.003168835049324e-06, "loss": 0.0323, "step": 161070 }, { "epoch": 0.0754, "grad_norm": 0.04438444972038269, "learning_rate": 5.000688205230983e-06, "loss": 0.0333, "step": 161080 }, { "epoch": 0.07545, "grad_norm": 0.04496739059686661, "learning_rate": 4.9982081221863094e-06, "loss": 0.0333, "step": 161090 }, { "epoch": 0.0755, "grad_norm": 0.04447561874985695, "learning_rate": 4.995728585983114e-06, "loss": 0.0326, "step": 161100 }, { "epoch": 0.07555, "grad_norm": 0.04621170461177826, "learning_rate": 4.993249596689179e-06, "loss": 0.0325, "step": 161110 }, { "epoch": 0.0756, "grad_norm": 0.04111267998814583, "learning_rate": 4.990771154372281e-06, "loss": 0.0331, "step": 161120 }, { "epoch": 0.07565, "grad_norm": 0.04611721634864807, "learning_rate": 4.988293259100188e-06, "loss": 0.0335, "step": 161130 }, { "epoch": 0.0757, "grad_norm": 0.040240366011857986, "learning_rate": 4.985815910940641e-06, "loss": 0.033, "step": 161140 }, { "epoch": 0.07575, "grad_norm": 0.04606426879763603, "learning_rate": 4.9833391099613685e-06, "loss": 0.0343, "step": 161150 }, { "epoch": 0.0758, "grad_norm": 0.04275064542889595, "learning_rate": 4.980862856230079e-06, "loss": 0.0337, "step": 161160 }, { "epoch": 0.07585, "grad_norm": 0.04866638407111168, "learning_rate": 4.978387149814481e-06, "loss": 0.0338, "step": 161170 }, { "epoch": 0.0759, "grad_norm": 0.04923534020781517, "learning_rate": 4.975911990782262e-06, "loss": 0.0335, "step": 161180 }, { "epoch": 0.07595, "grad_norm": 0.036708369851112366, "learning_rate": 4.973437379201076e-06, "loss": 0.0326, "step": 161190 }, { "epoch": 0.076, "grad_norm": 0.040371738374233246, "learning_rate": 4.970963315138596e-06, "loss": 0.0322, "step": 161200 }, { "epoch": 0.07605, "grad_norm": 0.040207087993621826, "learning_rate": 4.968489798662445e-06, "loss": 0.0326, "step": 161210 }, { "epoch": 0.0761, "grad_norm": 0.0415315218269825, "learning_rate": 4.966016829840276e-06, "loss": 0.0323, "step": 161220 }, { "epoch": 0.07615, "grad_norm": 0.038596540689468384, "learning_rate": 4.963544408739665e-06, "loss": 0.0326, "step": 161230 }, { "epoch": 0.0762, "grad_norm": 0.0413893461227417, "learning_rate": 4.9610725354282216e-06, "loss": 0.0335, "step": 161240 }, { "epoch": 0.07625, "grad_norm": 0.03666406869888306, "learning_rate": 4.958601209973535e-06, "loss": 0.034, "step": 161250 }, { "epoch": 0.0763, "grad_norm": 0.03784109652042389, "learning_rate": 4.956130432443159e-06, "loss": 0.0315, "step": 161260 }, { "epoch": 0.07635, "grad_norm": 0.03671599552035332, "learning_rate": 4.953660202904651e-06, "loss": 0.0319, "step": 161270 }, { "epoch": 0.0764, "grad_norm": 0.03958893194794655, "learning_rate": 4.951190521425531e-06, "loss": 0.0331, "step": 161280 }, { "epoch": 0.07645, "grad_norm": 0.042154353111982346, "learning_rate": 4.948721388073341e-06, "loss": 0.0341, "step": 161290 }, { "epoch": 0.0765, "grad_norm": 0.03948592394590378, "learning_rate": 4.9462528029155715e-06, "loss": 0.0324, "step": 161300 }, { "epoch": 0.07655, "grad_norm": 0.04275766387581825, "learning_rate": 4.943784766019713e-06, "loss": 0.032, "step": 161310 }, { "epoch": 0.0766, "grad_norm": 0.04145859181880951, "learning_rate": 4.94131727745325e-06, "loss": 0.0339, "step": 161320 }, { "epoch": 0.07665, "grad_norm": 0.0472242496907711, "learning_rate": 4.938850337283629e-06, "loss": 0.033, "step": 161330 }, { "epoch": 0.0767, "grad_norm": 0.04166737571358681, "learning_rate": 4.9363839455783104e-06, "loss": 0.0329, "step": 161340 }, { "epoch": 0.07675, "grad_norm": 0.04382137209177017, "learning_rate": 4.933918102404717e-06, "loss": 0.033, "step": 161350 }, { "epoch": 0.0768, "grad_norm": 0.04431990534067154, "learning_rate": 4.931452807830259e-06, "loss": 0.0345, "step": 161360 }, { "epoch": 0.07685, "grad_norm": 0.057345159351825714, "learning_rate": 4.928988061922349e-06, "loss": 0.0325, "step": 161370 }, { "epoch": 0.0769, "grad_norm": 0.048131149262189865, "learning_rate": 4.926523864748362e-06, "loss": 0.0322, "step": 161380 }, { "epoch": 0.07695, "grad_norm": 0.04350270330905914, "learning_rate": 4.924060216375672e-06, "loss": 0.0333, "step": 161390 }, { "epoch": 0.077, "grad_norm": 0.04744592681527138, "learning_rate": 4.921597116871629e-06, "loss": 0.0325, "step": 161400 }, { "epoch": 0.07705, "grad_norm": 0.043828800320625305, "learning_rate": 4.919134566303582e-06, "loss": 0.0319, "step": 161410 }, { "epoch": 0.0771, "grad_norm": 0.052718572318553925, "learning_rate": 4.916672564738847e-06, "loss": 0.0317, "step": 161420 }, { "epoch": 0.07715, "grad_norm": 0.04408946633338928, "learning_rate": 4.914211112244746e-06, "loss": 0.0329, "step": 161430 }, { "epoch": 0.0772, "grad_norm": 0.040304381400346756, "learning_rate": 4.9117502088885654e-06, "loss": 0.0319, "step": 161440 }, { "epoch": 0.07725, "grad_norm": 0.0368029847741127, "learning_rate": 4.909289854737581e-06, "loss": 0.0324, "step": 161450 }, { "epoch": 0.0773, "grad_norm": 0.040707238018512726, "learning_rate": 4.906830049859074e-06, "loss": 0.0304, "step": 161460 }, { "epoch": 0.07735, "grad_norm": 0.03966325893998146, "learning_rate": 4.9043707943202815e-06, "loss": 0.0314, "step": 161470 }, { "epoch": 0.0774, "grad_norm": 0.041993141174316406, "learning_rate": 4.901912088188443e-06, "loss": 0.0325, "step": 161480 }, { "epoch": 0.07745, "grad_norm": 0.03906678408384323, "learning_rate": 4.8994539315307705e-06, "loss": 0.0328, "step": 161490 }, { "epoch": 0.0775, "grad_norm": 0.03417155519127846, "learning_rate": 4.896996324414477e-06, "loss": 0.0316, "step": 161500 }, { "epoch": 0.07755, "grad_norm": 0.03894183784723282, "learning_rate": 4.894539266906764e-06, "loss": 0.0318, "step": 161510 }, { "epoch": 0.0776, "grad_norm": 0.047191206365823746, "learning_rate": 4.892082759074781e-06, "loss": 0.032, "step": 161520 }, { "epoch": 0.07765, "grad_norm": 0.03669824078679085, "learning_rate": 4.889626800985708e-06, "loss": 0.0316, "step": 161530 }, { "epoch": 0.0777, "grad_norm": 0.049897853285074234, "learning_rate": 4.8871713927066745e-06, "loss": 0.0326, "step": 161540 }, { "epoch": 0.07775, "grad_norm": 0.0515248104929924, "learning_rate": 4.884716534304829e-06, "loss": 0.0321, "step": 161550 }, { "epoch": 0.0778, "grad_norm": 0.04020237550139427, "learning_rate": 4.8822622258472755e-06, "loss": 0.0334, "step": 161560 }, { "epoch": 0.07785, "grad_norm": 0.0410909429192543, "learning_rate": 4.879808467401106e-06, "loss": 0.0328, "step": 161570 }, { "epoch": 0.0779, "grad_norm": 0.04371938109397888, "learning_rate": 4.877355259033423e-06, "loss": 0.034, "step": 161580 }, { "epoch": 0.07795, "grad_norm": 0.04580468684434891, "learning_rate": 4.874902600811287e-06, "loss": 0.0336, "step": 161590 }, { "epoch": 0.078, "grad_norm": 0.04708908125758171, "learning_rate": 4.872450492801753e-06, "loss": 0.0332, "step": 161600 }, { "epoch": 0.07805, "grad_norm": 0.04521835222840309, "learning_rate": 4.869998935071856e-06, "loss": 0.0335, "step": 161610 }, { "epoch": 0.0781, "grad_norm": 0.03202797472476959, "learning_rate": 4.867547927688623e-06, "loss": 0.0325, "step": 161620 }, { "epoch": 0.07815, "grad_norm": 0.05318346619606018, "learning_rate": 4.8650974707190765e-06, "loss": 0.034, "step": 161630 }, { "epoch": 0.0782, "grad_norm": 0.039052218198776245, "learning_rate": 4.8626475642301964e-06, "loss": 0.0319, "step": 161640 }, { "epoch": 0.07825, "grad_norm": 0.03733101114630699, "learning_rate": 4.860198208288969e-06, "loss": 0.0335, "step": 161650 }, { "epoch": 0.0783, "grad_norm": 0.0385499969124794, "learning_rate": 4.85774940296235e-06, "loss": 0.0331, "step": 161660 }, { "epoch": 0.07835, "grad_norm": 0.043056417256593704, "learning_rate": 4.855301148317301e-06, "loss": 0.0331, "step": 161670 }, { "epoch": 0.0784, "grad_norm": 0.046939749270677567, "learning_rate": 4.852853444420752e-06, "loss": 0.0324, "step": 161680 }, { "epoch": 0.07845, "grad_norm": 0.042191725224256516, "learning_rate": 4.850406291339612e-06, "loss": 0.0333, "step": 161690 }, { "epoch": 0.0785, "grad_norm": 0.04209177568554878, "learning_rate": 4.847959689140802e-06, "loss": 0.0318, "step": 161700 }, { "epoch": 0.07855, "grad_norm": 0.03851882740855217, "learning_rate": 4.845513637891197e-06, "loss": 0.0316, "step": 161710 }, { "epoch": 0.0786, "grad_norm": 0.03582446649670601, "learning_rate": 4.843068137657692e-06, "loss": 0.0351, "step": 161720 }, { "epoch": 0.07865, "grad_norm": 0.04331406578421593, "learning_rate": 4.840623188507115e-06, "loss": 0.0327, "step": 161730 }, { "epoch": 0.0787, "grad_norm": 0.0433661974966526, "learning_rate": 4.838178790506328e-06, "loss": 0.0322, "step": 161740 }, { "epoch": 0.07875, "grad_norm": 0.043242111802101135, "learning_rate": 4.835734943722167e-06, "loss": 0.0318, "step": 161750 }, { "epoch": 0.0788, "grad_norm": 0.04517514258623123, "learning_rate": 4.833291648221436e-06, "loss": 0.0334, "step": 161760 }, { "epoch": 0.07885, "grad_norm": 0.03985520452260971, "learning_rate": 4.830848904070934e-06, "loss": 0.0318, "step": 161770 }, { "epoch": 0.0789, "grad_norm": 0.03926889970898628, "learning_rate": 4.828406711337441e-06, "loss": 0.0326, "step": 161780 }, { "epoch": 0.07895, "grad_norm": 0.05253654345870018, "learning_rate": 4.825965070087735e-06, "loss": 0.0333, "step": 161790 }, { "epoch": 0.079, "grad_norm": 0.04187209531664848, "learning_rate": 4.823523980388564e-06, "loss": 0.0342, "step": 161800 }, { "epoch": 0.07905, "grad_norm": 0.05304960161447525, "learning_rate": 4.821083442306665e-06, "loss": 0.0332, "step": 161810 }, { "epoch": 0.0791, "grad_norm": 0.042362842708826065, "learning_rate": 4.818643455908767e-06, "loss": 0.0319, "step": 161820 }, { "epoch": 0.07915, "grad_norm": 0.044127993285655975, "learning_rate": 4.8162040212615695e-06, "loss": 0.033, "step": 161830 }, { "epoch": 0.0792, "grad_norm": 0.04105303809046745, "learning_rate": 4.8137651384317775e-06, "loss": 0.0319, "step": 161840 }, { "epoch": 0.07925, "grad_norm": 0.04213591665029526, "learning_rate": 4.8113268074860634e-06, "loss": 0.032, "step": 161850 }, { "epoch": 0.0793, "grad_norm": 0.04188720881938934, "learning_rate": 4.80888902849109e-06, "loss": 0.0324, "step": 161860 }, { "epoch": 0.07935, "grad_norm": 0.04474461078643799, "learning_rate": 4.806451801513498e-06, "loss": 0.0314, "step": 161870 }, { "epoch": 0.0794, "grad_norm": 0.05128539726138115, "learning_rate": 4.804015126619934e-06, "loss": 0.0333, "step": 161880 }, { "epoch": 0.07945, "grad_norm": 0.08220715075731277, "learning_rate": 4.80157900387701e-06, "loss": 0.0331, "step": 161890 }, { "epoch": 0.0795, "grad_norm": 0.04189962521195412, "learning_rate": 4.799143433351322e-06, "loss": 0.0319, "step": 161900 }, { "epoch": 0.07955, "grad_norm": 0.0444892942905426, "learning_rate": 4.796708415109469e-06, "loss": 0.0323, "step": 161910 }, { "epoch": 0.0796, "grad_norm": 0.041526589542627335, "learning_rate": 4.794273949218009e-06, "loss": 0.0325, "step": 161920 }, { "epoch": 0.07965, "grad_norm": 0.044409725815057755, "learning_rate": 4.791840035743525e-06, "loss": 0.0329, "step": 161930 }, { "epoch": 0.0797, "grad_norm": 0.037172187119722366, "learning_rate": 4.789406674752528e-06, "loss": 0.0334, "step": 161940 }, { "epoch": 0.07975, "grad_norm": 0.04312952980399132, "learning_rate": 4.786973866311559e-06, "loss": 0.0324, "step": 161950 }, { "epoch": 0.0798, "grad_norm": 0.05106675252318382, "learning_rate": 4.784541610487139e-06, "loss": 0.0349, "step": 161960 }, { "epoch": 0.07985, "grad_norm": 0.04369065910577774, "learning_rate": 4.7821099073457554e-06, "loss": 0.0332, "step": 161970 }, { "epoch": 0.0799, "grad_norm": 0.040114372968673706, "learning_rate": 4.779678756953893e-06, "loss": 0.0328, "step": 161980 }, { "epoch": 0.07995, "grad_norm": 0.04935789853334427, "learning_rate": 4.7772481593780084e-06, "loss": 0.0329, "step": 161990 }, { "epoch": 0.08, "grad_norm": 0.04562026634812355, "learning_rate": 4.7748181146845626e-06, "loss": 0.0338, "step": 162000 }, { "epoch": 0.08005, "grad_norm": 0.04056199640035629, "learning_rate": 4.772388622940005e-06, "loss": 0.0328, "step": 162010 }, { "epoch": 0.0801, "grad_norm": 0.03721732646226883, "learning_rate": 4.769959684210728e-06, "loss": 0.0342, "step": 162020 }, { "epoch": 0.08015, "grad_norm": 0.047930192202329636, "learning_rate": 4.767531298563163e-06, "loss": 0.0331, "step": 162030 }, { "epoch": 0.0802, "grad_norm": 0.043639253824949265, "learning_rate": 4.765103466063683e-06, "loss": 0.0324, "step": 162040 }, { "epoch": 0.08025, "grad_norm": 0.05039869248867035, "learning_rate": 4.762676186778678e-06, "loss": 0.0343, "step": 162050 }, { "epoch": 0.0803, "grad_norm": 0.05747959762811661, "learning_rate": 4.760249460774505e-06, "loss": 0.0343, "step": 162060 }, { "epoch": 0.08035, "grad_norm": 0.042816489934921265, "learning_rate": 4.757823288117502e-06, "loss": 0.0342, "step": 162070 }, { "epoch": 0.0804, "grad_norm": 0.04397408664226532, "learning_rate": 4.755397668874009e-06, "loss": 0.0335, "step": 162080 }, { "epoch": 0.08045, "grad_norm": 0.040711622685194016, "learning_rate": 4.752972603110342e-06, "loss": 0.033, "step": 162090 }, { "epoch": 0.0805, "grad_norm": 0.04967189207673073, "learning_rate": 4.750548090892795e-06, "loss": 0.0342, "step": 162100 }, { "epoch": 0.08055, "grad_norm": 0.046030282974243164, "learning_rate": 4.748124132287651e-06, "loss": 0.0344, "step": 162110 }, { "epoch": 0.0806, "grad_norm": 0.04683135077357292, "learning_rate": 4.745700727361191e-06, "loss": 0.0337, "step": 162120 }, { "epoch": 0.08065, "grad_norm": 0.04129005968570709, "learning_rate": 4.7432778761796554e-06, "loss": 0.0328, "step": 162130 }, { "epoch": 0.0807, "grad_norm": 0.04156533256173134, "learning_rate": 4.7408555788093e-06, "loss": 0.033, "step": 162140 }, { "epoch": 0.08075, "grad_norm": 0.046787671744823456, "learning_rate": 4.738433835316344e-06, "loss": 0.0331, "step": 162150 }, { "epoch": 0.0808, "grad_norm": 0.0452694408595562, "learning_rate": 4.7360126457669876e-06, "loss": 0.034, "step": 162160 }, { "epoch": 0.08085, "grad_norm": 0.05058307200670242, "learning_rate": 4.733592010227439e-06, "loss": 0.0327, "step": 162170 }, { "epoch": 0.0809, "grad_norm": 0.05251476168632507, "learning_rate": 4.73117192876387e-06, "loss": 0.0327, "step": 162180 }, { "epoch": 0.08095, "grad_norm": 0.04287990555167198, "learning_rate": 4.728752401442441e-06, "loss": 0.0325, "step": 162190 }, { "epoch": 0.081, "grad_norm": 0.039758551865816116, "learning_rate": 4.72633342832931e-06, "loss": 0.0327, "step": 162200 }, { "epoch": 0.08105, "grad_norm": 0.03664357587695122, "learning_rate": 4.723915009490601e-06, "loss": 0.0317, "step": 162210 }, { "epoch": 0.0811, "grad_norm": 0.0380556657910347, "learning_rate": 4.7214971449924535e-06, "loss": 0.0321, "step": 162220 }, { "epoch": 0.08115, "grad_norm": 0.04451322183012962, "learning_rate": 4.719079834900941e-06, "loss": 0.0321, "step": 162230 }, { "epoch": 0.0812, "grad_norm": 0.040741708129644394, "learning_rate": 4.716663079282174e-06, "loss": 0.0316, "step": 162240 }, { "epoch": 0.08125, "grad_norm": 0.0428212434053421, "learning_rate": 4.714246878202211e-06, "loss": 0.0321, "step": 162250 }, { "epoch": 0.0813, "grad_norm": 0.03966105729341507, "learning_rate": 4.711831231727123e-06, "loss": 0.0314, "step": 162260 }, { "epoch": 0.08135, "grad_norm": 0.04452461376786232, "learning_rate": 4.709416139922948e-06, "loss": 0.0321, "step": 162270 }, { "epoch": 0.0814, "grad_norm": 0.049640312790870667, "learning_rate": 4.707001602855707e-06, "loss": 0.0351, "step": 162280 }, { "epoch": 0.08145, "grad_norm": 0.05360918492078781, "learning_rate": 4.704587620591425e-06, "loss": 0.0322, "step": 162290 }, { "epoch": 0.0815, "grad_norm": 0.04629068076610565, "learning_rate": 4.70217419319609e-06, "loss": 0.0332, "step": 162300 }, { "epoch": 0.08155, "grad_norm": 0.04976212605834007, "learning_rate": 4.69976132073569e-06, "loss": 0.0338, "step": 162310 }, { "epoch": 0.0816, "grad_norm": 0.046391867101192474, "learning_rate": 4.697349003276183e-06, "loss": 0.0352, "step": 162320 }, { "epoch": 0.08165, "grad_norm": 0.04320315644145012, "learning_rate": 4.694937240883527e-06, "loss": 0.0324, "step": 162330 }, { "epoch": 0.0817, "grad_norm": 0.044961974024772644, "learning_rate": 4.692526033623662e-06, "loss": 0.0319, "step": 162340 }, { "epoch": 0.08175, "grad_norm": 0.046283621340990067, "learning_rate": 4.6901153815625095e-06, "loss": 0.0358, "step": 162350 }, { "epoch": 0.0818, "grad_norm": 0.05020609870553017, "learning_rate": 4.6877052847659695e-06, "loss": 0.0322, "step": 162360 }, { "epoch": 0.08185, "grad_norm": 0.03718167915940285, "learning_rate": 4.68529574329993e-06, "loss": 0.0312, "step": 162370 }, { "epoch": 0.0819, "grad_norm": 0.03635135665535927, "learning_rate": 4.682886757230282e-06, "loss": 0.0311, "step": 162380 }, { "epoch": 0.08195, "grad_norm": 0.03812402859330177, "learning_rate": 4.680478326622875e-06, "loss": 0.0318, "step": 162390 }, { "epoch": 0.082, "grad_norm": 0.04369513317942619, "learning_rate": 4.678070451543551e-06, "loss": 0.0336, "step": 162400 }, { "epoch": 0.08205, "grad_norm": 0.041224800050258636, "learning_rate": 4.675663132058153e-06, "loss": 0.0314, "step": 162410 }, { "epoch": 0.0821, "grad_norm": 0.04262978956103325, "learning_rate": 4.673256368232482e-06, "loss": 0.0315, "step": 162420 }, { "epoch": 0.08215, "grad_norm": 0.045711975544691086, "learning_rate": 4.670850160132359e-06, "loss": 0.0326, "step": 162430 }, { "epoch": 0.0822, "grad_norm": 0.05547575652599335, "learning_rate": 4.668444507823544e-06, "loss": 0.0319, "step": 162440 }, { "epoch": 0.08225, "grad_norm": 0.03910272195935249, "learning_rate": 4.666039411371817e-06, "loss": 0.0317, "step": 162450 }, { "epoch": 0.0823, "grad_norm": 0.03833167999982834, "learning_rate": 4.6636348708429394e-06, "loss": 0.0322, "step": 162460 }, { "epoch": 0.08235, "grad_norm": 0.04456144571304321, "learning_rate": 4.661230886302642e-06, "loss": 0.0341, "step": 162470 }, { "epoch": 0.0824, "grad_norm": 0.04156813398003578, "learning_rate": 4.658827457816656e-06, "loss": 0.0318, "step": 162480 }, { "epoch": 0.08245, "grad_norm": 0.03893129900097847, "learning_rate": 4.656424585450675e-06, "loss": 0.0332, "step": 162490 }, { "epoch": 0.0825, "grad_norm": 0.04058993235230446, "learning_rate": 4.654022269270411e-06, "loss": 0.0342, "step": 162500 }, { "epoch": 0.08255, "grad_norm": 0.04272441565990448, "learning_rate": 4.651620509341537e-06, "loss": 0.0328, "step": 162510 }, { "epoch": 0.0826, "grad_norm": 0.04472669959068298, "learning_rate": 4.649219305729705e-06, "loss": 0.0327, "step": 162520 }, { "epoch": 0.08265, "grad_norm": 0.04321610555052757, "learning_rate": 4.646818658500576e-06, "loss": 0.0322, "step": 162530 }, { "epoch": 0.0827, "grad_norm": 0.034631673246622086, "learning_rate": 4.644418567719774e-06, "loss": 0.0326, "step": 162540 }, { "epoch": 0.08275, "grad_norm": 0.04054947569966316, "learning_rate": 4.642019033452929e-06, "loss": 0.0325, "step": 162550 }, { "epoch": 0.0828, "grad_norm": 0.043881386518478394, "learning_rate": 4.639620055765634e-06, "loss": 0.0352, "step": 162560 }, { "epoch": 0.08285, "grad_norm": 0.04789385944604874, "learning_rate": 4.637221634723471e-06, "loss": 0.0334, "step": 162570 }, { "epoch": 0.0829, "grad_norm": 0.047630563378334045, "learning_rate": 4.634823770392027e-06, "loss": 0.0325, "step": 162580 }, { "epoch": 0.08295, "grad_norm": 0.043394166976213455, "learning_rate": 4.632426462836848e-06, "loss": 0.0352, "step": 162590 }, { "epoch": 0.083, "grad_norm": 0.04159924015402794, "learning_rate": 4.6300297121234795e-06, "loss": 0.0331, "step": 162600 }, { "epoch": 0.08305, "grad_norm": 0.04577537253499031, "learning_rate": 4.627633518317439e-06, "loss": 0.0332, "step": 162610 }, { "epoch": 0.0831, "grad_norm": 0.04137266427278519, "learning_rate": 4.625237881484251e-06, "loss": 0.0331, "step": 162620 }, { "epoch": 0.08315, "grad_norm": 0.05932014808058739, "learning_rate": 4.622842801689397e-06, "loss": 0.034, "step": 162630 }, { "epoch": 0.0832, "grad_norm": 0.044636886566877365, "learning_rate": 4.620448278998374e-06, "loss": 0.0352, "step": 162640 }, { "epoch": 0.08325, "grad_norm": 0.04746469110250473, "learning_rate": 4.618054313476639e-06, "loss": 0.034, "step": 162650 }, { "epoch": 0.0833, "grad_norm": 0.04262920096516609, "learning_rate": 4.615660905189633e-06, "loss": 0.0352, "step": 162660 }, { "epoch": 0.08335, "grad_norm": 0.04448793828487396, "learning_rate": 4.6132680542028075e-06, "loss": 0.0339, "step": 162670 }, { "epoch": 0.0834, "grad_norm": 0.061389073729515076, "learning_rate": 4.610875760581573e-06, "loss": 0.0351, "step": 162680 }, { "epoch": 0.08345, "grad_norm": 0.047155145555734634, "learning_rate": 4.608484024391338e-06, "loss": 0.0363, "step": 162690 }, { "epoch": 0.0835, "grad_norm": 0.05749020725488663, "learning_rate": 4.6060928456974825e-06, "loss": 0.0354, "step": 162700 }, { "epoch": 0.08355, "grad_norm": 0.04922579973936081, "learning_rate": 4.603702224565384e-06, "loss": 0.0324, "step": 162710 }, { "epoch": 0.0836, "grad_norm": 0.048341743648052216, "learning_rate": 4.6013121610604196e-06, "loss": 0.0359, "step": 162720 }, { "epoch": 0.08365, "grad_norm": 0.05521691218018532, "learning_rate": 4.598922655247906e-06, "loss": 0.0341, "step": 162730 }, { "epoch": 0.0837, "grad_norm": 0.061152759939432144, "learning_rate": 4.596533707193185e-06, "loss": 0.0356, "step": 162740 }, { "epoch": 0.08375, "grad_norm": 0.05486699938774109, "learning_rate": 4.594145316961562e-06, "loss": 0.0361, "step": 162750 }, { "epoch": 0.0838, "grad_norm": 0.0427870899438858, "learning_rate": 4.591757484618348e-06, "loss": 0.0343, "step": 162760 }, { "epoch": 0.08385, "grad_norm": 0.05687882378697395, "learning_rate": 4.589370210228816e-06, "loss": 0.0374, "step": 162770 }, { "epoch": 0.0839, "grad_norm": 0.04917405545711517, "learning_rate": 4.5869834938582295e-06, "loss": 0.0335, "step": 162780 }, { "epoch": 0.08395, "grad_norm": 0.04779008403420448, "learning_rate": 4.58459733557185e-06, "loss": 0.034, "step": 162790 }, { "epoch": 0.084, "grad_norm": 0.04037446901202202, "learning_rate": 4.582211735434911e-06, "loss": 0.034, "step": 162800 }, { "epoch": 0.08405, "grad_norm": 0.038166724145412445, "learning_rate": 4.579826693512632e-06, "loss": 0.0332, "step": 162810 }, { "epoch": 0.0841, "grad_norm": 0.04677105322480202, "learning_rate": 4.577442209870214e-06, "loss": 0.0341, "step": 162820 }, { "epoch": 0.08415, "grad_norm": 0.037814054638147354, "learning_rate": 4.575058284572853e-06, "loss": 0.0343, "step": 162830 }, { "epoch": 0.0842, "grad_norm": 0.048276208341121674, "learning_rate": 4.572674917685732e-06, "loss": 0.0336, "step": 162840 }, { "epoch": 0.08425, "grad_norm": 0.04171053692698479, "learning_rate": 4.570292109274005e-06, "loss": 0.0338, "step": 162850 }, { "epoch": 0.0843, "grad_norm": 0.04667214676737785, "learning_rate": 4.5679098594028135e-06, "loss": 0.0335, "step": 162860 }, { "epoch": 0.08435, "grad_norm": 0.04163886606693268, "learning_rate": 4.5655281681372865e-06, "loss": 0.0341, "step": 162870 }, { "epoch": 0.0844, "grad_norm": 0.043504420667886734, "learning_rate": 4.56314703554255e-06, "loss": 0.0376, "step": 162880 }, { "epoch": 0.08445, "grad_norm": 0.040289878845214844, "learning_rate": 4.5607664616836935e-06, "loss": 0.0327, "step": 162890 }, { "epoch": 0.0845, "grad_norm": 0.0437614843249321, "learning_rate": 4.558386446625798e-06, "loss": 0.034, "step": 162900 }, { "epoch": 0.08455, "grad_norm": 0.038885943591594696, "learning_rate": 4.5560069904339445e-06, "loss": 0.0332, "step": 162910 }, { "epoch": 0.0846, "grad_norm": 0.04464210197329521, "learning_rate": 4.553628093173173e-06, "loss": 0.033, "step": 162920 }, { "epoch": 0.08465, "grad_norm": 0.03692776337265968, "learning_rate": 4.551249754908541e-06, "loss": 0.0318, "step": 162930 }, { "epoch": 0.0847, "grad_norm": 0.039213284850120544, "learning_rate": 4.548871975705043e-06, "loss": 0.033, "step": 162940 }, { "epoch": 0.08475, "grad_norm": 0.038259051740169525, "learning_rate": 4.546494755627703e-06, "loss": 0.0313, "step": 162950 }, { "epoch": 0.0848, "grad_norm": 0.0368780642747879, "learning_rate": 4.54411809474152e-06, "loss": 0.0321, "step": 162960 }, { "epoch": 0.08485, "grad_norm": 0.03567549213767052, "learning_rate": 4.541741993111465e-06, "loss": 0.0313, "step": 162970 }, { "epoch": 0.0849, "grad_norm": 0.043377235531806946, "learning_rate": 4.539366450802496e-06, "loss": 0.0325, "step": 162980 }, { "epoch": 0.08495, "grad_norm": 0.05000419542193413, "learning_rate": 4.5369914678795535e-06, "loss": 0.0317, "step": 162990 }, { "epoch": 0.085, "grad_norm": 0.04062369838356972, "learning_rate": 4.534617044407586e-06, "loss": 0.0318, "step": 163000 }, { "epoch": 0.08505, "grad_norm": 0.03969515115022659, "learning_rate": 4.532243180451498e-06, "loss": 0.0329, "step": 163010 }, { "epoch": 0.0851, "grad_norm": 0.0338253490626812, "learning_rate": 4.529869876076187e-06, "loss": 0.0323, "step": 163020 }, { "epoch": 0.08515, "grad_norm": 0.03509035333991051, "learning_rate": 4.52749713134655e-06, "loss": 0.0331, "step": 163030 }, { "epoch": 0.0852, "grad_norm": 0.04538458585739136, "learning_rate": 4.525124946327444e-06, "loss": 0.0325, "step": 163040 }, { "epoch": 0.08525, "grad_norm": 0.045619990676641464, "learning_rate": 4.522753321083734e-06, "loss": 0.0348, "step": 163050 }, { "epoch": 0.0853, "grad_norm": 0.0390336811542511, "learning_rate": 4.5203822556802586e-06, "loss": 0.0327, "step": 163060 }, { "epoch": 0.08535, "grad_norm": 0.04070988669991493, "learning_rate": 4.518011750181836e-06, "loss": 0.0343, "step": 163070 }, { "epoch": 0.0854, "grad_norm": 0.04247087240219116, "learning_rate": 4.51564180465327e-06, "loss": 0.0344, "step": 163080 }, { "epoch": 0.08545, "grad_norm": 0.08123894035816193, "learning_rate": 4.51327241915937e-06, "loss": 0.0335, "step": 163090 }, { "epoch": 0.0855, "grad_norm": 0.05599669739603996, "learning_rate": 4.510903593764906e-06, "loss": 0.0319, "step": 163100 }, { "epoch": 0.08555, "grad_norm": 0.03611220419406891, "learning_rate": 4.508535328534632e-06, "loss": 0.032, "step": 163110 }, { "epoch": 0.0856, "grad_norm": 0.04675504192709923, "learning_rate": 4.506167623533311e-06, "loss": 0.0332, "step": 163120 }, { "epoch": 0.08565, "grad_norm": 0.03999366983771324, "learning_rate": 4.5038004788256625e-06, "loss": 0.0316, "step": 163130 }, { "epoch": 0.0857, "grad_norm": 0.04325946420431137, "learning_rate": 4.50143389447642e-06, "loss": 0.0325, "step": 163140 }, { "epoch": 0.08575, "grad_norm": 0.03891367092728615, "learning_rate": 4.4990678705502635e-06, "loss": 0.0327, "step": 163150 }, { "epoch": 0.0858, "grad_norm": 0.0469384640455246, "learning_rate": 4.496702407111888e-06, "loss": 0.0319, "step": 163160 }, { "epoch": 0.08585, "grad_norm": 0.04036073386669159, "learning_rate": 4.494337504225971e-06, "loss": 0.0314, "step": 163170 }, { "epoch": 0.0859, "grad_norm": 0.03913462162017822, "learning_rate": 4.491973161957167e-06, "loss": 0.0325, "step": 163180 }, { "epoch": 0.08595, "grad_norm": 0.039492640644311905, "learning_rate": 4.4896093803701076e-06, "loss": 0.0318, "step": 163190 }, { "epoch": 0.086, "grad_norm": 0.043633848428726196, "learning_rate": 4.487246159529418e-06, "loss": 0.031, "step": 163200 }, { "epoch": 0.08605, "grad_norm": 0.03938918560743332, "learning_rate": 4.484883499499712e-06, "loss": 0.0338, "step": 163210 }, { "epoch": 0.0861, "grad_norm": 0.036930423229932785, "learning_rate": 4.482521400345599e-06, "loss": 0.032, "step": 163220 }, { "epoch": 0.08615, "grad_norm": 0.0479891262948513, "learning_rate": 4.4801598621316274e-06, "loss": 0.0336, "step": 163230 }, { "epoch": 0.0862, "grad_norm": 0.04100598394870758, "learning_rate": 4.477798884922382e-06, "loss": 0.0351, "step": 163240 }, { "epoch": 0.08625, "grad_norm": 0.0451224111020565, "learning_rate": 4.4754384687824e-06, "loss": 0.0335, "step": 163250 }, { "epoch": 0.0863, "grad_norm": 0.03889250010251999, "learning_rate": 4.473078613776227e-06, "loss": 0.035, "step": 163260 }, { "epoch": 0.08635, "grad_norm": 0.04930736497044563, "learning_rate": 4.470719319968372e-06, "loss": 0.0342, "step": 163270 }, { "epoch": 0.0864, "grad_norm": 0.04826205223798752, "learning_rate": 4.4683605874233315e-06, "loss": 0.0338, "step": 163280 }, { "epoch": 0.08645, "grad_norm": 0.04418352618813515, "learning_rate": 4.466002416205606e-06, "loss": 0.0324, "step": 163290 }, { "epoch": 0.0865, "grad_norm": 0.04168399050831795, "learning_rate": 4.4636448063796605e-06, "loss": 0.0321, "step": 163300 }, { "epoch": 0.08655, "grad_norm": 0.044045589864254, "learning_rate": 4.461287758009949e-06, "loss": 0.0341, "step": 163310 }, { "epoch": 0.0866, "grad_norm": 0.03900770843029022, "learning_rate": 4.45893127116091e-06, "loss": 0.0329, "step": 163320 }, { "epoch": 0.08665, "grad_norm": 0.0450579933822155, "learning_rate": 4.45657534589698e-06, "loss": 0.0336, "step": 163330 }, { "epoch": 0.0867, "grad_norm": 0.04815605282783508, "learning_rate": 4.454219982282554e-06, "loss": 0.0338, "step": 163340 }, { "epoch": 0.08675, "grad_norm": 0.043602343648672104, "learning_rate": 4.451865180382042e-06, "loss": 0.0337, "step": 163350 }, { "epoch": 0.0868, "grad_norm": 0.044232890009880066, "learning_rate": 4.449510940259819e-06, "loss": 0.0341, "step": 163360 }, { "epoch": 0.08685, "grad_norm": 0.041185762733221054, "learning_rate": 4.447157261980237e-06, "loss": 0.0337, "step": 163370 }, { "epoch": 0.0869, "grad_norm": 0.043885987251996994, "learning_rate": 4.444804145607659e-06, "loss": 0.0331, "step": 163380 }, { "epoch": 0.08695, "grad_norm": 0.04258694499731064, "learning_rate": 4.442451591206417e-06, "loss": 0.0328, "step": 163390 }, { "epoch": 0.087, "grad_norm": 0.04276786372065544, "learning_rate": 4.440099598840816e-06, "loss": 0.0336, "step": 163400 }, { "epoch": 0.08705, "grad_norm": 0.04454897344112396, "learning_rate": 4.437748168575176e-06, "loss": 0.0334, "step": 163410 }, { "epoch": 0.0871, "grad_norm": 0.039518099278211594, "learning_rate": 4.43539730047377e-06, "loss": 0.0322, "step": 163420 }, { "epoch": 0.08715, "grad_norm": 0.04390877112746239, "learning_rate": 4.433046994600889e-06, "loss": 0.0335, "step": 163430 }, { "epoch": 0.0872, "grad_norm": 0.03864302486181259, "learning_rate": 4.4306972510207625e-06, "loss": 0.0318, "step": 163440 }, { "epoch": 0.08725, "grad_norm": 0.04346649348735809, "learning_rate": 4.428348069797653e-06, "loss": 0.0339, "step": 163450 }, { "epoch": 0.0873, "grad_norm": 0.05340910330414772, "learning_rate": 4.425999450995771e-06, "loss": 0.0348, "step": 163460 }, { "epoch": 0.08735, "grad_norm": 0.04619944468140602, "learning_rate": 4.423651394679343e-06, "loss": 0.0316, "step": 163470 }, { "epoch": 0.0874, "grad_norm": 0.0388994924724102, "learning_rate": 4.421303900912555e-06, "loss": 0.0323, "step": 163480 }, { "epoch": 0.08745, "grad_norm": 0.043405961245298386, "learning_rate": 4.418956969759583e-06, "loss": 0.0348, "step": 163490 }, { "epoch": 0.0875, "grad_norm": 0.04131999611854553, "learning_rate": 4.416610601284599e-06, "loss": 0.0321, "step": 163500 }, { "epoch": 0.08755, "grad_norm": 0.03781171143054962, "learning_rate": 4.414264795551748e-06, "loss": 0.0328, "step": 163510 }, { "epoch": 0.0876, "grad_norm": 0.040955789387226105, "learning_rate": 4.411919552625165e-06, "loss": 0.0339, "step": 163520 }, { "epoch": 0.08765, "grad_norm": 0.040484536439180374, "learning_rate": 4.409574872568961e-06, "loss": 0.0349, "step": 163530 }, { "epoch": 0.0877, "grad_norm": 0.04398057982325554, "learning_rate": 4.407230755447245e-06, "loss": 0.0346, "step": 163540 }, { "epoch": 0.08775, "grad_norm": 0.038379691541194916, "learning_rate": 4.404887201324107e-06, "loss": 0.0326, "step": 163550 }, { "epoch": 0.0878, "grad_norm": 0.04793104901909828, "learning_rate": 4.402544210263618e-06, "loss": 0.0342, "step": 163560 }, { "epoch": 0.08785, "grad_norm": 0.04331078752875328, "learning_rate": 4.400201782329833e-06, "loss": 0.0329, "step": 163570 }, { "epoch": 0.0879, "grad_norm": 0.036909881979227066, "learning_rate": 4.3978599175867855e-06, "loss": 0.0324, "step": 163580 }, { "epoch": 0.08795, "grad_norm": 0.038852840662002563, "learning_rate": 4.395518616098513e-06, "loss": 0.0332, "step": 163590 }, { "epoch": 0.088, "grad_norm": 0.04239456355571747, "learning_rate": 4.393177877929022e-06, "loss": 0.033, "step": 163600 }, { "epoch": 0.08805, "grad_norm": 0.036402180790901184, "learning_rate": 4.390837703142298e-06, "loss": 0.0368, "step": 163610 }, { "epoch": 0.0881, "grad_norm": 0.04131398722529411, "learning_rate": 4.388498091802337e-06, "loss": 0.033, "step": 163620 }, { "epoch": 0.08815, "grad_norm": 0.04325365275144577, "learning_rate": 4.386159043973087e-06, "loss": 0.0324, "step": 163630 }, { "epoch": 0.0882, "grad_norm": 0.04191657900810242, "learning_rate": 4.3838205597185186e-06, "loss": 0.0317, "step": 163640 }, { "epoch": 0.08825, "grad_norm": 0.04028930142521858, "learning_rate": 4.381482639102538e-06, "loss": 0.0334, "step": 163650 }, { "epoch": 0.0883, "grad_norm": 0.0353885218501091, "learning_rate": 4.379145282189076e-06, "loss": 0.0324, "step": 163660 }, { "epoch": 0.08835, "grad_norm": 0.037858471274375916, "learning_rate": 4.376808489042042e-06, "loss": 0.0319, "step": 163670 }, { "epoch": 0.0884, "grad_norm": 0.03909361734986305, "learning_rate": 4.374472259725315e-06, "loss": 0.0323, "step": 163680 }, { "epoch": 0.08845, "grad_norm": 0.03300325945019722, "learning_rate": 4.372136594302767e-06, "loss": 0.0315, "step": 163690 }, { "epoch": 0.0885, "grad_norm": 0.04488392919301987, "learning_rate": 4.369801492838249e-06, "loss": 0.032, "step": 163700 }, { "epoch": 0.08855, "grad_norm": 0.04392409324645996, "learning_rate": 4.367466955395616e-06, "loss": 0.0326, "step": 163710 }, { "epoch": 0.0886, "grad_norm": 0.041952718049287796, "learning_rate": 4.3651329820386835e-06, "loss": 0.0325, "step": 163720 }, { "epoch": 0.08865, "grad_norm": 0.035865992307662964, "learning_rate": 4.362799572831258e-06, "loss": 0.0317, "step": 163730 }, { "epoch": 0.0887, "grad_norm": 0.042515747249126434, "learning_rate": 4.360466727837146e-06, "loss": 0.0318, "step": 163740 }, { "epoch": 0.08875, "grad_norm": 0.03912314772605896, "learning_rate": 4.35813444712011e-06, "loss": 0.0339, "step": 163750 }, { "epoch": 0.0888, "grad_norm": 0.041423387825489044, "learning_rate": 4.355802730743932e-06, "loss": 0.0322, "step": 163760 }, { "epoch": 0.08885, "grad_norm": 0.04592496156692505, "learning_rate": 4.3534715787723525e-06, "loss": 0.0325, "step": 163770 }, { "epoch": 0.0889, "grad_norm": 0.04569490626454353, "learning_rate": 4.3511409912690955e-06, "loss": 0.0336, "step": 163780 }, { "epoch": 0.08895, "grad_norm": 0.04563876986503601, "learning_rate": 4.348810968297895e-06, "loss": 0.0333, "step": 163790 }, { "epoch": 0.089, "grad_norm": 0.04631095752120018, "learning_rate": 4.346481509922443e-06, "loss": 0.0334, "step": 163800 }, { "epoch": 0.08905, "grad_norm": 0.04735285043716431, "learning_rate": 4.344152616206426e-06, "loss": 0.0329, "step": 163810 }, { "epoch": 0.0891, "grad_norm": 0.04486202821135521, "learning_rate": 4.341824287213511e-06, "loss": 0.0325, "step": 163820 }, { "epoch": 0.08915, "grad_norm": 0.04026487097144127, "learning_rate": 4.3394965230073665e-06, "loss": 0.0329, "step": 163830 }, { "epoch": 0.0892, "grad_norm": 0.03889838606119156, "learning_rate": 4.337169323651619e-06, "loss": 0.0344, "step": 163840 }, { "epoch": 0.08925, "grad_norm": 0.047240667045116425, "learning_rate": 4.334842689209903e-06, "loss": 0.0336, "step": 163850 }, { "epoch": 0.0893, "grad_norm": 0.04033409059047699, "learning_rate": 4.332516619745828e-06, "loss": 0.0347, "step": 163860 }, { "epoch": 0.08935, "grad_norm": 0.05238153040409088, "learning_rate": 4.330191115322973e-06, "loss": 0.034, "step": 163870 }, { "epoch": 0.0894, "grad_norm": 0.05487070977687836, "learning_rate": 4.327866176004938e-06, "loss": 0.0341, "step": 163880 }, { "epoch": 0.08945, "grad_norm": 0.05616867542266846, "learning_rate": 4.325541801855276e-06, "loss": 0.0332, "step": 163890 }, { "epoch": 0.0895, "grad_norm": 0.040924083441495895, "learning_rate": 4.323217992937531e-06, "loss": 0.037, "step": 163900 }, { "epoch": 0.08955, "grad_norm": 0.04166407510638237, "learning_rate": 4.320894749315235e-06, "loss": 0.0334, "step": 163910 }, { "epoch": 0.0896, "grad_norm": 0.03643600270152092, "learning_rate": 4.3185720710519075e-06, "loss": 0.0338, "step": 163920 }, { "epoch": 0.08965, "grad_norm": 0.03127395734190941, "learning_rate": 4.316249958211061e-06, "loss": 0.0316, "step": 163930 }, { "epoch": 0.0897, "grad_norm": 0.036043621599674225, "learning_rate": 4.313928410856158e-06, "loss": 0.0315, "step": 163940 }, { "epoch": 0.08975, "grad_norm": 0.03948104754090309, "learning_rate": 4.311607429050687e-06, "loss": 0.0323, "step": 163950 }, { "epoch": 0.0898, "grad_norm": 0.03400971367955208, "learning_rate": 4.30928701285809e-06, "loss": 0.0316, "step": 163960 }, { "epoch": 0.08985, "grad_norm": 0.037716370075941086, "learning_rate": 4.306967162341818e-06, "loss": 0.0331, "step": 163970 }, { "epoch": 0.0899, "grad_norm": 0.03730543330311775, "learning_rate": 4.304647877565293e-06, "loss": 0.032, "step": 163980 }, { "epoch": 0.08995, "grad_norm": 0.039050523191690445, "learning_rate": 4.302329158591911e-06, "loss": 0.0309, "step": 163990 }, { "epoch": 0.09, "grad_norm": 0.038181960582733154, "learning_rate": 4.3000110054850826e-06, "loss": 0.0312, "step": 164000 }, { "epoch": 0.09005, "grad_norm": 0.03886967524886131, "learning_rate": 4.297693418308177e-06, "loss": 0.0338, "step": 164010 }, { "epoch": 0.0901, "grad_norm": 0.03755145147442818, "learning_rate": 4.295376397124554e-06, "loss": 0.0316, "step": 164020 }, { "epoch": 0.09015, "grad_norm": 0.03620903566479683, "learning_rate": 4.293059941997557e-06, "loss": 0.0324, "step": 164030 }, { "epoch": 0.0902, "grad_norm": 0.04417108744382858, "learning_rate": 4.29074405299052e-06, "loss": 0.0318, "step": 164040 }, { "epoch": 0.09025, "grad_norm": 0.04349370300769806, "learning_rate": 4.288428730166768e-06, "loss": 0.0335, "step": 164050 }, { "epoch": 0.0903, "grad_norm": 0.04132212698459625, "learning_rate": 4.286113973589595e-06, "loss": 0.0329, "step": 164060 }, { "epoch": 0.09035, "grad_norm": 0.04030095413327217, "learning_rate": 4.283799783322282e-06, "loss": 0.0328, "step": 164070 }, { "epoch": 0.0904, "grad_norm": 0.044824227690696716, "learning_rate": 4.2814861594280946e-06, "loss": 0.0339, "step": 164080 }, { "epoch": 0.09045, "grad_norm": 0.04444180056452751, "learning_rate": 4.279173101970296e-06, "loss": 0.0331, "step": 164090 }, { "epoch": 0.0905, "grad_norm": 0.03922867774963379, "learning_rate": 4.276860611012124e-06, "loss": 0.0316, "step": 164100 }, { "epoch": 0.09055, "grad_norm": 0.03677331656217575, "learning_rate": 4.274548686616789e-06, "loss": 0.0335, "step": 164110 }, { "epoch": 0.0906, "grad_norm": 0.036875706166028976, "learning_rate": 4.272237328847514e-06, "loss": 0.0321, "step": 164120 }, { "epoch": 0.09065, "grad_norm": 0.035596031695604324, "learning_rate": 4.269926537767477e-06, "loss": 0.0327, "step": 164130 }, { "epoch": 0.0907, "grad_norm": 0.04016328975558281, "learning_rate": 4.267616313439873e-06, "loss": 0.0322, "step": 164140 }, { "epoch": 0.09075, "grad_norm": 0.05132593587040901, "learning_rate": 4.26530665592784e-06, "loss": 0.0347, "step": 164150 }, { "epoch": 0.0908, "grad_norm": 0.041420597583055496, "learning_rate": 4.2629975652945295e-06, "loss": 0.032, "step": 164160 }, { "epoch": 0.09085, "grad_norm": 0.047631099820137024, "learning_rate": 4.260689041603083e-06, "loss": 0.0325, "step": 164170 }, { "epoch": 0.0909, "grad_norm": 0.044410839676856995, "learning_rate": 4.2583810849166076e-06, "loss": 0.0316, "step": 164180 }, { "epoch": 0.09095, "grad_norm": 0.054925624281167984, "learning_rate": 4.2560736952981986e-06, "loss": 0.0329, "step": 164190 }, { "epoch": 0.091, "grad_norm": 0.04730357602238655, "learning_rate": 4.253766872810938e-06, "loss": 0.0339, "step": 164200 }, { "epoch": 0.09105, "grad_norm": 0.04951634630560875, "learning_rate": 4.251460617517903e-06, "loss": 0.0336, "step": 164210 }, { "epoch": 0.0911, "grad_norm": 0.05603445693850517, "learning_rate": 4.249154929482138e-06, "loss": 0.0339, "step": 164220 }, { "epoch": 0.09115, "grad_norm": 0.045728061348199844, "learning_rate": 4.246849808766676e-06, "loss": 0.0332, "step": 164230 }, { "epoch": 0.0912, "grad_norm": 0.040378279983997345, "learning_rate": 4.244545255434551e-06, "loss": 0.0335, "step": 164240 }, { "epoch": 0.09125, "grad_norm": 0.043054450303316116, "learning_rate": 4.242241269548752e-06, "loss": 0.0325, "step": 164250 }, { "epoch": 0.0913, "grad_norm": 0.03699095547199249, "learning_rate": 4.239937851172287e-06, "loss": 0.0326, "step": 164260 }, { "epoch": 0.09135, "grad_norm": 0.044853974133729935, "learning_rate": 4.237635000368123e-06, "loss": 0.0333, "step": 164270 }, { "epoch": 0.0914, "grad_norm": 0.037403397262096405, "learning_rate": 4.235332717199217e-06, "loss": 0.0327, "step": 164280 }, { "epoch": 0.09145, "grad_norm": 0.04466768726706505, "learning_rate": 4.233031001728508e-06, "loss": 0.0342, "step": 164290 }, { "epoch": 0.0915, "grad_norm": 0.04541900008916855, "learning_rate": 4.230729854018933e-06, "loss": 0.0344, "step": 164300 }, { "epoch": 0.09155, "grad_norm": 0.04904589429497719, "learning_rate": 4.228429274133403e-06, "loss": 0.0351, "step": 164310 }, { "epoch": 0.0916, "grad_norm": 0.03982043266296387, "learning_rate": 4.226129262134807e-06, "loss": 0.0335, "step": 164320 }, { "epoch": 0.09165, "grad_norm": 0.04510324448347092, "learning_rate": 4.2238298180860396e-06, "loss": 0.033, "step": 164330 }, { "epoch": 0.0917, "grad_norm": 0.05479760095477104, "learning_rate": 4.221530942049953e-06, "loss": 0.0336, "step": 164340 }, { "epoch": 0.09175, "grad_norm": 0.04232299327850342, "learning_rate": 4.219232634089415e-06, "loss": 0.0324, "step": 164350 }, { "epoch": 0.0918, "grad_norm": 0.03701121360063553, "learning_rate": 4.2169348942672406e-06, "loss": 0.0317, "step": 164360 }, { "epoch": 0.09185, "grad_norm": 0.04614982753992081, "learning_rate": 4.214637722646256e-06, "loss": 0.0331, "step": 164370 }, { "epoch": 0.0919, "grad_norm": 0.04048416391015053, "learning_rate": 4.212341119289273e-06, "loss": 0.0336, "step": 164380 }, { "epoch": 0.09195, "grad_norm": 0.040577538311481476, "learning_rate": 4.210045084259076e-06, "loss": 0.0324, "step": 164390 }, { "epoch": 0.092, "grad_norm": 0.040321774780750275, "learning_rate": 4.207749617618437e-06, "loss": 0.0336, "step": 164400 }, { "epoch": 0.09205, "grad_norm": 0.04228787124156952, "learning_rate": 4.205454719430105e-06, "loss": 0.0325, "step": 164410 }, { "epoch": 0.0921, "grad_norm": 0.036766789853572845, "learning_rate": 4.203160389756827e-06, "loss": 0.0332, "step": 164420 }, { "epoch": 0.09215, "grad_norm": 0.03712713345885277, "learning_rate": 4.200866628661346e-06, "loss": 0.032, "step": 164430 }, { "epoch": 0.0922, "grad_norm": 0.03988231346011162, "learning_rate": 4.198573436206344e-06, "loss": 0.0325, "step": 164440 }, { "epoch": 0.09225, "grad_norm": 0.03686943277716637, "learning_rate": 4.196280812454534e-06, "loss": 0.0318, "step": 164450 }, { "epoch": 0.0923, "grad_norm": 0.04354991391301155, "learning_rate": 4.193988757468587e-06, "loss": 0.0316, "step": 164460 }, { "epoch": 0.09235, "grad_norm": 0.043063972145318985, "learning_rate": 4.191697271311176e-06, "loss": 0.0329, "step": 164470 }, { "epoch": 0.0924, "grad_norm": 0.045166417956352234, "learning_rate": 4.1894063540449445e-06, "loss": 0.0311, "step": 164480 }, { "epoch": 0.09245, "grad_norm": 0.041872259229421616, "learning_rate": 4.18711600573252e-06, "loss": 0.0324, "step": 164490 }, { "epoch": 0.0925, "grad_norm": 0.04205867648124695, "learning_rate": 4.184826226436528e-06, "loss": 0.034, "step": 164500 }, { "epoch": 0.09255, "grad_norm": 0.04708458483219147, "learning_rate": 4.18253701621957e-06, "loss": 0.0329, "step": 164510 }, { "epoch": 0.0926, "grad_norm": 0.0457821786403656, "learning_rate": 4.180248375144227e-06, "loss": 0.0333, "step": 164520 }, { "epoch": 0.09265, "grad_norm": 0.04448466747999191, "learning_rate": 4.177960303273068e-06, "loss": 0.0333, "step": 164530 }, { "epoch": 0.0927, "grad_norm": 0.03960685804486275, "learning_rate": 4.175672800668656e-06, "loss": 0.0324, "step": 164540 }, { "epoch": 0.09275, "grad_norm": 0.04607471451163292, "learning_rate": 4.173385867393522e-06, "loss": 0.0322, "step": 164550 }, { "epoch": 0.0928, "grad_norm": 0.053429655730724335, "learning_rate": 4.171099503510198e-06, "loss": 0.0347, "step": 164560 }, { "epoch": 0.09285, "grad_norm": 0.055426646023988724, "learning_rate": 4.16881370908119e-06, "loss": 0.0339, "step": 164570 }, { "epoch": 0.0929, "grad_norm": 0.039910197257995605, "learning_rate": 4.166528484168986e-06, "loss": 0.0325, "step": 164580 }, { "epoch": 0.09295, "grad_norm": 0.05889817699790001, "learning_rate": 4.164243828836067e-06, "loss": 0.0335, "step": 164590 }, { "epoch": 0.093, "grad_norm": 0.04975072294473648, "learning_rate": 4.161959743144897e-06, "loss": 0.0321, "step": 164600 }, { "epoch": 0.09305, "grad_norm": 0.04804937541484833, "learning_rate": 4.159676227157913e-06, "loss": 0.033, "step": 164610 }, { "epoch": 0.0931, "grad_norm": 0.042053185403347015, "learning_rate": 4.1573932809375574e-06, "loss": 0.0335, "step": 164620 }, { "epoch": 0.09315, "grad_norm": 0.04230290278792381, "learning_rate": 4.155110904546233e-06, "loss": 0.0317, "step": 164630 }, { "epoch": 0.0932, "grad_norm": 0.036810606718063354, "learning_rate": 4.1528290980463596e-06, "loss": 0.035, "step": 164640 }, { "epoch": 0.09325, "grad_norm": 0.04115741327404976, "learning_rate": 4.150547861500293e-06, "loss": 0.0315, "step": 164650 }, { "epoch": 0.0933, "grad_norm": 0.04060962423682213, "learning_rate": 4.14826719497042e-06, "loss": 0.0321, "step": 164660 }, { "epoch": 0.09335, "grad_norm": 0.04310805723071098, "learning_rate": 4.145987098519083e-06, "loss": 0.0322, "step": 164670 }, { "epoch": 0.0934, "grad_norm": 0.0482821948826313, "learning_rate": 4.14370757220863e-06, "loss": 0.0338, "step": 164680 }, { "epoch": 0.09345, "grad_norm": 0.03712339699268341, "learning_rate": 4.141428616101378e-06, "loss": 0.0316, "step": 164690 }, { "epoch": 0.0935, "grad_norm": 0.033921223133802414, "learning_rate": 4.139150230259625e-06, "loss": 0.0326, "step": 164700 }, { "epoch": 0.09355, "grad_norm": 0.03595450893044472, "learning_rate": 4.136872414745674e-06, "loss": 0.0323, "step": 164710 }, { "epoch": 0.0936, "grad_norm": 0.0412093885242939, "learning_rate": 4.134595169621791e-06, "loss": 0.0317, "step": 164720 }, { "epoch": 0.09365, "grad_norm": 0.04304290562868118, "learning_rate": 4.132318494950241e-06, "loss": 0.0327, "step": 164730 }, { "epoch": 0.0937, "grad_norm": 0.03768237680196762, "learning_rate": 4.130042390793254e-06, "loss": 0.032, "step": 164740 }, { "epoch": 0.09375, "grad_norm": 0.0378873385488987, "learning_rate": 4.1277668572130695e-06, "loss": 0.031, "step": 164750 }, { "epoch": 0.0938, "grad_norm": 0.03945872187614441, "learning_rate": 4.125491894271902e-06, "loss": 0.0328, "step": 164760 }, { "epoch": 0.09385, "grad_norm": 0.04004974663257599, "learning_rate": 4.123217502031945e-06, "loss": 0.0328, "step": 164770 }, { "epoch": 0.0939, "grad_norm": 0.042620837688446045, "learning_rate": 4.120943680555381e-06, "loss": 0.0327, "step": 164780 }, { "epoch": 0.09395, "grad_norm": 0.03914150223135948, "learning_rate": 4.118670429904365e-06, "loss": 0.0335, "step": 164790 }, { "epoch": 0.094, "grad_norm": 0.03904701769351959, "learning_rate": 4.11639775014106e-06, "loss": 0.0345, "step": 164800 }, { "epoch": 0.09405, "grad_norm": 0.03798987716436386, "learning_rate": 4.114125641327593e-06, "loss": 0.0338, "step": 164810 }, { "epoch": 0.0941, "grad_norm": 0.03474852442741394, "learning_rate": 4.111854103526083e-06, "loss": 0.0338, "step": 164820 }, { "epoch": 0.09415, "grad_norm": 0.03531309589743614, "learning_rate": 4.109583136798636e-06, "loss": 0.034, "step": 164830 }, { "epoch": 0.0942, "grad_norm": 0.042505595833063126, "learning_rate": 4.107312741207337e-06, "loss": 0.0344, "step": 164840 }, { "epoch": 0.09425, "grad_norm": 0.03951616212725639, "learning_rate": 4.105042916814267e-06, "loss": 0.0344, "step": 164850 }, { "epoch": 0.0943, "grad_norm": 0.04498439282178879, "learning_rate": 4.1027736636814615e-06, "loss": 0.0337, "step": 164860 }, { "epoch": 0.09435, "grad_norm": 0.05997840687632561, "learning_rate": 4.100504981870975e-06, "loss": 0.0355, "step": 164870 }, { "epoch": 0.0944, "grad_norm": 0.0440804548561573, "learning_rate": 4.098236871444836e-06, "loss": 0.0325, "step": 164880 }, { "epoch": 0.09445, "grad_norm": 0.04230072349309921, "learning_rate": 4.095969332465047e-06, "loss": 0.0326, "step": 164890 }, { "epoch": 0.0945, "grad_norm": 0.04409044608473778, "learning_rate": 4.093702364993607e-06, "loss": 0.0338, "step": 164900 }, { "epoch": 0.09455, "grad_norm": 0.03817106410861015, "learning_rate": 4.091435969092481e-06, "loss": 0.0336, "step": 164910 }, { "epoch": 0.0946, "grad_norm": 0.03945619985461235, "learning_rate": 4.089170144823648e-06, "loss": 0.0332, "step": 164920 }, { "epoch": 0.09465, "grad_norm": 0.0455905981361866, "learning_rate": 4.0869048922490465e-06, "loss": 0.0328, "step": 164930 }, { "epoch": 0.0947, "grad_norm": 0.0419369712471962, "learning_rate": 4.084640211430601e-06, "loss": 0.0331, "step": 164940 }, { "epoch": 0.09475, "grad_norm": 0.03600061684846878, "learning_rate": 4.082376102430244e-06, "loss": 0.033, "step": 164950 }, { "epoch": 0.0948, "grad_norm": 0.045839328318834305, "learning_rate": 4.08011256530986e-06, "loss": 0.0344, "step": 164960 }, { "epoch": 0.09485, "grad_norm": 0.04063069820404053, "learning_rate": 4.077849600131342e-06, "loss": 0.0334, "step": 164970 }, { "epoch": 0.0949, "grad_norm": 0.04238218441605568, "learning_rate": 4.075587206956558e-06, "loss": 0.036, "step": 164980 }, { "epoch": 0.09495, "grad_norm": 0.04314802214503288, "learning_rate": 4.0733253858473545e-06, "loss": 0.0333, "step": 164990 }, { "epoch": 0.095, "grad_norm": 0.03600018471479416, "learning_rate": 4.071064136865576e-06, "loss": 0.0328, "step": 165000 }, { "epoch": 0.09505, "grad_norm": 0.039877258241176605, "learning_rate": 4.068803460073042e-06, "loss": 0.0325, "step": 165010 }, { "epoch": 0.0951, "grad_norm": 0.037514206022024155, "learning_rate": 4.066543355531557e-06, "loss": 0.0326, "step": 165020 }, { "epoch": 0.09515, "grad_norm": 0.04040644317865372, "learning_rate": 4.064283823302909e-06, "loss": 0.0335, "step": 165030 }, { "epoch": 0.0952, "grad_norm": 0.04327954351902008, "learning_rate": 4.062024863448882e-06, "loss": 0.038, "step": 165040 }, { "epoch": 0.09525, "grad_norm": 0.04183371737599373, "learning_rate": 4.059766476031221e-06, "loss": 0.0336, "step": 165050 }, { "epoch": 0.0953, "grad_norm": 0.03946535289287567, "learning_rate": 4.057508661111686e-06, "loss": 0.0343, "step": 165060 }, { "epoch": 0.09535, "grad_norm": 0.041227441281080246, "learning_rate": 4.055251418751993e-06, "loss": 0.0345, "step": 165070 }, { "epoch": 0.0954, "grad_norm": 0.044941313564777374, "learning_rate": 4.052994749013855e-06, "loss": 0.0335, "step": 165080 }, { "epoch": 0.09545, "grad_norm": 0.06171907112002373, "learning_rate": 4.0507386519589766e-06, "loss": 0.0336, "step": 165090 }, { "epoch": 0.0955, "grad_norm": 0.05504198744893074, "learning_rate": 4.048483127649033e-06, "loss": 0.0379, "step": 165100 }, { "epoch": 0.09555, "grad_norm": 0.04573937878012657, "learning_rate": 4.046228176145689e-06, "loss": 0.0339, "step": 165110 }, { "epoch": 0.0956, "grad_norm": 0.044137127697467804, "learning_rate": 4.043973797510589e-06, "loss": 0.033, "step": 165120 }, { "epoch": 0.09565, "grad_norm": 0.043332889676094055, "learning_rate": 4.041719991805371e-06, "loss": 0.0365, "step": 165130 }, { "epoch": 0.0957, "grad_norm": 0.06066054478287697, "learning_rate": 4.039466759091667e-06, "loss": 0.034, "step": 165140 }, { "epoch": 0.09575, "grad_norm": 0.04395994171500206, "learning_rate": 4.037214099431058e-06, "loss": 0.0329, "step": 165150 }, { "epoch": 0.0958, "grad_norm": 0.03807613253593445, "learning_rate": 4.034962012885144e-06, "loss": 0.0336, "step": 165160 }, { "epoch": 0.09585, "grad_norm": 0.038211889564991, "learning_rate": 4.032710499515488e-06, "loss": 0.0341, "step": 165170 }, { "epoch": 0.0959, "grad_norm": 0.036341212689876556, "learning_rate": 4.0304595593836536e-06, "loss": 0.0331, "step": 165180 }, { "epoch": 0.09595, "grad_norm": 0.035370923578739166, "learning_rate": 4.02820919255118e-06, "loss": 0.0329, "step": 165190 }, { "epoch": 0.096, "grad_norm": 0.038010936230421066, "learning_rate": 4.0259593990795795e-06, "loss": 0.0329, "step": 165200 }, { "epoch": 0.09605, "grad_norm": 0.03992505371570587, "learning_rate": 4.023710179030377e-06, "loss": 0.034, "step": 165210 }, { "epoch": 0.0961, "grad_norm": 0.049580223858356476, "learning_rate": 4.021461532465057e-06, "loss": 0.0352, "step": 165220 }, { "epoch": 0.09615, "grad_norm": 0.04979586601257324, "learning_rate": 4.019213459445098e-06, "loss": 0.0331, "step": 165230 }, { "epoch": 0.0962, "grad_norm": 0.05107463523745537, "learning_rate": 4.016965960031954e-06, "loss": 0.0327, "step": 165240 }, { "epoch": 0.09625, "grad_norm": 0.04189491271972656, "learning_rate": 4.014719034287079e-06, "loss": 0.0324, "step": 165250 }, { "epoch": 0.0963, "grad_norm": 0.048803169280290604, "learning_rate": 4.012472682271906e-06, "loss": 0.0329, "step": 165260 }, { "epoch": 0.09635, "grad_norm": 0.04566030949354172, "learning_rate": 4.0102269040478475e-06, "loss": 0.0339, "step": 165270 }, { "epoch": 0.0964, "grad_norm": 0.04436716064810753, "learning_rate": 4.0079816996763e-06, "loss": 0.0341, "step": 165280 }, { "epoch": 0.09645, "grad_norm": 0.037450190633535385, "learning_rate": 4.005737069218637e-06, "loss": 0.0321, "step": 165290 }, { "epoch": 0.0965, "grad_norm": 0.041611459106206894, "learning_rate": 4.003493012736246e-06, "loss": 0.0329, "step": 165300 }, { "epoch": 0.09655, "grad_norm": 0.038101986050605774, "learning_rate": 4.001249530290466e-06, "loss": 0.0324, "step": 165310 }, { "epoch": 0.0966, "grad_norm": 0.04389223828911781, "learning_rate": 3.999006621942628e-06, "loss": 0.0321, "step": 165320 }, { "epoch": 0.09665, "grad_norm": 0.048132430762052536, "learning_rate": 3.996764287754065e-06, "loss": 0.0355, "step": 165330 }, { "epoch": 0.0967, "grad_norm": 0.03650198504328728, "learning_rate": 3.994522527786071e-06, "loss": 0.032, "step": 165340 }, { "epoch": 0.09675, "grad_norm": 0.03952125832438469, "learning_rate": 3.992281342099952e-06, "loss": 0.0339, "step": 165350 }, { "epoch": 0.0968, "grad_norm": 0.044009458273649216, "learning_rate": 3.990040730756955e-06, "loss": 0.034, "step": 165360 }, { "epoch": 0.09685, "grad_norm": 0.04283880069851875, "learning_rate": 3.9878006938183525e-06, "loss": 0.0331, "step": 165370 }, { "epoch": 0.0969, "grad_norm": 0.04398813471198082, "learning_rate": 3.985561231345391e-06, "loss": 0.0327, "step": 165380 }, { "epoch": 0.09695, "grad_norm": 0.041674233973026276, "learning_rate": 3.983322343399293e-06, "loss": 0.0329, "step": 165390 }, { "epoch": 0.097, "grad_norm": 0.03643820062279701, "learning_rate": 3.981084030041263e-06, "loss": 0.032, "step": 165400 }, { "epoch": 0.09705, "grad_norm": 0.045983344316482544, "learning_rate": 3.9788462913324945e-06, "loss": 0.0323, "step": 165410 }, { "epoch": 0.0971, "grad_norm": 0.042985949665308, "learning_rate": 3.976609127334177e-06, "loss": 0.0317, "step": 165420 }, { "epoch": 0.09715, "grad_norm": 0.04276910051703453, "learning_rate": 3.974372538107468e-06, "loss": 0.0332, "step": 165430 }, { "epoch": 0.0972, "grad_norm": 0.037295885384082794, "learning_rate": 3.97213652371351e-06, "loss": 0.0324, "step": 165440 }, { "epoch": 0.09725, "grad_norm": 0.042068447917699814, "learning_rate": 3.9699010842134455e-06, "loss": 0.0331, "step": 165450 }, { "epoch": 0.0973, "grad_norm": 0.042526569217443466, "learning_rate": 3.967666219668376e-06, "loss": 0.0345, "step": 165460 }, { "epoch": 0.09735, "grad_norm": 0.04848969727754593, "learning_rate": 3.965431930139418e-06, "loss": 0.0336, "step": 165470 }, { "epoch": 0.0974, "grad_norm": 0.03859826177358627, "learning_rate": 3.96319821568765e-06, "loss": 0.0324, "step": 165480 }, { "epoch": 0.09745, "grad_norm": 0.04887937381863594, "learning_rate": 3.960965076374138e-06, "loss": 0.0331, "step": 165490 }, { "epoch": 0.0975, "grad_norm": 0.04081129655241966, "learning_rate": 3.9587325122599325e-06, "loss": 0.0326, "step": 165500 }, { "epoch": 0.09755, "grad_norm": 0.03952052816748619, "learning_rate": 3.956500523406079e-06, "loss": 0.0355, "step": 165510 }, { "epoch": 0.0976, "grad_norm": 0.04575337842106819, "learning_rate": 3.9542691098735985e-06, "loss": 0.0327, "step": 165520 }, { "epoch": 0.09765, "grad_norm": 0.04624428600072861, "learning_rate": 3.952038271723485e-06, "loss": 0.032, "step": 165530 }, { "epoch": 0.0977, "grad_norm": 0.03500010445713997, "learning_rate": 3.949808009016745e-06, "loss": 0.0315, "step": 165540 }, { "epoch": 0.09775, "grad_norm": 0.03959944471716881, "learning_rate": 3.947578321814341e-06, "loss": 0.0327, "step": 165550 }, { "epoch": 0.0978, "grad_norm": 0.04318464919924736, "learning_rate": 3.945349210177249e-06, "loss": 0.032, "step": 165560 }, { "epoch": 0.09785, "grad_norm": 0.039310380816459656, "learning_rate": 3.943120674166384e-06, "loss": 0.0328, "step": 165570 }, { "epoch": 0.0979, "grad_norm": 0.048084914684295654, "learning_rate": 3.940892713842692e-06, "loss": 0.0328, "step": 165580 }, { "epoch": 0.09795, "grad_norm": 0.05158539488911629, "learning_rate": 3.938665329267088e-06, "loss": 0.0325, "step": 165590 }, { "epoch": 0.098, "grad_norm": 0.047085583209991455, "learning_rate": 3.93643852050046e-06, "loss": 0.0342, "step": 165600 }, { "epoch": 0.09805, "grad_norm": 0.04157368093729019, "learning_rate": 3.9342122876036894e-06, "loss": 0.0322, "step": 165610 }, { "epoch": 0.0981, "grad_norm": 0.046697210520505905, "learning_rate": 3.931986630637635e-06, "loss": 0.0346, "step": 165620 }, { "epoch": 0.09815, "grad_norm": 0.0374649278819561, "learning_rate": 3.9297615496631525e-06, "loss": 0.0343, "step": 165630 }, { "epoch": 0.0982, "grad_norm": 0.042161110788583755, "learning_rate": 3.927537044741086e-06, "loss": 0.0334, "step": 165640 }, { "epoch": 0.09825, "grad_norm": 0.0364234521985054, "learning_rate": 3.925313115932227e-06, "loss": 0.0333, "step": 165650 }, { "epoch": 0.0983, "grad_norm": 0.04255906492471695, "learning_rate": 3.923089763297397e-06, "loss": 0.0343, "step": 165660 }, { "epoch": 0.09835, "grad_norm": 0.03991887718439102, "learning_rate": 3.920866986897367e-06, "loss": 0.0336, "step": 165670 }, { "epoch": 0.0984, "grad_norm": 0.041006337851285934, "learning_rate": 3.918644786792922e-06, "loss": 0.0336, "step": 165680 }, { "epoch": 0.09845, "grad_norm": 0.0471954271197319, "learning_rate": 3.916423163044808e-06, "loss": 0.033, "step": 165690 }, { "epoch": 0.0985, "grad_norm": 0.03990200161933899, "learning_rate": 3.914202115713756e-06, "loss": 0.034, "step": 165700 }, { "epoch": 0.09855, "grad_norm": 0.04307753965258598, "learning_rate": 3.911981644860505e-06, "loss": 0.0346, "step": 165710 }, { "epoch": 0.0986, "grad_norm": 0.03951175883412361, "learning_rate": 3.909761750545754e-06, "loss": 0.0348, "step": 165720 }, { "epoch": 0.09865, "grad_norm": 0.041647281497716904, "learning_rate": 3.9075424328301914e-06, "loss": 0.0346, "step": 165730 }, { "epoch": 0.0987, "grad_norm": 0.04150126501917839, "learning_rate": 3.90532369177449e-06, "loss": 0.0322, "step": 165740 }, { "epoch": 0.09875, "grad_norm": 0.04288269206881523, "learning_rate": 3.903105527439319e-06, "loss": 0.0326, "step": 165750 }, { "epoch": 0.0988, "grad_norm": 0.038329627364873886, "learning_rate": 3.900887939885312e-06, "loss": 0.0316, "step": 165760 }, { "epoch": 0.09885, "grad_norm": 0.03365486487746239, "learning_rate": 3.898670929173107e-06, "loss": 0.0316, "step": 165770 }, { "epoch": 0.0989, "grad_norm": 0.04152948781847954, "learning_rate": 3.896454495363313e-06, "loss": 0.0327, "step": 165780 }, { "epoch": 0.09895, "grad_norm": 0.036098282784223557, "learning_rate": 3.894238638516518e-06, "loss": 0.0335, "step": 165790 }, { "epoch": 0.099, "grad_norm": 0.0432005412876606, "learning_rate": 3.892023358693317e-06, "loss": 0.0344, "step": 165800 }, { "epoch": 0.09905, "grad_norm": 0.036867767572402954, "learning_rate": 3.889808655954263e-06, "loss": 0.0334, "step": 165810 }, { "epoch": 0.0991, "grad_norm": 0.03740301728248596, "learning_rate": 3.887594530359909e-06, "loss": 0.0328, "step": 165820 }, { "epoch": 0.09915, "grad_norm": 0.03603968769311905, "learning_rate": 3.885380981970793e-06, "loss": 0.0324, "step": 165830 }, { "epoch": 0.0992, "grad_norm": 0.03937996178865433, "learning_rate": 3.883168010847421e-06, "loss": 0.0343, "step": 165840 }, { "epoch": 0.09925, "grad_norm": 0.038710664957761765, "learning_rate": 3.880955617050316e-06, "loss": 0.0332, "step": 165850 }, { "epoch": 0.0993, "grad_norm": 0.04181608557701111, "learning_rate": 3.878743800639939e-06, "loss": 0.0327, "step": 165860 }, { "epoch": 0.09935, "grad_norm": 0.039431218057870865, "learning_rate": 3.876532561676777e-06, "loss": 0.0338, "step": 165870 }, { "epoch": 0.0994, "grad_norm": 0.044270072132349014, "learning_rate": 3.874321900221273e-06, "loss": 0.0326, "step": 165880 }, { "epoch": 0.09945, "grad_norm": 0.0367770753800869, "learning_rate": 3.872111816333876e-06, "loss": 0.0324, "step": 165890 }, { "epoch": 0.0995, "grad_norm": 0.04619767516851425, "learning_rate": 3.869902310075005e-06, "loss": 0.0328, "step": 165900 }, { "epoch": 0.09955, "grad_norm": 0.041448675096035004, "learning_rate": 3.867693381505064e-06, "loss": 0.0326, "step": 165910 }, { "epoch": 0.0996, "grad_norm": 0.042945943772792816, "learning_rate": 3.865485030684449e-06, "loss": 0.0317, "step": 165920 }, { "epoch": 0.09965, "grad_norm": 0.039436765015125275, "learning_rate": 3.863277257673533e-06, "loss": 0.0326, "step": 165930 }, { "epoch": 0.0997, "grad_norm": 0.04146011918783188, "learning_rate": 3.861070062532679e-06, "loss": 0.0317, "step": 165940 }, { "epoch": 0.09975, "grad_norm": 0.04633131995797157, "learning_rate": 3.858863445322222e-06, "loss": 0.0316, "step": 165950 }, { "epoch": 0.0998, "grad_norm": 0.03417607769370079, "learning_rate": 3.856657406102496e-06, "loss": 0.032, "step": 165960 }, { "epoch": 0.09985, "grad_norm": 0.04359418526291847, "learning_rate": 3.854451944933818e-06, "loss": 0.033, "step": 165970 }, { "epoch": 0.0999, "grad_norm": 0.04120621457695961, "learning_rate": 3.85224706187648e-06, "loss": 0.0327, "step": 165980 }, { "epoch": 0.09995, "grad_norm": 0.0409977026283741, "learning_rate": 3.850042756990763e-06, "loss": 0.0334, "step": 165990 }, { "epoch": 0.1, "grad_norm": 0.03260594978928566, "learning_rate": 3.847839030336925e-06, "loss": 0.0315, "step": 166000 }, { "epoch": 0.10005, "grad_norm": 0.03696081042289734, "learning_rate": 3.845635881975226e-06, "loss": 0.033, "step": 166010 }, { "epoch": 0.1001, "grad_norm": 0.03451487794518471, "learning_rate": 3.843433311965897e-06, "loss": 0.0317, "step": 166020 }, { "epoch": 0.10015, "grad_norm": 0.03411698341369629, "learning_rate": 3.841231320369146e-06, "loss": 0.0316, "step": 166030 }, { "epoch": 0.1002, "grad_norm": 0.03840222954750061, "learning_rate": 3.8390299072451866e-06, "loss": 0.0333, "step": 166040 }, { "epoch": 0.10025, "grad_norm": 0.04946882650256157, "learning_rate": 3.836829072654196e-06, "loss": 0.0332, "step": 166050 }, { "epoch": 0.1003, "grad_norm": 0.050360143184661865, "learning_rate": 3.834628816656357e-06, "loss": 0.0337, "step": 166060 }, { "epoch": 0.10035, "grad_norm": 0.04284515231847763, "learning_rate": 3.832429139311805e-06, "loss": 0.0329, "step": 166070 }, { "epoch": 0.1004, "grad_norm": 0.03992254287004471, "learning_rate": 3.830230040680688e-06, "loss": 0.0356, "step": 166080 }, { "epoch": 0.10045, "grad_norm": 0.044528309255838394, "learning_rate": 3.828031520823136e-06, "loss": 0.0326, "step": 166090 }, { "epoch": 0.1005, "grad_norm": 0.04010489583015442, "learning_rate": 3.825833579799246e-06, "loss": 0.0335, "step": 166100 }, { "epoch": 0.10055, "grad_norm": 0.04283710569143295, "learning_rate": 3.823636217669111e-06, "loss": 0.0328, "step": 166110 }, { "epoch": 0.1006, "grad_norm": 0.0381898432970047, "learning_rate": 3.821439434492802e-06, "loss": 0.032, "step": 166120 }, { "epoch": 0.10065, "grad_norm": 0.0465182326734066, "learning_rate": 3.819243230330385e-06, "loss": 0.0333, "step": 166130 }, { "epoch": 0.1007, "grad_norm": 0.04123750701546669, "learning_rate": 3.817047605241905e-06, "loss": 0.0327, "step": 166140 }, { "epoch": 0.10075, "grad_norm": 0.042871516197919846, "learning_rate": 3.814852559287377e-06, "loss": 0.0318, "step": 166150 }, { "epoch": 0.1008, "grad_norm": 0.05040085315704346, "learning_rate": 3.8126580925268273e-06, "loss": 0.0323, "step": 166160 }, { "epoch": 0.10085, "grad_norm": 0.05391421169042587, "learning_rate": 3.8104642050202393e-06, "loss": 0.0334, "step": 166170 }, { "epoch": 0.1009, "grad_norm": 0.04254022613167763, "learning_rate": 3.8082708968276066e-06, "loss": 0.0328, "step": 166180 }, { "epoch": 0.10095, "grad_norm": 0.05270838364958763, "learning_rate": 3.8060781680088865e-06, "loss": 0.0339, "step": 166190 }, { "epoch": 0.101, "grad_norm": 0.039037276059389114, "learning_rate": 3.8038860186240198e-06, "loss": 0.0323, "step": 166200 }, { "epoch": 0.10105, "grad_norm": 0.03755092993378639, "learning_rate": 3.801694448732954e-06, "loss": 0.0329, "step": 166210 }, { "epoch": 0.1011, "grad_norm": 0.037635862827301025, "learning_rate": 3.799503458395598e-06, "loss": 0.0328, "step": 166220 }, { "epoch": 0.10115, "grad_norm": 0.03773429989814758, "learning_rate": 3.7973130476718492e-06, "loss": 0.0325, "step": 166230 }, { "epoch": 0.1012, "grad_norm": 0.04328387975692749, "learning_rate": 3.7951232166215933e-06, "loss": 0.0341, "step": 166240 }, { "epoch": 0.10125, "grad_norm": 0.04198687896132469, "learning_rate": 3.7929339653047095e-06, "loss": 0.0335, "step": 166250 }, { "epoch": 0.1013, "grad_norm": 0.04273358732461929, "learning_rate": 3.7907452937810366e-06, "loss": 0.0343, "step": 166260 }, { "epoch": 0.10135, "grad_norm": 0.0433444119989872, "learning_rate": 3.788557202110424e-06, "loss": 0.0326, "step": 166270 }, { "epoch": 0.1014, "grad_norm": 0.039447490125894547, "learning_rate": 3.7863696903526895e-06, "loss": 0.0338, "step": 166280 }, { "epoch": 0.10145, "grad_norm": 0.049982041120529175, "learning_rate": 3.7841827585676337e-06, "loss": 0.0336, "step": 166290 }, { "epoch": 0.1015, "grad_norm": 0.047276243567466736, "learning_rate": 3.7819964068150556e-06, "loss": 0.033, "step": 166300 }, { "epoch": 0.10155, "grad_norm": 0.04317307844758034, "learning_rate": 3.7798106351547236e-06, "loss": 0.0329, "step": 166310 }, { "epoch": 0.1016, "grad_norm": 0.05367967113852501, "learning_rate": 3.7776254436463985e-06, "loss": 0.0339, "step": 166320 }, { "epoch": 0.10165, "grad_norm": 0.05572677403688431, "learning_rate": 3.775440832349814e-06, "loss": 0.0334, "step": 166330 }, { "epoch": 0.1017, "grad_norm": 0.05006201192736626, "learning_rate": 3.773256801324704e-06, "loss": 0.0323, "step": 166340 }, { "epoch": 0.10175, "grad_norm": 0.04554805904626846, "learning_rate": 3.7710733506307883e-06, "loss": 0.0324, "step": 166350 }, { "epoch": 0.1018, "grad_norm": 0.04822637140750885, "learning_rate": 3.7688904803277414e-06, "loss": 0.0328, "step": 166360 }, { "epoch": 0.10185, "grad_norm": 0.05482174828648567, "learning_rate": 3.7667081904752597e-06, "loss": 0.0331, "step": 166370 }, { "epoch": 0.1019, "grad_norm": 0.038746532052755356, "learning_rate": 3.7645264811329934e-06, "loss": 0.034, "step": 166380 }, { "epoch": 0.10195, "grad_norm": 0.04358692839741707, "learning_rate": 3.7623453523605994e-06, "loss": 0.032, "step": 166390 }, { "epoch": 0.102, "grad_norm": 0.04856157302856445, "learning_rate": 3.7601648042177055e-06, "loss": 0.0343, "step": 166400 }, { "epoch": 0.10205, "grad_norm": 0.042550262063741684, "learning_rate": 3.75798483676392e-06, "loss": 0.0333, "step": 166410 }, { "epoch": 0.1021, "grad_norm": 0.05196889862418175, "learning_rate": 3.755805450058855e-06, "loss": 0.0333, "step": 166420 }, { "epoch": 0.10215, "grad_norm": 0.04230838268995285, "learning_rate": 3.753626644162089e-06, "loss": 0.0324, "step": 166430 }, { "epoch": 0.1022, "grad_norm": 0.0412224642932415, "learning_rate": 3.7514484191331885e-06, "loss": 0.0325, "step": 166440 }, { "epoch": 0.10225, "grad_norm": 0.04116053506731987, "learning_rate": 3.749270775031699e-06, "loss": 0.0313, "step": 166450 }, { "epoch": 0.1023, "grad_norm": 0.036984339356422424, "learning_rate": 3.747093711917163e-06, "loss": 0.0318, "step": 166460 }, { "epoch": 0.10235, "grad_norm": 0.03305768221616745, "learning_rate": 3.744917229849107e-06, "loss": 0.0312, "step": 166470 }, { "epoch": 0.1024, "grad_norm": 0.04337235167622566, "learning_rate": 3.7427413288870283e-06, "loss": 0.0331, "step": 166480 }, { "epoch": 0.10245, "grad_norm": 0.034650687128305435, "learning_rate": 3.7405660090904153e-06, "loss": 0.0328, "step": 166490 }, { "epoch": 0.1025, "grad_norm": 0.0390767827630043, "learning_rate": 3.738391270518735e-06, "loss": 0.0314, "step": 166500 }, { "epoch": 0.10255, "grad_norm": 0.03500323370099068, "learning_rate": 3.7362171132314548e-06, "loss": 0.0313, "step": 166510 }, { "epoch": 0.1026, "grad_norm": 0.04510362446308136, "learning_rate": 3.7340435372880124e-06, "loss": 0.0322, "step": 166520 }, { "epoch": 0.10265, "grad_norm": 0.044195231050252914, "learning_rate": 3.731870542747823e-06, "loss": 0.0322, "step": 166530 }, { "epoch": 0.1027, "grad_norm": 0.04079239070415497, "learning_rate": 3.7296981296703088e-06, "loss": 0.0323, "step": 166540 }, { "epoch": 0.10275, "grad_norm": 0.06017296016216278, "learning_rate": 3.727526298114853e-06, "loss": 0.0323, "step": 166550 }, { "epoch": 0.1028, "grad_norm": 0.04101934656500816, "learning_rate": 3.7253550481408467e-06, "loss": 0.0331, "step": 166560 }, { "epoch": 0.10285, "grad_norm": 0.034680336713790894, "learning_rate": 3.723184379807629e-06, "loss": 0.0312, "step": 166570 }, { "epoch": 0.1029, "grad_norm": 0.045201726257801056, "learning_rate": 3.7210142931745575e-06, "loss": 0.0326, "step": 166580 }, { "epoch": 0.10295, "grad_norm": 0.037214379757642746, "learning_rate": 3.7188447883009653e-06, "loss": 0.0306, "step": 166590 }, { "epoch": 0.103, "grad_norm": 0.03503154218196869, "learning_rate": 3.716675865246164e-06, "loss": 0.0324, "step": 166600 }, { "epoch": 0.10305, "grad_norm": 0.04241985082626343, "learning_rate": 3.7145075240694465e-06, "loss": 0.0329, "step": 166610 }, { "epoch": 0.1031, "grad_norm": 0.037117183208465576, "learning_rate": 3.7123397648300917e-06, "loss": 0.0328, "step": 166620 }, { "epoch": 0.10315, "grad_norm": 0.06032340228557587, "learning_rate": 3.7101725875873765e-06, "loss": 0.0337, "step": 166630 }, { "epoch": 0.1032, "grad_norm": 0.04760419949889183, "learning_rate": 3.7080059924005454e-06, "loss": 0.032, "step": 166640 }, { "epoch": 0.10325, "grad_norm": 0.04192169010639191, "learning_rate": 3.7058399793288263e-06, "loss": 0.0334, "step": 166650 }, { "epoch": 0.1033, "grad_norm": 0.0428856760263443, "learning_rate": 3.703674548431446e-06, "loss": 0.0321, "step": 166660 }, { "epoch": 0.10335, "grad_norm": 0.03872525319457054, "learning_rate": 3.7015096997675967e-06, "loss": 0.0325, "step": 166670 }, { "epoch": 0.1034, "grad_norm": 0.04189387336373329, "learning_rate": 3.699345433396478e-06, "loss": 0.032, "step": 166680 }, { "epoch": 0.10345, "grad_norm": 0.03546981140971184, "learning_rate": 3.6971817493772517e-06, "loss": 0.0335, "step": 166690 }, { "epoch": 0.1035, "grad_norm": 0.03853558376431465, "learning_rate": 3.6950186477690748e-06, "loss": 0.0317, "step": 166700 }, { "epoch": 0.10355, "grad_norm": 0.03602823242545128, "learning_rate": 3.692856128631078e-06, "loss": 0.0323, "step": 166710 }, { "epoch": 0.1036, "grad_norm": 0.0376969650387764, "learning_rate": 3.6906941920223953e-06, "loss": 0.0313, "step": 166720 }, { "epoch": 0.10365, "grad_norm": 0.03826133906841278, "learning_rate": 3.688532838002129e-06, "loss": 0.0318, "step": 166730 }, { "epoch": 0.1037, "grad_norm": 0.045628514140844345, "learning_rate": 3.6863720666293595e-06, "loss": 0.0333, "step": 166740 }, { "epoch": 0.10375, "grad_norm": 0.03853847086429596, "learning_rate": 3.6842118779631785e-06, "loss": 0.0326, "step": 166750 }, { "epoch": 0.1038, "grad_norm": 0.036480020731687546, "learning_rate": 3.6820522720626304e-06, "loss": 0.0327, "step": 166760 }, { "epoch": 0.10385, "grad_norm": 0.03850627318024635, "learning_rate": 3.679893248986779e-06, "loss": 0.0328, "step": 166770 }, { "epoch": 0.1039, "grad_norm": 0.04077065363526344, "learning_rate": 3.6777348087946224e-06, "loss": 0.0325, "step": 166780 }, { "epoch": 0.10395, "grad_norm": 0.04662526771426201, "learning_rate": 3.6755769515451842e-06, "loss": 0.0333, "step": 166790 }, { "epoch": 0.104, "grad_norm": 0.03916983678936958, "learning_rate": 3.673419677297468e-06, "loss": 0.0336, "step": 166800 }, { "epoch": 0.10405, "grad_norm": 0.039329130202531815, "learning_rate": 3.6712629861104464e-06, "loss": 0.0325, "step": 166810 }, { "epoch": 0.1041, "grad_norm": 0.04693764075636864, "learning_rate": 3.6691068780430825e-06, "loss": 0.034, "step": 166820 }, { "epoch": 0.10415, "grad_norm": 0.03850613906979561, "learning_rate": 3.666951353154316e-06, "loss": 0.0327, "step": 166830 }, { "epoch": 0.1042, "grad_norm": 0.04074549302458763, "learning_rate": 3.6647964115030853e-06, "loss": 0.0328, "step": 166840 }, { "epoch": 0.10425, "grad_norm": 0.04204528406262398, "learning_rate": 3.6626420531483187e-06, "loss": 0.0333, "step": 166850 }, { "epoch": 0.1043, "grad_norm": 0.0419759526848793, "learning_rate": 3.660488278148888e-06, "loss": 0.0325, "step": 166860 }, { "epoch": 0.10435, "grad_norm": 0.03967071697115898, "learning_rate": 3.658335086563697e-06, "loss": 0.0324, "step": 166870 }, { "epoch": 0.1044, "grad_norm": 0.03214019164443016, "learning_rate": 3.656182478451603e-06, "loss": 0.0323, "step": 166880 }, { "epoch": 0.10445, "grad_norm": 0.03751469403505325, "learning_rate": 3.6540304538714655e-06, "loss": 0.0317, "step": 166890 }, { "epoch": 0.1045, "grad_norm": 0.038455113768577576, "learning_rate": 3.6518790128821173e-06, "loss": 0.0321, "step": 166900 }, { "epoch": 0.10455, "grad_norm": 0.04511810466647148, "learning_rate": 3.64972815554237e-06, "loss": 0.0329, "step": 166910 }, { "epoch": 0.1046, "grad_norm": 0.04041874781250954, "learning_rate": 3.647577881911041e-06, "loss": 0.0337, "step": 166920 }, { "epoch": 0.10465, "grad_norm": 0.032743386924266815, "learning_rate": 3.6454281920469126e-06, "loss": 0.035, "step": 166930 }, { "epoch": 0.1047, "grad_norm": 0.045992206782102585, "learning_rate": 3.6432790860087525e-06, "loss": 0.0324, "step": 166940 }, { "epoch": 0.10475, "grad_norm": 0.07870320975780487, "learning_rate": 3.6411305638553133e-06, "loss": 0.0363, "step": 166950 }, { "epoch": 0.1048, "grad_norm": 0.043390434235334396, "learning_rate": 3.6389826256453457e-06, "loss": 0.0345, "step": 166960 }, { "epoch": 0.10485, "grad_norm": 0.04987112060189247, "learning_rate": 3.636835271437561e-06, "loss": 0.0332, "step": 166970 }, { "epoch": 0.1049, "grad_norm": 0.048528384417295456, "learning_rate": 3.634688501290684e-06, "loss": 0.0364, "step": 166980 }, { "epoch": 0.10495, "grad_norm": 0.05022319406270981, "learning_rate": 3.632542315263393e-06, "loss": 0.0315, "step": 166990 }, { "epoch": 0.105, "grad_norm": 0.04441463574767113, "learning_rate": 3.6303967134143637e-06, "loss": 0.0348, "step": 167000 }, { "epoch": 0.10505, "grad_norm": 0.05608077347278595, "learning_rate": 3.628251695802265e-06, "loss": 0.0337, "step": 167010 }, { "epoch": 0.1051, "grad_norm": 0.04835597053170204, "learning_rate": 3.6261072624857367e-06, "loss": 0.033, "step": 167020 }, { "epoch": 0.10515, "grad_norm": 0.03941258788108826, "learning_rate": 3.6239634135234012e-06, "loss": 0.0323, "step": 167030 }, { "epoch": 0.1052, "grad_norm": 0.052929267287254333, "learning_rate": 3.6218201489738783e-06, "loss": 0.0332, "step": 167040 }, { "epoch": 0.10525, "grad_norm": 0.032524507492780685, "learning_rate": 3.6196774688957575e-06, "loss": 0.0327, "step": 167050 }, { "epoch": 0.1053, "grad_norm": 0.04127586632966995, "learning_rate": 3.617535373347636e-06, "loss": 0.0318, "step": 167060 }, { "epoch": 0.10535, "grad_norm": 0.04529045522212982, "learning_rate": 3.61539386238805e-06, "loss": 0.0331, "step": 167070 }, { "epoch": 0.1054, "grad_norm": 0.038415782153606415, "learning_rate": 3.6132529360755674e-06, "loss": 0.0311, "step": 167080 }, { "epoch": 0.10545, "grad_norm": 0.03427970036864281, "learning_rate": 3.61111259446871e-06, "loss": 0.0315, "step": 167090 }, { "epoch": 0.1055, "grad_norm": 0.033244382590055466, "learning_rate": 3.608972837626004e-06, "loss": 0.0315, "step": 167100 }, { "epoch": 0.10555, "grad_norm": 0.038616396486759186, "learning_rate": 3.6068336656059466e-06, "loss": 0.032, "step": 167110 }, { "epoch": 0.1056, "grad_norm": 0.039937783032655716, "learning_rate": 3.6046950784670105e-06, "loss": 0.0322, "step": 167120 }, { "epoch": 0.10565, "grad_norm": 0.04301230236887932, "learning_rate": 3.602557076267682e-06, "loss": 0.0322, "step": 167130 }, { "epoch": 0.1057, "grad_norm": 0.035067368298769, "learning_rate": 3.6004196590664037e-06, "loss": 0.0309, "step": 167140 }, { "epoch": 0.10575, "grad_norm": 0.039692506194114685, "learning_rate": 3.5982828269216117e-06, "loss": 0.0325, "step": 167150 }, { "epoch": 0.1058, "grad_norm": 0.045233264565467834, "learning_rate": 3.596146579891721e-06, "loss": 0.0328, "step": 167160 }, { "epoch": 0.10585, "grad_norm": 0.04296007379889488, "learning_rate": 3.594010918035143e-06, "loss": 0.0328, "step": 167170 }, { "epoch": 0.1059, "grad_norm": 0.04041620343923569, "learning_rate": 3.5918758414102695e-06, "loss": 0.032, "step": 167180 }, { "epoch": 0.10595, "grad_norm": 0.041898369789123535, "learning_rate": 3.589741350075465e-06, "loss": 0.0313, "step": 167190 }, { "epoch": 0.106, "grad_norm": 0.0341411828994751, "learning_rate": 3.587607444089092e-06, "loss": 0.0326, "step": 167200 }, { "epoch": 0.10605, "grad_norm": 0.03707229718565941, "learning_rate": 3.585474123509483e-06, "loss": 0.0325, "step": 167210 }, { "epoch": 0.1061, "grad_norm": 0.037575431168079376, "learning_rate": 3.5833413883949675e-06, "loss": 0.0324, "step": 167220 }, { "epoch": 0.10615, "grad_norm": 0.034515380859375, "learning_rate": 3.5812092388038567e-06, "loss": 0.0329, "step": 167230 }, { "epoch": 0.1062, "grad_norm": 0.03806985169649124, "learning_rate": 3.5790776747944316e-06, "loss": 0.033, "step": 167240 }, { "epoch": 0.10625, "grad_norm": 0.03956698253750801, "learning_rate": 3.5769466964249793e-06, "loss": 0.0326, "step": 167250 }, { "epoch": 0.1063, "grad_norm": 0.04012298211455345, "learning_rate": 3.574816303753753e-06, "loss": 0.0332, "step": 167260 }, { "epoch": 0.10635, "grad_norm": 0.03527490049600601, "learning_rate": 3.572686496839009e-06, "loss": 0.0331, "step": 167270 }, { "epoch": 0.1064, "grad_norm": 0.03796212002635002, "learning_rate": 3.570557275738956e-06, "loss": 0.0335, "step": 167280 }, { "epoch": 0.10645, "grad_norm": 0.03673234581947327, "learning_rate": 3.5684286405118173e-06, "loss": 0.0331, "step": 167290 }, { "epoch": 0.1065, "grad_norm": 0.03789430111646652, "learning_rate": 3.5663005912157933e-06, "loss": 0.0327, "step": 167300 }, { "epoch": 0.10655, "grad_norm": 0.033841654658317566, "learning_rate": 3.5641731279090596e-06, "loss": 0.0336, "step": 167310 }, { "epoch": 0.1066, "grad_norm": 0.03266311064362526, "learning_rate": 3.5620462506497782e-06, "loss": 0.0335, "step": 167320 }, { "epoch": 0.10665, "grad_norm": 0.03473520651459694, "learning_rate": 3.559919959496091e-06, "loss": 0.0328, "step": 167330 }, { "epoch": 0.1067, "grad_norm": 0.03881165385246277, "learning_rate": 3.5577942545061473e-06, "loss": 0.0338, "step": 167340 }, { "epoch": 0.10675, "grad_norm": 0.03510915860533714, "learning_rate": 3.555669135738049e-06, "loss": 0.0331, "step": 167350 }, { "epoch": 0.1068, "grad_norm": 0.04461480677127838, "learning_rate": 3.5535446032498977e-06, "loss": 0.0335, "step": 167360 }, { "epoch": 0.10685, "grad_norm": 0.046035848557949066, "learning_rate": 3.5514206570997854e-06, "loss": 0.0329, "step": 167370 }, { "epoch": 0.1069, "grad_norm": 0.03814137354493141, "learning_rate": 3.549297297345766e-06, "loss": 0.0337, "step": 167380 }, { "epoch": 0.10695, "grad_norm": 0.03890189900994301, "learning_rate": 3.5471745240459096e-06, "loss": 0.0333, "step": 167390 }, { "epoch": 0.107, "grad_norm": 0.04086184874176979, "learning_rate": 3.5450523372582395e-06, "loss": 0.0327, "step": 167400 }, { "epoch": 0.10705, "grad_norm": 0.03729187697172165, "learning_rate": 3.5429307370407728e-06, "loss": 0.0335, "step": 167410 }, { "epoch": 0.1071, "grad_norm": 0.03804780915379524, "learning_rate": 3.5408097234515243e-06, "loss": 0.0313, "step": 167420 }, { "epoch": 0.10715, "grad_norm": 0.04238668456673622, "learning_rate": 3.538689296548478e-06, "loss": 0.0331, "step": 167430 }, { "epoch": 0.1072, "grad_norm": 0.042996007949113846, "learning_rate": 3.5365694563896016e-06, "loss": 0.0316, "step": 167440 }, { "epoch": 0.10725, "grad_norm": 0.03444478660821915, "learning_rate": 3.5344502030328463e-06, "loss": 0.0346, "step": 167450 }, { "epoch": 0.1073, "grad_norm": 0.043042588979005814, "learning_rate": 3.532331536536165e-06, "loss": 0.0328, "step": 167460 }, { "epoch": 0.10735, "grad_norm": 0.03950345143675804, "learning_rate": 3.5302134569574706e-06, "loss": 0.0335, "step": 167470 }, { "epoch": 0.1074, "grad_norm": 0.041202180087566376, "learning_rate": 3.52809596435468e-06, "loss": 0.0344, "step": 167480 }, { "epoch": 0.10745, "grad_norm": 0.035815853625535965, "learning_rate": 3.525979058785678e-06, "loss": 0.0324, "step": 167490 }, { "epoch": 0.1075, "grad_norm": 0.036990031599998474, "learning_rate": 3.523862740308334e-06, "loss": 0.0319, "step": 167500 }, { "epoch": 0.10755, "grad_norm": 0.03876454383134842, "learning_rate": 3.5217470089805223e-06, "loss": 0.0328, "step": 167510 }, { "epoch": 0.1076, "grad_norm": 0.038854099810123444, "learning_rate": 3.519631864860076e-06, "loss": 0.0326, "step": 167520 }, { "epoch": 0.10765, "grad_norm": 0.037550996989011765, "learning_rate": 3.517517308004828e-06, "loss": 0.032, "step": 167530 }, { "epoch": 0.1077, "grad_norm": 0.035221610218286514, "learning_rate": 3.515403338472578e-06, "loss": 0.0344, "step": 167540 }, { "epoch": 0.10775, "grad_norm": 0.03795564919710159, "learning_rate": 3.513289956321131e-06, "loss": 0.0326, "step": 167550 }, { "epoch": 0.1078, "grad_norm": 0.03376448526978493, "learning_rate": 3.511177161608273e-06, "loss": 0.0326, "step": 167560 }, { "epoch": 0.10785, "grad_norm": 0.04067350551486015, "learning_rate": 3.50906495439175e-06, "loss": 0.033, "step": 167570 }, { "epoch": 0.1079, "grad_norm": 0.0399472676217556, "learning_rate": 3.506953334729321e-06, "loss": 0.0325, "step": 167580 }, { "epoch": 0.10795, "grad_norm": 0.03734531253576279, "learning_rate": 3.5048423026787095e-06, "loss": 0.0328, "step": 167590 }, { "epoch": 0.108, "grad_norm": 0.04117441922426224, "learning_rate": 3.5027318582976394e-06, "loss": 0.0323, "step": 167600 }, { "epoch": 0.10805, "grad_norm": 0.0442042350769043, "learning_rate": 3.5006220016438023e-06, "loss": 0.0317, "step": 167610 }, { "epoch": 0.1081, "grad_norm": 0.039410967379808426, "learning_rate": 3.498512732774878e-06, "loss": 0.0324, "step": 167620 }, { "epoch": 0.10815, "grad_norm": 0.0426187664270401, "learning_rate": 3.4964040517485447e-06, "loss": 0.0332, "step": 167630 }, { "epoch": 0.1082, "grad_norm": 0.0367325022816658, "learning_rate": 3.4942959586224457e-06, "loss": 0.0317, "step": 167640 }, { "epoch": 0.10825, "grad_norm": 0.037765733897686005, "learning_rate": 3.4921884534542148e-06, "loss": 0.0327, "step": 167650 }, { "epoch": 0.1083, "grad_norm": 0.040944699198007584, "learning_rate": 3.4900815363014677e-06, "loss": 0.032, "step": 167660 }, { "epoch": 0.10835, "grad_norm": 0.03851859271526337, "learning_rate": 3.487975207221808e-06, "loss": 0.0334, "step": 167670 }, { "epoch": 0.1084, "grad_norm": 0.038580868393182755, "learning_rate": 3.4858694662728314e-06, "loss": 0.0316, "step": 167680 }, { "epoch": 0.10845, "grad_norm": 0.04467671737074852, "learning_rate": 3.4837643135121e-06, "loss": 0.0335, "step": 167690 }, { "epoch": 0.1085, "grad_norm": 0.03394944965839386, "learning_rate": 3.481659748997171e-06, "loss": 0.0323, "step": 167700 }, { "epoch": 0.10855, "grad_norm": 0.03451866656541824, "learning_rate": 3.4795557727855754e-06, "loss": 0.0311, "step": 167710 }, { "epoch": 0.1086, "grad_norm": 0.038766369223594666, "learning_rate": 3.477452384934843e-06, "loss": 0.0325, "step": 167720 }, { "epoch": 0.10865, "grad_norm": 0.0374983511865139, "learning_rate": 3.475349585502477e-06, "loss": 0.0339, "step": 167730 }, { "epoch": 0.1087, "grad_norm": 0.04100382328033447, "learning_rate": 3.4732473745459625e-06, "loss": 0.0329, "step": 167740 }, { "epoch": 0.10875, "grad_norm": 0.04044672101736069, "learning_rate": 3.4711457521227843e-06, "loss": 0.0335, "step": 167750 }, { "epoch": 0.1088, "grad_norm": 0.035386454313993454, "learning_rate": 3.4690447182903844e-06, "loss": 0.033, "step": 167760 }, { "epoch": 0.10885, "grad_norm": 0.04271038994193077, "learning_rate": 3.466944273106226e-06, "loss": 0.0326, "step": 167770 }, { "epoch": 0.1089, "grad_norm": 0.03882293403148651, "learning_rate": 3.4648444166277107e-06, "loss": 0.0343, "step": 167780 }, { "epoch": 0.10895, "grad_norm": 0.03273117542266846, "learning_rate": 3.462745148912258e-06, "loss": 0.0312, "step": 167790 }, { "epoch": 0.109, "grad_norm": 0.03577882796525955, "learning_rate": 3.4606464700172703e-06, "loss": 0.0333, "step": 167800 }, { "epoch": 0.10905, "grad_norm": 0.03669364005327225, "learning_rate": 3.4585483800001124e-06, "loss": 0.0333, "step": 167810 }, { "epoch": 0.1091, "grad_norm": 0.0384441576898098, "learning_rate": 3.456450878918149e-06, "loss": 0.0328, "step": 167820 }, { "epoch": 0.10915, "grad_norm": 0.037700068205595016, "learning_rate": 3.4543539668287218e-06, "loss": 0.0344, "step": 167830 }, { "epoch": 0.1092, "grad_norm": 0.03969224914908409, "learning_rate": 3.4522576437891668e-06, "loss": 0.0331, "step": 167840 }, { "epoch": 0.10925, "grad_norm": 0.04010225459933281, "learning_rate": 3.4501619098567944e-06, "loss": 0.0321, "step": 167850 }, { "epoch": 0.1093, "grad_norm": 0.03746872395277023, "learning_rate": 3.448066765088892e-06, "loss": 0.0331, "step": 167860 }, { "epoch": 0.10935, "grad_norm": 0.04125736281275749, "learning_rate": 3.4459722095427554e-06, "loss": 0.0333, "step": 167870 }, { "epoch": 0.1094, "grad_norm": 0.036100760102272034, "learning_rate": 3.4438782432756336e-06, "loss": 0.032, "step": 167880 }, { "epoch": 0.10945, "grad_norm": 0.040355175733566284, "learning_rate": 3.4417848663447883e-06, "loss": 0.0341, "step": 167890 }, { "epoch": 0.1095, "grad_norm": 0.032878391444683075, "learning_rate": 3.4396920788074436e-06, "loss": 0.0348, "step": 167900 }, { "epoch": 0.10955, "grad_norm": 0.0392109714448452, "learning_rate": 3.437599880720821e-06, "loss": 0.0326, "step": 167910 }, { "epoch": 0.1096, "grad_norm": 0.03616342693567276, "learning_rate": 3.43550827214211e-06, "loss": 0.0338, "step": 167920 }, { "epoch": 0.10965, "grad_norm": 0.04273064434528351, "learning_rate": 3.433417253128507e-06, "loss": 0.034, "step": 167930 }, { "epoch": 0.1097, "grad_norm": 0.050068605691194534, "learning_rate": 3.431326823737174e-06, "loss": 0.0337, "step": 167940 }, { "epoch": 0.10975, "grad_norm": 0.03763910382986069, "learning_rate": 3.429236984025258e-06, "loss": 0.0321, "step": 167950 }, { "epoch": 0.1098, "grad_norm": 0.03633598983287811, "learning_rate": 3.427147734049904e-06, "loss": 0.0331, "step": 167960 }, { "epoch": 0.10985, "grad_norm": 0.03945426270365715, "learning_rate": 3.4250590738682224e-06, "loss": 0.0322, "step": 167970 }, { "epoch": 0.1099, "grad_norm": 0.03770596906542778, "learning_rate": 3.422971003537323e-06, "loss": 0.0326, "step": 167980 }, { "epoch": 0.10995, "grad_norm": 0.036587655544281006, "learning_rate": 3.4208835231142933e-06, "loss": 0.0326, "step": 167990 }, { "epoch": 0.11, "grad_norm": 0.038484178483486176, "learning_rate": 3.4187966326561933e-06, "loss": 0.0349, "step": 168000 }, { "epoch": 0.11005, "grad_norm": 0.040012579411268234, "learning_rate": 3.416710332220094e-06, "loss": 0.0325, "step": 168010 }, { "epoch": 0.1101, "grad_norm": 0.03485150635242462, "learning_rate": 3.4146246218630217e-06, "loss": 0.0325, "step": 168020 }, { "epoch": 0.11015, "grad_norm": 0.0435153990983963, "learning_rate": 3.4125395016420065e-06, "loss": 0.032, "step": 168030 }, { "epoch": 0.1102, "grad_norm": 0.032235775142908096, "learning_rate": 3.410454971614044e-06, "loss": 0.031, "step": 168040 }, { "epoch": 0.11025, "grad_norm": 0.03393120318651199, "learning_rate": 3.408371031836133e-06, "loss": 0.0325, "step": 168050 }, { "epoch": 0.1103, "grad_norm": 0.03405354917049408, "learning_rate": 3.4062876823652557e-06, "loss": 0.0321, "step": 168060 }, { "epoch": 0.11035, "grad_norm": 0.038526881486177444, "learning_rate": 3.4042049232583503e-06, "loss": 0.0319, "step": 168070 }, { "epoch": 0.1104, "grad_norm": 0.03656435385346413, "learning_rate": 3.402122754572376e-06, "loss": 0.0315, "step": 168080 }, { "epoch": 0.11045, "grad_norm": 0.04226868599653244, "learning_rate": 3.4000411763642436e-06, "loss": 0.0326, "step": 168090 }, { "epoch": 0.1105, "grad_norm": 0.03305479511618614, "learning_rate": 3.397960188690877e-06, "loss": 0.0316, "step": 168100 }, { "epoch": 0.11055, "grad_norm": 0.03760003671050072, "learning_rate": 3.3958797916091662e-06, "loss": 0.0319, "step": 168110 }, { "epoch": 0.1106, "grad_norm": 0.03340890631079674, "learning_rate": 3.3937999851759773e-06, "loss": 0.0333, "step": 168120 }, { "epoch": 0.11065, "grad_norm": 0.033915311098098755, "learning_rate": 3.3917207694481866e-06, "loss": 0.0321, "step": 168130 }, { "epoch": 0.1107, "grad_norm": 0.03152468055486679, "learning_rate": 3.3896421444826297e-06, "loss": 0.0321, "step": 168140 }, { "epoch": 0.11075, "grad_norm": 0.03944316506385803, "learning_rate": 3.3875641103361417e-06, "loss": 0.0325, "step": 168150 }, { "epoch": 0.1108, "grad_norm": 0.03829282894730568, "learning_rate": 3.385486667065524e-06, "loss": 0.0326, "step": 168160 }, { "epoch": 0.11085, "grad_norm": 0.04128112271428108, "learning_rate": 3.383409814727584e-06, "loss": 0.0326, "step": 168170 }, { "epoch": 0.1109, "grad_norm": 0.03964536637067795, "learning_rate": 3.3813335533790957e-06, "loss": 0.0329, "step": 168180 }, { "epoch": 0.11095, "grad_norm": 0.03742802515625954, "learning_rate": 3.3792578830768333e-06, "loss": 0.0347, "step": 168190 }, { "epoch": 0.111, "grad_norm": 0.04376475140452385, "learning_rate": 3.377182803877535e-06, "loss": 0.0351, "step": 168200 }, { "epoch": 0.11105, "grad_norm": 0.03592308238148689, "learning_rate": 3.3751083158379298e-06, "loss": 0.0347, "step": 168210 }, { "epoch": 0.1111, "grad_norm": 0.03608611598610878, "learning_rate": 3.373034419014748e-06, "loss": 0.0345, "step": 168220 }, { "epoch": 0.11115, "grad_norm": 0.0484461635351181, "learning_rate": 3.3709611134646766e-06, "loss": 0.0336, "step": 168230 }, { "epoch": 0.1112, "grad_norm": 0.044795531779527664, "learning_rate": 3.368888399244399e-06, "loss": 0.0338, "step": 168240 }, { "epoch": 0.11125, "grad_norm": 0.03537564352154732, "learning_rate": 3.366816276410592e-06, "loss": 0.0322, "step": 168250 }, { "epoch": 0.1113, "grad_norm": 0.03821335732936859, "learning_rate": 3.364744745019893e-06, "loss": 0.0332, "step": 168260 }, { "epoch": 0.11135, "grad_norm": 0.03966094180941582, "learning_rate": 3.36267380512896e-06, "loss": 0.0325, "step": 168270 }, { "epoch": 0.1114, "grad_norm": 0.036077141761779785, "learning_rate": 3.3606034567943813e-06, "loss": 0.0339, "step": 168280 }, { "epoch": 0.11145, "grad_norm": 0.042035218328237534, "learning_rate": 3.358533700072783e-06, "loss": 0.0325, "step": 168290 }, { "epoch": 0.1115, "grad_norm": 0.04042064771056175, "learning_rate": 3.3564645350207343e-06, "loss": 0.0327, "step": 168300 }, { "epoch": 0.11155, "grad_norm": 0.03909675404429436, "learning_rate": 3.354395961694823e-06, "loss": 0.0333, "step": 168310 }, { "epoch": 0.1116, "grad_norm": 0.03649524971842766, "learning_rate": 3.3523279801515926e-06, "loss": 0.0335, "step": 168320 }, { "epoch": 0.11165, "grad_norm": 0.035712581127882004, "learning_rate": 3.3502605904475763e-06, "loss": 0.0327, "step": 168330 }, { "epoch": 0.1117, "grad_norm": 0.055029258131980896, "learning_rate": 3.348193792639309e-06, "loss": 0.0334, "step": 168340 }, { "epoch": 0.11175, "grad_norm": 0.042117055505514145, "learning_rate": 3.3461275867832877e-06, "loss": 0.0319, "step": 168350 }, { "epoch": 0.1118, "grad_norm": 0.038168080151081085, "learning_rate": 3.3440619729360053e-06, "loss": 0.0313, "step": 168360 }, { "epoch": 0.11185, "grad_norm": 0.042006079107522964, "learning_rate": 3.341996951153925e-06, "loss": 0.0344, "step": 168370 }, { "epoch": 0.1119, "grad_norm": 0.04365299269556999, "learning_rate": 3.3399325214935133e-06, "loss": 0.0318, "step": 168380 }, { "epoch": 0.11195, "grad_norm": 0.03829650580883026, "learning_rate": 3.337868684011214e-06, "loss": 0.0333, "step": 168390 }, { "epoch": 0.112, "grad_norm": 0.05951496213674545, "learning_rate": 3.335805438763445e-06, "loss": 0.0333, "step": 168400 }, { "epoch": 0.11205, "grad_norm": 0.044234320521354675, "learning_rate": 3.3337427858066177e-06, "loss": 0.0332, "step": 168410 }, { "epoch": 0.1121, "grad_norm": 0.05862995237112045, "learning_rate": 3.33168072519712e-06, "loss": 0.0341, "step": 168420 }, { "epoch": 0.11215, "grad_norm": 0.05722839757800102, "learning_rate": 3.329619256991334e-06, "loss": 0.0338, "step": 168430 }, { "epoch": 0.1122, "grad_norm": 0.04409719631075859, "learning_rate": 3.3275583812456157e-06, "loss": 0.0338, "step": 168440 }, { "epoch": 0.11225, "grad_norm": 0.045837193727493286, "learning_rate": 3.3254980980163052e-06, "loss": 0.0338, "step": 168450 }, { "epoch": 0.1123, "grad_norm": 0.04249592497944832, "learning_rate": 3.3234384073597386e-06, "loss": 0.0333, "step": 168460 }, { "epoch": 0.11235, "grad_norm": 0.04318466782569885, "learning_rate": 3.3213793093322176e-06, "loss": 0.0352, "step": 168470 }, { "epoch": 0.1124, "grad_norm": 0.049135416746139526, "learning_rate": 3.319320803990053e-06, "loss": 0.0338, "step": 168480 }, { "epoch": 0.11245, "grad_norm": 0.03898349404335022, "learning_rate": 3.3172628913894998e-06, "loss": 0.0377, "step": 168490 }, { "epoch": 0.1125, "grad_norm": 0.0410807728767395, "learning_rate": 3.315205571586835e-06, "loss": 0.035, "step": 168500 }, { "epoch": 0.11255, "grad_norm": 0.04127427935600281, "learning_rate": 3.3131488446383086e-06, "loss": 0.0344, "step": 168510 }, { "epoch": 0.1126, "grad_norm": 0.03689667582511902, "learning_rate": 3.311092710600147e-06, "loss": 0.0335, "step": 168520 }, { "epoch": 0.11265, "grad_norm": 0.04671632871031761, "learning_rate": 3.3090371695285617e-06, "loss": 0.0351, "step": 168530 }, { "epoch": 0.1127, "grad_norm": 0.05275014415383339, "learning_rate": 3.306982221479743e-06, "loss": 0.0346, "step": 168540 }, { "epoch": 0.11275, "grad_norm": 0.037315089255571365, "learning_rate": 3.304927866509888e-06, "loss": 0.0332, "step": 168550 }, { "epoch": 0.1128, "grad_norm": 0.04311274737119675, "learning_rate": 3.302874104675155e-06, "loss": 0.0328, "step": 168560 }, { "epoch": 0.11285, "grad_norm": 0.03334786370396614, "learning_rate": 3.300820936031687e-06, "loss": 0.0333, "step": 168570 }, { "epoch": 0.1129, "grad_norm": 0.03692523390054703, "learning_rate": 3.298768360635629e-06, "loss": 0.033, "step": 168580 }, { "epoch": 0.11295, "grad_norm": 0.03456854075193405, "learning_rate": 3.2967163785430854e-06, "loss": 0.0323, "step": 168590 }, { "epoch": 0.113, "grad_norm": 0.03847216069698334, "learning_rate": 3.294664989810167e-06, "loss": 0.0323, "step": 168600 }, { "epoch": 0.11305, "grad_norm": 0.03631163015961647, "learning_rate": 3.2926141944929546e-06, "loss": 0.0317, "step": 168610 }, { "epoch": 0.1131, "grad_norm": 0.03581319749355316, "learning_rate": 3.290563992647508e-06, "loss": 0.031, "step": 168620 }, { "epoch": 0.11315, "grad_norm": 0.03114577941596508, "learning_rate": 3.288514384329894e-06, "loss": 0.0315, "step": 168630 }, { "epoch": 0.1132, "grad_norm": 0.04888668656349182, "learning_rate": 3.2864653695961366e-06, "loss": 0.0352, "step": 168640 }, { "epoch": 0.11325, "grad_norm": 0.04437430948019028, "learning_rate": 3.2844169485022612e-06, "loss": 0.0316, "step": 168650 }, { "epoch": 0.1133, "grad_norm": 0.045641057193279266, "learning_rate": 3.282369121104262e-06, "loss": 0.0328, "step": 168660 }, { "epoch": 0.11335, "grad_norm": 0.03782769665122032, "learning_rate": 3.2803218874581377e-06, "loss": 0.0308, "step": 168670 }, { "epoch": 0.1134, "grad_norm": 0.046148475259542465, "learning_rate": 3.278275247619847e-06, "loss": 0.0335, "step": 168680 }, { "epoch": 0.11345, "grad_norm": 0.03975826874375343, "learning_rate": 3.2762292016453587e-06, "loss": 0.0339, "step": 168690 }, { "epoch": 0.1135, "grad_norm": 0.04578711465001106, "learning_rate": 3.2741837495906007e-06, "loss": 0.032, "step": 168700 }, { "epoch": 0.11355, "grad_norm": 0.043910104781389236, "learning_rate": 3.272138891511492e-06, "loss": 0.0325, "step": 168710 }, { "epoch": 0.1136, "grad_norm": 0.03680054470896721, "learning_rate": 3.2700946274639487e-06, "loss": 0.0337, "step": 168720 }, { "epoch": 0.11365, "grad_norm": 0.040630705654621124, "learning_rate": 3.268050957503854e-06, "loss": 0.0325, "step": 168730 }, { "epoch": 0.1137, "grad_norm": 0.05036550760269165, "learning_rate": 3.2660078816870804e-06, "loss": 0.0338, "step": 168740 }, { "epoch": 0.11375, "grad_norm": 0.045405495911836624, "learning_rate": 3.2639654000694854e-06, "loss": 0.0322, "step": 168750 }, { "epoch": 0.1138, "grad_norm": 0.04269585758447647, "learning_rate": 3.2619235127069077e-06, "loss": 0.0331, "step": 168760 }, { "epoch": 0.11385, "grad_norm": 0.04223855957388878, "learning_rate": 3.2598822196551833e-06, "loss": 0.0347, "step": 168770 }, { "epoch": 0.1139, "grad_norm": 0.04172206297516823, "learning_rate": 3.2578415209701037e-06, "loss": 0.0336, "step": 168780 }, { "epoch": 0.11395, "grad_norm": 0.04562797024846077, "learning_rate": 3.255801416707474e-06, "loss": 0.0319, "step": 168790 }, { "epoch": 0.114, "grad_norm": 0.040666863322257996, "learning_rate": 3.2537619069230586e-06, "loss": 0.0317, "step": 168800 }, { "epoch": 0.11405, "grad_norm": 0.03749188035726547, "learning_rate": 3.2517229916726287e-06, "loss": 0.0325, "step": 168810 }, { "epoch": 0.1141, "grad_norm": 0.03828401491045952, "learning_rate": 3.249684671011921e-06, "loss": 0.033, "step": 168820 }, { "epoch": 0.11415, "grad_norm": 0.03732697665691376, "learning_rate": 3.247646944996657e-06, "loss": 0.0319, "step": 168830 }, { "epoch": 0.1142, "grad_norm": 0.04065719619393349, "learning_rate": 3.2456098136825623e-06, "loss": 0.0334, "step": 168840 }, { "epoch": 0.11425, "grad_norm": 0.043520860373973846, "learning_rate": 3.2435732771253195e-06, "loss": 0.0326, "step": 168850 }, { "epoch": 0.1143, "grad_norm": 0.056450847536325455, "learning_rate": 3.2415373353806124e-06, "loss": 0.0325, "step": 168860 }, { "epoch": 0.11435, "grad_norm": 0.03849427029490471, "learning_rate": 3.2395019885040927e-06, "loss": 0.0319, "step": 168870 }, { "epoch": 0.1144, "grad_norm": 0.03648701682686806, "learning_rate": 3.2374672365514143e-06, "loss": 0.0315, "step": 168880 }, { "epoch": 0.11445, "grad_norm": 0.04408055916428566, "learning_rate": 3.2354330795782095e-06, "loss": 0.0325, "step": 168890 }, { "epoch": 0.1145, "grad_norm": 0.03254362940788269, "learning_rate": 3.23339951764009e-06, "loss": 0.032, "step": 168900 }, { "epoch": 0.11455, "grad_norm": 0.04348884895443916, "learning_rate": 3.23136655079265e-06, "loss": 0.0316, "step": 168910 }, { "epoch": 0.1146, "grad_norm": 0.03716294839978218, "learning_rate": 3.2293341790914645e-06, "loss": 0.0322, "step": 168920 }, { "epoch": 0.11465, "grad_norm": 0.034980643540620804, "learning_rate": 3.2273024025921082e-06, "loss": 0.033, "step": 168930 }, { "epoch": 0.1147, "grad_norm": 0.03570462390780449, "learning_rate": 3.225271221350126e-06, "loss": 0.032, "step": 168940 }, { "epoch": 0.11475, "grad_norm": 0.03555014356970787, "learning_rate": 3.223240635421043e-06, "loss": 0.035, "step": 168950 }, { "epoch": 0.1148, "grad_norm": 0.0335003100335598, "learning_rate": 3.221210644860384e-06, "loss": 0.0329, "step": 168960 }, { "epoch": 0.11485, "grad_norm": 0.035364456474781036, "learning_rate": 3.2191812497236407e-06, "loss": 0.0344, "step": 168970 }, { "epoch": 0.1149, "grad_norm": 0.04259192943572998, "learning_rate": 3.2171524500663074e-06, "loss": 0.0341, "step": 168980 }, { "epoch": 0.11495, "grad_norm": 0.041103146970272064, "learning_rate": 3.2151242459438345e-06, "loss": 0.0335, "step": 168990 }, { "epoch": 0.115, "grad_norm": 0.038075946271419525, "learning_rate": 3.2130966374116806e-06, "loss": 0.033, "step": 169000 }, { "epoch": 0.11505, "grad_norm": 0.03837242349982262, "learning_rate": 3.211069624525284e-06, "loss": 0.0329, "step": 169010 }, { "epoch": 0.1151, "grad_norm": 0.03916725143790245, "learning_rate": 3.209043207340057e-06, "loss": 0.0324, "step": 169020 }, { "epoch": 0.11515, "grad_norm": 0.0411614254117012, "learning_rate": 3.207017385911404e-06, "loss": 0.0322, "step": 169030 }, { "epoch": 0.1152, "grad_norm": 0.03601289540529251, "learning_rate": 3.2049921602947007e-06, "loss": 0.0313, "step": 169040 }, { "epoch": 0.11525, "grad_norm": 0.04232946038246155, "learning_rate": 3.202967530545331e-06, "loss": 0.0347, "step": 169050 }, { "epoch": 0.1153, "grad_norm": 0.041129209101200104, "learning_rate": 3.2009434967186418e-06, "loss": 0.0323, "step": 169060 }, { "epoch": 0.11535, "grad_norm": 0.04336433857679367, "learning_rate": 3.1989200588699584e-06, "loss": 0.0331, "step": 169070 }, { "epoch": 0.1154, "grad_norm": 0.034335821866989136, "learning_rate": 3.1968972170546203e-06, "loss": 0.0324, "step": 169080 }, { "epoch": 0.11545, "grad_norm": 0.03699544072151184, "learning_rate": 3.194874971327913e-06, "loss": 0.0323, "step": 169090 }, { "epoch": 0.1155, "grad_norm": 0.03071075677871704, "learning_rate": 3.1928533217451374e-06, "loss": 0.0317, "step": 169100 }, { "epoch": 0.11555, "grad_norm": 0.038801129907369614, "learning_rate": 3.19083226836156e-06, "loss": 0.0307, "step": 169110 }, { "epoch": 0.1156, "grad_norm": 0.036969684064388275, "learning_rate": 3.188811811232434e-06, "loss": 0.0324, "step": 169120 }, { "epoch": 0.11565, "grad_norm": 0.04446541890501976, "learning_rate": 3.1867919504129954e-06, "loss": 0.0315, "step": 169130 }, { "epoch": 0.1157, "grad_norm": 0.03564651310443878, "learning_rate": 3.1847726859584757e-06, "loss": 0.0322, "step": 169140 }, { "epoch": 0.11575, "grad_norm": 0.036352917551994324, "learning_rate": 3.182754017924075e-06, "loss": 0.0334, "step": 169150 }, { "epoch": 0.1158, "grad_norm": 0.048761919140815735, "learning_rate": 3.180735946364977e-06, "loss": 0.0319, "step": 169160 }, { "epoch": 0.11585, "grad_norm": 0.048298221081495285, "learning_rate": 3.1787184713363643e-06, "loss": 0.0323, "step": 169170 }, { "epoch": 0.1159, "grad_norm": 0.04164647310972214, "learning_rate": 3.1767015928933884e-06, "loss": 0.0309, "step": 169180 }, { "epoch": 0.11595, "grad_norm": 0.039654441177845, "learning_rate": 3.1746853110911967e-06, "loss": 0.0317, "step": 169190 }, { "epoch": 0.116, "grad_norm": 0.03504246473312378, "learning_rate": 3.1726696259849087e-06, "loss": 0.0323, "step": 169200 }, { "epoch": 0.11605, "grad_norm": 0.03481772914528847, "learning_rate": 3.1706545376296277e-06, "loss": 0.0321, "step": 169210 }, { "epoch": 0.1161, "grad_norm": 0.039279524236917496, "learning_rate": 3.1686400460804565e-06, "loss": 0.0324, "step": 169220 }, { "epoch": 0.11615, "grad_norm": 0.041676539927721024, "learning_rate": 3.1666261513924655e-06, "loss": 0.0331, "step": 169230 }, { "epoch": 0.1162, "grad_norm": 0.04180776700377464, "learning_rate": 3.164612853620713e-06, "loss": 0.0316, "step": 169240 }, { "epoch": 0.11625, "grad_norm": 0.03256388381123543, "learning_rate": 3.162600152820236e-06, "loss": 0.0325, "step": 169250 }, { "epoch": 0.1163, "grad_norm": 0.03377092257142067, "learning_rate": 3.1605880490460676e-06, "loss": 0.032, "step": 169260 }, { "epoch": 0.11635, "grad_norm": 0.035713210701942444, "learning_rate": 3.1585765423532284e-06, "loss": 0.0317, "step": 169270 }, { "epoch": 0.1164, "grad_norm": 0.03613951802253723, "learning_rate": 3.1565656327966877e-06, "loss": 0.0332, "step": 169280 }, { "epoch": 0.11645, "grad_norm": 0.034064631909132004, "learning_rate": 3.1545553204314434e-06, "loss": 0.0332, "step": 169290 }, { "epoch": 0.1165, "grad_norm": 0.04030144587159157, "learning_rate": 3.152545605312443e-06, "loss": 0.0335, "step": 169300 }, { "epoch": 0.11655, "grad_norm": 0.03883133456110954, "learning_rate": 3.1505364874946457e-06, "loss": 0.0341, "step": 169310 }, { "epoch": 0.1166, "grad_norm": 0.042704951018095016, "learning_rate": 3.1485279670329715e-06, "loss": 0.0342, "step": 169320 }, { "epoch": 0.11665, "grad_norm": 0.038515184074640274, "learning_rate": 3.146520043982329e-06, "loss": 0.0345, "step": 169330 }, { "epoch": 0.1167, "grad_norm": 0.03892954811453819, "learning_rate": 3.1445127183976213e-06, "loss": 0.0348, "step": 169340 }, { "epoch": 0.11675, "grad_norm": 0.03511323407292366, "learning_rate": 3.142505990333727e-06, "loss": 0.0324, "step": 169350 }, { "epoch": 0.1168, "grad_norm": 0.04509086534380913, "learning_rate": 3.140499859845508e-06, "loss": 0.0341, "step": 169360 }, { "epoch": 0.11685, "grad_norm": 0.037586696445941925, "learning_rate": 3.1384943269878035e-06, "loss": 0.0329, "step": 169370 }, { "epoch": 0.1169, "grad_norm": 0.033918607980012894, "learning_rate": 3.1364893918154586e-06, "loss": 0.0319, "step": 169380 }, { "epoch": 0.11695, "grad_norm": 0.036432284861803055, "learning_rate": 3.134485054383271e-06, "loss": 0.0328, "step": 169390 }, { "epoch": 0.117, "grad_norm": 0.035951100289821625, "learning_rate": 3.132481314746055e-06, "loss": 0.0327, "step": 169400 }, { "epoch": 0.11705, "grad_norm": 0.04071575775742531, "learning_rate": 3.1304781729585867e-06, "loss": 0.0319, "step": 169410 }, { "epoch": 0.1171, "grad_norm": 0.03830753266811371, "learning_rate": 3.1284756290756225e-06, "loss": 0.0328, "step": 169420 }, { "epoch": 0.11715, "grad_norm": 0.03831455111503601, "learning_rate": 3.1264736831519204e-06, "loss": 0.0326, "step": 169430 }, { "epoch": 0.1172, "grad_norm": 0.03554108738899231, "learning_rate": 3.124472335242215e-06, "loss": 0.0333, "step": 169440 }, { "epoch": 0.11725, "grad_norm": 0.047145262360572815, "learning_rate": 3.1224715854012098e-06, "loss": 0.0326, "step": 169450 }, { "epoch": 0.1173, "grad_norm": 0.05718984082341194, "learning_rate": 3.1204714336836166e-06, "loss": 0.0325, "step": 169460 }, { "epoch": 0.11735, "grad_norm": 0.06556253880262375, "learning_rate": 3.118471880144111e-06, "loss": 0.0337, "step": 169470 }, { "epoch": 0.1174, "grad_norm": 0.04458180442452431, "learning_rate": 3.116472924837374e-06, "loss": 0.0321, "step": 169480 }, { "epoch": 0.11745, "grad_norm": 0.04007299244403839, "learning_rate": 3.114474567818035e-06, "loss": 0.0321, "step": 169490 }, { "epoch": 0.1175, "grad_norm": 0.03834760934114456, "learning_rate": 3.1124768091407463e-06, "loss": 0.031, "step": 169500 }, { "epoch": 0.11755, "grad_norm": 0.05196467414498329, "learning_rate": 3.1104796488601094e-06, "loss": 0.0336, "step": 169510 }, { "epoch": 0.1176, "grad_norm": 0.04448392242193222, "learning_rate": 3.1084830870307445e-06, "loss": 0.0328, "step": 169520 }, { "epoch": 0.11765, "grad_norm": 0.0343671552836895, "learning_rate": 3.1064871237072274e-06, "loss": 0.0338, "step": 169530 }, { "epoch": 0.1177, "grad_norm": 0.044008392840623856, "learning_rate": 3.1044917589441195e-06, "loss": 0.0322, "step": 169540 }, { "epoch": 0.11775, "grad_norm": 0.038599561899900436, "learning_rate": 3.102496992795989e-06, "loss": 0.0335, "step": 169550 }, { "epoch": 0.1178, "grad_norm": 0.04106726497411728, "learning_rate": 3.100502825317364e-06, "loss": 0.0321, "step": 169560 }, { "epoch": 0.11785, "grad_norm": 0.040985990315675735, "learning_rate": 3.098509256562765e-06, "loss": 0.0331, "step": 169570 }, { "epoch": 0.1179, "grad_norm": 0.0400564968585968, "learning_rate": 3.096516286586687e-06, "loss": 0.0329, "step": 169580 }, { "epoch": 0.11795, "grad_norm": 0.03802260756492615, "learning_rate": 3.0945239154436282e-06, "loss": 0.0326, "step": 169590 }, { "epoch": 0.118, "grad_norm": 0.04563620686531067, "learning_rate": 3.0925321431880594e-06, "loss": 0.0328, "step": 169600 }, { "epoch": 0.11805, "grad_norm": 0.037531446665525436, "learning_rate": 3.090540969874431e-06, "loss": 0.0317, "step": 169610 }, { "epoch": 0.1181, "grad_norm": 0.045137230306863785, "learning_rate": 3.0885503955571826e-06, "loss": 0.0317, "step": 169620 }, { "epoch": 0.11815, "grad_norm": 0.03471638262271881, "learning_rate": 3.0865604202907295e-06, "loss": 0.0319, "step": 169630 }, { "epoch": 0.1182, "grad_norm": 0.03710561618208885, "learning_rate": 3.084571044129486e-06, "loss": 0.0309, "step": 169640 }, { "epoch": 0.11825, "grad_norm": 0.0388769693672657, "learning_rate": 3.08258226712784e-06, "loss": 0.0331, "step": 169650 }, { "epoch": 0.1183, "grad_norm": 0.04335852339863777, "learning_rate": 3.0805940893401526e-06, "loss": 0.0324, "step": 169660 }, { "epoch": 0.11835, "grad_norm": 0.04328613728284836, "learning_rate": 3.0786065108207946e-06, "loss": 0.0338, "step": 169670 }, { "epoch": 0.1184, "grad_norm": 0.05562340095639229, "learning_rate": 3.076619531624092e-06, "loss": 0.0339, "step": 169680 }, { "epoch": 0.11845, "grad_norm": 0.04683903604745865, "learning_rate": 3.0746331518043876e-06, "loss": 0.0317, "step": 169690 }, { "epoch": 0.1185, "grad_norm": 0.047938767820596695, "learning_rate": 3.072647371415965e-06, "loss": 0.0342, "step": 169700 }, { "epoch": 0.11855, "grad_norm": 0.047772448509931564, "learning_rate": 3.070662190513124e-06, "loss": 0.0323, "step": 169710 }, { "epoch": 0.1186, "grad_norm": 0.05084957182407379, "learning_rate": 3.0686776091501475e-06, "loss": 0.031, "step": 169720 }, { "epoch": 0.11865, "grad_norm": 0.11979934573173523, "learning_rate": 3.066693627381284e-06, "loss": 0.0334, "step": 169730 }, { "epoch": 0.1187, "grad_norm": 0.06551992148160934, "learning_rate": 3.0647102452607797e-06, "loss": 0.0311, "step": 169740 }, { "epoch": 0.11875, "grad_norm": 0.05387571081519127, "learning_rate": 3.062727462842849e-06, "loss": 0.0333, "step": 169750 }, { "epoch": 0.1188, "grad_norm": 0.05842439830303192, "learning_rate": 3.060745280181715e-06, "loss": 0.0332, "step": 169760 }, { "epoch": 0.11885, "grad_norm": 0.038283172994852066, "learning_rate": 3.05876369733156e-06, "loss": 0.0325, "step": 169770 }, { "epoch": 0.1189, "grad_norm": 0.040976520627737045, "learning_rate": 3.0567827143465596e-06, "loss": 0.0329, "step": 169780 }, { "epoch": 0.11895, "grad_norm": 0.03876696154475212, "learning_rate": 3.0548023312808817e-06, "loss": 0.0316, "step": 169790 }, { "epoch": 0.119, "grad_norm": 0.03440241515636444, "learning_rate": 3.052822548188658e-06, "loss": 0.0326, "step": 169800 }, { "epoch": 0.11905, "grad_norm": 0.037959929555654526, "learning_rate": 3.050843365124026e-06, "loss": 0.0329, "step": 169810 }, { "epoch": 0.1191, "grad_norm": 0.03727288544178009, "learning_rate": 3.048864782141089e-06, "loss": 0.0317, "step": 169820 }, { "epoch": 0.11915, "grad_norm": 0.036096200346946716, "learning_rate": 3.0468867992939383e-06, "loss": 0.0328, "step": 169830 }, { "epoch": 0.1192, "grad_norm": 0.03416402265429497, "learning_rate": 3.0449094166366597e-06, "loss": 0.0337, "step": 169840 }, { "epoch": 0.11925, "grad_norm": 0.036379892379045486, "learning_rate": 3.0429326342233085e-06, "loss": 0.033, "step": 169850 }, { "epoch": 0.1193, "grad_norm": 0.037294864654541016, "learning_rate": 3.040956452107932e-06, "loss": 0.0336, "step": 169860 }, { "epoch": 0.11935, "grad_norm": 0.0385604128241539, "learning_rate": 3.0389808703445517e-06, "loss": 0.0363, "step": 169870 }, { "epoch": 0.1194, "grad_norm": 0.03919385373592377, "learning_rate": 3.0370058889871878e-06, "loss": 0.0331, "step": 169880 }, { "epoch": 0.11945, "grad_norm": 0.03545527160167694, "learning_rate": 3.035031508089828e-06, "loss": 0.0339, "step": 169890 }, { "epoch": 0.1195, "grad_norm": 0.03262300789356232, "learning_rate": 3.0330577277064563e-06, "loss": 0.0337, "step": 169900 }, { "epoch": 0.11955, "grad_norm": 0.035723332315683365, "learning_rate": 3.031084547891039e-06, "loss": 0.0334, "step": 169910 }, { "epoch": 0.1196, "grad_norm": 0.03940999507904053, "learning_rate": 3.0291119686975093e-06, "loss": 0.0344, "step": 169920 }, { "epoch": 0.11965, "grad_norm": 0.038978319615125656, "learning_rate": 3.027139990179809e-06, "loss": 0.0338, "step": 169930 }, { "epoch": 0.1197, "grad_norm": 0.03864939138293266, "learning_rate": 3.025168612391846e-06, "loss": 0.0338, "step": 169940 }, { "epoch": 0.11975, "grad_norm": 0.03584860637784004, "learning_rate": 3.023197835387517e-06, "loss": 0.0328, "step": 169950 }, { "epoch": 0.1198, "grad_norm": 0.04506705328822136, "learning_rate": 3.0212276592207017e-06, "loss": 0.0342, "step": 169960 }, { "epoch": 0.11985, "grad_norm": 0.03933991119265556, "learning_rate": 3.019258083945262e-06, "loss": 0.0339, "step": 169970 }, { "epoch": 0.1199, "grad_norm": 0.04669029638171196, "learning_rate": 3.0172891096150595e-06, "loss": 0.0354, "step": 169980 }, { "epoch": 0.11995, "grad_norm": 0.03818805143237114, "learning_rate": 3.0153207362839026e-06, "loss": 0.0338, "step": 169990 }, { "epoch": 0.12, "grad_norm": 0.03293650597333908, "learning_rate": 3.013352964005625e-06, "loss": 0.0329, "step": 170000 }, { "epoch": 0.12005, "grad_norm": 0.03547142818570137, "learning_rate": 3.011385792834012e-06, "loss": 0.0339, "step": 170010 }, { "epoch": 0.1201, "grad_norm": 0.0389530323445797, "learning_rate": 3.009419222822854e-06, "loss": 0.0333, "step": 170020 }, { "epoch": 0.12015, "grad_norm": 0.04693255200982094, "learning_rate": 3.0074532540259133e-06, "loss": 0.0333, "step": 170030 }, { "epoch": 0.1202, "grad_norm": 0.0418180488049984, "learning_rate": 3.0054878864969353e-06, "loss": 0.0343, "step": 170040 }, { "epoch": 0.12025, "grad_norm": 0.03619196265935898, "learning_rate": 3.0035231202896587e-06, "loss": 0.0339, "step": 170050 }, { "epoch": 0.1203, "grad_norm": 0.03492094203829765, "learning_rate": 3.0015589554577977e-06, "loss": 0.0331, "step": 170060 }, { "epoch": 0.12035, "grad_norm": 0.035028375685214996, "learning_rate": 2.999595392055049e-06, "loss": 0.0324, "step": 170070 }, { "epoch": 0.1204, "grad_norm": 0.03487279638648033, "learning_rate": 2.9976324301350916e-06, "loss": 0.0329, "step": 170080 }, { "epoch": 0.12045, "grad_norm": 0.033144522458314896, "learning_rate": 2.9956700697515996e-06, "loss": 0.0316, "step": 170090 }, { "epoch": 0.1205, "grad_norm": 0.03214790299534798, "learning_rate": 2.9937083109582265e-06, "loss": 0.0316, "step": 170100 }, { "epoch": 0.12055, "grad_norm": 0.029983768239617348, "learning_rate": 2.9917471538085996e-06, "loss": 0.0321, "step": 170110 }, { "epoch": 0.1206, "grad_norm": 0.03648291155695915, "learning_rate": 2.9897865983563366e-06, "loss": 0.0321, "step": 170120 }, { "epoch": 0.12065, "grad_norm": 0.03465743735432625, "learning_rate": 2.987826644655034e-06, "loss": 0.0329, "step": 170130 }, { "epoch": 0.1207, "grad_norm": 0.039328765124082565, "learning_rate": 2.9858672927582876e-06, "loss": 0.0324, "step": 170140 }, { "epoch": 0.12075, "grad_norm": 0.039982233196496964, "learning_rate": 2.98390854271966e-06, "loss": 0.0349, "step": 170150 }, { "epoch": 0.1208, "grad_norm": 0.04412681609392166, "learning_rate": 2.9819503945926946e-06, "loss": 0.0323, "step": 170160 }, { "epoch": 0.12085, "grad_norm": 0.0323239229619503, "learning_rate": 2.9799928484309405e-06, "loss": 0.0316, "step": 170170 }, { "epoch": 0.1209, "grad_norm": 0.03849190101027489, "learning_rate": 2.9780359042879043e-06, "loss": 0.0327, "step": 170180 }, { "epoch": 0.12095, "grad_norm": 0.03475498780608177, "learning_rate": 2.9760795622171017e-06, "loss": 0.0334, "step": 170190 }, { "epoch": 0.121, "grad_norm": 0.0383419468998909, "learning_rate": 2.974123822272001e-06, "loss": 0.034, "step": 170200 }, { "epoch": 0.12105, "grad_norm": 0.03523703292012215, "learning_rate": 2.9721686845060797e-06, "loss": 0.032, "step": 170210 }, { "epoch": 0.1211, "grad_norm": 0.03748098015785217, "learning_rate": 2.970214148972797e-06, "loss": 0.0325, "step": 170220 }, { "epoch": 0.12115, "grad_norm": 0.038272153586149216, "learning_rate": 2.968260215725582e-06, "loss": 0.034, "step": 170230 }, { "epoch": 0.1212, "grad_norm": 0.03523880988359451, "learning_rate": 2.966306884817857e-06, "loss": 0.033, "step": 170240 }, { "epoch": 0.12125, "grad_norm": 0.045698393136262894, "learning_rate": 2.9643541563030177e-06, "loss": 0.0343, "step": 170250 }, { "epoch": 0.1213, "grad_norm": 0.03664017468690872, "learning_rate": 2.9624020302344623e-06, "loss": 0.0329, "step": 170260 }, { "epoch": 0.12135, "grad_norm": 0.03602268174290657, "learning_rate": 2.9604505066655575e-06, "loss": 0.0337, "step": 170270 }, { "epoch": 0.1214, "grad_norm": 0.03589862212538719, "learning_rate": 2.9584995856496516e-06, "loss": 0.0332, "step": 170280 }, { "epoch": 0.12145, "grad_norm": 0.03870958834886551, "learning_rate": 2.9565492672400886e-06, "loss": 0.0327, "step": 170290 }, { "epoch": 0.1215, "grad_norm": 0.039397165179252625, "learning_rate": 2.9545995514901837e-06, "loss": 0.0348, "step": 170300 }, { "epoch": 0.12155, "grad_norm": 0.036325667053461075, "learning_rate": 2.9526504384532525e-06, "loss": 0.0335, "step": 170310 }, { "epoch": 0.1216, "grad_norm": 0.03910687193274498, "learning_rate": 2.9507019281825726e-06, "loss": 0.0338, "step": 170320 }, { "epoch": 0.12165, "grad_norm": 0.03571851924061775, "learning_rate": 2.9487540207314203e-06, "loss": 0.0341, "step": 170330 }, { "epoch": 0.1217, "grad_norm": 0.033882223069667816, "learning_rate": 2.9468067161530416e-06, "loss": 0.0337, "step": 170340 }, { "epoch": 0.12175, "grad_norm": 0.029706673696637154, "learning_rate": 2.9448600145006888e-06, "loss": 0.0343, "step": 170350 }, { "epoch": 0.1218, "grad_norm": 0.03674902021884918, "learning_rate": 2.9429139158275774e-06, "loss": 0.0339, "step": 170360 }, { "epoch": 0.12185, "grad_norm": 0.03714902698993683, "learning_rate": 2.940968420186907e-06, "loss": 0.0334, "step": 170370 }, { "epoch": 0.1219, "grad_norm": 0.038870666176080704, "learning_rate": 2.939023527631879e-06, "loss": 0.0346, "step": 170380 }, { "epoch": 0.12195, "grad_norm": 0.03690927103161812, "learning_rate": 2.937079238215654e-06, "loss": 0.0323, "step": 170390 }, { "epoch": 0.122, "grad_norm": 0.03617848455905914, "learning_rate": 2.9351355519914e-06, "loss": 0.0333, "step": 170400 }, { "epoch": 0.12205, "grad_norm": 0.03987789526581764, "learning_rate": 2.9331924690122496e-06, "loss": 0.0331, "step": 170410 }, { "epoch": 0.1221, "grad_norm": 0.03884678706526756, "learning_rate": 2.9312499893313224e-06, "loss": 0.0333, "step": 170420 }, { "epoch": 0.12215, "grad_norm": 0.03599599748849869, "learning_rate": 2.929308113001733e-06, "loss": 0.0343, "step": 170430 }, { "epoch": 0.1222, "grad_norm": 0.034484054893255234, "learning_rate": 2.92736684007657e-06, "loss": 0.0327, "step": 170440 }, { "epoch": 0.12225, "grad_norm": 0.04852869361639023, "learning_rate": 2.925426170608905e-06, "loss": 0.0321, "step": 170450 }, { "epoch": 0.1223, "grad_norm": 0.037036336958408356, "learning_rate": 2.92348610465179e-06, "loss": 0.0341, "step": 170460 }, { "epoch": 0.12235, "grad_norm": 0.04164846986532211, "learning_rate": 2.9215466422582717e-06, "loss": 0.0325, "step": 170470 }, { "epoch": 0.1224, "grad_norm": 0.038174912333488464, "learning_rate": 2.9196077834813844e-06, "loss": 0.0326, "step": 170480 }, { "epoch": 0.12245, "grad_norm": 0.03971322998404503, "learning_rate": 2.9176695283741146e-06, "loss": 0.0333, "step": 170490 }, { "epoch": 0.1225, "grad_norm": 0.044817980378866196, "learning_rate": 2.915731876989469e-06, "loss": 0.0341, "step": 170500 }, { "epoch": 0.12255, "grad_norm": 0.049287863075733185, "learning_rate": 2.913794829380412e-06, "loss": 0.0328, "step": 170510 }, { "epoch": 0.1226, "grad_norm": 0.046644117683172226, "learning_rate": 2.911858385599911e-06, "loss": 0.0333, "step": 170520 }, { "epoch": 0.12265, "grad_norm": 0.03821752965450287, "learning_rate": 2.9099225457009047e-06, "loss": 0.0324, "step": 170530 }, { "epoch": 0.1227, "grad_norm": 0.04043368250131607, "learning_rate": 2.9079873097363126e-06, "loss": 0.0326, "step": 170540 }, { "epoch": 0.12275, "grad_norm": 0.0473080575466156, "learning_rate": 2.9060526777590522e-06, "loss": 0.0323, "step": 170550 }, { "epoch": 0.1228, "grad_norm": 0.03708431124687195, "learning_rate": 2.9041186498220104e-06, "loss": 0.0325, "step": 170560 }, { "epoch": 0.12285, "grad_norm": 0.04041951522231102, "learning_rate": 2.9021852259780656e-06, "loss": 0.0368, "step": 170570 }, { "epoch": 0.1229, "grad_norm": 0.03459900617599487, "learning_rate": 2.9002524062800684e-06, "loss": 0.0328, "step": 170580 }, { "epoch": 0.12295, "grad_norm": 0.0452764630317688, "learning_rate": 2.8983201907808734e-06, "loss": 0.0356, "step": 170590 }, { "epoch": 0.123, "grad_norm": 0.03793106600642204, "learning_rate": 2.8963885795332963e-06, "loss": 0.0335, "step": 170600 }, { "epoch": 0.12305, "grad_norm": 0.0353790745139122, "learning_rate": 2.8944575725901565e-06, "loss": 0.0326, "step": 170610 }, { "epoch": 0.1231, "grad_norm": 0.04083758220076561, "learning_rate": 2.892527170004242e-06, "loss": 0.0348, "step": 170620 }, { "epoch": 0.12315, "grad_norm": 0.04061799496412277, "learning_rate": 2.8905973718283243e-06, "loss": 0.0329, "step": 170630 }, { "epoch": 0.1232, "grad_norm": 0.041940487921237946, "learning_rate": 2.8886681781151727e-06, "loss": 0.0324, "step": 170640 }, { "epoch": 0.12325, "grad_norm": 0.036815203726291656, "learning_rate": 2.886739588917528e-06, "loss": 0.0336, "step": 170650 }, { "epoch": 0.1233, "grad_norm": 0.03948887810111046, "learning_rate": 2.8848116042881094e-06, "loss": 0.0338, "step": 170660 }, { "epoch": 0.12335, "grad_norm": 0.033183470368385315, "learning_rate": 2.882884224279636e-06, "loss": 0.0342, "step": 170670 }, { "epoch": 0.1234, "grad_norm": 0.030717089772224426, "learning_rate": 2.880957448944796e-06, "loss": 0.0327, "step": 170680 }, { "epoch": 0.12345, "grad_norm": 0.03915680572390556, "learning_rate": 2.8790312783362806e-06, "loss": 0.0334, "step": 170690 }, { "epoch": 0.1235, "grad_norm": 0.03605576604604721, "learning_rate": 2.877105712506728e-06, "loss": 0.0336, "step": 170700 }, { "epoch": 0.12355, "grad_norm": 0.039679888635873795, "learning_rate": 2.8751807515087993e-06, "loss": 0.0322, "step": 170710 }, { "epoch": 0.1236, "grad_norm": 0.033951517194509506, "learning_rate": 2.873256395395113e-06, "loss": 0.0329, "step": 170720 }, { "epoch": 0.12365, "grad_norm": 0.04974079132080078, "learning_rate": 2.8713326442182886e-06, "loss": 0.0329, "step": 170730 }, { "epoch": 0.1237, "grad_norm": 0.04222653806209564, "learning_rate": 2.869409498030917e-06, "loss": 0.0333, "step": 170740 }, { "epoch": 0.12375, "grad_norm": 0.036745402961969376, "learning_rate": 2.8674869568855676e-06, "loss": 0.0324, "step": 170750 }, { "epoch": 0.1238, "grad_norm": 0.030236944556236267, "learning_rate": 2.8655650208348178e-06, "loss": 0.0321, "step": 170760 }, { "epoch": 0.12385, "grad_norm": 0.033882755786180496, "learning_rate": 2.863643689931206e-06, "loss": 0.0334, "step": 170770 }, { "epoch": 0.1239, "grad_norm": 0.0467345230281353, "learning_rate": 2.8617229642272563e-06, "loss": 0.0339, "step": 170780 }, { "epoch": 0.12395, "grad_norm": 0.03583429381251335, "learning_rate": 2.8598028437754802e-06, "loss": 0.0344, "step": 170790 }, { "epoch": 0.124, "grad_norm": 0.033289577811956406, "learning_rate": 2.857883328628377e-06, "loss": 0.0325, "step": 170800 }, { "epoch": 0.12405, "grad_norm": 0.03858331963419914, "learning_rate": 2.8559644188384306e-06, "loss": 0.0328, "step": 170810 }, { "epoch": 0.1241, "grad_norm": 0.03308607637882233, "learning_rate": 2.8540461144580978e-06, "loss": 0.0331, "step": 170820 }, { "epoch": 0.12415, "grad_norm": 0.04164617508649826, "learning_rate": 2.852128415539823e-06, "loss": 0.0345, "step": 170830 }, { "epoch": 0.1242, "grad_norm": 0.0402013398706913, "learning_rate": 2.8502113221360314e-06, "loss": 0.0336, "step": 170840 }, { "epoch": 0.12425, "grad_norm": 0.036746375262737274, "learning_rate": 2.848294834299148e-06, "loss": 0.0352, "step": 170850 }, { "epoch": 0.1243, "grad_norm": 0.034505799412727356, "learning_rate": 2.8463789520815602e-06, "loss": 0.0345, "step": 170860 }, { "epoch": 0.12435, "grad_norm": 0.036910947412252426, "learning_rate": 2.8444636755356442e-06, "loss": 0.0349, "step": 170870 }, { "epoch": 0.1244, "grad_norm": 0.05080193281173706, "learning_rate": 2.8425490047137737e-06, "loss": 0.0341, "step": 170880 }, { "epoch": 0.12445, "grad_norm": 0.041805725544691086, "learning_rate": 2.840634939668285e-06, "loss": 0.0332, "step": 170890 }, { "epoch": 0.1245, "grad_norm": 0.0434776172041893, "learning_rate": 2.83872148045152e-06, "loss": 0.0347, "step": 170900 }, { "epoch": 0.12455, "grad_norm": 0.035198379307985306, "learning_rate": 2.836808627115775e-06, "loss": 0.0343, "step": 170910 }, { "epoch": 0.1246, "grad_norm": 0.03682961314916611, "learning_rate": 2.834896379713356e-06, "loss": 0.0321, "step": 170920 }, { "epoch": 0.12465, "grad_norm": 0.038385991007089615, "learning_rate": 2.8329847382965485e-06, "loss": 0.0329, "step": 170930 }, { "epoch": 0.1247, "grad_norm": 0.038753096014261246, "learning_rate": 2.831073702917611e-06, "loss": 0.0324, "step": 170940 }, { "epoch": 0.12475, "grad_norm": 0.03721785545349121, "learning_rate": 2.8291632736287877e-06, "loss": 0.0323, "step": 170950 }, { "epoch": 0.1248, "grad_norm": 0.04255424067378044, "learning_rate": 2.827253450482309e-06, "loss": 0.0337, "step": 170960 }, { "epoch": 0.12485, "grad_norm": 0.03699033707380295, "learning_rate": 2.8253442335303944e-06, "loss": 0.0315, "step": 170970 }, { "epoch": 0.1249, "grad_norm": 0.0359467975795269, "learning_rate": 2.8234356228252377e-06, "loss": 0.0328, "step": 170980 }, { "epoch": 0.12495, "grad_norm": 0.03315534070134163, "learning_rate": 2.821527618419015e-06, "loss": 0.0319, "step": 170990 }, { "epoch": 0.125, "grad_norm": 0.04848819971084595, "learning_rate": 2.8196202203639e-06, "loss": 0.0341, "step": 171000 }, { "epoch": 0.12505, "grad_norm": 0.03591757267713547, "learning_rate": 2.817713428712029e-06, "loss": 0.0317, "step": 171010 }, { "epoch": 0.1251, "grad_norm": 0.04786156117916107, "learning_rate": 2.8158072435155474e-06, "loss": 0.0351, "step": 171020 }, { "epoch": 0.12515, "grad_norm": 0.04991556331515312, "learning_rate": 2.8139016648265597e-06, "loss": 0.0329, "step": 171030 }, { "epoch": 0.1252, "grad_norm": 0.036221716552972794, "learning_rate": 2.811996692697161e-06, "loss": 0.0332, "step": 171040 }, { "epoch": 0.12525, "grad_norm": 0.03783911094069481, "learning_rate": 2.810092327179442e-06, "loss": 0.0318, "step": 171050 }, { "epoch": 0.1253, "grad_norm": 0.038371000438928604, "learning_rate": 2.8081885683254623e-06, "loss": 0.033, "step": 171060 }, { "epoch": 0.12535, "grad_norm": 0.04190398380160332, "learning_rate": 2.806285416187271e-06, "loss": 0.0322, "step": 171070 }, { "epoch": 0.1254, "grad_norm": 0.038543831557035446, "learning_rate": 2.8043828708168935e-06, "loss": 0.0321, "step": 171080 }, { "epoch": 0.12545, "grad_norm": 0.03184407949447632, "learning_rate": 2.8024809322663547e-06, "loss": 0.0319, "step": 171090 }, { "epoch": 0.1255, "grad_norm": 0.038300253450870514, "learning_rate": 2.800579600587641e-06, "loss": 0.0327, "step": 171100 }, { "epoch": 0.12555, "grad_norm": 0.04688066616654396, "learning_rate": 2.798678875832747e-06, "loss": 0.0366, "step": 171110 }, { "epoch": 0.1256, "grad_norm": 0.03489735722541809, "learning_rate": 2.7967787580536336e-06, "loss": 0.031, "step": 171120 }, { "epoch": 0.12565, "grad_norm": 0.04473022371530533, "learning_rate": 2.79487924730224e-06, "loss": 0.0334, "step": 171130 }, { "epoch": 0.1257, "grad_norm": 0.037319961935281754, "learning_rate": 2.7929803436305137e-06, "loss": 0.0329, "step": 171140 }, { "epoch": 0.12575, "grad_norm": 0.0370020754635334, "learning_rate": 2.79108204709036e-06, "loss": 0.0329, "step": 171150 }, { "epoch": 0.1258, "grad_norm": 0.03408210724592209, "learning_rate": 2.789184357733679e-06, "loss": 0.0329, "step": 171160 }, { "epoch": 0.12585, "grad_norm": 0.03917255252599716, "learning_rate": 2.787287275612349e-06, "loss": 0.0318, "step": 171170 }, { "epoch": 0.1259, "grad_norm": 0.03344712778925896, "learning_rate": 2.7853908007782426e-06, "loss": 0.0323, "step": 171180 }, { "epoch": 0.12595, "grad_norm": 0.036113545298576355, "learning_rate": 2.783494933283212e-06, "loss": 0.0353, "step": 171190 }, { "epoch": 0.126, "grad_norm": 0.042464982718229294, "learning_rate": 2.7815996731790778e-06, "loss": 0.0347, "step": 171200 }, { "epoch": 0.12605, "grad_norm": 0.03898061439394951, "learning_rate": 2.7797050205176644e-06, "loss": 0.033, "step": 171210 }, { "epoch": 0.1261, "grad_norm": 0.03752130642533302, "learning_rate": 2.7778109753507614e-06, "loss": 0.0324, "step": 171220 }, { "epoch": 0.12615, "grad_norm": 0.033893827348947525, "learning_rate": 2.775917537730163e-06, "loss": 0.032, "step": 171230 }, { "epoch": 0.1262, "grad_norm": 0.03558017686009407, "learning_rate": 2.7740247077076316e-06, "loss": 0.0316, "step": 171240 }, { "epoch": 0.12625, "grad_norm": 0.039208702743053436, "learning_rate": 2.7721324853349106e-06, "loss": 0.0301, "step": 171250 }, { "epoch": 0.1263, "grad_norm": 0.036127764731645584, "learning_rate": 2.77024087066374e-06, "loss": 0.0318, "step": 171260 }, { "epoch": 0.12635, "grad_norm": 0.03924562782049179, "learning_rate": 2.7683498637458336e-06, "loss": 0.0316, "step": 171270 }, { "epoch": 0.1264, "grad_norm": 0.03675839304924011, "learning_rate": 2.7664594646328895e-06, "loss": 0.032, "step": 171280 }, { "epoch": 0.12645, "grad_norm": 0.03594402223825455, "learning_rate": 2.764569673376585e-06, "loss": 0.0315, "step": 171290 }, { "epoch": 0.1265, "grad_norm": 0.03455478325486183, "learning_rate": 2.7626804900285937e-06, "loss": 0.031, "step": 171300 }, { "epoch": 0.12655, "grad_norm": 0.03500540927052498, "learning_rate": 2.7607919146405674e-06, "loss": 0.033, "step": 171310 }, { "epoch": 0.1266, "grad_norm": 0.03529797121882439, "learning_rate": 2.7589039472641354e-06, "loss": 0.0318, "step": 171320 }, { "epoch": 0.12665, "grad_norm": 0.034966934472322464, "learning_rate": 2.757016587950914e-06, "loss": 0.0302, "step": 171330 }, { "epoch": 0.1267, "grad_norm": 0.031065698713064194, "learning_rate": 2.7551298367524985e-06, "loss": 0.0317, "step": 171340 }, { "epoch": 0.12675, "grad_norm": 0.03381740301847458, "learning_rate": 2.753243693720481e-06, "loss": 0.0325, "step": 171350 }, { "epoch": 0.1268, "grad_norm": 0.03633000701665878, "learning_rate": 2.7513581589064223e-06, "loss": 0.0316, "step": 171360 }, { "epoch": 0.12685, "grad_norm": 0.041332636028528214, "learning_rate": 2.749473232361871e-06, "loss": 0.0312, "step": 171370 }, { "epoch": 0.1269, "grad_norm": 0.03545716404914856, "learning_rate": 2.7475889141383632e-06, "loss": 0.032, "step": 171380 }, { "epoch": 0.12695, "grad_norm": 0.03463119640946388, "learning_rate": 2.745705204287413e-06, "loss": 0.0335, "step": 171390 }, { "epoch": 0.127, "grad_norm": 0.03852127864956856, "learning_rate": 2.74382210286053e-06, "loss": 0.0338, "step": 171400 }, { "epoch": 0.12705, "grad_norm": 0.03509530797600746, "learning_rate": 2.7419396099091806e-06, "loss": 0.0329, "step": 171410 }, { "epoch": 0.1271, "grad_norm": 0.032109301537275314, "learning_rate": 2.7400577254848405e-06, "loss": 0.0336, "step": 171420 }, { "epoch": 0.12715, "grad_norm": 0.03659934550523758, "learning_rate": 2.7381764496389626e-06, "loss": 0.0333, "step": 171430 }, { "epoch": 0.1272, "grad_norm": 0.03949648514389992, "learning_rate": 2.736295782422979e-06, "loss": 0.0351, "step": 171440 }, { "epoch": 0.12725, "grad_norm": 0.03775806725025177, "learning_rate": 2.7344157238883034e-06, "loss": 0.0332, "step": 171450 }, { "epoch": 0.1273, "grad_norm": 0.03968925401568413, "learning_rate": 2.7325362740863336e-06, "loss": 0.0354, "step": 171460 }, { "epoch": 0.12735, "grad_norm": 0.038549553602933884, "learning_rate": 2.7306574330684593e-06, "loss": 0.0326, "step": 171470 }, { "epoch": 0.1274, "grad_norm": 0.03686949238181114, "learning_rate": 2.7287792008860472e-06, "loss": 0.0349, "step": 171480 }, { "epoch": 0.12745, "grad_norm": 0.03963692486286163, "learning_rate": 2.7269015775904377e-06, "loss": 0.0329, "step": 171490 }, { "epoch": 0.1275, "grad_norm": 0.03710221126675606, "learning_rate": 2.725024563232978e-06, "loss": 0.0349, "step": 171500 }, { "epoch": 0.12755, "grad_norm": 0.039931006729602814, "learning_rate": 2.7231481578649743e-06, "loss": 0.0343, "step": 171510 }, { "epoch": 0.1276, "grad_norm": 0.03956283628940582, "learning_rate": 2.7212723615377326e-06, "loss": 0.035, "step": 171520 }, { "epoch": 0.12765, "grad_norm": 0.03812943771481514, "learning_rate": 2.7193971743025376e-06, "loss": 0.0352, "step": 171530 }, { "epoch": 0.1277, "grad_norm": 0.03685388341546059, "learning_rate": 2.7175225962106533e-06, "loss": 0.033, "step": 171540 }, { "epoch": 0.12775, "grad_norm": 0.040714338421821594, "learning_rate": 2.715648627313322e-06, "loss": 0.0321, "step": 171550 }, { "epoch": 0.1278, "grad_norm": 0.0388384610414505, "learning_rate": 2.713775267661789e-06, "loss": 0.0326, "step": 171560 }, { "epoch": 0.12785, "grad_norm": 0.03131807595491409, "learning_rate": 2.7119025173072772e-06, "loss": 0.033, "step": 171570 }, { "epoch": 0.1279, "grad_norm": 0.03585648536682129, "learning_rate": 2.7100303763009647e-06, "loss": 0.0328, "step": 171580 }, { "epoch": 0.12795, "grad_norm": 0.03749939799308777, "learning_rate": 2.708158844694056e-06, "loss": 0.0383, "step": 171590 }, { "epoch": 0.128, "grad_norm": 0.037364646792411804, "learning_rate": 2.7062879225377006e-06, "loss": 0.0366, "step": 171600 }, { "epoch": 0.12805, "grad_norm": 0.04412858933210373, "learning_rate": 2.704417609883067e-06, "loss": 0.0348, "step": 171610 }, { "epoch": 0.1281, "grad_norm": 0.039236828684806824, "learning_rate": 2.7025479067812777e-06, "loss": 0.0332, "step": 171620 }, { "epoch": 0.12815, "grad_norm": 0.04682591184973717, "learning_rate": 2.7006788132834498e-06, "loss": 0.033, "step": 171630 }, { "epoch": 0.1282, "grad_norm": 0.04099787026643753, "learning_rate": 2.6988103294406875e-06, "loss": 0.0323, "step": 171640 }, { "epoch": 0.12825, "grad_norm": 0.036896705627441406, "learning_rate": 2.6969424553040746e-06, "loss": 0.0331, "step": 171650 }, { "epoch": 0.1283, "grad_norm": 0.038666076958179474, "learning_rate": 2.6950751909246756e-06, "loss": 0.0334, "step": 171660 }, { "epoch": 0.12835, "grad_norm": 0.03882349282503128, "learning_rate": 2.6932085363535362e-06, "loss": 0.0328, "step": 171670 }, { "epoch": 0.1284, "grad_norm": 0.04096510261297226, "learning_rate": 2.6913424916416936e-06, "loss": 0.0325, "step": 171680 }, { "epoch": 0.12845, "grad_norm": 0.03649698942899704, "learning_rate": 2.689477056840173e-06, "loss": 0.0333, "step": 171690 }, { "epoch": 0.1285, "grad_norm": 0.042621366679668427, "learning_rate": 2.6876122319999668e-06, "loss": 0.0331, "step": 171700 }, { "epoch": 0.12855, "grad_norm": 0.03946394473314285, "learning_rate": 2.685748017172063e-06, "loss": 0.0326, "step": 171710 }, { "epoch": 0.1286, "grad_norm": 0.034220434725284576, "learning_rate": 2.683884412407417e-06, "loss": 0.0328, "step": 171720 }, { "epoch": 0.12865, "grad_norm": 0.039184972643852234, "learning_rate": 2.682021417756994e-06, "loss": 0.0341, "step": 171730 }, { "epoch": 0.1287, "grad_norm": 0.03642386570572853, "learning_rate": 2.6801590332717203e-06, "loss": 0.0324, "step": 171740 }, { "epoch": 0.12875, "grad_norm": 0.03386127948760986, "learning_rate": 2.6782972590025074e-06, "loss": 0.033, "step": 171750 }, { "epoch": 0.1288, "grad_norm": 0.036138761788606644, "learning_rate": 2.6764360950002677e-06, "loss": 0.0336, "step": 171760 }, { "epoch": 0.12885, "grad_norm": 0.036520663648843765, "learning_rate": 2.674575541315874e-06, "loss": 0.0332, "step": 171770 }, { "epoch": 0.1289, "grad_norm": 0.03316280245780945, "learning_rate": 2.6727155980002057e-06, "loss": 0.0339, "step": 171780 }, { "epoch": 0.12895, "grad_norm": 0.04363163560628891, "learning_rate": 2.6708562651040963e-06, "loss": 0.0328, "step": 171790 }, { "epoch": 0.129, "grad_norm": 0.03942793607711792, "learning_rate": 2.6689975426783946e-06, "loss": 0.0343, "step": 171800 }, { "epoch": 0.12905, "grad_norm": 0.038066670298576355, "learning_rate": 2.6671394307739043e-06, "loss": 0.0341, "step": 171810 }, { "epoch": 0.1291, "grad_norm": 0.04269417002797127, "learning_rate": 2.6652819294414377e-06, "loss": 0.0318, "step": 171820 }, { "epoch": 0.12915, "grad_norm": 0.03263647109270096, "learning_rate": 2.6634250387317706e-06, "loss": 0.0336, "step": 171830 }, { "epoch": 0.1292, "grad_norm": 0.04190134257078171, "learning_rate": 2.6615687586956678e-06, "loss": 0.0337, "step": 171840 }, { "epoch": 0.12925, "grad_norm": 0.03180139511823654, "learning_rate": 2.659713089383886e-06, "loss": 0.033, "step": 171850 }, { "epoch": 0.1293, "grad_norm": 0.0395943745970726, "learning_rate": 2.6578580308471575e-06, "loss": 0.0334, "step": 171860 }, { "epoch": 0.12935, "grad_norm": 0.0389542281627655, "learning_rate": 2.6560035831361936e-06, "loss": 0.0329, "step": 171870 }, { "epoch": 0.1294, "grad_norm": 0.041193362325429916, "learning_rate": 2.6541497463016983e-06, "loss": 0.0354, "step": 171880 }, { "epoch": 0.12945, "grad_norm": 0.04503790661692619, "learning_rate": 2.652296520394351e-06, "loss": 0.0344, "step": 171890 }, { "epoch": 0.1295, "grad_norm": 0.04351415857672691, "learning_rate": 2.650443905464828e-06, "loss": 0.0347, "step": 171900 }, { "epoch": 0.12955, "grad_norm": 0.03476227447390556, "learning_rate": 2.6485919015637712e-06, "loss": 0.034, "step": 171910 }, { "epoch": 0.1296, "grad_norm": 0.04246963933110237, "learning_rate": 2.646740508741813e-06, "loss": 0.0343, "step": 171920 }, { "epoch": 0.12965, "grad_norm": 0.03715596720576286, "learning_rate": 2.6448897270495683e-06, "loss": 0.0343, "step": 171930 }, { "epoch": 0.1297, "grad_norm": 0.039296895265579224, "learning_rate": 2.643039556537644e-06, "loss": 0.0356, "step": 171940 }, { "epoch": 0.12975, "grad_norm": 0.03285042941570282, "learning_rate": 2.6411899972566186e-06, "loss": 0.0333, "step": 171950 }, { "epoch": 0.1298, "grad_norm": 0.03390447795391083, "learning_rate": 2.6393410492570546e-06, "loss": 0.0336, "step": 171960 }, { "epoch": 0.12985, "grad_norm": 0.030247660353779793, "learning_rate": 2.637492712589512e-06, "loss": 0.033, "step": 171970 }, { "epoch": 0.1299, "grad_norm": 0.034210652112960815, "learning_rate": 2.6356449873045114e-06, "loss": 0.0332, "step": 171980 }, { "epoch": 0.12995, "grad_norm": 0.031560979783535004, "learning_rate": 2.6337978734525844e-06, "loss": 0.0337, "step": 171990 }, { "epoch": 0.13, "grad_norm": 0.043251339346170425, "learning_rate": 2.6319513710842127e-06, "loss": 0.0336, "step": 172000 }, { "epoch": 0.13005, "grad_norm": 0.03139464929699898, "learning_rate": 2.6301054802498866e-06, "loss": 0.0324, "step": 172010 }, { "epoch": 0.1301, "grad_norm": 0.039893459528684616, "learning_rate": 2.628260201000077e-06, "loss": 0.0324, "step": 172020 }, { "epoch": 0.13015, "grad_norm": 0.033400699496269226, "learning_rate": 2.626415533385229e-06, "loss": 0.0327, "step": 172030 }, { "epoch": 0.1302, "grad_norm": 0.03817545995116234, "learning_rate": 2.6245714774557728e-06, "loss": 0.0342, "step": 172040 }, { "epoch": 0.13025, "grad_norm": 0.03130049630999565, "learning_rate": 2.6227280332621256e-06, "loss": 0.0321, "step": 172050 }, { "epoch": 0.1303, "grad_norm": 0.041443757712841034, "learning_rate": 2.6208852008546836e-06, "loss": 0.0345, "step": 172060 }, { "epoch": 0.13035, "grad_norm": 0.037647463381290436, "learning_rate": 2.6190429802838453e-06, "loss": 0.0334, "step": 172070 }, { "epoch": 0.1304, "grad_norm": 0.038211289793252945, "learning_rate": 2.617201371599953e-06, "loss": 0.0329, "step": 172080 }, { "epoch": 0.13045, "grad_norm": 0.041644494980573654, "learning_rate": 2.6153603748533705e-06, "loss": 0.0333, "step": 172090 }, { "epoch": 0.1305, "grad_norm": 0.03600402921438217, "learning_rate": 2.613519990094421e-06, "loss": 0.0332, "step": 172100 }, { "epoch": 0.13055, "grad_norm": 0.03201232850551605, "learning_rate": 2.61168021737343e-06, "loss": 0.0335, "step": 172110 }, { "epoch": 0.1306, "grad_norm": 0.04101625829935074, "learning_rate": 2.6098410567406916e-06, "loss": 0.0368, "step": 172120 }, { "epoch": 0.13065, "grad_norm": 0.037356119602918625, "learning_rate": 2.608002508246482e-06, "loss": 0.0338, "step": 172130 }, { "epoch": 0.1307, "grad_norm": 0.046279631555080414, "learning_rate": 2.606164571941078e-06, "loss": 0.0332, "step": 172140 }, { "epoch": 0.13075, "grad_norm": 0.03496434912085533, "learning_rate": 2.6043272478747187e-06, "loss": 0.0329, "step": 172150 }, { "epoch": 0.1308, "grad_norm": 0.04666680842638016, "learning_rate": 2.6024905360976405e-06, "loss": 0.0349, "step": 172160 }, { "epoch": 0.13085, "grad_norm": 0.04172470420598984, "learning_rate": 2.6006544366600494e-06, "loss": 0.0326, "step": 172170 }, { "epoch": 0.1309, "grad_norm": 0.035661906003952026, "learning_rate": 2.5988189496121584e-06, "loss": 0.0335, "step": 172180 }, { "epoch": 0.13095, "grad_norm": 0.055430054664611816, "learning_rate": 2.596984075004136e-06, "loss": 0.0338, "step": 172190 }, { "epoch": 0.131, "grad_norm": 0.07867436110973358, "learning_rate": 2.595149812886158e-06, "loss": 0.0327, "step": 172200 }, { "epoch": 0.13105, "grad_norm": 0.05331834405660629, "learning_rate": 2.5933161633083654e-06, "loss": 0.0332, "step": 172210 }, { "epoch": 0.1311, "grad_norm": 0.04124901071190834, "learning_rate": 2.591483126320887e-06, "loss": 0.0325, "step": 172220 }, { "epoch": 0.13115, "grad_norm": 0.04230903834104538, "learning_rate": 2.5896507019738475e-06, "loss": 0.0342, "step": 172230 }, { "epoch": 0.1312, "grad_norm": 0.03651657700538635, "learning_rate": 2.587818890317337e-06, "loss": 0.033, "step": 172240 }, { "epoch": 0.13125, "grad_norm": 0.03932705521583557, "learning_rate": 2.585987691401434e-06, "loss": 0.0342, "step": 172250 }, { "epoch": 0.1313, "grad_norm": 0.039938826113939285, "learning_rate": 2.5841571052762135e-06, "loss": 0.0348, "step": 172260 }, { "epoch": 0.13135, "grad_norm": 0.03369868919253349, "learning_rate": 2.58232713199171e-06, "loss": 0.0329, "step": 172270 }, { "epoch": 0.1314, "grad_norm": 0.03550058230757713, "learning_rate": 2.58049777159797e-06, "loss": 0.0329, "step": 172280 }, { "epoch": 0.13145, "grad_norm": 0.03386114537715912, "learning_rate": 2.578669024144989e-06, "loss": 0.0335, "step": 172290 }, { "epoch": 0.1315, "grad_norm": 0.040460165590047836, "learning_rate": 2.576840889682777e-06, "loss": 0.0336, "step": 172300 }, { "epoch": 0.13155, "grad_norm": 0.0648966133594513, "learning_rate": 2.5750133682613085e-06, "loss": 0.0349, "step": 172310 }, { "epoch": 0.1316, "grad_norm": 0.035546429455280304, "learning_rate": 2.5731864599305545e-06, "loss": 0.0312, "step": 172320 }, { "epoch": 0.13165, "grad_norm": 0.03535797819495201, "learning_rate": 2.571360164740455e-06, "loss": 0.0332, "step": 172330 }, { "epoch": 0.1317, "grad_norm": 0.07054276019334793, "learning_rate": 2.56953448274094e-06, "loss": 0.0331, "step": 172340 }, { "epoch": 0.13175, "grad_norm": 0.04104088246822357, "learning_rate": 2.5677094139819307e-06, "loss": 0.0319, "step": 172350 }, { "epoch": 0.1318, "grad_norm": 0.030941542237997055, "learning_rate": 2.5658849585133178e-06, "loss": 0.0319, "step": 172360 }, { "epoch": 0.13185, "grad_norm": 0.042520422488451004, "learning_rate": 2.564061116384983e-06, "loss": 0.0325, "step": 172370 }, { "epoch": 0.1319, "grad_norm": 0.035734474658966064, "learning_rate": 2.5622378876467818e-06, "loss": 0.0321, "step": 172380 }, { "epoch": 0.13195, "grad_norm": 0.035758309066295624, "learning_rate": 2.560415272348568e-06, "loss": 0.0328, "step": 172390 }, { "epoch": 0.132, "grad_norm": 0.029714185744524002, "learning_rate": 2.5585932705401744e-06, "loss": 0.0318, "step": 172400 }, { "epoch": 0.13205, "grad_norm": 0.045044753700494766, "learning_rate": 2.556771882271411e-06, "loss": 0.0334, "step": 172410 }, { "epoch": 0.1321, "grad_norm": 0.04619543254375458, "learning_rate": 2.5549511075920742e-06, "loss": 0.0322, "step": 172420 }, { "epoch": 0.13215, "grad_norm": 0.03538672253489494, "learning_rate": 2.5531309465519347e-06, "loss": 0.0312, "step": 172430 }, { "epoch": 0.1322, "grad_norm": 0.034619834274053574, "learning_rate": 2.5513113992007675e-06, "loss": 0.0323, "step": 172440 }, { "epoch": 0.13225, "grad_norm": 0.03356612101197243, "learning_rate": 2.549492465588313e-06, "loss": 0.0327, "step": 172450 }, { "epoch": 0.1323, "grad_norm": 0.03605108708143234, "learning_rate": 2.5476741457642976e-06, "loss": 0.0334, "step": 172460 }, { "epoch": 0.13235, "grad_norm": 0.04438095539808273, "learning_rate": 2.545856439778438e-06, "loss": 0.0333, "step": 172470 }, { "epoch": 0.1324, "grad_norm": 0.03273903578519821, "learning_rate": 2.5440393476804243e-06, "loss": 0.0324, "step": 172480 }, { "epoch": 0.13245, "grad_norm": 0.03685907647013664, "learning_rate": 2.542222869519947e-06, "loss": 0.0311, "step": 172490 }, { "epoch": 0.1325, "grad_norm": 0.03596313297748566, "learning_rate": 2.540407005346651e-06, "loss": 0.0321, "step": 172500 }, { "epoch": 0.13255, "grad_norm": 0.030287181958556175, "learning_rate": 2.53859175521019e-06, "loss": 0.0315, "step": 172510 }, { "epoch": 0.1326, "grad_norm": 0.034024789929389954, "learning_rate": 2.536777119160197e-06, "loss": 0.0321, "step": 172520 }, { "epoch": 0.13265, "grad_norm": 0.032543737441301346, "learning_rate": 2.5349630972462792e-06, "loss": 0.0318, "step": 172530 }, { "epoch": 0.1327, "grad_norm": 0.03257584199309349, "learning_rate": 2.5331496895180272e-06, "loss": 0.0311, "step": 172540 }, { "epoch": 0.13275, "grad_norm": 0.03539401665329933, "learning_rate": 2.5313368960250216e-06, "loss": 0.0323, "step": 172550 }, { "epoch": 0.1328, "grad_norm": 0.04302481189370155, "learning_rate": 2.529524716816825e-06, "loss": 0.0334, "step": 172560 }, { "epoch": 0.13285, "grad_norm": 0.03241045027971268, "learning_rate": 2.5277131519429843e-06, "loss": 0.0321, "step": 172570 }, { "epoch": 0.1329, "grad_norm": 0.0356929786503315, "learning_rate": 2.5259022014530174e-06, "loss": 0.0326, "step": 172580 }, { "epoch": 0.13295, "grad_norm": 0.03583104535937309, "learning_rate": 2.5240918653964467e-06, "loss": 0.0323, "step": 172590 }, { "epoch": 0.133, "grad_norm": 0.03354700654745102, "learning_rate": 2.5222821438227545e-06, "loss": 0.0342, "step": 172600 }, { "epoch": 0.13305, "grad_norm": 0.042298246175050735, "learning_rate": 2.520473036781429e-06, "loss": 0.0324, "step": 172610 }, { "epoch": 0.1331, "grad_norm": 0.04196954146027565, "learning_rate": 2.5186645443219274e-06, "loss": 0.0325, "step": 172620 }, { "epoch": 0.13315, "grad_norm": 0.037745051085948944, "learning_rate": 2.516856666493683e-06, "loss": 0.0321, "step": 172630 }, { "epoch": 0.1332, "grad_norm": 0.03741823136806488, "learning_rate": 2.51504940334614e-06, "loss": 0.0338, "step": 172640 }, { "epoch": 0.13325, "grad_norm": 0.03566833585500717, "learning_rate": 2.5132427549286965e-06, "loss": 0.0321, "step": 172650 }, { "epoch": 0.1333, "grad_norm": 0.03631792217493057, "learning_rate": 2.511436721290747e-06, "loss": 0.0321, "step": 172660 }, { "epoch": 0.13335, "grad_norm": 0.03379756212234497, "learning_rate": 2.509631302481666e-06, "loss": 0.0318, "step": 172670 }, { "epoch": 0.1334, "grad_norm": 0.038850437849760056, "learning_rate": 2.5078264985508193e-06, "loss": 0.0338, "step": 172680 }, { "epoch": 0.13345, "grad_norm": 0.0350588895380497, "learning_rate": 2.5060223095475426e-06, "loss": 0.0329, "step": 172690 }, { "epoch": 0.1335, "grad_norm": 0.0503779798746109, "learning_rate": 2.5042187355211717e-06, "loss": 0.0336, "step": 172700 }, { "epoch": 0.13355, "grad_norm": 0.037332683801651, "learning_rate": 2.5024157765210083e-06, "loss": 0.0324, "step": 172710 }, { "epoch": 0.1336, "grad_norm": 0.038964446634054184, "learning_rate": 2.500613432596338e-06, "loss": 0.0342, "step": 172720 }, { "epoch": 0.13365, "grad_norm": 0.03178824484348297, "learning_rate": 2.4988117037964527e-06, "loss": 0.0325, "step": 172730 }, { "epoch": 0.1337, "grad_norm": 0.037879884243011475, "learning_rate": 2.497010590170598e-06, "loss": 0.0334, "step": 172740 }, { "epoch": 0.13375, "grad_norm": 0.04096490517258644, "learning_rate": 2.4952100917680244e-06, "loss": 0.0332, "step": 172750 }, { "epoch": 0.1338, "grad_norm": 0.03576594591140747, "learning_rate": 2.4934102086379445e-06, "loss": 0.0329, "step": 172760 }, { "epoch": 0.13385, "grad_norm": 0.0467950701713562, "learning_rate": 2.491610940829575e-06, "loss": 0.0351, "step": 172770 }, { "epoch": 0.1339, "grad_norm": 0.05011410638689995, "learning_rate": 2.4898122883921152e-06, "loss": 0.0331, "step": 172780 }, { "epoch": 0.13395, "grad_norm": 0.04678485170006752, "learning_rate": 2.4880142513747205e-06, "loss": 0.0328, "step": 172790 }, { "epoch": 0.134, "grad_norm": 0.03232264891266823, "learning_rate": 2.4862168298265652e-06, "loss": 0.033, "step": 172800 }, { "epoch": 0.13405, "grad_norm": 0.04205932840704918, "learning_rate": 2.4844200237967795e-06, "loss": 0.0345, "step": 172810 }, { "epoch": 0.1341, "grad_norm": 0.03812059760093689, "learning_rate": 2.482623833334494e-06, "loss": 0.0316, "step": 172820 }, { "epoch": 0.13415, "grad_norm": 0.04027124494314194, "learning_rate": 2.4808282584888153e-06, "loss": 0.0329, "step": 172830 }, { "epoch": 0.1342, "grad_norm": 0.04119805246591568, "learning_rate": 2.479033299308828e-06, "loss": 0.0315, "step": 172840 }, { "epoch": 0.13425, "grad_norm": 0.04910222068428993, "learning_rate": 2.477238955843611e-06, "loss": 0.0336, "step": 172850 }, { "epoch": 0.1343, "grad_norm": 0.03819654881954193, "learning_rate": 2.475445228142223e-06, "loss": 0.0315, "step": 172860 }, { "epoch": 0.13435, "grad_norm": 0.03676626831293106, "learning_rate": 2.4736521162536996e-06, "loss": 0.0326, "step": 172870 }, { "epoch": 0.1344, "grad_norm": 0.0349026694893837, "learning_rate": 2.4718596202270574e-06, "loss": 0.0327, "step": 172880 }, { "epoch": 0.13445, "grad_norm": 0.034219786524772644, "learning_rate": 2.470067740111312e-06, "loss": 0.0329, "step": 172890 }, { "epoch": 0.1345, "grad_norm": 0.033093199133872986, "learning_rate": 2.4682764759554553e-06, "loss": 0.0319, "step": 172900 }, { "epoch": 0.13455, "grad_norm": 0.036882225424051285, "learning_rate": 2.466485827808454e-06, "loss": 0.034, "step": 172910 }, { "epoch": 0.1346, "grad_norm": 0.03655955195426941, "learning_rate": 2.4646957957192656e-06, "loss": 0.0326, "step": 172920 }, { "epoch": 0.13465, "grad_norm": 0.03251729905605316, "learning_rate": 2.4629063797368235e-06, "loss": 0.0326, "step": 172930 }, { "epoch": 0.1347, "grad_norm": 0.0346374586224556, "learning_rate": 2.4611175799100577e-06, "loss": 0.0321, "step": 172940 }, { "epoch": 0.13475, "grad_norm": 0.030120186507701874, "learning_rate": 2.459329396287871e-06, "loss": 0.032, "step": 172950 }, { "epoch": 0.1348, "grad_norm": 0.03634950518608093, "learning_rate": 2.4575418289191437e-06, "loss": 0.0325, "step": 172960 }, { "epoch": 0.13485, "grad_norm": 0.034011442214250565, "learning_rate": 2.455754877852762e-06, "loss": 0.0342, "step": 172970 }, { "epoch": 0.1349, "grad_norm": 0.03213994950056076, "learning_rate": 2.4539685431375663e-06, "loss": 0.0325, "step": 172980 }, { "epoch": 0.13495, "grad_norm": 0.03639134019613266, "learning_rate": 2.4521828248224075e-06, "loss": 0.033, "step": 172990 }, { "epoch": 0.135, "grad_norm": 0.036853231489658356, "learning_rate": 2.450397722956094e-06, "loss": 0.0339, "step": 173000 }, { "epoch": 0.13505, "grad_norm": 0.0340038537979126, "learning_rate": 2.4486132375874383e-06, "loss": 0.0343, "step": 173010 }, { "epoch": 0.1351, "grad_norm": 0.0397774763405323, "learning_rate": 2.4468293687652187e-06, "loss": 0.0336, "step": 173020 }, { "epoch": 0.13515, "grad_norm": 0.03534870222210884, "learning_rate": 2.445046116538216e-06, "loss": 0.0341, "step": 173030 }, { "epoch": 0.1352, "grad_norm": 0.03766913339495659, "learning_rate": 2.4432634809551796e-06, "loss": 0.0337, "step": 173040 }, { "epoch": 0.13525, "grad_norm": 0.04208486154675484, "learning_rate": 2.441481462064843e-06, "loss": 0.0338, "step": 173050 }, { "epoch": 0.1353, "grad_norm": 0.0469144769012928, "learning_rate": 2.439700059915931e-06, "loss": 0.0351, "step": 173060 }, { "epoch": 0.13535, "grad_norm": 0.03179687634110451, "learning_rate": 2.4379192745571434e-06, "loss": 0.0341, "step": 173070 }, { "epoch": 0.1354, "grad_norm": 0.04159516468644142, "learning_rate": 2.4361391060371606e-06, "loss": 0.0338, "step": 173080 }, { "epoch": 0.13545, "grad_norm": 0.03519073873758316, "learning_rate": 2.4343595544046666e-06, "loss": 0.0322, "step": 173090 }, { "epoch": 0.1355, "grad_norm": 0.033475492149591446, "learning_rate": 2.432580619708297e-06, "loss": 0.0328, "step": 173100 }, { "epoch": 0.13555, "grad_norm": 0.031248176470398903, "learning_rate": 2.4308023019967014e-06, "loss": 0.0317, "step": 173110 }, { "epoch": 0.1356, "grad_norm": 0.04228196665644646, "learning_rate": 2.4290246013184913e-06, "loss": 0.0328, "step": 173120 }, { "epoch": 0.13565, "grad_norm": 0.037378840148448944, "learning_rate": 2.4272475177222698e-06, "loss": 0.0316, "step": 173130 }, { "epoch": 0.1357, "grad_norm": 0.05218826234340668, "learning_rate": 2.425471051256614e-06, "loss": 0.0351, "step": 173140 }, { "epoch": 0.13575, "grad_norm": 0.037467218935489655, "learning_rate": 2.423695201970105e-06, "loss": 0.0324, "step": 173150 }, { "epoch": 0.1358, "grad_norm": 0.03451351821422577, "learning_rate": 2.4219199699112876e-06, "loss": 0.0325, "step": 173160 }, { "epoch": 0.13585, "grad_norm": 0.03146947920322418, "learning_rate": 2.4201453551286916e-06, "loss": 0.0325, "step": 173170 }, { "epoch": 0.1359, "grad_norm": 0.030889015644788742, "learning_rate": 2.4183713576708427e-06, "loss": 0.0335, "step": 173180 }, { "epoch": 0.13595, "grad_norm": 0.032551318407058716, "learning_rate": 2.4165979775862354e-06, "loss": 0.0339, "step": 173190 }, { "epoch": 0.136, "grad_norm": 0.04587157815694809, "learning_rate": 2.4148252149233607e-06, "loss": 0.0341, "step": 173200 }, { "epoch": 0.13605, "grad_norm": 0.03541712835431099, "learning_rate": 2.4130530697306724e-06, "loss": 0.0331, "step": 173210 }, { "epoch": 0.1361, "grad_norm": 0.03498711436986923, "learning_rate": 2.4112815420566287e-06, "loss": 0.0335, "step": 173220 }, { "epoch": 0.13615, "grad_norm": 0.03785178065299988, "learning_rate": 2.409510631949666e-06, "loss": 0.0349, "step": 173230 }, { "epoch": 0.1362, "grad_norm": 0.0386972650885582, "learning_rate": 2.407740339458192e-06, "loss": 0.0334, "step": 173240 }, { "epoch": 0.13625, "grad_norm": 0.03768829628825188, "learning_rate": 2.4059706646306133e-06, "loss": 0.0339, "step": 173250 }, { "epoch": 0.1363, "grad_norm": 0.034488171339035034, "learning_rate": 2.4042016075153024e-06, "loss": 0.0342, "step": 173260 }, { "epoch": 0.13635, "grad_norm": 0.04211435094475746, "learning_rate": 2.402433168160631e-06, "loss": 0.0339, "step": 173270 }, { "epoch": 0.1364, "grad_norm": 0.03679462522268295, "learning_rate": 2.4006653466149582e-06, "loss": 0.0335, "step": 173280 }, { "epoch": 0.13645, "grad_norm": 0.042969267815351486, "learning_rate": 2.398898142926592e-06, "loss": 0.0335, "step": 173290 }, { "epoch": 0.1365, "grad_norm": 0.03657511621713638, "learning_rate": 2.3971315571438668e-06, "loss": 0.0334, "step": 173300 }, { "epoch": 0.13655, "grad_norm": 0.03540303185582161, "learning_rate": 2.3953655893150683e-06, "loss": 0.0324, "step": 173310 }, { "epoch": 0.1366, "grad_norm": 0.03584011644124985, "learning_rate": 2.393600239488486e-06, "loss": 0.0345, "step": 173320 }, { "epoch": 0.13665, "grad_norm": 0.03523749113082886, "learning_rate": 2.3918355077123812e-06, "loss": 0.0331, "step": 173330 }, { "epoch": 0.1367, "grad_norm": 0.036041587591171265, "learning_rate": 2.3900713940349956e-06, "loss": 0.0326, "step": 173340 }, { "epoch": 0.13675, "grad_norm": 0.036619171500205994, "learning_rate": 2.3883078985045688e-06, "loss": 0.0339, "step": 173350 }, { "epoch": 0.1368, "grad_norm": 0.03686472028493881, "learning_rate": 2.3865450211693093e-06, "loss": 0.0316, "step": 173360 }, { "epoch": 0.13685, "grad_norm": 0.0400625579059124, "learning_rate": 2.3847827620774116e-06, "loss": 0.0338, "step": 173370 }, { "epoch": 0.1369, "grad_norm": 0.04308782517910004, "learning_rate": 2.383021121277054e-06, "loss": 0.0326, "step": 173380 }, { "epoch": 0.13695, "grad_norm": 0.041462741792201996, "learning_rate": 2.381260098816407e-06, "loss": 0.0334, "step": 173390 }, { "epoch": 0.137, "grad_norm": 0.04413997381925583, "learning_rate": 2.379499694743606e-06, "loss": 0.035, "step": 173400 }, { "epoch": 0.13705, "grad_norm": 0.03597474470734596, "learning_rate": 2.377739909106791e-06, "loss": 0.0327, "step": 173410 }, { "epoch": 0.1371, "grad_norm": 0.0335271991789341, "learning_rate": 2.3759807419540675e-06, "loss": 0.0327, "step": 173420 }, { "epoch": 0.13715, "grad_norm": 0.03556426987051964, "learning_rate": 2.374222193333525e-06, "loss": 0.0324, "step": 173430 }, { "epoch": 0.1372, "grad_norm": 0.03495519608259201, "learning_rate": 2.372464263293253e-06, "loss": 0.0331, "step": 173440 }, { "epoch": 0.13725, "grad_norm": 0.03677154332399368, "learning_rate": 2.3707069518813072e-06, "loss": 0.034, "step": 173450 }, { "epoch": 0.1373, "grad_norm": 0.03947559744119644, "learning_rate": 2.3689502591457276e-06, "loss": 0.0332, "step": 173460 }, { "epoch": 0.13735, "grad_norm": 0.0355621762573719, "learning_rate": 2.3671941851345524e-06, "loss": 0.0338, "step": 173470 }, { "epoch": 0.1374, "grad_norm": 0.03790060058236122, "learning_rate": 2.3654387298957776e-06, "loss": 0.0322, "step": 173480 }, { "epoch": 0.13745, "grad_norm": 0.03127957880496979, "learning_rate": 2.363683893477417e-06, "loss": 0.0332, "step": 173490 }, { "epoch": 0.1375, "grad_norm": 0.03315692022442818, "learning_rate": 2.361929675927424e-06, "loss": 0.0326, "step": 173500 }, { "epoch": 0.13755, "grad_norm": 0.034898389130830765, "learning_rate": 2.3601760772937716e-06, "loss": 0.0335, "step": 173510 }, { "epoch": 0.1376, "grad_norm": 0.03737428039312363, "learning_rate": 2.358423097624396e-06, "loss": 0.0339, "step": 173520 }, { "epoch": 0.13765, "grad_norm": 0.03121230937540531, "learning_rate": 2.356670736967234e-06, "loss": 0.0325, "step": 173530 }, { "epoch": 0.1377, "grad_norm": 0.03580779209733009, "learning_rate": 2.354918995370184e-06, "loss": 0.0317, "step": 173540 }, { "epoch": 0.13775, "grad_norm": 0.035638175904750824, "learning_rate": 2.35316787288114e-06, "loss": 0.0328, "step": 173550 }, { "epoch": 0.1378, "grad_norm": 0.032858967781066895, "learning_rate": 2.3514173695479815e-06, "loss": 0.0324, "step": 173560 }, { "epoch": 0.13785, "grad_norm": 0.03643010929226875, "learning_rate": 2.3496674854185637e-06, "loss": 0.0324, "step": 173570 }, { "epoch": 0.1379, "grad_norm": 0.036192405968904495, "learning_rate": 2.3479182205407264e-06, "loss": 0.0311, "step": 173580 }, { "epoch": 0.13795, "grad_norm": 0.033632777631282806, "learning_rate": 2.3461695749622896e-06, "loss": 0.0329, "step": 173590 }, { "epoch": 0.138, "grad_norm": 0.03389897570014, "learning_rate": 2.3444215487310684e-06, "loss": 0.0314, "step": 173600 }, { "epoch": 0.13805, "grad_norm": 0.03711351007223129, "learning_rate": 2.3426741418948545e-06, "loss": 0.033, "step": 173610 }, { "epoch": 0.1381, "grad_norm": 0.042397286742925644, "learning_rate": 2.3409273545014183e-06, "loss": 0.0336, "step": 173620 }, { "epoch": 0.13815, "grad_norm": 0.037497516721487045, "learning_rate": 2.339181186598513e-06, "loss": 0.0325, "step": 173630 }, { "epoch": 0.1382, "grad_norm": 0.0302271768450737, "learning_rate": 2.3374356382338787e-06, "loss": 0.0313, "step": 173640 }, { "epoch": 0.13825, "grad_norm": 0.032058198004961014, "learning_rate": 2.3356907094552434e-06, "loss": 0.0313, "step": 173650 }, { "epoch": 0.1383, "grad_norm": 0.033630333840847015, "learning_rate": 2.3339464003103105e-06, "loss": 0.0309, "step": 173660 }, { "epoch": 0.13835, "grad_norm": 0.031055064871907234, "learning_rate": 2.332202710846762e-06, "loss": 0.0323, "step": 173670 }, { "epoch": 0.1384, "grad_norm": 0.043439898639917374, "learning_rate": 2.3304596411122814e-06, "loss": 0.0322, "step": 173680 }, { "epoch": 0.13845, "grad_norm": 0.03847711905837059, "learning_rate": 2.3287171911545116e-06, "loss": 0.032, "step": 173690 }, { "epoch": 0.1385, "grad_norm": 0.03554611653089523, "learning_rate": 2.3269753610211083e-06, "loss": 0.0332, "step": 173700 }, { "epoch": 0.13855, "grad_norm": 0.04088981822133064, "learning_rate": 2.3252341507596697e-06, "loss": 0.0334, "step": 173710 }, { "epoch": 0.1386, "grad_norm": 0.041101280599832535, "learning_rate": 2.323493560417811e-06, "loss": 0.0337, "step": 173720 }, { "epoch": 0.13865, "grad_norm": 0.040162887424230576, "learning_rate": 2.321753590043124e-06, "loss": 0.0325, "step": 173730 }, { "epoch": 0.1387, "grad_norm": 0.033348675817251205, "learning_rate": 2.3200142396831743e-06, "loss": 0.0332, "step": 173740 }, { "epoch": 0.13875, "grad_norm": 0.037173010408878326, "learning_rate": 2.3182755093855146e-06, "loss": 0.0331, "step": 173750 }, { "epoch": 0.1388, "grad_norm": 0.03944842144846916, "learning_rate": 2.3165373991976767e-06, "loss": 0.034, "step": 173760 }, { "epoch": 0.13885, "grad_norm": 0.047941941767930984, "learning_rate": 2.3147999091671897e-06, "loss": 0.0388, "step": 173770 }, { "epoch": 0.1389, "grad_norm": 0.05058452859520912, "learning_rate": 2.313063039341548e-06, "loss": 0.0354, "step": 173780 }, { "epoch": 0.13895, "grad_norm": 0.03964913263916969, "learning_rate": 2.3113267897682393e-06, "loss": 0.0329, "step": 173790 }, { "epoch": 0.139, "grad_norm": 0.046650372445583344, "learning_rate": 2.309591160494734e-06, "loss": 0.0338, "step": 173800 }, { "epoch": 0.13905, "grad_norm": 0.032496377825737, "learning_rate": 2.307856151568477e-06, "loss": 0.0325, "step": 173810 }, { "epoch": 0.1391, "grad_norm": 0.0319688655436039, "learning_rate": 2.3061217630369142e-06, "loss": 0.0325, "step": 173820 }, { "epoch": 0.13915, "grad_norm": 0.03517069295048714, "learning_rate": 2.3043879949474574e-06, "loss": 0.0324, "step": 173830 }, { "epoch": 0.1392, "grad_norm": 0.03652153164148331, "learning_rate": 2.302654847347499e-06, "loss": 0.0318, "step": 173840 }, { "epoch": 0.13925, "grad_norm": 0.03405037149786949, "learning_rate": 2.300922320284438e-06, "loss": 0.0329, "step": 173850 }, { "epoch": 0.1393, "grad_norm": 0.041154876351356506, "learning_rate": 2.2991904138056323e-06, "loss": 0.0325, "step": 173860 }, { "epoch": 0.13935, "grad_norm": 0.029424719512462616, "learning_rate": 2.297459127958432e-06, "loss": 0.0334, "step": 173870 }, { "epoch": 0.1394, "grad_norm": 0.03557342290878296, "learning_rate": 2.2957284627901644e-06, "loss": 0.0334, "step": 173880 }, { "epoch": 0.13945, "grad_norm": 0.033025480806827545, "learning_rate": 2.293998418348156e-06, "loss": 0.0333, "step": 173890 }, { "epoch": 0.1395, "grad_norm": 0.03407977148890495, "learning_rate": 2.292268994679697e-06, "loss": 0.0319, "step": 173900 }, { "epoch": 0.13955, "grad_norm": 0.03547189012169838, "learning_rate": 2.2905401918320745e-06, "loss": 0.0316, "step": 173910 }, { "epoch": 0.1396, "grad_norm": 0.03738795220851898, "learning_rate": 2.2888120098525563e-06, "loss": 0.0326, "step": 173920 }, { "epoch": 0.13965, "grad_norm": 0.03844847157597542, "learning_rate": 2.2870844487883763e-06, "loss": 0.0337, "step": 173930 }, { "epoch": 0.1397, "grad_norm": 0.0328495129942894, "learning_rate": 2.2853575086867834e-06, "loss": 0.0328, "step": 173940 }, { "epoch": 0.13975, "grad_norm": 0.03362671658396721, "learning_rate": 2.2836311895949813e-06, "loss": 0.0344, "step": 173950 }, { "epoch": 0.1398, "grad_norm": 0.034235890954732895, "learning_rate": 2.2819054915601656e-06, "loss": 0.0341, "step": 173960 }, { "epoch": 0.13985, "grad_norm": 0.04000657796859741, "learning_rate": 2.280180414629518e-06, "loss": 0.0345, "step": 173970 }, { "epoch": 0.1399, "grad_norm": 0.03184925392270088, "learning_rate": 2.278455958850201e-06, "loss": 0.0334, "step": 173980 }, { "epoch": 0.13995, "grad_norm": 0.036327533423900604, "learning_rate": 2.2767321242693707e-06, "loss": 0.0339, "step": 173990 }, { "epoch": 0.14, "grad_norm": 0.03221013396978378, "learning_rate": 2.27500891093414e-06, "loss": 0.0334, "step": 174000 }, { "epoch": 0.14005, "grad_norm": 0.038224413990974426, "learning_rate": 2.2732863188916293e-06, "loss": 0.0345, "step": 174010 }, { "epoch": 0.1401, "grad_norm": 0.03446537256240845, "learning_rate": 2.2715643481889314e-06, "loss": 0.0339, "step": 174020 }, { "epoch": 0.14015, "grad_norm": 0.03583173453807831, "learning_rate": 2.2698429988731285e-06, "loss": 0.0344, "step": 174030 }, { "epoch": 0.1402, "grad_norm": 0.039286285638809204, "learning_rate": 2.2681222709912797e-06, "loss": 0.0339, "step": 174040 }, { "epoch": 0.14025, "grad_norm": 0.038437262177467346, "learning_rate": 2.2664021645904224e-06, "loss": 0.0353, "step": 174050 }, { "epoch": 0.1403, "grad_norm": 0.03841080889105797, "learning_rate": 2.2646826797175945e-06, "loss": 0.0344, "step": 174060 }, { "epoch": 0.14035, "grad_norm": 0.043520137667655945, "learning_rate": 2.2629638164198025e-06, "loss": 0.0336, "step": 174070 }, { "epoch": 0.1404, "grad_norm": 0.041831016540527344, "learning_rate": 2.2612455747440363e-06, "loss": 0.034, "step": 174080 }, { "epoch": 0.14045, "grad_norm": 0.03334524482488632, "learning_rate": 2.2595279547372673e-06, "loss": 0.035, "step": 174090 }, { "epoch": 0.1405, "grad_norm": 0.03944189473986626, "learning_rate": 2.2578109564464627e-06, "loss": 0.0371, "step": 174100 }, { "epoch": 0.14055, "grad_norm": 0.03718682378530502, "learning_rate": 2.2560945799185664e-06, "loss": 0.0337, "step": 174110 }, { "epoch": 0.1406, "grad_norm": 0.03292892500758171, "learning_rate": 2.254378825200501e-06, "loss": 0.0332, "step": 174120 }, { "epoch": 0.14065, "grad_norm": 0.03702680766582489, "learning_rate": 2.252663692339174e-06, "loss": 0.033, "step": 174130 }, { "epoch": 0.1407, "grad_norm": 0.03774098679423332, "learning_rate": 2.2509491813814676e-06, "loss": 0.0322, "step": 174140 }, { "epoch": 0.14075, "grad_norm": 0.038331057876348495, "learning_rate": 2.2492352923742714e-06, "loss": 0.035, "step": 174150 }, { "epoch": 0.1408, "grad_norm": 0.036840472370386124, "learning_rate": 2.2475220253644374e-06, "loss": 0.0352, "step": 174160 }, { "epoch": 0.14085, "grad_norm": 0.04416101798415184, "learning_rate": 2.2458093803987944e-06, "loss": 0.0315, "step": 174170 }, { "epoch": 0.1409, "grad_norm": 0.03244810178875923, "learning_rate": 2.2440973575241832e-06, "loss": 0.033, "step": 174180 }, { "epoch": 0.14095, "grad_norm": 0.04091142490506172, "learning_rate": 2.2423859567873936e-06, "loss": 0.0331, "step": 174190 }, { "epoch": 0.141, "grad_norm": 0.05770982801914215, "learning_rate": 2.2406751782352324e-06, "loss": 0.0325, "step": 174200 }, { "epoch": 0.14105, "grad_norm": 0.04321189224720001, "learning_rate": 2.238965021914452e-06, "loss": 0.0341, "step": 174210 }, { "epoch": 0.1411, "grad_norm": 0.03503815457224846, "learning_rate": 2.23725548787182e-06, "loss": 0.0326, "step": 174220 }, { "epoch": 0.14115, "grad_norm": 0.041832130402326584, "learning_rate": 2.235546576154071e-06, "loss": 0.0323, "step": 174230 }, { "epoch": 0.1412, "grad_norm": 0.06155143678188324, "learning_rate": 2.2338382868079295e-06, "loss": 0.0326, "step": 174240 }, { "epoch": 0.14125, "grad_norm": 0.04963129758834839, "learning_rate": 2.232130619880096e-06, "loss": 0.034, "step": 174250 }, { "epoch": 0.1413, "grad_norm": 0.03338255733251572, "learning_rate": 2.230423575417254e-06, "loss": 0.0317, "step": 174260 }, { "epoch": 0.14135, "grad_norm": 0.03729334473609924, "learning_rate": 2.228717153466081e-06, "loss": 0.0316, "step": 174270 }, { "epoch": 0.1414, "grad_norm": 0.03284437581896782, "learning_rate": 2.227011354073227e-06, "loss": 0.0322, "step": 174280 }, { "epoch": 0.14145, "grad_norm": 0.04066002741456032, "learning_rate": 2.2253061772853217e-06, "loss": 0.0326, "step": 174290 }, { "epoch": 0.1415, "grad_norm": 0.046504199504852295, "learning_rate": 2.2236016231489938e-06, "loss": 0.033, "step": 174300 }, { "epoch": 0.14155, "grad_norm": 0.03390634059906006, "learning_rate": 2.2218976917108365e-06, "loss": 0.0324, "step": 174310 }, { "epoch": 0.1416, "grad_norm": 0.042621005326509476, "learning_rate": 2.2201943830174434e-06, "loss": 0.0344, "step": 174320 }, { "epoch": 0.14165, "grad_norm": 0.03581277281045914, "learning_rate": 2.2184916971153797e-06, "loss": 0.0332, "step": 174330 }, { "epoch": 0.1417, "grad_norm": 0.0325859859585762, "learning_rate": 2.2167896340511917e-06, "loss": 0.0325, "step": 174340 }, { "epoch": 0.14175, "grad_norm": 0.03300931304693222, "learning_rate": 2.215088193871412e-06, "loss": 0.0345, "step": 174350 }, { "epoch": 0.1418, "grad_norm": 0.036072760820388794, "learning_rate": 2.2133873766225665e-06, "loss": 0.0338, "step": 174360 }, { "epoch": 0.14185, "grad_norm": 0.03882085159420967, "learning_rate": 2.211687182351149e-06, "loss": 0.0322, "step": 174370 }, { "epoch": 0.1419, "grad_norm": 0.03754011169075966, "learning_rate": 2.2099876111036392e-06, "loss": 0.0351, "step": 174380 }, { "epoch": 0.14195, "grad_norm": 0.04201170802116394, "learning_rate": 2.2082886629265107e-06, "loss": 0.0341, "step": 174390 }, { "epoch": 0.142, "grad_norm": 0.03877422213554382, "learning_rate": 2.206590337866202e-06, "loss": 0.0335, "step": 174400 }, { "epoch": 0.14205, "grad_norm": 0.03186605125665665, "learning_rate": 2.2048926359691606e-06, "loss": 0.0337, "step": 174410 }, { "epoch": 0.1421, "grad_norm": 0.03477894142270088, "learning_rate": 2.2031955572817814e-06, "loss": 0.0328, "step": 174420 }, { "epoch": 0.14215, "grad_norm": 0.040825922042131424, "learning_rate": 2.201499101850471e-06, "loss": 0.0334, "step": 174430 }, { "epoch": 0.1422, "grad_norm": 0.03556324541568756, "learning_rate": 2.199803269721615e-06, "loss": 0.0345, "step": 174440 }, { "epoch": 0.14225, "grad_norm": 0.031087305396795273, "learning_rate": 2.1981080609415726e-06, "loss": 0.0325, "step": 174450 }, { "epoch": 0.1423, "grad_norm": 0.04095204919576645, "learning_rate": 2.1964134755566884e-06, "loss": 0.0338, "step": 174460 }, { "epoch": 0.14235, "grad_norm": 0.03513423725962639, "learning_rate": 2.1947195136132886e-06, "loss": 0.0333, "step": 174470 }, { "epoch": 0.1424, "grad_norm": 0.03837606683373451, "learning_rate": 2.1930261751576894e-06, "loss": 0.0329, "step": 174480 }, { "epoch": 0.14245, "grad_norm": 0.03757372498512268, "learning_rate": 2.1913334602361946e-06, "loss": 0.0324, "step": 174490 }, { "epoch": 0.1425, "grad_norm": 0.03373987227678299, "learning_rate": 2.189641368895068e-06, "loss": 0.0325, "step": 174500 }, { "epoch": 0.14255, "grad_norm": 0.042526379227638245, "learning_rate": 2.1879499011805806e-06, "loss": 0.0331, "step": 174510 }, { "epoch": 0.1426, "grad_norm": 0.032664705067873, "learning_rate": 2.1862590571389674e-06, "loss": 0.0328, "step": 174520 }, { "epoch": 0.14265, "grad_norm": 0.03375755250453949, "learning_rate": 2.184568836816464e-06, "loss": 0.032, "step": 174530 }, { "epoch": 0.1427, "grad_norm": 0.03368820250034332, "learning_rate": 2.1828792402592806e-06, "loss": 0.0317, "step": 174540 }, { "epoch": 0.14275, "grad_norm": 0.029644666239619255, "learning_rate": 2.1811902675135996e-06, "loss": 0.0336, "step": 174550 }, { "epoch": 0.1428, "grad_norm": 0.03478449210524559, "learning_rate": 2.1795019186256092e-06, "loss": 0.0319, "step": 174560 }, { "epoch": 0.14285, "grad_norm": 0.032810889184474945, "learning_rate": 2.1778141936414643e-06, "loss": 0.0326, "step": 174570 }, { "epoch": 0.1429, "grad_norm": 0.03178194910287857, "learning_rate": 2.1761270926073025e-06, "loss": 0.0319, "step": 174580 }, { "epoch": 0.14295, "grad_norm": 0.042895201593637466, "learning_rate": 2.174440615569251e-06, "loss": 0.0346, "step": 174590 }, { "epoch": 0.143, "grad_norm": 0.04192337766289711, "learning_rate": 2.1727547625734203e-06, "loss": 0.0329, "step": 174600 }, { "epoch": 0.14305, "grad_norm": 0.0405057892203331, "learning_rate": 2.1710695336658926e-06, "loss": 0.0334, "step": 174610 }, { "epoch": 0.1431, "grad_norm": 0.04190431907773018, "learning_rate": 2.1693849288927536e-06, "loss": 0.0326, "step": 174620 }, { "epoch": 0.14315, "grad_norm": 0.03621964529156685, "learning_rate": 2.1677009483000526e-06, "loss": 0.0316, "step": 174630 }, { "epoch": 0.1432, "grad_norm": 0.03936958685517311, "learning_rate": 2.1660175919338276e-06, "loss": 0.034, "step": 174640 }, { "epoch": 0.14325, "grad_norm": 0.042062707245349884, "learning_rate": 2.164334859840106e-06, "loss": 0.0327, "step": 174650 }, { "epoch": 0.1433, "grad_norm": 0.03852769359946251, "learning_rate": 2.1626527520648922e-06, "loss": 0.0329, "step": 174660 }, { "epoch": 0.14335, "grad_norm": 0.03222566843032837, "learning_rate": 2.160971268654166e-06, "loss": 0.0337, "step": 174670 }, { "epoch": 0.1434, "grad_norm": 0.03322441130876541, "learning_rate": 2.1592904096539108e-06, "loss": 0.0322, "step": 174680 }, { "epoch": 0.14345, "grad_norm": 0.03481338173151016, "learning_rate": 2.15761017511007e-06, "loss": 0.034, "step": 174690 }, { "epoch": 0.1435, "grad_norm": 0.034989435225725174, "learning_rate": 2.1559305650685925e-06, "loss": 0.0345, "step": 174700 }, { "epoch": 0.14355, "grad_norm": 0.040841348469257355, "learning_rate": 2.1542515795753866e-06, "loss": 0.0333, "step": 174710 }, { "epoch": 0.1436, "grad_norm": 0.035034745931625366, "learning_rate": 2.1525732186763596e-06, "loss": 0.0336, "step": 174720 }, { "epoch": 0.14365, "grad_norm": 0.03477044776082039, "learning_rate": 2.150895482417395e-06, "loss": 0.0333, "step": 174730 }, { "epoch": 0.1437, "grad_norm": 0.038191020488739014, "learning_rate": 2.149218370844369e-06, "loss": 0.0332, "step": 174740 }, { "epoch": 0.14375, "grad_norm": 0.0353204570710659, "learning_rate": 2.147541884003129e-06, "loss": 0.0333, "step": 174750 }, { "epoch": 0.1438, "grad_norm": 0.03748689219355583, "learning_rate": 2.1458660219395022e-06, "loss": 0.0329, "step": 174760 }, { "epoch": 0.14385, "grad_norm": 0.036651611328125, "learning_rate": 2.144190784699318e-06, "loss": 0.0333, "step": 174770 }, { "epoch": 0.1439, "grad_norm": 0.036774635314941406, "learning_rate": 2.142516172328371e-06, "loss": 0.0324, "step": 174780 }, { "epoch": 0.14395, "grad_norm": 0.05032498016953468, "learning_rate": 2.1408421848724435e-06, "loss": 0.0336, "step": 174790 }, { "epoch": 0.144, "grad_norm": 0.05237219110131264, "learning_rate": 2.1391688223772994e-06, "loss": 0.0327, "step": 174800 }, { "epoch": 0.14405, "grad_norm": 0.03800968453288078, "learning_rate": 2.1374960848886905e-06, "loss": 0.0329, "step": 174810 }, { "epoch": 0.1441, "grad_norm": 0.05860413610935211, "learning_rate": 2.1358239724523555e-06, "loss": 0.0322, "step": 174820 }, { "epoch": 0.14415, "grad_norm": 0.04500029981136322, "learning_rate": 2.1341524851140023e-06, "loss": 0.0308, "step": 174830 }, { "epoch": 0.1442, "grad_norm": 0.02946821227669716, "learning_rate": 2.13248162291933e-06, "loss": 0.0334, "step": 174840 }, { "epoch": 0.14425, "grad_norm": 0.04804980754852295, "learning_rate": 2.130811385914017e-06, "loss": 0.0333, "step": 174850 }, { "epoch": 0.1443, "grad_norm": 0.04131940379738808, "learning_rate": 2.1291417741437307e-06, "loss": 0.0327, "step": 174860 }, { "epoch": 0.14435, "grad_norm": 0.04331108182668686, "learning_rate": 2.1274727876541166e-06, "loss": 0.0314, "step": 174870 }, { "epoch": 0.1444, "grad_norm": 0.035809844732284546, "learning_rate": 2.1258044264908016e-06, "loss": 0.0309, "step": 174880 }, { "epoch": 0.14445, "grad_norm": 0.03436094522476196, "learning_rate": 2.124136690699402e-06, "loss": 0.033, "step": 174890 }, { "epoch": 0.1445, "grad_norm": 0.029962128028273582, "learning_rate": 2.122469580325506e-06, "loss": 0.0323, "step": 174900 }, { "epoch": 0.14455, "grad_norm": 0.038812559098005295, "learning_rate": 2.1208030954147086e-06, "loss": 0.032, "step": 174910 }, { "epoch": 0.1446, "grad_norm": 0.037531036883592606, "learning_rate": 2.1191372360125498e-06, "loss": 0.0348, "step": 174920 }, { "epoch": 0.14465, "grad_norm": 0.037080105394124985, "learning_rate": 2.1174720021645805e-06, "loss": 0.0316, "step": 174930 }, { "epoch": 0.1447, "grad_norm": 0.03403100371360779, "learning_rate": 2.1158073939163386e-06, "loss": 0.0333, "step": 174940 }, { "epoch": 0.14475, "grad_norm": 0.04135388135910034, "learning_rate": 2.114143411313321e-06, "loss": 0.033, "step": 174950 }, { "epoch": 0.1448, "grad_norm": 0.03619159385561943, "learning_rate": 2.112480054401028e-06, "loss": 0.0323, "step": 174960 }, { "epoch": 0.14485, "grad_norm": 0.03448307514190674, "learning_rate": 2.110817323224926e-06, "loss": 0.0344, "step": 174970 }, { "epoch": 0.1449, "grad_norm": 0.030624793842434883, "learning_rate": 2.109155217830483e-06, "loss": 0.0323, "step": 174980 }, { "epoch": 0.14495, "grad_norm": 0.03290669247508049, "learning_rate": 2.107493738263139e-06, "loss": 0.0344, "step": 174990 }, { "epoch": 0.145, "grad_norm": 0.033714376389980316, "learning_rate": 2.1058328845683096e-06, "loss": 0.0318, "step": 175000 }, { "epoch": 0.14505, "grad_norm": 0.04691474139690399, "learning_rate": 2.1041726567914143e-06, "loss": 0.0331, "step": 175010 }, { "epoch": 0.1451, "grad_norm": 0.03821582719683647, "learning_rate": 2.102513054977831e-06, "loss": 0.0324, "step": 175020 }, { "epoch": 0.14515, "grad_norm": 0.034602485597133636, "learning_rate": 2.1008540791729426e-06, "loss": 0.0357, "step": 175030 }, { "epoch": 0.1452, "grad_norm": 0.031218018382787704, "learning_rate": 2.0991957294221013e-06, "loss": 0.0329, "step": 175040 }, { "epoch": 0.14525, "grad_norm": 0.03488103300333023, "learning_rate": 2.097538005770641e-06, "loss": 0.0325, "step": 175050 }, { "epoch": 0.1453, "grad_norm": 0.03420497477054596, "learning_rate": 2.0958809082638947e-06, "loss": 0.0334, "step": 175060 }, { "epoch": 0.14535, "grad_norm": 0.03363852947950363, "learning_rate": 2.0942244369471564e-06, "loss": 0.0324, "step": 175070 }, { "epoch": 0.1454, "grad_norm": 0.036308009177446365, "learning_rate": 2.092568591865718e-06, "loss": 0.0331, "step": 175080 }, { "epoch": 0.14545, "grad_norm": 0.03419158235192299, "learning_rate": 2.0909133730648428e-06, "loss": 0.0327, "step": 175090 }, { "epoch": 0.1455, "grad_norm": 0.03830074891448021, "learning_rate": 2.089258780589795e-06, "loss": 0.0335, "step": 175100 }, { "epoch": 0.14555, "grad_norm": 0.038069311529397964, "learning_rate": 2.0876048144857997e-06, "loss": 0.0333, "step": 175110 }, { "epoch": 0.1456, "grad_norm": 0.03244706615805626, "learning_rate": 2.0859514747980867e-06, "loss": 0.0356, "step": 175120 }, { "epoch": 0.14565, "grad_norm": 0.038728900253772736, "learning_rate": 2.084298761571851e-06, "loss": 0.0335, "step": 175130 }, { "epoch": 0.1457, "grad_norm": 0.03410856053233147, "learning_rate": 2.0826466748522734e-06, "loss": 0.0336, "step": 175140 }, { "epoch": 0.14575, "grad_norm": 0.030474839732050896, "learning_rate": 2.080995214684531e-06, "loss": 0.0345, "step": 175150 }, { "epoch": 0.1458, "grad_norm": 0.035134457051754, "learning_rate": 2.079344381113768e-06, "loss": 0.0341, "step": 175160 }, { "epoch": 0.14585, "grad_norm": 0.03453769162297249, "learning_rate": 2.0776941741851215e-06, "loss": 0.0326, "step": 175170 }, { "epoch": 0.1459, "grad_norm": 0.036479029804468155, "learning_rate": 2.076044593943699e-06, "loss": 0.0333, "step": 175180 }, { "epoch": 0.14595, "grad_norm": 0.03493402525782585, "learning_rate": 2.074395640434604e-06, "loss": 0.0337, "step": 175190 }, { "epoch": 0.146, "grad_norm": 0.03477869927883148, "learning_rate": 2.0727473137029303e-06, "loss": 0.033, "step": 175200 }, { "epoch": 0.14605, "grad_norm": 0.03139925375580788, "learning_rate": 2.07109961379372e-06, "loss": 0.0331, "step": 175210 }, { "epoch": 0.1461, "grad_norm": 0.031562916934490204, "learning_rate": 2.06945254075204e-06, "loss": 0.032, "step": 175220 }, { "epoch": 0.14615, "grad_norm": 0.032667577266693115, "learning_rate": 2.0678060946229066e-06, "loss": 0.0324, "step": 175230 }, { "epoch": 0.1462, "grad_norm": 0.03880128264427185, "learning_rate": 2.0661602754513426e-06, "loss": 0.0341, "step": 175240 }, { "epoch": 0.14625, "grad_norm": 0.03329472988843918, "learning_rate": 2.0645150832823424e-06, "loss": 0.0316, "step": 175250 }, { "epoch": 0.1463, "grad_norm": 0.030931759625673294, "learning_rate": 2.062870518160878e-06, "loss": 0.0326, "step": 175260 }, { "epoch": 0.14635, "grad_norm": 0.03219376504421234, "learning_rate": 2.0612265801319225e-06, "loss": 0.0333, "step": 175270 }, { "epoch": 0.1464, "grad_norm": 0.032046496868133545, "learning_rate": 2.059583269240414e-06, "loss": 0.0315, "step": 175280 }, { "epoch": 0.14645, "grad_norm": 0.03174709901213646, "learning_rate": 2.0579405855312815e-06, "loss": 0.0324, "step": 175290 }, { "epoch": 0.1465, "grad_norm": 0.02962440438568592, "learning_rate": 2.0562985290494275e-06, "loss": 0.0312, "step": 175300 }, { "epoch": 0.14655, "grad_norm": 0.03580990433692932, "learning_rate": 2.0546570998397547e-06, "loss": 0.0331, "step": 175310 }, { "epoch": 0.1466, "grad_norm": 0.03146926686167717, "learning_rate": 2.0530162979471385e-06, "loss": 0.0316, "step": 175320 }, { "epoch": 0.14665, "grad_norm": 0.03137435391545296, "learning_rate": 2.0513761234164377e-06, "loss": 0.0343, "step": 175330 }, { "epoch": 0.1467, "grad_norm": 0.03477836027741432, "learning_rate": 2.0497365762924938e-06, "loss": 0.0317, "step": 175340 }, { "epoch": 0.14675, "grad_norm": 0.03239109739661217, "learning_rate": 2.0480976566201237e-06, "loss": 0.0327, "step": 175350 }, { "epoch": 0.1468, "grad_norm": 0.03294882923364639, "learning_rate": 2.046459364444145e-06, "loss": 0.0317, "step": 175360 }, { "epoch": 0.14685, "grad_norm": 0.032854992896318436, "learning_rate": 2.0448216998093433e-06, "loss": 0.0321, "step": 175370 }, { "epoch": 0.1469, "grad_norm": 0.03243003040552139, "learning_rate": 2.043184662760489e-06, "loss": 0.0306, "step": 175380 }, { "epoch": 0.14695, "grad_norm": 0.029420357197523117, "learning_rate": 2.0415482533423486e-06, "loss": 0.032, "step": 175390 }, { "epoch": 0.147, "grad_norm": 0.039761677384376526, "learning_rate": 2.039912471599645e-06, "loss": 0.0344, "step": 175400 }, { "epoch": 0.14705, "grad_norm": 0.03798593208193779, "learning_rate": 2.0382773175771198e-06, "loss": 0.0321, "step": 175410 }, { "epoch": 0.1471, "grad_norm": 0.03551090881228447, "learning_rate": 2.036642791319457e-06, "loss": 0.0324, "step": 175420 }, { "epoch": 0.14715, "grad_norm": 0.03543001040816307, "learning_rate": 2.035008892871354e-06, "loss": 0.0323, "step": 175430 }, { "epoch": 0.1472, "grad_norm": 0.040055036544799805, "learning_rate": 2.0333756222774865e-06, "loss": 0.0329, "step": 175440 }, { "epoch": 0.14725, "grad_norm": 0.029678745195269585, "learning_rate": 2.0317429795824987e-06, "loss": 0.032, "step": 175450 }, { "epoch": 0.1473, "grad_norm": 0.036483991891145706, "learning_rate": 2.0301109648310306e-06, "loss": 0.0347, "step": 175460 }, { "epoch": 0.14735, "grad_norm": 0.03847810998558998, "learning_rate": 2.0284795780676936e-06, "loss": 0.0335, "step": 175470 }, { "epoch": 0.1474, "grad_norm": 0.03432368487119675, "learning_rate": 2.0268488193370988e-06, "loss": 0.0328, "step": 175480 }, { "epoch": 0.14745, "grad_norm": 0.03539138287305832, "learning_rate": 2.0252186886838276e-06, "loss": 0.0336, "step": 175490 }, { "epoch": 0.1475, "grad_norm": 0.03191179782152176, "learning_rate": 2.0235891861524413e-06, "loss": 0.0324, "step": 175500 }, { "epoch": 0.14755, "grad_norm": 0.036091215908527374, "learning_rate": 2.0219603117874992e-06, "loss": 0.034, "step": 175510 }, { "epoch": 0.1476, "grad_norm": 0.030849860981106758, "learning_rate": 2.020332065633526e-06, "loss": 0.0342, "step": 175520 }, { "epoch": 0.14765, "grad_norm": 0.03476780652999878, "learning_rate": 2.0187044477350454e-06, "loss": 0.0321, "step": 175530 }, { "epoch": 0.1477, "grad_norm": 0.03580397740006447, "learning_rate": 2.0170774581365513e-06, "loss": 0.0316, "step": 175540 }, { "epoch": 0.14775, "grad_norm": 0.03447788953781128, "learning_rate": 2.015451096882526e-06, "loss": 0.0329, "step": 175550 }, { "epoch": 0.1478, "grad_norm": 0.031128806993365288, "learning_rate": 2.01382536401743e-06, "loss": 0.035, "step": 175560 }, { "epoch": 0.14785, "grad_norm": 0.039009448140859604, "learning_rate": 2.012200259585714e-06, "loss": 0.0334, "step": 175570 }, { "epoch": 0.1479, "grad_norm": 0.0299491286277771, "learning_rate": 2.010575783631807e-06, "loss": 0.0331, "step": 175580 }, { "epoch": 0.14795, "grad_norm": 0.0313674621284008, "learning_rate": 2.008951936200118e-06, "loss": 0.0345, "step": 175590 }, { "epoch": 0.148, "grad_norm": 0.03528376296162605, "learning_rate": 2.0073287173350525e-06, "loss": 0.0323, "step": 175600 }, { "epoch": 0.14805, "grad_norm": 0.029887860640883446, "learning_rate": 2.005706127080975e-06, "loss": 0.0327, "step": 175610 }, { "epoch": 0.1481, "grad_norm": 0.0347294919192791, "learning_rate": 2.0040841654822617e-06, "loss": 0.0342, "step": 175620 }, { "epoch": 0.14815, "grad_norm": 0.034214939922094345, "learning_rate": 2.002462832583241e-06, "loss": 0.0327, "step": 175630 }, { "epoch": 0.1482, "grad_norm": 0.03237602487206459, "learning_rate": 2.0008421284282432e-06, "loss": 0.0328, "step": 175640 }, { "epoch": 0.14825, "grad_norm": 0.03369561582803726, "learning_rate": 1.999222053061589e-06, "loss": 0.0328, "step": 175650 }, { "epoch": 0.1483, "grad_norm": 0.031222902238368988, "learning_rate": 1.9976026065275627e-06, "loss": 0.0338, "step": 175660 }, { "epoch": 0.14835, "grad_norm": 0.037712518125772476, "learning_rate": 1.9959837888704395e-06, "loss": 0.0324, "step": 175670 }, { "epoch": 0.1484, "grad_norm": 0.03585483506321907, "learning_rate": 1.9943656001344708e-06, "loss": 0.0326, "step": 175680 }, { "epoch": 0.14845, "grad_norm": 0.03238583356142044, "learning_rate": 1.9927480403639063e-06, "loss": 0.0325, "step": 175690 }, { "epoch": 0.1485, "grad_norm": 0.03418656438589096, "learning_rate": 1.9911311096029726e-06, "loss": 0.0356, "step": 175700 }, { "epoch": 0.14855, "grad_norm": 0.03242915868759155, "learning_rate": 1.9895148078958646e-06, "loss": 0.0323, "step": 175710 }, { "epoch": 0.1486, "grad_norm": 0.032259389758110046, "learning_rate": 1.9878991352867804e-06, "loss": 0.0327, "step": 175720 }, { "epoch": 0.14865, "grad_norm": 0.029926873743534088, "learning_rate": 1.986284091819884e-06, "loss": 0.0323, "step": 175730 }, { "epoch": 0.1487, "grad_norm": 0.034845590591430664, "learning_rate": 1.9846696775393413e-06, "loss": 0.0314, "step": 175740 }, { "epoch": 0.14875, "grad_norm": 0.030692042782902718, "learning_rate": 1.98305589248928e-06, "loss": 0.0323, "step": 175750 }, { "epoch": 0.1488, "grad_norm": 0.04207862541079521, "learning_rate": 1.9814427367138207e-06, "loss": 0.032, "step": 175760 }, { "epoch": 0.14885, "grad_norm": 0.033570546656847, "learning_rate": 1.9798302102570747e-06, "loss": 0.032, "step": 175770 }, { "epoch": 0.1489, "grad_norm": 0.03618871420621872, "learning_rate": 1.978218313163119e-06, "loss": 0.0347, "step": 175780 }, { "epoch": 0.14895, "grad_norm": 0.03668270260095596, "learning_rate": 1.976607045476028e-06, "loss": 0.0326, "step": 175790 }, { "epoch": 0.149, "grad_norm": 0.03377063572406769, "learning_rate": 1.9749964072398487e-06, "loss": 0.0345, "step": 175800 }, { "epoch": 0.14905, "grad_norm": 0.03670993819832802, "learning_rate": 1.9733863984986195e-06, "loss": 0.0334, "step": 175810 }, { "epoch": 0.1491, "grad_norm": 0.03634348511695862, "learning_rate": 1.9717770192963502e-06, "loss": 0.0338, "step": 175820 }, { "epoch": 0.14915, "grad_norm": 0.037743836641311646, "learning_rate": 1.97016826967705e-06, "loss": 0.0337, "step": 175830 }, { "epoch": 0.1492, "grad_norm": 0.03787601739168167, "learning_rate": 1.9685601496847006e-06, "loss": 0.0341, "step": 175840 }, { "epoch": 0.14925, "grad_norm": 0.03569841384887695, "learning_rate": 1.966952659363258e-06, "loss": 0.0346, "step": 175850 }, { "epoch": 0.1493, "grad_norm": 0.032588619738817215, "learning_rate": 1.965345798756679e-06, "loss": 0.0333, "step": 175860 }, { "epoch": 0.14935, "grad_norm": 0.039640285074710846, "learning_rate": 1.9637395679088956e-06, "loss": 0.0333, "step": 175870 }, { "epoch": 0.1494, "grad_norm": 0.03261444345116615, "learning_rate": 1.9621339668638105e-06, "loss": 0.0331, "step": 175880 }, { "epoch": 0.14945, "grad_norm": 0.03778470307588577, "learning_rate": 1.9605289956653337e-06, "loss": 0.0351, "step": 175890 }, { "epoch": 0.1495, "grad_norm": 0.03554508462548256, "learning_rate": 1.958924654357336e-06, "loss": 0.0336, "step": 175900 }, { "epoch": 0.14955, "grad_norm": 0.03847133368253708, "learning_rate": 1.9573209429836897e-06, "loss": 0.033, "step": 175910 }, { "epoch": 0.1496, "grad_norm": 0.03790833801031113, "learning_rate": 1.9557178615882217e-06, "loss": 0.0327, "step": 175920 }, { "epoch": 0.14965, "grad_norm": 0.05213003233075142, "learning_rate": 1.954115410214777e-06, "loss": 0.034, "step": 175930 }, { "epoch": 0.1497, "grad_norm": 0.03753867745399475, "learning_rate": 1.9525135889071538e-06, "loss": 0.0338, "step": 175940 }, { "epoch": 0.14975, "grad_norm": 0.03490155190229416, "learning_rate": 1.9509123977091536e-06, "loss": 0.0341, "step": 175950 }, { "epoch": 0.1498, "grad_norm": 0.03734445571899414, "learning_rate": 1.9493118366645497e-06, "loss": 0.0337, "step": 175960 }, { "epoch": 0.14985, "grad_norm": 0.03365827724337578, "learning_rate": 1.9477119058170957e-06, "loss": 0.0333, "step": 175970 }, { "epoch": 0.1499, "grad_norm": 0.03264923766255379, "learning_rate": 1.946112605210543e-06, "loss": 0.0324, "step": 175980 }, { "epoch": 0.14995, "grad_norm": 0.03164662420749664, "learning_rate": 1.9445139348886114e-06, "loss": 0.0327, "step": 175990 }, { "epoch": 0.15, "grad_norm": 0.032765284180641174, "learning_rate": 1.942915894895006e-06, "loss": 0.0333, "step": 176000 }, { "epoch": 0.15005, "grad_norm": 0.03914589062333107, "learning_rate": 1.9413184852734163e-06, "loss": 0.0342, "step": 176010 }, { "epoch": 0.1501, "grad_norm": 0.04127218574285507, "learning_rate": 1.9397217060675128e-06, "loss": 0.0325, "step": 176020 }, { "epoch": 0.15015, "grad_norm": 0.03312616050243378, "learning_rate": 1.9381255573209607e-06, "loss": 0.0328, "step": 176030 }, { "epoch": 0.1502, "grad_norm": 0.03647003322839737, "learning_rate": 1.9365300390773915e-06, "loss": 0.0317, "step": 176040 }, { "epoch": 0.15025, "grad_norm": 0.037491753697395325, "learning_rate": 1.9349351513804267e-06, "loss": 0.0317, "step": 176050 }, { "epoch": 0.1503, "grad_norm": 0.04122772812843323, "learning_rate": 1.9333408942736663e-06, "loss": 0.0329, "step": 176060 }, { "epoch": 0.15035, "grad_norm": 0.02853936143219471, "learning_rate": 1.9317472678007016e-06, "loss": 0.0325, "step": 176070 }, { "epoch": 0.1504, "grad_norm": 0.03218531236052513, "learning_rate": 1.9301542720051024e-06, "loss": 0.0352, "step": 176080 }, { "epoch": 0.15045, "grad_norm": 0.035726893693208694, "learning_rate": 1.928561906930415e-06, "loss": 0.0326, "step": 176090 }, { "epoch": 0.1505, "grad_norm": 0.033531490713357925, "learning_rate": 1.9269701726201795e-06, "loss": 0.032, "step": 176100 }, { "epoch": 0.15055, "grad_norm": 0.03677698224782944, "learning_rate": 1.925379069117908e-06, "loss": 0.0334, "step": 176110 }, { "epoch": 0.1506, "grad_norm": 0.031199825927615166, "learning_rate": 1.923788596467113e-06, "loss": 0.0308, "step": 176120 }, { "epoch": 0.15065, "grad_norm": 0.03129459545016289, "learning_rate": 1.9221987547112603e-06, "loss": 0.0317, "step": 176130 }, { "epoch": 0.1507, "grad_norm": 0.030460912734270096, "learning_rate": 1.9206095438938225e-06, "loss": 0.0314, "step": 176140 }, { "epoch": 0.15075, "grad_norm": 0.030488573014736176, "learning_rate": 1.9190209640582547e-06, "loss": 0.0316, "step": 176150 }, { "epoch": 0.1508, "grad_norm": 0.0343417227268219, "learning_rate": 1.9174330152479825e-06, "loss": 0.0325, "step": 176160 }, { "epoch": 0.15085, "grad_norm": 0.033270884305238724, "learning_rate": 1.9158456975064186e-06, "loss": 0.0328, "step": 176170 }, { "epoch": 0.1509, "grad_norm": 0.030955199152231216, "learning_rate": 1.9142590108769593e-06, "loss": 0.032, "step": 176180 }, { "epoch": 0.15095, "grad_norm": 0.03370071202516556, "learning_rate": 1.912672955402986e-06, "loss": 0.0316, "step": 176190 }, { "epoch": 0.151, "grad_norm": 0.03444530814886093, "learning_rate": 1.9110875311278643e-06, "loss": 0.0325, "step": 176200 }, { "epoch": 0.15105, "grad_norm": 0.032148417085409164, "learning_rate": 1.9095027380949288e-06, "loss": 0.0326, "step": 176210 }, { "epoch": 0.1511, "grad_norm": 0.035337161272764206, "learning_rate": 1.90791857634752e-06, "loss": 0.0324, "step": 176220 }, { "epoch": 0.15115, "grad_norm": 0.03444439545273781, "learning_rate": 1.9063350459289364e-06, "loss": 0.033, "step": 176230 }, { "epoch": 0.1512, "grad_norm": 0.031171441078186035, "learning_rate": 1.9047521468824826e-06, "loss": 0.0327, "step": 176240 }, { "epoch": 0.15125, "grad_norm": 0.03326416015625, "learning_rate": 1.9031698792514262e-06, "loss": 0.0326, "step": 176250 }, { "epoch": 0.1513, "grad_norm": 0.03335323929786682, "learning_rate": 1.901588243079025e-06, "loss": 0.0322, "step": 176260 }, { "epoch": 0.15135, "grad_norm": 0.03669118508696556, "learning_rate": 1.9000072384085272e-06, "loss": 0.0338, "step": 176270 }, { "epoch": 0.1514, "grad_norm": 0.039805181324481964, "learning_rate": 1.8984268652831538e-06, "loss": 0.0309, "step": 176280 }, { "epoch": 0.15145, "grad_norm": 0.03643699735403061, "learning_rate": 1.896847123746112e-06, "loss": 0.0319, "step": 176290 }, { "epoch": 0.1515, "grad_norm": 0.033225320279598236, "learning_rate": 1.8952680138405843e-06, "loss": 0.0314, "step": 176300 }, { "epoch": 0.15155, "grad_norm": 0.03185262903571129, "learning_rate": 1.8936895356097551e-06, "loss": 0.0321, "step": 176310 }, { "epoch": 0.1516, "grad_norm": 0.03353235498070717, "learning_rate": 1.8921116890967678e-06, "loss": 0.0333, "step": 176320 }, { "epoch": 0.15165, "grad_norm": 0.036939650774002075, "learning_rate": 1.890534474344771e-06, "loss": 0.0328, "step": 176330 }, { "epoch": 0.1517, "grad_norm": 0.03296613693237305, "learning_rate": 1.8889578913968807e-06, "loss": 0.0306, "step": 176340 }, { "epoch": 0.15175, "grad_norm": 0.03160223364830017, "learning_rate": 1.8873819402961929e-06, "loss": 0.0332, "step": 176350 }, { "epoch": 0.1518, "grad_norm": 0.03642089292407036, "learning_rate": 1.8858066210858061e-06, "loss": 0.0319, "step": 176360 }, { "epoch": 0.15185, "grad_norm": 0.03370288014411926, "learning_rate": 1.8842319338087805e-06, "loss": 0.0324, "step": 176370 }, { "epoch": 0.1519, "grad_norm": 0.03192954510450363, "learning_rate": 1.8826578785081705e-06, "loss": 0.0313, "step": 176380 }, { "epoch": 0.15195, "grad_norm": 0.03451680764555931, "learning_rate": 1.8810844552270058e-06, "loss": 0.0328, "step": 176390 }, { "epoch": 0.152, "grad_norm": 0.03376030921936035, "learning_rate": 1.8795116640083044e-06, "loss": 0.0333, "step": 176400 }, { "epoch": 0.15205, "grad_norm": 0.035202499479055405, "learning_rate": 1.8779395048950794e-06, "loss": 0.0325, "step": 176410 }, { "epoch": 0.1521, "grad_norm": 0.03488573059439659, "learning_rate": 1.8763679779302935e-06, "loss": 0.0318, "step": 176420 }, { "epoch": 0.15215, "grad_norm": 0.03220974653959274, "learning_rate": 1.8747970831569206e-06, "loss": 0.0324, "step": 176430 }, { "epoch": 0.1522, "grad_norm": 0.03496704250574112, "learning_rate": 1.8732268206179072e-06, "loss": 0.0321, "step": 176440 }, { "epoch": 0.15225, "grad_norm": 0.030801020562648773, "learning_rate": 1.8716571903561853e-06, "loss": 0.0321, "step": 176450 }, { "epoch": 0.1523, "grad_norm": 0.03100545145571232, "learning_rate": 1.8700881924146707e-06, "loss": 0.0331, "step": 176460 }, { "epoch": 0.15235, "grad_norm": 0.03517313301563263, "learning_rate": 1.8685198268362486e-06, "loss": 0.0331, "step": 176470 }, { "epoch": 0.1524, "grad_norm": 0.035738781094551086, "learning_rate": 1.8669520936638096e-06, "loss": 0.0333, "step": 176480 }, { "epoch": 0.15245, "grad_norm": 0.040103767067193985, "learning_rate": 1.8653849929402084e-06, "loss": 0.0342, "step": 176490 }, { "epoch": 0.1525, "grad_norm": 0.03443825617432594, "learning_rate": 1.8638185247082912e-06, "loss": 0.0328, "step": 176500 }, { "epoch": 0.15255, "grad_norm": 0.03138561546802521, "learning_rate": 1.8622526890108795e-06, "loss": 0.0328, "step": 176510 }, { "epoch": 0.1526, "grad_norm": 0.03944150358438492, "learning_rate": 1.8606874858907859e-06, "loss": 0.033, "step": 176520 }, { "epoch": 0.15265, "grad_norm": 0.03146817907691002, "learning_rate": 1.859122915390807e-06, "loss": 0.0327, "step": 176530 }, { "epoch": 0.1527, "grad_norm": 0.03628481552004814, "learning_rate": 1.8575589775537166e-06, "loss": 0.0327, "step": 176540 }, { "epoch": 0.15275, "grad_norm": 0.030864441767334938, "learning_rate": 1.8559956724222672e-06, "loss": 0.0332, "step": 176550 }, { "epoch": 0.1528, "grad_norm": 0.03527746722102165, "learning_rate": 1.854433000039199e-06, "loss": 0.0318, "step": 176560 }, { "epoch": 0.15285, "grad_norm": 0.030624518170952797, "learning_rate": 1.8528709604472393e-06, "loss": 0.0325, "step": 176570 }, { "epoch": 0.1529, "grad_norm": 0.034199099987745285, "learning_rate": 1.8513095536890928e-06, "loss": 0.032, "step": 176580 }, { "epoch": 0.15295, "grad_norm": 0.032137248665094376, "learning_rate": 1.849748779807442e-06, "loss": 0.0329, "step": 176590 }, { "epoch": 0.153, "grad_norm": 0.0312294140458107, "learning_rate": 1.8481886388449694e-06, "loss": 0.0314, "step": 176600 }, { "epoch": 0.15305, "grad_norm": 0.03156765550374985, "learning_rate": 1.846629130844313e-06, "loss": 0.0322, "step": 176610 }, { "epoch": 0.1531, "grad_norm": 0.03706734627485275, "learning_rate": 1.8450702558481304e-06, "loss": 0.0305, "step": 176620 }, { "epoch": 0.15315, "grad_norm": 0.03096882998943329, "learning_rate": 1.8435120138990152e-06, "loss": 0.0319, "step": 176630 }, { "epoch": 0.1532, "grad_norm": 0.034606993198394775, "learning_rate": 1.8419544050395865e-06, "loss": 0.0313, "step": 176640 }, { "epoch": 0.15325, "grad_norm": 0.031098688021302223, "learning_rate": 1.8403974293124265e-06, "loss": 0.0318, "step": 176650 }, { "epoch": 0.1533, "grad_norm": 0.03658682480454445, "learning_rate": 1.8388410867600986e-06, "loss": 0.0319, "step": 176660 }, { "epoch": 0.15335, "grad_norm": 0.03677098825573921, "learning_rate": 1.8372853774251548e-06, "loss": 0.0329, "step": 176670 }, { "epoch": 0.1534, "grad_norm": 0.0359707847237587, "learning_rate": 1.8357303013501248e-06, "loss": 0.0323, "step": 176680 }, { "epoch": 0.15345, "grad_norm": 0.03360195830464363, "learning_rate": 1.8341758585775276e-06, "loss": 0.0323, "step": 176690 }, { "epoch": 0.1535, "grad_norm": 0.0349896065890789, "learning_rate": 1.8326220491498624e-06, "loss": 0.0311, "step": 176700 }, { "epoch": 0.15355, "grad_norm": 0.03248278796672821, "learning_rate": 1.831068873109601e-06, "loss": 0.0328, "step": 176710 }, { "epoch": 0.1536, "grad_norm": 0.03948421776294708, "learning_rate": 1.8295163304992147e-06, "loss": 0.0327, "step": 176720 }, { "epoch": 0.15365, "grad_norm": 0.03639651834964752, "learning_rate": 1.8279644213611447e-06, "loss": 0.0326, "step": 176730 }, { "epoch": 0.1537, "grad_norm": 0.03521611541509628, "learning_rate": 1.8264131457378264e-06, "loss": 0.0335, "step": 176740 }, { "epoch": 0.15375, "grad_norm": 0.02922460436820984, "learning_rate": 1.8248625036716676e-06, "loss": 0.0321, "step": 176750 }, { "epoch": 0.1538, "grad_norm": 0.03481606766581535, "learning_rate": 1.8233124952050596e-06, "loss": 0.0363, "step": 176760 }, { "epoch": 0.15385, "grad_norm": 0.03618673235177994, "learning_rate": 1.8217631203803791e-06, "loss": 0.0339, "step": 176770 }, { "epoch": 0.1539, "grad_norm": 0.03688351437449455, "learning_rate": 1.82021437923999e-06, "loss": 0.0325, "step": 176780 }, { "epoch": 0.15395, "grad_norm": 0.030665552243590355, "learning_rate": 1.8186662718262304e-06, "loss": 0.0337, "step": 176790 }, { "epoch": 0.154, "grad_norm": 0.03393794968724251, "learning_rate": 1.8171187981814247e-06, "loss": 0.0323, "step": 176800 }, { "epoch": 0.15405, "grad_norm": 0.03129062056541443, "learning_rate": 1.8155719583478836e-06, "loss": 0.0334, "step": 176810 }, { "epoch": 0.1541, "grad_norm": 0.036447811871767044, "learning_rate": 1.8140257523678928e-06, "loss": 0.033, "step": 176820 }, { "epoch": 0.15415, "grad_norm": 0.03580206260085106, "learning_rate": 1.812480180283735e-06, "loss": 0.0348, "step": 176830 }, { "epoch": 0.1542, "grad_norm": 0.03224453702569008, "learning_rate": 1.8109352421376486e-06, "loss": 0.0325, "step": 176840 }, { "epoch": 0.15425, "grad_norm": 0.03797399252653122, "learning_rate": 1.8093909379718804e-06, "loss": 0.0328, "step": 176850 }, { "epoch": 0.1543, "grad_norm": 0.0390145443379879, "learning_rate": 1.807847267828658e-06, "loss": 0.0334, "step": 176860 }, { "epoch": 0.15435, "grad_norm": 0.03498688340187073, "learning_rate": 1.8063042317501777e-06, "loss": 0.0331, "step": 176870 }, { "epoch": 0.1544, "grad_norm": 0.03264835849404335, "learning_rate": 1.8047618297786257e-06, "loss": 0.0336, "step": 176880 }, { "epoch": 0.15445, "grad_norm": 0.04160667210817337, "learning_rate": 1.803220061956165e-06, "loss": 0.0355, "step": 176890 }, { "epoch": 0.1545, "grad_norm": 0.036346692591905594, "learning_rate": 1.8016789283249568e-06, "loss": 0.0336, "step": 176900 }, { "epoch": 0.15455, "grad_norm": 0.03496379032731056, "learning_rate": 1.8001384289271394e-06, "loss": 0.0331, "step": 176910 }, { "epoch": 0.1546, "grad_norm": 0.03790908679366112, "learning_rate": 1.7985985638048098e-06, "loss": 0.0336, "step": 176920 }, { "epoch": 0.15465, "grad_norm": 0.03414648398756981, "learning_rate": 1.7970593330000867e-06, "loss": 0.0331, "step": 176930 }, { "epoch": 0.1547, "grad_norm": 0.03520461916923523, "learning_rate": 1.7955207365550398e-06, "loss": 0.032, "step": 176940 }, { "epoch": 0.15475, "grad_norm": 0.03670840337872505, "learning_rate": 1.7939827745117433e-06, "loss": 0.0345, "step": 176950 }, { "epoch": 0.1548, "grad_norm": 0.040852636098861694, "learning_rate": 1.7924454469122387e-06, "loss": 0.0339, "step": 176960 }, { "epoch": 0.15485, "grad_norm": 0.04101041704416275, "learning_rate": 1.7909087537985509e-06, "loss": 0.0334, "step": 176970 }, { "epoch": 0.1549, "grad_norm": 0.0339178703725338, "learning_rate": 1.7893726952127043e-06, "loss": 0.0329, "step": 176980 }, { "epoch": 0.15495, "grad_norm": 0.035663388669490814, "learning_rate": 1.7878372711966879e-06, "loss": 0.0333, "step": 176990 }, { "epoch": 0.155, "grad_norm": 0.038831040263175964, "learning_rate": 1.7863024817924816e-06, "loss": 0.0332, "step": 177000 }, { "epoch": 0.15505, "grad_norm": 0.03730317950248718, "learning_rate": 1.7847683270420385e-06, "loss": 0.0335, "step": 177010 }, { "epoch": 0.1551, "grad_norm": 0.03230489417910576, "learning_rate": 1.7832348069873134e-06, "loss": 0.0326, "step": 177020 }, { "epoch": 0.15515, "grad_norm": 0.038198865950107574, "learning_rate": 1.781701921670223e-06, "loss": 0.033, "step": 177030 }, { "epoch": 0.1552, "grad_norm": 0.03649018704891205, "learning_rate": 1.780169671132681e-06, "loss": 0.0317, "step": 177040 }, { "epoch": 0.15525, "grad_norm": 0.03312964364886284, "learning_rate": 1.7786380554165787e-06, "loss": 0.0336, "step": 177050 }, { "epoch": 0.1553, "grad_norm": 0.03336400166153908, "learning_rate": 1.777107074563783e-06, "loss": 0.0325, "step": 177060 }, { "epoch": 0.15535, "grad_norm": 0.0388801284134388, "learning_rate": 1.77557672861616e-06, "loss": 0.033, "step": 177070 }, { "epoch": 0.1554, "grad_norm": 0.0346393957734108, "learning_rate": 1.7740470176155434e-06, "loss": 0.0321, "step": 177080 }, { "epoch": 0.15545, "grad_norm": 0.03467172011733055, "learning_rate": 1.7725179416037546e-06, "loss": 0.0326, "step": 177090 }, { "epoch": 0.1555, "grad_norm": 0.03163683041930199, "learning_rate": 1.7709895006225996e-06, "loss": 0.0321, "step": 177100 }, { "epoch": 0.15555, "grad_norm": 0.03598109260201454, "learning_rate": 1.7694616947138642e-06, "loss": 0.0316, "step": 177110 }, { "epoch": 0.1556, "grad_norm": 0.033809930086135864, "learning_rate": 1.7679345239193234e-06, "loss": 0.0316, "step": 177120 }, { "epoch": 0.15565, "grad_norm": 0.03225461021065712, "learning_rate": 1.766407988280719e-06, "loss": 0.0331, "step": 177130 }, { "epoch": 0.1557, "grad_norm": 0.0304391048848629, "learning_rate": 1.7648820878397948e-06, "loss": 0.0342, "step": 177140 }, { "epoch": 0.15575, "grad_norm": 0.03615155071020126, "learning_rate": 1.7633568226382624e-06, "loss": 0.0339, "step": 177150 }, { "epoch": 0.1558, "grad_norm": 0.030978847295045853, "learning_rate": 1.761832192717827e-06, "loss": 0.0337, "step": 177160 }, { "epoch": 0.15585, "grad_norm": 0.031113559380173683, "learning_rate": 1.7603081981201696e-06, "loss": 0.0332, "step": 177170 }, { "epoch": 0.1559, "grad_norm": 0.03291467949748039, "learning_rate": 1.7587848388869539e-06, "loss": 0.0332, "step": 177180 }, { "epoch": 0.15595, "grad_norm": 0.036255598068237305, "learning_rate": 1.7572621150598323e-06, "loss": 0.0329, "step": 177190 }, { "epoch": 0.156, "grad_norm": 0.03551039472222328, "learning_rate": 1.7557400266804302e-06, "loss": 0.0345, "step": 177200 }, { "epoch": 0.15605, "grad_norm": 0.040075451135635376, "learning_rate": 1.7542185737903643e-06, "loss": 0.0359, "step": 177210 }, { "epoch": 0.1561, "grad_norm": 0.03435356914997101, "learning_rate": 1.7526977564312263e-06, "loss": 0.0351, "step": 177220 }, { "epoch": 0.15615, "grad_norm": 0.03493310511112213, "learning_rate": 1.7511775746445997e-06, "loss": 0.0342, "step": 177230 }, { "epoch": 0.1562, "grad_norm": 0.029415188357234, "learning_rate": 1.7496580284720455e-06, "loss": 0.0331, "step": 177240 }, { "epoch": 0.15625, "grad_norm": 0.035070475190877914, "learning_rate": 1.7481391179551082e-06, "loss": 0.0328, "step": 177250 }, { "epoch": 0.1563, "grad_norm": 0.03617245703935623, "learning_rate": 1.7466208431353104e-06, "loss": 0.033, "step": 177260 }, { "epoch": 0.15635, "grad_norm": 0.033768750727176666, "learning_rate": 1.7451032040541576e-06, "loss": 0.0346, "step": 177270 }, { "epoch": 0.1564, "grad_norm": 0.03762581944465637, "learning_rate": 1.7435862007531527e-06, "loss": 0.0343, "step": 177280 }, { "epoch": 0.15645, "grad_norm": 0.031112609431147575, "learning_rate": 1.742069833273763e-06, "loss": 0.0328, "step": 177290 }, { "epoch": 0.1565, "grad_norm": 0.037205617874860764, "learning_rate": 1.7405541016574434e-06, "loss": 0.0349, "step": 177300 }, { "epoch": 0.15655, "grad_norm": 0.030738942325115204, "learning_rate": 1.739039005945642e-06, "loss": 0.033, "step": 177310 }, { "epoch": 0.1566, "grad_norm": 0.03419323265552521, "learning_rate": 1.7375245461797696e-06, "loss": 0.0336, "step": 177320 }, { "epoch": 0.15665, "grad_norm": 0.038969021290540695, "learning_rate": 1.7360107224012434e-06, "loss": 0.0347, "step": 177330 }, { "epoch": 0.1567, "grad_norm": 0.03191017359495163, "learning_rate": 1.7344975346514387e-06, "loss": 0.0325, "step": 177340 }, { "epoch": 0.15675, "grad_norm": 0.03612587973475456, "learning_rate": 1.7329849829717303e-06, "loss": 0.0333, "step": 177350 }, { "epoch": 0.1568, "grad_norm": 0.0304668340831995, "learning_rate": 1.7314730674034745e-06, "loss": 0.0355, "step": 177360 }, { "epoch": 0.15685, "grad_norm": 0.03494877740740776, "learning_rate": 1.7299617879880048e-06, "loss": 0.0318, "step": 177370 }, { "epoch": 0.1569, "grad_norm": 0.03373005986213684, "learning_rate": 1.7284511447666352e-06, "loss": 0.0333, "step": 177380 }, { "epoch": 0.15695, "grad_norm": 0.03489000350236893, "learning_rate": 1.726941137780666e-06, "loss": 0.0343, "step": 177390 }, { "epoch": 0.157, "grad_norm": 0.03359324857592583, "learning_rate": 1.7254317670713894e-06, "loss": 0.0339, "step": 177400 }, { "epoch": 0.15705, "grad_norm": 0.036254171282052994, "learning_rate": 1.723923032680061e-06, "loss": 0.0337, "step": 177410 }, { "epoch": 0.1571, "grad_norm": 0.031140070408582687, "learning_rate": 1.7224149346479285e-06, "loss": 0.0312, "step": 177420 }, { "epoch": 0.15715, "grad_norm": 0.03355412557721138, "learning_rate": 1.7209074730162338e-06, "loss": 0.0328, "step": 177430 }, { "epoch": 0.1572, "grad_norm": 0.03463577479124069, "learning_rate": 1.7194006478261776e-06, "loss": 0.0318, "step": 177440 }, { "epoch": 0.15725, "grad_norm": 0.03454165533185005, "learning_rate": 1.7178944591189656e-06, "loss": 0.0331, "step": 177450 }, { "epoch": 0.1573, "grad_norm": 0.03468501567840576, "learning_rate": 1.7163889069357702e-06, "loss": 0.0333, "step": 177460 }, { "epoch": 0.15735, "grad_norm": 0.03777414560317993, "learning_rate": 1.7148839913177533e-06, "loss": 0.0321, "step": 177470 }, { "epoch": 0.1574, "grad_norm": 0.038221944123506546, "learning_rate": 1.7133797123060625e-06, "loss": 0.0347, "step": 177480 }, { "epoch": 0.15745, "grad_norm": 0.031114550307393074, "learning_rate": 1.711876069941823e-06, "loss": 0.0339, "step": 177490 }, { "epoch": 0.1575, "grad_norm": 0.037087131291627884, "learning_rate": 1.7103730642661436e-06, "loss": 0.0331, "step": 177500 }, { "epoch": 0.15755, "grad_norm": 0.036503974348306656, "learning_rate": 1.708870695320111e-06, "loss": 0.0334, "step": 177510 }, { "epoch": 0.1576, "grad_norm": 0.0365588515996933, "learning_rate": 1.7073689631448063e-06, "loss": 0.0333, "step": 177520 }, { "epoch": 0.15765, "grad_norm": 0.03332650661468506, "learning_rate": 1.70586786778128e-06, "loss": 0.0337, "step": 177530 }, { "epoch": 0.1577, "grad_norm": 0.044379010796546936, "learning_rate": 1.7043674092705798e-06, "loss": 0.0328, "step": 177540 }, { "epoch": 0.15775, "grad_norm": 0.03675961121916771, "learning_rate": 1.7028675876537225e-06, "loss": 0.0329, "step": 177550 }, { "epoch": 0.1578, "grad_norm": 0.03883613646030426, "learning_rate": 1.7013684029717093e-06, "loss": 0.0329, "step": 177560 }, { "epoch": 0.15785, "grad_norm": 0.03839869052171707, "learning_rate": 1.6998698552655345e-06, "loss": 0.0329, "step": 177570 }, { "epoch": 0.1579, "grad_norm": 0.03880983963608742, "learning_rate": 1.698371944576163e-06, "loss": 0.036, "step": 177580 }, { "epoch": 0.15795, "grad_norm": 0.037065938115119934, "learning_rate": 1.696874670944551e-06, "loss": 0.0322, "step": 177590 }, { "epoch": 0.158, "grad_norm": 0.0350380539894104, "learning_rate": 1.6953780344116265e-06, "loss": 0.0335, "step": 177600 }, { "epoch": 0.15805, "grad_norm": 0.0355491079390049, "learning_rate": 1.6938820350183098e-06, "loss": 0.0332, "step": 177610 }, { "epoch": 0.1581, "grad_norm": 0.033673424273729324, "learning_rate": 1.6923866728055127e-06, "loss": 0.0349, "step": 177620 }, { "epoch": 0.15815, "grad_norm": 0.033299632370471954, "learning_rate": 1.6908919478140966e-06, "loss": 0.0327, "step": 177630 }, { "epoch": 0.1582, "grad_norm": 0.031211506575345993, "learning_rate": 1.689397860084946e-06, "loss": 0.0366, "step": 177640 }, { "epoch": 0.15825, "grad_norm": 0.03407136723399162, "learning_rate": 1.6879044096588942e-06, "loss": 0.0331, "step": 177650 }, { "epoch": 0.1583, "grad_norm": 0.038370538502931595, "learning_rate": 1.6864115965767814e-06, "loss": 0.0355, "step": 177660 }, { "epoch": 0.15835, "grad_norm": 0.03854278847575188, "learning_rate": 1.6849194208794162e-06, "loss": 0.0327, "step": 177670 }, { "epoch": 0.1584, "grad_norm": 0.04024885594844818, "learning_rate": 1.6834278826075939e-06, "loss": 0.0336, "step": 177680 }, { "epoch": 0.15845, "grad_norm": 0.03746865317225456, "learning_rate": 1.6819369818020959e-06, "loss": 0.0327, "step": 177690 }, { "epoch": 0.1585, "grad_norm": 0.041079357266426086, "learning_rate": 1.680446718503681e-06, "loss": 0.0353, "step": 177700 }, { "epoch": 0.15855, "grad_norm": 0.039807822555303574, "learning_rate": 1.6789570927530918e-06, "loss": 0.0331, "step": 177710 }, { "epoch": 0.1586, "grad_norm": 0.03857456520199776, "learning_rate": 1.6774681045910513e-06, "loss": 0.0343, "step": 177720 }, { "epoch": 0.15865, "grad_norm": 0.03497397527098656, "learning_rate": 1.6759797540582684e-06, "loss": 0.0327, "step": 177730 }, { "epoch": 0.1587, "grad_norm": 0.034522783011198044, "learning_rate": 1.674492041195444e-06, "loss": 0.0324, "step": 177740 }, { "epoch": 0.15875, "grad_norm": 0.04140979051589966, "learning_rate": 1.6730049660432429e-06, "loss": 0.033, "step": 177750 }, { "epoch": 0.1588, "grad_norm": 0.04125241935253143, "learning_rate": 1.6715185286423213e-06, "loss": 0.0333, "step": 177760 }, { "epoch": 0.15885, "grad_norm": 0.031144708395004272, "learning_rate": 1.6700327290333163e-06, "loss": 0.0326, "step": 177770 }, { "epoch": 0.1589, "grad_norm": 0.03373735770583153, "learning_rate": 1.6685475672568562e-06, "loss": 0.0341, "step": 177780 }, { "epoch": 0.15895, "grad_norm": 0.043694883584976196, "learning_rate": 1.6670630433535395e-06, "loss": 0.032, "step": 177790 }, { "epoch": 0.159, "grad_norm": 0.03402690961956978, "learning_rate": 1.6655791573639473e-06, "loss": 0.0318, "step": 177800 }, { "epoch": 0.15905, "grad_norm": 0.03880279138684273, "learning_rate": 1.6640959093286612e-06, "loss": 0.0317, "step": 177810 }, { "epoch": 0.1591, "grad_norm": 0.032201413065195084, "learning_rate": 1.6626132992882238e-06, "loss": 0.0318, "step": 177820 }, { "epoch": 0.15915, "grad_norm": 0.03442845121026039, "learning_rate": 1.6611313272831747e-06, "loss": 0.0318, "step": 177830 }, { "epoch": 0.1592, "grad_norm": 0.029142117127776146, "learning_rate": 1.6596499933540233e-06, "loss": 0.034, "step": 177840 }, { "epoch": 0.15925, "grad_norm": 0.030788760632276535, "learning_rate": 1.6581692975412705e-06, "loss": 0.0317, "step": 177850 }, { "epoch": 0.1593, "grad_norm": 0.028324414044618607, "learning_rate": 1.6566892398854033e-06, "loss": 0.0327, "step": 177860 }, { "epoch": 0.15935, "grad_norm": 0.03501060605049133, "learning_rate": 1.655209820426884e-06, "loss": 0.0336, "step": 177870 }, { "epoch": 0.1594, "grad_norm": 0.03039061650633812, "learning_rate": 1.6537310392061578e-06, "loss": 0.0339, "step": 177880 }, { "epoch": 0.15945, "grad_norm": 0.03187539055943489, "learning_rate": 1.6522528962636507e-06, "loss": 0.0353, "step": 177890 }, { "epoch": 0.1595, "grad_norm": 0.03798288479447365, "learning_rate": 1.6507753916397806e-06, "loss": 0.0337, "step": 177900 }, { "epoch": 0.15955, "grad_norm": 0.04895629361271858, "learning_rate": 1.6492985253749399e-06, "loss": 0.0355, "step": 177910 }, { "epoch": 0.1596, "grad_norm": 0.04290872439742088, "learning_rate": 1.647822297509502e-06, "loss": 0.0342, "step": 177920 }, { "epoch": 0.15965, "grad_norm": 0.0384073369204998, "learning_rate": 1.6463467080838347e-06, "loss": 0.0336, "step": 177930 }, { "epoch": 0.1597, "grad_norm": 0.032967161387205124, "learning_rate": 1.6448717571382665e-06, "loss": 0.0328, "step": 177940 }, { "epoch": 0.15975, "grad_norm": 0.030712133273482323, "learning_rate": 1.6433974447131378e-06, "loss": 0.0351, "step": 177950 }, { "epoch": 0.1598, "grad_norm": 0.03501797094941139, "learning_rate": 1.6419237708487466e-06, "loss": 0.0344, "step": 177960 }, { "epoch": 0.15985, "grad_norm": 0.0321439765393734, "learning_rate": 1.640450735585386e-06, "loss": 0.0323, "step": 177970 }, { "epoch": 0.1599, "grad_norm": 0.03721248731017113, "learning_rate": 1.6389783389633207e-06, "loss": 0.0327, "step": 177980 }, { "epoch": 0.15995, "grad_norm": 0.030178798362612724, "learning_rate": 1.6375065810228157e-06, "loss": 0.0325, "step": 177990 }, { "epoch": 0.16, "grad_norm": 0.03577028214931488, "learning_rate": 1.6360354618041058e-06, "loss": 0.0333, "step": 178000 }, { "epoch": 0.16005, "grad_norm": 0.02578200027346611, "learning_rate": 1.6345649813474028e-06, "loss": 0.0327, "step": 178010 }, { "epoch": 0.1601, "grad_norm": 0.03544124960899353, "learning_rate": 1.6330951396929195e-06, "loss": 0.0344, "step": 178020 }, { "epoch": 0.16015, "grad_norm": 0.03315258026123047, "learning_rate": 1.6316259368808345e-06, "loss": 0.0341, "step": 178030 }, { "epoch": 0.1602, "grad_norm": 0.03840891271829605, "learning_rate": 1.6301573729513241e-06, "loss": 0.0328, "step": 178040 }, { "epoch": 0.16025, "grad_norm": 0.03329712152481079, "learning_rate": 1.628689447944523e-06, "loss": 0.0316, "step": 178050 }, { "epoch": 0.1603, "grad_norm": 0.03092890791594982, "learning_rate": 1.6272221619005712e-06, "loss": 0.0312, "step": 178060 }, { "epoch": 0.16035, "grad_norm": 0.030587226152420044, "learning_rate": 1.6257555148595893e-06, "loss": 0.0322, "step": 178070 }, { "epoch": 0.1604, "grad_norm": 0.03487320616841316, "learning_rate": 1.6242895068616704e-06, "loss": 0.032, "step": 178080 }, { "epoch": 0.16045, "grad_norm": 0.032729100435972214, "learning_rate": 1.6228241379468962e-06, "loss": 0.0337, "step": 178090 }, { "epoch": 0.1605, "grad_norm": 0.03575211390852928, "learning_rate": 1.6213594081553236e-06, "loss": 0.0324, "step": 178100 }, { "epoch": 0.16055, "grad_norm": 0.031156299635767937, "learning_rate": 1.619895317527001e-06, "loss": 0.0324, "step": 178110 }, { "epoch": 0.1606, "grad_norm": 0.033754050731658936, "learning_rate": 1.618431866101963e-06, "loss": 0.0333, "step": 178120 }, { "epoch": 0.16065, "grad_norm": 0.03337110951542854, "learning_rate": 1.6169690539202088e-06, "loss": 0.0323, "step": 178130 }, { "epoch": 0.1607, "grad_norm": 0.03375527262687683, "learning_rate": 1.615506881021736e-06, "loss": 0.0319, "step": 178140 }, { "epoch": 0.16075, "grad_norm": 0.03295344114303589, "learning_rate": 1.614045347446519e-06, "loss": 0.0321, "step": 178150 }, { "epoch": 0.1608, "grad_norm": 0.032287366688251495, "learning_rate": 1.6125844532345225e-06, "loss": 0.0317, "step": 178160 }, { "epoch": 0.16085, "grad_norm": 0.0348057895898819, "learning_rate": 1.6111241984256758e-06, "loss": 0.034, "step": 178170 }, { "epoch": 0.1609, "grad_norm": 0.030505768954753876, "learning_rate": 1.6096645830599055e-06, "loss": 0.0323, "step": 178180 }, { "epoch": 0.16095, "grad_norm": 0.03195841610431671, "learning_rate": 1.608205607177124e-06, "loss": 0.032, "step": 178190 }, { "epoch": 0.161, "grad_norm": 0.038009725511074066, "learning_rate": 1.6067472708172104e-06, "loss": 0.0319, "step": 178200 }, { "epoch": 0.16105, "grad_norm": 0.04068221524357796, "learning_rate": 1.6052895740200385e-06, "loss": 0.0328, "step": 178210 }, { "epoch": 0.1611, "grad_norm": 0.03707307204604149, "learning_rate": 1.603832516825457e-06, "loss": 0.0332, "step": 178220 }, { "epoch": 0.16115, "grad_norm": 0.03900527209043503, "learning_rate": 1.6023760992733089e-06, "loss": 0.0329, "step": 178230 }, { "epoch": 0.1612, "grad_norm": 0.03804875165224075, "learning_rate": 1.600920321403404e-06, "loss": 0.0336, "step": 178240 }, { "epoch": 0.16125, "grad_norm": 0.028278328478336334, "learning_rate": 1.5994651832555523e-06, "loss": 0.0332, "step": 178250 }, { "epoch": 0.1613, "grad_norm": 0.03762136399745941, "learning_rate": 1.5980106848695303e-06, "loss": 0.032, "step": 178260 }, { "epoch": 0.16135, "grad_norm": 0.033716052770614624, "learning_rate": 1.5965568262851005e-06, "loss": 0.0332, "step": 178270 }, { "epoch": 0.1614, "grad_norm": 0.03807881101965904, "learning_rate": 1.5951036075420173e-06, "loss": 0.0332, "step": 178280 }, { "epoch": 0.16145, "grad_norm": 0.03636297583580017, "learning_rate": 1.59365102868001e-06, "loss": 0.0312, "step": 178290 }, { "epoch": 0.1615, "grad_norm": 0.03612672910094261, "learning_rate": 1.5921990897387857e-06, "loss": 0.0324, "step": 178300 }, { "epoch": 0.16155, "grad_norm": 0.036949120461940765, "learning_rate": 1.5907477907580488e-06, "loss": 0.0314, "step": 178310 }, { "epoch": 0.1616, "grad_norm": 0.03454224765300751, "learning_rate": 1.5892971317774702e-06, "loss": 0.0327, "step": 178320 }, { "epoch": 0.16165, "grad_norm": 0.03373921290040016, "learning_rate": 1.5878471128367183e-06, "loss": 0.0319, "step": 178330 }, { "epoch": 0.1617, "grad_norm": 0.028942229226231575, "learning_rate": 1.5863977339754255e-06, "loss": 0.0324, "step": 178340 }, { "epoch": 0.16175, "grad_norm": 0.03300546854734421, "learning_rate": 1.5849489952332263e-06, "loss": 0.0328, "step": 178350 }, { "epoch": 0.1618, "grad_norm": 0.03184133768081665, "learning_rate": 1.58350089664972e-06, "loss": 0.0334, "step": 178360 }, { "epoch": 0.16185, "grad_norm": 0.033677779138088226, "learning_rate": 1.5820534382645052e-06, "loss": 0.0333, "step": 178370 }, { "epoch": 0.1619, "grad_norm": 0.030481450259685516, "learning_rate": 1.5806066201171532e-06, "loss": 0.0336, "step": 178380 }, { "epoch": 0.16195, "grad_norm": 0.03218537196516991, "learning_rate": 1.5791604422472128e-06, "loss": 0.0331, "step": 178390 }, { "epoch": 0.162, "grad_norm": 0.028280923143029213, "learning_rate": 1.5777149046942301e-06, "loss": 0.0327, "step": 178400 }, { "epoch": 0.16205, "grad_norm": 0.033612944185733795, "learning_rate": 1.5762700074977238e-06, "loss": 0.0339, "step": 178410 }, { "epoch": 0.1621, "grad_norm": 0.03201251104474068, "learning_rate": 1.5748257506971953e-06, "loss": 0.0327, "step": 178420 }, { "epoch": 0.16215, "grad_norm": 0.037322528660297394, "learning_rate": 1.573382134332127e-06, "loss": 0.033, "step": 178430 }, { "epoch": 0.1622, "grad_norm": 0.036024387925863266, "learning_rate": 1.5719391584419902e-06, "loss": 0.0339, "step": 178440 }, { "epoch": 0.16225, "grad_norm": 0.029813559725880623, "learning_rate": 1.5704968230662393e-06, "loss": 0.0326, "step": 178450 }, { "epoch": 0.1623, "grad_norm": 0.033420126885175705, "learning_rate": 1.569055128244304e-06, "loss": 0.0347, "step": 178460 }, { "epoch": 0.16235, "grad_norm": 0.04035967215895653, "learning_rate": 1.5676140740155971e-06, "loss": 0.0339, "step": 178470 }, { "epoch": 0.1624, "grad_norm": 0.03336472064256668, "learning_rate": 1.5661736604195148e-06, "loss": 0.0318, "step": 178480 }, { "epoch": 0.16245, "grad_norm": 0.031709253787994385, "learning_rate": 1.5647338874954453e-06, "loss": 0.0324, "step": 178490 }, { "epoch": 0.1625, "grad_norm": 0.028425998985767365, "learning_rate": 1.5632947552827488e-06, "loss": 0.0356, "step": 178500 }, { "epoch": 0.16255, "grad_norm": 0.031041646376252174, "learning_rate": 1.561856263820763e-06, "loss": 0.0318, "step": 178510 }, { "epoch": 0.1626, "grad_norm": 0.03370223194360733, "learning_rate": 1.560418413148826e-06, "loss": 0.0317, "step": 178520 }, { "epoch": 0.16265, "grad_norm": 0.03401322662830353, "learning_rate": 1.5589812033062396e-06, "loss": 0.031, "step": 178530 }, { "epoch": 0.1627, "grad_norm": 0.03218592703342438, "learning_rate": 1.5575446343323086e-06, "loss": 0.0325, "step": 178540 }, { "epoch": 0.16275, "grad_norm": 0.03417234867811203, "learning_rate": 1.5561087062662905e-06, "loss": 0.0305, "step": 178550 }, { "epoch": 0.1628, "grad_norm": 0.029766710475087166, "learning_rate": 1.5546734191474565e-06, "loss": 0.0343, "step": 178560 }, { "epoch": 0.16285, "grad_norm": 0.03576730191707611, "learning_rate": 1.553238773015045e-06, "loss": 0.0331, "step": 178570 }, { "epoch": 0.1629, "grad_norm": 0.03527417033910751, "learning_rate": 1.551804767908277e-06, "loss": 0.0332, "step": 178580 }, { "epoch": 0.16295, "grad_norm": 0.030761733651161194, "learning_rate": 1.5503714038663575e-06, "loss": 0.0333, "step": 178590 }, { "epoch": 0.163, "grad_norm": 0.03658977523446083, "learning_rate": 1.5489386809284718e-06, "loss": 0.0325, "step": 178600 }, { "epoch": 0.16305, "grad_norm": 0.03047502413392067, "learning_rate": 1.5475065991337966e-06, "loss": 0.0321, "step": 178610 }, { "epoch": 0.1631, "grad_norm": 0.03343275189399719, "learning_rate": 1.5460751585214788e-06, "loss": 0.0326, "step": 178620 }, { "epoch": 0.16315, "grad_norm": 0.03201655298471451, "learning_rate": 1.5446443591306509e-06, "loss": 0.0339, "step": 178630 }, { "epoch": 0.1632, "grad_norm": 0.03290139138698578, "learning_rate": 1.543214201000437e-06, "loss": 0.0325, "step": 178640 }, { "epoch": 0.16325, "grad_norm": 0.03751504793763161, "learning_rate": 1.5417846841699336e-06, "loss": 0.0325, "step": 178650 }, { "epoch": 0.1633, "grad_norm": 0.035507798194885254, "learning_rate": 1.540355808678226e-06, "loss": 0.032, "step": 178660 }, { "epoch": 0.16335, "grad_norm": 0.032653480768203735, "learning_rate": 1.5389275745643777e-06, "loss": 0.0338, "step": 178670 }, { "epoch": 0.1634, "grad_norm": 0.03228415176272392, "learning_rate": 1.5374999818674324e-06, "loss": 0.0338, "step": 178680 }, { "epoch": 0.16345, "grad_norm": 0.033933766186237335, "learning_rate": 1.536073030626428e-06, "loss": 0.0321, "step": 178690 }, { "epoch": 0.1635, "grad_norm": 0.03191066533327103, "learning_rate": 1.53464672088037e-06, "loss": 0.0328, "step": 178700 }, { "epoch": 0.16355, "grad_norm": 0.038408685475587845, "learning_rate": 1.5332210526682545e-06, "loss": 0.0331, "step": 178710 }, { "epoch": 0.1636, "grad_norm": 0.03488050028681755, "learning_rate": 1.5317960260290559e-06, "loss": 0.0334, "step": 178720 }, { "epoch": 0.16365, "grad_norm": 0.02847726084291935, "learning_rate": 1.5303716410017433e-06, "loss": 0.0316, "step": 178730 }, { "epoch": 0.1637, "grad_norm": 0.03325439989566803, "learning_rate": 1.5289478976252491e-06, "loss": 0.033, "step": 178740 }, { "epoch": 0.16375, "grad_norm": 0.03413159027695656, "learning_rate": 1.5275247959385037e-06, "loss": 0.0327, "step": 178750 }, { "epoch": 0.1638, "grad_norm": 0.03945057466626167, "learning_rate": 1.5261023359804116e-06, "loss": 0.037, "step": 178760 }, { "epoch": 0.16385, "grad_norm": 0.030923176556825638, "learning_rate": 1.5246805177898615e-06, "loss": 0.033, "step": 178770 }, { "epoch": 0.1639, "grad_norm": 0.03191499784588814, "learning_rate": 1.5232593414057278e-06, "loss": 0.0345, "step": 178780 }, { "epoch": 0.16395, "grad_norm": 0.03918515890836716, "learning_rate": 1.5218388068668655e-06, "loss": 0.0322, "step": 178790 }, { "epoch": 0.164, "grad_norm": 0.03057541884481907, "learning_rate": 1.52041891421211e-06, "loss": 0.0326, "step": 178800 }, { "epoch": 0.16405, "grad_norm": 0.03401718661189079, "learning_rate": 1.518999663480275e-06, "loss": 0.0337, "step": 178810 }, { "epoch": 0.1641, "grad_norm": 0.03624304383993149, "learning_rate": 1.517581054710171e-06, "loss": 0.0336, "step": 178820 }, { "epoch": 0.16415, "grad_norm": 0.03786566108465195, "learning_rate": 1.5161630879405835e-06, "loss": 0.0331, "step": 178830 }, { "epoch": 0.1642, "grad_norm": 0.03679925948381424, "learning_rate": 1.5147457632102708e-06, "loss": 0.0334, "step": 178840 }, { "epoch": 0.16425, "grad_norm": 0.036222558468580246, "learning_rate": 1.5133290805579847e-06, "loss": 0.033, "step": 178850 }, { "epoch": 0.1643, "grad_norm": 0.0338745042681694, "learning_rate": 1.5119130400224584e-06, "loss": 0.0347, "step": 178860 }, { "epoch": 0.16435, "grad_norm": 0.03509068489074707, "learning_rate": 1.5104976416424082e-06, "loss": 0.0332, "step": 178870 }, { "epoch": 0.1644, "grad_norm": 0.03290260210633278, "learning_rate": 1.509082885456528e-06, "loss": 0.0324, "step": 178880 }, { "epoch": 0.16445, "grad_norm": 0.02996455878019333, "learning_rate": 1.5076687715034926e-06, "loss": 0.0334, "step": 178890 }, { "epoch": 0.1645, "grad_norm": 0.03558652848005295, "learning_rate": 1.506255299821968e-06, "loss": 0.0356, "step": 178900 }, { "epoch": 0.16455, "grad_norm": 0.0318191722035408, "learning_rate": 1.5048424704506015e-06, "loss": 0.0326, "step": 178910 }, { "epoch": 0.1646, "grad_norm": 0.02919379062950611, "learning_rate": 1.5034302834280146e-06, "loss": 0.0322, "step": 178920 }, { "epoch": 0.16465, "grad_norm": 0.03439747542142868, "learning_rate": 1.5020187387928124e-06, "loss": 0.0336, "step": 178930 }, { "epoch": 0.1647, "grad_norm": 0.035345617681741714, "learning_rate": 1.5006078365835895e-06, "loss": 0.0328, "step": 178940 }, { "epoch": 0.16475, "grad_norm": 0.033006761223077774, "learning_rate": 1.4991975768389233e-06, "loss": 0.0327, "step": 178950 }, { "epoch": 0.1648, "grad_norm": 0.03655276447534561, "learning_rate": 1.4977879595973659e-06, "loss": 0.0335, "step": 178960 }, { "epoch": 0.16485, "grad_norm": 0.030718335881829262, "learning_rate": 1.4963789848974591e-06, "loss": 0.0322, "step": 178970 }, { "epoch": 0.1649, "grad_norm": 0.0313151478767395, "learning_rate": 1.4949706527777136e-06, "loss": 0.0331, "step": 178980 }, { "epoch": 0.16495, "grad_norm": 0.029872264713048935, "learning_rate": 1.4935629632766456e-06, "loss": 0.0321, "step": 178990 }, { "epoch": 0.165, "grad_norm": 0.03434133902192116, "learning_rate": 1.4921559164327358e-06, "loss": 0.034, "step": 179000 }, { "epoch": 0.16505, "grad_norm": 0.02828471176326275, "learning_rate": 1.4907495122844479e-06, "loss": 0.0326, "step": 179010 }, { "epoch": 0.1651, "grad_norm": 0.035396866500377655, "learning_rate": 1.4893437508702395e-06, "loss": 0.0362, "step": 179020 }, { "epoch": 0.16515, "grad_norm": 0.030432626605033875, "learning_rate": 1.4879386322285387e-06, "loss": 0.0329, "step": 179030 }, { "epoch": 0.1652, "grad_norm": 0.03829565644264221, "learning_rate": 1.4865341563977674e-06, "loss": 0.0331, "step": 179040 }, { "epoch": 0.16525, "grad_norm": 0.032867953181266785, "learning_rate": 1.4851303234163116e-06, "loss": 0.0338, "step": 179050 }, { "epoch": 0.1653, "grad_norm": 0.03387406840920448, "learning_rate": 1.48372713332256e-06, "loss": 0.035, "step": 179060 }, { "epoch": 0.16535, "grad_norm": 0.029376495629549026, "learning_rate": 1.4823245861548762e-06, "loss": 0.0319, "step": 179070 }, { "epoch": 0.1654, "grad_norm": 0.03476952761411667, "learning_rate": 1.4809226819516048e-06, "loss": 0.0322, "step": 179080 }, { "epoch": 0.16545, "grad_norm": 0.035101015120744705, "learning_rate": 1.4795214207510704e-06, "loss": 0.0329, "step": 179090 }, { "epoch": 0.1655, "grad_norm": 0.03613230586051941, "learning_rate": 1.4781208025915788e-06, "loss": 0.0318, "step": 179100 }, { "epoch": 0.16555, "grad_norm": 0.0309486985206604, "learning_rate": 1.4767208275114354e-06, "loss": 0.033, "step": 179110 }, { "epoch": 0.1656, "grad_norm": 0.029845857992768288, "learning_rate": 1.4753214955489036e-06, "loss": 0.0314, "step": 179120 }, { "epoch": 0.16565, "grad_norm": 0.03276577591896057, "learning_rate": 1.4739228067422422e-06, "loss": 0.0318, "step": 179130 }, { "epoch": 0.1657, "grad_norm": 0.031203771010041237, "learning_rate": 1.4725247611296956e-06, "loss": 0.0321, "step": 179140 }, { "epoch": 0.16575, "grad_norm": 0.03170618787407875, "learning_rate": 1.47112735874948e-06, "loss": 0.0353, "step": 179150 }, { "epoch": 0.1658, "grad_norm": 0.035838086158037186, "learning_rate": 1.469730599639807e-06, "loss": 0.0316, "step": 179160 }, { "epoch": 0.16585, "grad_norm": 0.029705338180065155, "learning_rate": 1.4683344838388595e-06, "loss": 0.0322, "step": 179170 }, { "epoch": 0.1659, "grad_norm": 0.03481472656130791, "learning_rate": 1.4669390113848075e-06, "loss": 0.0318, "step": 179180 }, { "epoch": 0.16595, "grad_norm": 0.03514959290623665, "learning_rate": 1.4655441823157978e-06, "loss": 0.0327, "step": 179190 }, { "epoch": 0.166, "grad_norm": 0.03326687961816788, "learning_rate": 1.4641499966699723e-06, "loss": 0.0329, "step": 179200 }, { "epoch": 0.16605, "grad_norm": 0.03179854527115822, "learning_rate": 1.4627564544854422e-06, "loss": 0.031, "step": 179210 }, { "epoch": 0.1661, "grad_norm": 0.029857762157917023, "learning_rate": 1.4613635558003076e-06, "loss": 0.0324, "step": 179220 }, { "epoch": 0.16615, "grad_norm": 0.030423279851675034, "learning_rate": 1.4599713006526517e-06, "loss": 0.0338, "step": 179230 }, { "epoch": 0.1662, "grad_norm": 0.03567027300596237, "learning_rate": 1.4585796890805332e-06, "loss": 0.0323, "step": 179240 }, { "epoch": 0.16625, "grad_norm": 0.03131220489740372, "learning_rate": 1.4571887211220075e-06, "loss": 0.0356, "step": 179250 }, { "epoch": 0.1663, "grad_norm": 0.02795344404876232, "learning_rate": 1.4557983968150945e-06, "loss": 0.0341, "step": 179260 }, { "epoch": 0.16635, "grad_norm": 0.03564042970538139, "learning_rate": 1.454408716197808e-06, "loss": 0.0326, "step": 179270 }, { "epoch": 0.1664, "grad_norm": 0.03069239854812622, "learning_rate": 1.453019679308143e-06, "loss": 0.033, "step": 179280 }, { "epoch": 0.16645, "grad_norm": 0.03263924643397331, "learning_rate": 1.4516312861840742e-06, "loss": 0.0328, "step": 179290 }, { "epoch": 0.1665, "grad_norm": 0.03687722980976105, "learning_rate": 1.4502435368635603e-06, "loss": 0.0341, "step": 179300 }, { "epoch": 0.16655, "grad_norm": 0.03473667800426483, "learning_rate": 1.4488564313845348e-06, "loss": 0.033, "step": 179310 }, { "epoch": 0.1666, "grad_norm": 0.03266061097383499, "learning_rate": 1.4474699697849286e-06, "loss": 0.0342, "step": 179320 }, { "epoch": 0.16665, "grad_norm": 0.033977825194597244, "learning_rate": 1.4460841521026504e-06, "loss": 0.0329, "step": 179330 }, { "epoch": 0.1667, "grad_norm": 0.03571394085884094, "learning_rate": 1.4446989783755776e-06, "loss": 0.0331, "step": 179340 }, { "epoch": 0.16675, "grad_norm": 0.034471821039915085, "learning_rate": 1.443314448641589e-06, "loss": 0.032, "step": 179350 }, { "epoch": 0.1668, "grad_norm": 0.03545355424284935, "learning_rate": 1.4419305629385288e-06, "loss": 0.032, "step": 179360 }, { "epoch": 0.16685, "grad_norm": 0.033533014357089996, "learning_rate": 1.440547321304242e-06, "loss": 0.0331, "step": 179370 }, { "epoch": 0.1669, "grad_norm": 0.03619558736681938, "learning_rate": 1.4391647237765399e-06, "loss": 0.0334, "step": 179380 }, { "epoch": 0.16695, "grad_norm": 0.03412100672721863, "learning_rate": 1.4377827703932172e-06, "loss": 0.0325, "step": 179390 }, { "epoch": 0.167, "grad_norm": 0.03337492421269417, "learning_rate": 1.4364014611920662e-06, "loss": 0.032, "step": 179400 }, { "epoch": 0.16705, "grad_norm": 0.03705546259880066, "learning_rate": 1.435020796210848e-06, "loss": 0.0316, "step": 179410 }, { "epoch": 0.1671, "grad_norm": 0.03405716270208359, "learning_rate": 1.4336407754873077e-06, "loss": 0.0324, "step": 179420 }, { "epoch": 0.16715, "grad_norm": 0.03192787617444992, "learning_rate": 1.4322613990591704e-06, "loss": 0.0317, "step": 179430 }, { "epoch": 0.1672, "grad_norm": 0.035241540521383286, "learning_rate": 1.430882666964159e-06, "loss": 0.0314, "step": 179440 }, { "epoch": 0.16725, "grad_norm": 0.028237996622920036, "learning_rate": 1.429504579239954e-06, "loss": 0.0329, "step": 179450 }, { "epoch": 0.1673, "grad_norm": 0.030574286356568336, "learning_rate": 1.4281271359242455e-06, "loss": 0.0319, "step": 179460 }, { "epoch": 0.16735, "grad_norm": 0.030079707503318787, "learning_rate": 1.4267503370546832e-06, "loss": 0.0325, "step": 179470 }, { "epoch": 0.1674, "grad_norm": 0.03491288796067238, "learning_rate": 1.4253741826689094e-06, "loss": 0.033, "step": 179480 }, { "epoch": 0.16745, "grad_norm": 0.03250223770737648, "learning_rate": 1.4239986728045495e-06, "loss": 0.0331, "step": 179490 }, { "epoch": 0.1675, "grad_norm": 0.031394198536872864, "learning_rate": 1.4226238074992099e-06, "loss": 0.0322, "step": 179500 }, { "epoch": 0.16755, "grad_norm": 0.03774748370051384, "learning_rate": 1.421249586790474e-06, "loss": 0.0335, "step": 179510 }, { "epoch": 0.1676, "grad_norm": 0.04410416632890701, "learning_rate": 1.4198760107159203e-06, "loss": 0.0331, "step": 179520 }, { "epoch": 0.16765, "grad_norm": 0.034346163272857666, "learning_rate": 1.4185030793130965e-06, "loss": 0.0324, "step": 179530 }, { "epoch": 0.1677, "grad_norm": 0.03225886449217796, "learning_rate": 1.417130792619542e-06, "loss": 0.0333, "step": 179540 }, { "epoch": 0.16775, "grad_norm": 0.0325591005384922, "learning_rate": 1.4157591506727685e-06, "loss": 0.0319, "step": 179550 }, { "epoch": 0.1678, "grad_norm": 0.03200984373688698, "learning_rate": 1.4143881535102822e-06, "loss": 0.0336, "step": 179560 }, { "epoch": 0.16785, "grad_norm": 0.03078301064670086, "learning_rate": 1.4130178011695584e-06, "loss": 0.0329, "step": 179570 }, { "epoch": 0.1679, "grad_norm": 0.03340752795338631, "learning_rate": 1.4116480936880699e-06, "loss": 0.0328, "step": 179580 }, { "epoch": 0.16795, "grad_norm": 0.03492152690887451, "learning_rate": 1.4102790311032621e-06, "loss": 0.0343, "step": 179590 }, { "epoch": 0.168, "grad_norm": 0.040707699954509735, "learning_rate": 1.4089106134525604e-06, "loss": 0.0335, "step": 179600 }, { "epoch": 0.16805, "grad_norm": 0.03249933570623398, "learning_rate": 1.407542840773382e-06, "loss": 0.0328, "step": 179610 }, { "epoch": 0.1681, "grad_norm": 0.03230295330286026, "learning_rate": 1.4061757131031196e-06, "loss": 0.0344, "step": 179620 }, { "epoch": 0.16815, "grad_norm": 0.035333674401044846, "learning_rate": 1.404809230479151e-06, "loss": 0.0326, "step": 179630 }, { "epoch": 0.1682, "grad_norm": 0.03570147976279259, "learning_rate": 1.4034433929388274e-06, "loss": 0.0339, "step": 179640 }, { "epoch": 0.16825, "grad_norm": 0.03937431797385216, "learning_rate": 1.4020782005194965e-06, "loss": 0.0339, "step": 179650 }, { "epoch": 0.1683, "grad_norm": 0.037766024470329285, "learning_rate": 1.4007136532584897e-06, "loss": 0.0325, "step": 179660 }, { "epoch": 0.16835, "grad_norm": 0.034978706389665604, "learning_rate": 1.399349751193102e-06, "loss": 0.033, "step": 179670 }, { "epoch": 0.1684, "grad_norm": 0.03416333720088005, "learning_rate": 1.3979864943606259e-06, "loss": 0.0329, "step": 179680 }, { "epoch": 0.16845, "grad_norm": 0.032717134803533554, "learning_rate": 1.3966238827983314e-06, "loss": 0.0339, "step": 179690 }, { "epoch": 0.1685, "grad_norm": 0.035884711891412735, "learning_rate": 1.3952619165434721e-06, "loss": 0.0322, "step": 179700 }, { "epoch": 0.16855, "grad_norm": 0.031024346128106117, "learning_rate": 1.3939005956332878e-06, "loss": 0.0326, "step": 179710 }, { "epoch": 0.1686, "grad_norm": 0.034197259694337845, "learning_rate": 1.3925399201049876e-06, "loss": 0.0331, "step": 179720 }, { "epoch": 0.16865, "grad_norm": 0.035327374935150146, "learning_rate": 1.3911798899957807e-06, "loss": 0.0329, "step": 179730 }, { "epoch": 0.1687, "grad_norm": 0.03364065662026405, "learning_rate": 1.3898205053428425e-06, "loss": 0.0329, "step": 179740 }, { "epoch": 0.16875, "grad_norm": 0.03280908986926079, "learning_rate": 1.3884617661833493e-06, "loss": 0.0322, "step": 179750 }, { "epoch": 0.1688, "grad_norm": 0.03385601565241814, "learning_rate": 1.3871036725544352e-06, "loss": 0.033, "step": 179760 }, { "epoch": 0.16885, "grad_norm": 0.031862739473581314, "learning_rate": 1.385746224493234e-06, "loss": 0.0332, "step": 179770 }, { "epoch": 0.1689, "grad_norm": 0.028963228687644005, "learning_rate": 1.3843894220368637e-06, "loss": 0.0325, "step": 179780 }, { "epoch": 0.16895, "grad_norm": 0.035909876227378845, "learning_rate": 1.3830332652224137e-06, "loss": 0.0327, "step": 179790 }, { "epoch": 0.169, "grad_norm": 0.03678746894001961, "learning_rate": 1.381677754086963e-06, "loss": 0.0362, "step": 179800 }, { "epoch": 0.16905, "grad_norm": 0.03247828036546707, "learning_rate": 1.380322888667565e-06, "loss": 0.0327, "step": 179810 }, { "epoch": 0.1691, "grad_norm": 0.03818798065185547, "learning_rate": 1.3789686690012682e-06, "loss": 0.033, "step": 179820 }, { "epoch": 0.16915, "grad_norm": 0.03432374820113182, "learning_rate": 1.3776150951250955e-06, "loss": 0.0325, "step": 179830 }, { "epoch": 0.1692, "grad_norm": 0.03608046472072601, "learning_rate": 1.3762621670760478e-06, "loss": 0.0323, "step": 179840 }, { "epoch": 0.16925, "grad_norm": 0.03437166288495064, "learning_rate": 1.3749098848911206e-06, "loss": 0.0337, "step": 179850 }, { "epoch": 0.1693, "grad_norm": 0.0373493991792202, "learning_rate": 1.373558248607279e-06, "loss": 0.0325, "step": 179860 }, { "epoch": 0.16935, "grad_norm": 0.03960481658577919, "learning_rate": 1.372207258261482e-06, "loss": 0.0331, "step": 179870 }, { "epoch": 0.1694, "grad_norm": 0.03167131170630455, "learning_rate": 1.3708569138906612e-06, "loss": 0.0332, "step": 179880 }, { "epoch": 0.16945, "grad_norm": 0.03516445681452751, "learning_rate": 1.3695072155317345e-06, "loss": 0.0333, "step": 179890 }, { "epoch": 0.1695, "grad_norm": 0.034307535737752914, "learning_rate": 1.368158163221603e-06, "loss": 0.0344, "step": 179900 }, { "epoch": 0.16955, "grad_norm": 0.03313758969306946, "learning_rate": 1.3668097569971505e-06, "loss": 0.0325, "step": 179910 }, { "epoch": 0.1696, "grad_norm": 0.035713810473680496, "learning_rate": 1.3654619968952426e-06, "loss": 0.0344, "step": 179920 }, { "epoch": 0.16965, "grad_norm": 0.03464226424694061, "learning_rate": 1.364114882952719e-06, "loss": 0.0326, "step": 179930 }, { "epoch": 0.1697, "grad_norm": 0.032023943960666656, "learning_rate": 1.3627684152064196e-06, "loss": 0.0332, "step": 179940 }, { "epoch": 0.16975, "grad_norm": 0.030161544680595398, "learning_rate": 1.3614225936931458e-06, "loss": 0.0336, "step": 179950 }, { "epoch": 0.1698, "grad_norm": 0.03181074932217598, "learning_rate": 1.360077418449704e-06, "loss": 0.032, "step": 179960 }, { "epoch": 0.16985, "grad_norm": 0.03367370739579201, "learning_rate": 1.3587328895128621e-06, "loss": 0.0331, "step": 179970 }, { "epoch": 0.1699, "grad_norm": 0.026204094290733337, "learning_rate": 1.3573890069193796e-06, "loss": 0.0319, "step": 179980 }, { "epoch": 0.16995, "grad_norm": 0.029326176270842552, "learning_rate": 1.356045770706002e-06, "loss": 0.033, "step": 179990 }, { "epoch": 0.17, "grad_norm": 0.03619501367211342, "learning_rate": 1.3547031809094502e-06, "loss": 0.0327, "step": 180000 }, { "epoch": 0.17005, "grad_norm": 0.0314396396279335, "learning_rate": 1.3533612375664333e-06, "loss": 0.0333, "step": 180010 }, { "epoch": 0.1701, "grad_norm": 0.03336181491613388, "learning_rate": 1.3520199407136308e-06, "loss": 0.0312, "step": 180020 }, { "epoch": 0.17015, "grad_norm": 0.03133557736873627, "learning_rate": 1.3506792903877186e-06, "loss": 0.0326, "step": 180030 }, { "epoch": 0.1702, "grad_norm": 0.03235197439789772, "learning_rate": 1.3493392866253563e-06, "loss": 0.0333, "step": 180040 }, { "epoch": 0.17025, "grad_norm": 0.03352617099881172, "learning_rate": 1.3479999294631673e-06, "loss": 0.0332, "step": 180050 }, { "epoch": 0.1703, "grad_norm": 0.03173385187983513, "learning_rate": 1.346661218937778e-06, "loss": 0.0311, "step": 180060 }, { "epoch": 0.17035, "grad_norm": 0.03343489393591881, "learning_rate": 1.3453231550857787e-06, "loss": 0.0329, "step": 180070 }, { "epoch": 0.1704, "grad_norm": 0.0289906058460474, "learning_rate": 1.3439857379437647e-06, "loss": 0.033, "step": 180080 }, { "epoch": 0.17045, "grad_norm": 0.03115617111325264, "learning_rate": 1.3426489675482907e-06, "loss": 0.033, "step": 180090 }, { "epoch": 0.1705, "grad_norm": 0.03450518101453781, "learning_rate": 1.3413128439359046e-06, "loss": 0.0331, "step": 180100 }, { "epoch": 0.17055, "grad_norm": 0.035788483917713165, "learning_rate": 1.3399773671431414e-06, "loss": 0.032, "step": 180110 }, { "epoch": 0.1706, "grad_norm": 0.035771444439888, "learning_rate": 1.3386425372065081e-06, "loss": 0.0336, "step": 180120 }, { "epoch": 0.17065, "grad_norm": 0.03090623952448368, "learning_rate": 1.3373083541624975e-06, "loss": 0.0331, "step": 180130 }, { "epoch": 0.1707, "grad_norm": 0.03492768108844757, "learning_rate": 1.3359748180475835e-06, "loss": 0.0337, "step": 180140 }, { "epoch": 0.17075, "grad_norm": 0.03133808448910713, "learning_rate": 1.3346419288982282e-06, "loss": 0.0327, "step": 180150 }, { "epoch": 0.1708, "grad_norm": 0.03177190199494362, "learning_rate": 1.3333096867508748e-06, "loss": 0.0333, "step": 180160 }, { "epoch": 0.17085, "grad_norm": 0.034788765013217926, "learning_rate": 1.3319780916419417e-06, "loss": 0.0321, "step": 180170 }, { "epoch": 0.1709, "grad_norm": 0.03276115655899048, "learning_rate": 1.3306471436078383e-06, "loss": 0.0339, "step": 180180 }, { "epoch": 0.17095, "grad_norm": 0.0364110991358757, "learning_rate": 1.3293168426849467e-06, "loss": 0.0326, "step": 180190 }, { "epoch": 0.171, "grad_norm": 0.030001336708664894, "learning_rate": 1.3279871889096434e-06, "loss": 0.0325, "step": 180200 }, { "epoch": 0.17105, "grad_norm": 0.03211287036538124, "learning_rate": 1.3266581823182771e-06, "loss": 0.033, "step": 180210 }, { "epoch": 0.1711, "grad_norm": 0.0314616933465004, "learning_rate": 1.3253298229471772e-06, "loss": 0.0323, "step": 180220 }, { "epoch": 0.17115, "grad_norm": 0.03480513393878937, "learning_rate": 1.32400211083267e-06, "loss": 0.0336, "step": 180230 }, { "epoch": 0.1712, "grad_norm": 0.03739199787378311, "learning_rate": 1.3226750460110487e-06, "loss": 0.0335, "step": 180240 }, { "epoch": 0.17125, "grad_norm": 0.036488406360149384, "learning_rate": 1.3213486285186012e-06, "loss": 0.0342, "step": 180250 }, { "epoch": 0.1713, "grad_norm": 0.03254292532801628, "learning_rate": 1.3200228583915814e-06, "loss": 0.0343, "step": 180260 }, { "epoch": 0.17135, "grad_norm": 0.03373872861266136, "learning_rate": 1.3186977356662383e-06, "loss": 0.0333, "step": 180270 }, { "epoch": 0.1714, "grad_norm": 0.028090765699744225, "learning_rate": 1.317373260378807e-06, "loss": 0.0319, "step": 180280 }, { "epoch": 0.17145, "grad_norm": 0.032359834760427475, "learning_rate": 1.3160494325654944e-06, "loss": 0.0328, "step": 180290 }, { "epoch": 0.1715, "grad_norm": 0.03507302701473236, "learning_rate": 1.3147262522624936e-06, "loss": 0.0317, "step": 180300 }, { "epoch": 0.17155, "grad_norm": 0.03586224839091301, "learning_rate": 1.3134037195059735e-06, "loss": 0.0325, "step": 180310 }, { "epoch": 0.1716, "grad_norm": 0.03436509892344475, "learning_rate": 1.3120818343321018e-06, "loss": 0.0334, "step": 180320 }, { "epoch": 0.17165, "grad_norm": 0.030631018802523613, "learning_rate": 1.3107605967770109e-06, "loss": 0.0326, "step": 180330 }, { "epoch": 0.1717, "grad_norm": 0.035087957978248596, "learning_rate": 1.3094400068768248e-06, "loss": 0.0322, "step": 180340 }, { "epoch": 0.17175, "grad_norm": 0.03104361705482006, "learning_rate": 1.3081200646676506e-06, "loss": 0.0332, "step": 180350 }, { "epoch": 0.1718, "grad_norm": 0.02846512943506241, "learning_rate": 1.3068007701855705e-06, "loss": 0.0324, "step": 180360 }, { "epoch": 0.17185, "grad_norm": 0.033627621829509735, "learning_rate": 1.3054821234666615e-06, "loss": 0.0341, "step": 180370 }, { "epoch": 0.1719, "grad_norm": 0.043194908648729324, "learning_rate": 1.3041641245469665e-06, "loss": 0.0323, "step": 180380 }, { "epoch": 0.17195, "grad_norm": 0.04160348325967789, "learning_rate": 1.3028467734625238e-06, "loss": 0.0322, "step": 180390 }, { "epoch": 0.172, "grad_norm": 0.03457237780094147, "learning_rate": 1.301530070249346e-06, "loss": 0.0338, "step": 180400 }, { "epoch": 0.17205, "grad_norm": 0.0446646511554718, "learning_rate": 1.3002140149434321e-06, "loss": 0.0346, "step": 180410 }, { "epoch": 0.1721, "grad_norm": 0.0351555310189724, "learning_rate": 1.2988986075807674e-06, "loss": 0.0346, "step": 180420 }, { "epoch": 0.17215, "grad_norm": 0.03978744521737099, "learning_rate": 1.2975838481973063e-06, "loss": 0.0325, "step": 180430 }, { "epoch": 0.1722, "grad_norm": 0.02988533116877079, "learning_rate": 1.2962697368290006e-06, "loss": 0.0347, "step": 180440 }, { "epoch": 0.17225, "grad_norm": 0.031685106456279755, "learning_rate": 1.2949562735117716e-06, "loss": 0.0328, "step": 180450 }, { "epoch": 0.1723, "grad_norm": 0.028254924342036247, "learning_rate": 1.2936434582815377e-06, "loss": 0.0322, "step": 180460 }, { "epoch": 0.17235, "grad_norm": 0.02911999821662903, "learning_rate": 1.292331291174184e-06, "loss": 0.033, "step": 180470 }, { "epoch": 0.1724, "grad_norm": 0.02961915358901024, "learning_rate": 1.2910197722255824e-06, "loss": 0.0326, "step": 180480 }, { "epoch": 0.17245, "grad_norm": 0.03134278580546379, "learning_rate": 1.289708901471598e-06, "loss": 0.0327, "step": 180490 }, { "epoch": 0.1725, "grad_norm": 0.033643536269664764, "learning_rate": 1.2883986789480663e-06, "loss": 0.0326, "step": 180500 }, { "epoch": 0.17255, "grad_norm": 0.03249495476484299, "learning_rate": 1.2870891046908028e-06, "loss": 0.0327, "step": 180510 }, { "epoch": 0.1726, "grad_norm": 0.03298050910234451, "learning_rate": 1.2857801787356127e-06, "loss": 0.0333, "step": 180520 }, { "epoch": 0.17265, "grad_norm": 0.036723580211400986, "learning_rate": 1.2844719011182837e-06, "loss": 0.0337, "step": 180530 }, { "epoch": 0.1727, "grad_norm": 0.03416711091995239, "learning_rate": 1.28316427187459e-06, "loss": 0.0316, "step": 180540 }, { "epoch": 0.17275, "grad_norm": 0.03185368701815605, "learning_rate": 1.2818572910402698e-06, "loss": 0.0315, "step": 180550 }, { "epoch": 0.1728, "grad_norm": 0.028836287558078766, "learning_rate": 1.2805509586510639e-06, "loss": 0.0324, "step": 180560 }, { "epoch": 0.17285, "grad_norm": 0.0385720320045948, "learning_rate": 1.2792452747426798e-06, "loss": 0.0336, "step": 180570 }, { "epoch": 0.1729, "grad_norm": 0.033134475350379944, "learning_rate": 1.2779402393508195e-06, "loss": 0.0344, "step": 180580 }, { "epoch": 0.17295, "grad_norm": 0.03713807463645935, "learning_rate": 1.2766358525111656e-06, "loss": 0.0329, "step": 180590 }, { "epoch": 0.173, "grad_norm": 0.032080747187137604, "learning_rate": 1.2753321142593671e-06, "loss": 0.0324, "step": 180600 }, { "epoch": 0.17305, "grad_norm": 0.031095851212739944, "learning_rate": 1.2740290246310821e-06, "loss": 0.033, "step": 180610 }, { "epoch": 0.1731, "grad_norm": 0.030296696349978447, "learning_rate": 1.272726583661929e-06, "loss": 0.0321, "step": 180620 }, { "epoch": 0.17315, "grad_norm": 0.03122873604297638, "learning_rate": 1.2714247913875183e-06, "loss": 0.0332, "step": 180630 }, { "epoch": 0.1732, "grad_norm": 0.03570917621254921, "learning_rate": 1.2701236478434352e-06, "loss": 0.0337, "step": 180640 }, { "epoch": 0.17325, "grad_norm": 0.028863271698355675, "learning_rate": 1.26882315306526e-06, "loss": 0.0328, "step": 180650 }, { "epoch": 0.1733, "grad_norm": 0.03672472760081291, "learning_rate": 1.267523307088539e-06, "loss": 0.0332, "step": 180660 }, { "epoch": 0.17335, "grad_norm": 0.02857218310236931, "learning_rate": 1.2662241099488215e-06, "loss": 0.033, "step": 180670 }, { "epoch": 0.1734, "grad_norm": 0.03267050161957741, "learning_rate": 1.264925561681618e-06, "loss": 0.0327, "step": 180680 }, { "epoch": 0.17345, "grad_norm": 0.036747854202985764, "learning_rate": 1.2636276623224308e-06, "loss": 0.0346, "step": 180690 }, { "epoch": 0.1735, "grad_norm": 0.03358514979481697, "learning_rate": 1.2623304119067507e-06, "loss": 0.0328, "step": 180700 }, { "epoch": 0.17355, "grad_norm": 0.035015739500522614, "learning_rate": 1.2610338104700359e-06, "loss": 0.0342, "step": 180710 }, { "epoch": 0.1736, "grad_norm": 0.03652586415410042, "learning_rate": 1.2597378580477382e-06, "loss": 0.0353, "step": 180720 }, { "epoch": 0.17365, "grad_norm": 0.03134174644947052, "learning_rate": 1.2584425546752903e-06, "loss": 0.033, "step": 180730 }, { "epoch": 0.1737, "grad_norm": 0.029100047424435616, "learning_rate": 1.2571479003881004e-06, "loss": 0.0342, "step": 180740 }, { "epoch": 0.17375, "grad_norm": 0.029299162328243256, "learning_rate": 1.2558538952215758e-06, "loss": 0.0345, "step": 180750 }, { "epoch": 0.1738, "grad_norm": 0.02790946140885353, "learning_rate": 1.2545605392110776e-06, "loss": 0.0348, "step": 180760 }, { "epoch": 0.17385, "grad_norm": 0.035933081060647964, "learning_rate": 1.2532678323919744e-06, "loss": 0.0327, "step": 180770 }, { "epoch": 0.1739, "grad_norm": 0.030573425814509392, "learning_rate": 1.2519757747996074e-06, "loss": 0.0323, "step": 180780 }, { "epoch": 0.17395, "grad_norm": 0.031238127499818802, "learning_rate": 1.2506843664693013e-06, "loss": 0.0343, "step": 180790 }, { "epoch": 0.174, "grad_norm": 0.03557777777314186, "learning_rate": 1.2493936074363667e-06, "loss": 0.0351, "step": 180800 }, { "epoch": 0.17405, "grad_norm": 0.037748608738183975, "learning_rate": 1.2481034977360806e-06, "loss": 0.0338, "step": 180810 }, { "epoch": 0.1741, "grad_norm": 0.03454018756747246, "learning_rate": 1.2468140374037262e-06, "loss": 0.0331, "step": 180820 }, { "epoch": 0.17415, "grad_norm": 0.03299016132950783, "learning_rate": 1.2455252264745532e-06, "loss": 0.0331, "step": 180830 }, { "epoch": 0.1742, "grad_norm": 0.031141648069024086, "learning_rate": 1.244237064983797e-06, "loss": 0.0338, "step": 180840 }, { "epoch": 0.17425, "grad_norm": 0.03504917770624161, "learning_rate": 1.2429495529666712e-06, "loss": 0.0349, "step": 180850 }, { "epoch": 0.1743, "grad_norm": 0.03259625658392906, "learning_rate": 1.2416626904583783e-06, "loss": 0.0326, "step": 180860 }, { "epoch": 0.17435, "grad_norm": 0.0345325767993927, "learning_rate": 1.240376477494104e-06, "loss": 0.0317, "step": 180870 }, { "epoch": 0.1744, "grad_norm": 0.029554620385169983, "learning_rate": 1.2390909141090146e-06, "loss": 0.0324, "step": 180880 }, { "epoch": 0.17445, "grad_norm": 0.029579907655715942, "learning_rate": 1.2378060003382486e-06, "loss": 0.0328, "step": 180890 }, { "epoch": 0.1745, "grad_norm": 0.0344732403755188, "learning_rate": 1.236521736216939e-06, "loss": 0.0332, "step": 180900 }, { "epoch": 0.17455, "grad_norm": 0.029749970883131027, "learning_rate": 1.2352381217802022e-06, "loss": 0.0329, "step": 180910 }, { "epoch": 0.1746, "grad_norm": 0.030175866559147835, "learning_rate": 1.233955157063124e-06, "loss": 0.0325, "step": 180920 }, { "epoch": 0.17465, "grad_norm": 0.028932299464941025, "learning_rate": 1.2326728421007821e-06, "loss": 0.0323, "step": 180930 }, { "epoch": 0.1747, "grad_norm": 0.032971736043691635, "learning_rate": 1.231391176928237e-06, "loss": 0.0321, "step": 180940 }, { "epoch": 0.17475, "grad_norm": 0.03181711211800575, "learning_rate": 1.2301101615805278e-06, "loss": 0.0326, "step": 180950 }, { "epoch": 0.1748, "grad_norm": 0.0331878699362278, "learning_rate": 1.2288297960926814e-06, "loss": 0.0316, "step": 180960 }, { "epoch": 0.17485, "grad_norm": 0.03228962421417236, "learning_rate": 1.2275500804996898e-06, "loss": 0.0343, "step": 180970 }, { "epoch": 0.1749, "grad_norm": 0.03149693086743355, "learning_rate": 1.2262710148365498e-06, "loss": 0.0328, "step": 180980 }, { "epoch": 0.17495, "grad_norm": 0.030987145379185677, "learning_rate": 1.2249925991382306e-06, "loss": 0.0327, "step": 180990 }, { "epoch": 0.175, "grad_norm": 0.0314842164516449, "learning_rate": 1.2237148334396848e-06, "loss": 0.0326, "step": 181000 }, { "epoch": 0.17505, "grad_norm": 0.028250273317098618, "learning_rate": 1.22243771777584e-06, "loss": 0.0321, "step": 181010 }, { "epoch": 0.1751, "grad_norm": 0.03365689516067505, "learning_rate": 1.2211612521816156e-06, "loss": 0.0326, "step": 181020 }, { "epoch": 0.17515, "grad_norm": 0.03639967739582062, "learning_rate": 1.2198854366919089e-06, "loss": 0.033, "step": 181030 }, { "epoch": 0.1752, "grad_norm": 0.03515635058283806, "learning_rate": 1.2186102713416026e-06, "loss": 0.0338, "step": 181040 }, { "epoch": 0.17525, "grad_norm": 0.03461417555809021, "learning_rate": 1.2173357561655525e-06, "loss": 0.0314, "step": 181050 }, { "epoch": 0.1753, "grad_norm": 0.03033815324306488, "learning_rate": 1.2160618911986138e-06, "loss": 0.0339, "step": 181060 }, { "epoch": 0.17535, "grad_norm": 0.028576888144016266, "learning_rate": 1.2147886764756033e-06, "loss": 0.0317, "step": 181070 }, { "epoch": 0.1754, "grad_norm": 0.035641852766275406, "learning_rate": 1.2135161120313376e-06, "loss": 0.0331, "step": 181080 }, { "epoch": 0.17545, "grad_norm": 0.0301712267100811, "learning_rate": 1.2122441979006056e-06, "loss": 0.032, "step": 181090 }, { "epoch": 0.1755, "grad_norm": 0.03008922189474106, "learning_rate": 1.2109729341181763e-06, "loss": 0.0328, "step": 181100 }, { "epoch": 0.17555, "grad_norm": 0.03754622861742973, "learning_rate": 1.2097023207188142e-06, "loss": 0.0339, "step": 181110 }, { "epoch": 0.1756, "grad_norm": 0.03148207813501358, "learning_rate": 1.2084323577372519e-06, "loss": 0.033, "step": 181120 }, { "epoch": 0.17565, "grad_norm": 0.03381652757525444, "learning_rate": 1.2071630452082123e-06, "loss": 0.0318, "step": 181130 }, { "epoch": 0.1757, "grad_norm": 0.03082115948200226, "learning_rate": 1.2058943831663922e-06, "loss": 0.033, "step": 181140 }, { "epoch": 0.17575, "grad_norm": 0.029486792162060738, "learning_rate": 1.2046263716464834e-06, "loss": 0.0329, "step": 181150 }, { "epoch": 0.1758, "grad_norm": 0.03202378749847412, "learning_rate": 1.20335901068315e-06, "loss": 0.0327, "step": 181160 }, { "epoch": 0.17585, "grad_norm": 0.03240067511796951, "learning_rate": 1.2020923003110418e-06, "loss": 0.0326, "step": 181170 }, { "epoch": 0.1759, "grad_norm": 0.03265637159347534, "learning_rate": 1.2008262405647896e-06, "loss": 0.0346, "step": 181180 }, { "epoch": 0.17595, "grad_norm": 0.039439257234334946, "learning_rate": 1.1995608314790046e-06, "loss": 0.0329, "step": 181190 }, { "epoch": 0.176, "grad_norm": 0.03045968897640705, "learning_rate": 1.198296073088287e-06, "loss": 0.0341, "step": 181200 }, { "epoch": 0.17605, "grad_norm": 0.033822670578956604, "learning_rate": 1.1970319654272144e-06, "loss": 0.0343, "step": 181210 }, { "epoch": 0.1761, "grad_norm": 0.03462981432676315, "learning_rate": 1.1957685085303455e-06, "loss": 0.0337, "step": 181220 }, { "epoch": 0.17615, "grad_norm": 0.032089609652757645, "learning_rate": 1.1945057024322192e-06, "loss": 0.0352, "step": 181230 }, { "epoch": 0.1762, "grad_norm": 0.03550237789750099, "learning_rate": 1.1932435471673637e-06, "loss": 0.0315, "step": 181240 }, { "epoch": 0.17625, "grad_norm": 0.02741037867963314, "learning_rate": 1.1919820427702927e-06, "loss": 0.0338, "step": 181250 }, { "epoch": 0.1763, "grad_norm": 0.03825649991631508, "learning_rate": 1.1907211892754788e-06, "loss": 0.0325, "step": 181260 }, { "epoch": 0.17635, "grad_norm": 0.027359571307897568, "learning_rate": 1.1894609867174112e-06, "loss": 0.0331, "step": 181270 }, { "epoch": 0.1764, "grad_norm": 0.030887959524989128, "learning_rate": 1.1882014351305288e-06, "loss": 0.033, "step": 181280 }, { "epoch": 0.17645, "grad_norm": 0.03310007229447365, "learning_rate": 1.1869425345492762e-06, "loss": 0.0329, "step": 181290 }, { "epoch": 0.1765, "grad_norm": 0.030865758657455444, "learning_rate": 1.1856842850080707e-06, "loss": 0.0344, "step": 181300 }, { "epoch": 0.17655, "grad_norm": 0.033761125057935715, "learning_rate": 1.1844266865413039e-06, "loss": 0.0339, "step": 181310 }, { "epoch": 0.1766, "grad_norm": 0.03483714908361435, "learning_rate": 1.1831697391833708e-06, "loss": 0.0328, "step": 181320 }, { "epoch": 0.17665, "grad_norm": 0.033422935754060745, "learning_rate": 1.1819134429686268e-06, "loss": 0.0334, "step": 181330 }, { "epoch": 0.1767, "grad_norm": 0.03466103971004486, "learning_rate": 1.1806577979314225e-06, "loss": 0.0331, "step": 181340 }, { "epoch": 0.17675, "grad_norm": 0.03309163823723793, "learning_rate": 1.1794028041060834e-06, "loss": 0.0314, "step": 181350 }, { "epoch": 0.1768, "grad_norm": 0.035105571150779724, "learning_rate": 1.1781484615269207e-06, "loss": 0.0325, "step": 181360 }, { "epoch": 0.17685, "grad_norm": 0.02798105590045452, "learning_rate": 1.1768947702282345e-06, "loss": 0.0345, "step": 181370 }, { "epoch": 0.1769, "grad_norm": 0.04057719185948372, "learning_rate": 1.175641730244295e-06, "loss": 0.0347, "step": 181380 }, { "epoch": 0.17695, "grad_norm": 0.03164273500442505, "learning_rate": 1.1743893416093582e-06, "loss": 0.0311, "step": 181390 }, { "epoch": 0.177, "grad_norm": 0.03630835935473442, "learning_rate": 1.1731376043576659e-06, "loss": 0.033, "step": 181400 }, { "epoch": 0.17705, "grad_norm": 0.03291347995400429, "learning_rate": 1.1718865185234407e-06, "loss": 0.0326, "step": 181410 }, { "epoch": 0.1771, "grad_norm": 0.037380918860435486, "learning_rate": 1.1706360841408886e-06, "loss": 0.0347, "step": 181420 }, { "epoch": 0.17715, "grad_norm": 0.03159075975418091, "learning_rate": 1.169386301244188e-06, "loss": 0.033, "step": 181430 }, { "epoch": 0.1772, "grad_norm": 0.032524630427360535, "learning_rate": 1.1681371698675169e-06, "loss": 0.0321, "step": 181440 }, { "epoch": 0.17725, "grad_norm": 0.0323689766228199, "learning_rate": 1.1668886900450205e-06, "loss": 0.0347, "step": 181450 }, { "epoch": 0.1773, "grad_norm": 0.034034889191389084, "learning_rate": 1.1656408618108405e-06, "loss": 0.033, "step": 181460 }, { "epoch": 0.17735, "grad_norm": 0.03076021373271942, "learning_rate": 1.1643936851990778e-06, "loss": 0.0324, "step": 181470 }, { "epoch": 0.1774, "grad_norm": 0.03190213441848755, "learning_rate": 1.1631471602438355e-06, "loss": 0.033, "step": 181480 }, { "epoch": 0.17745, "grad_norm": 0.034344565123319626, "learning_rate": 1.1619012869792e-06, "loss": 0.0326, "step": 181490 }, { "epoch": 0.1775, "grad_norm": 0.03356689587235451, "learning_rate": 1.1606560654392278e-06, "loss": 0.0349, "step": 181500 }, { "epoch": 0.17755, "grad_norm": 0.035489995032548904, "learning_rate": 1.159411495657961e-06, "loss": 0.0355, "step": 181510 }, { "epoch": 0.1776, "grad_norm": 0.03316465765237808, "learning_rate": 1.158167577669428e-06, "loss": 0.0344, "step": 181520 }, { "epoch": 0.17765, "grad_norm": 0.03325631096959114, "learning_rate": 1.1569243115076346e-06, "loss": 0.0327, "step": 181530 }, { "epoch": 0.1777, "grad_norm": 0.031159063801169395, "learning_rate": 1.1556816972065759e-06, "loss": 0.0332, "step": 181540 }, { "epoch": 0.17775, "grad_norm": 0.03643181920051575, "learning_rate": 1.1544397348002196e-06, "loss": 0.034, "step": 181550 }, { "epoch": 0.1778, "grad_norm": 0.0365658663213253, "learning_rate": 1.1531984243225241e-06, "loss": 0.0333, "step": 181560 }, { "epoch": 0.17785, "grad_norm": 0.036051541566848755, "learning_rate": 1.151957765807421e-06, "loss": 0.0336, "step": 181570 }, { "epoch": 0.1779, "grad_norm": 0.02893468365073204, "learning_rate": 1.1507177592888384e-06, "loss": 0.0331, "step": 181580 }, { "epoch": 0.17795, "grad_norm": 0.03419188782572746, "learning_rate": 1.1494784048006718e-06, "loss": 0.0326, "step": 181590 }, { "epoch": 0.178, "grad_norm": 0.03470698371529579, "learning_rate": 1.1482397023768048e-06, "loss": 0.0333, "step": 181600 }, { "epoch": 0.17805, "grad_norm": 0.03663039579987526, "learning_rate": 1.1470016520510996e-06, "loss": 0.0316, "step": 181610 }, { "epoch": 0.1781, "grad_norm": 0.031225159764289856, "learning_rate": 1.145764253857412e-06, "loss": 0.0334, "step": 181620 }, { "epoch": 0.17815, "grad_norm": 0.03354804217815399, "learning_rate": 1.1445275078295709e-06, "loss": 0.0323, "step": 181630 }, { "epoch": 0.1782, "grad_norm": 0.026633795350790024, "learning_rate": 1.1432914140013795e-06, "loss": 0.0319, "step": 181640 }, { "epoch": 0.17825, "grad_norm": 0.030485892668366432, "learning_rate": 1.1420559724066415e-06, "loss": 0.0322, "step": 181650 }, { "epoch": 0.1783, "grad_norm": 0.028709067031741142, "learning_rate": 1.140821183079127e-06, "loss": 0.0333, "step": 181660 }, { "epoch": 0.17835, "grad_norm": 0.03147530183196068, "learning_rate": 1.1395870460526008e-06, "loss": 0.0329, "step": 181670 }, { "epoch": 0.1784, "grad_norm": 0.028239740058779716, "learning_rate": 1.1383535613608026e-06, "loss": 0.0361, "step": 181680 }, { "epoch": 0.17845, "grad_norm": 0.03355034440755844, "learning_rate": 1.1371207290374497e-06, "loss": 0.0314, "step": 181690 }, { "epoch": 0.1785, "grad_norm": 0.03325127437710762, "learning_rate": 1.135888549116254e-06, "loss": 0.033, "step": 181700 }, { "epoch": 0.17855, "grad_norm": 0.0414295494556427, "learning_rate": 1.1346570216309e-06, "loss": 0.0338, "step": 181710 }, { "epoch": 0.1786, "grad_norm": 0.03711015358567238, "learning_rate": 1.1334261466150575e-06, "loss": 0.0325, "step": 181720 }, { "epoch": 0.17865, "grad_norm": 0.036904025822877884, "learning_rate": 1.132195924102375e-06, "loss": 0.0327, "step": 181730 }, { "epoch": 0.1787, "grad_norm": 0.02809218503534794, "learning_rate": 1.1309663541264893e-06, "loss": 0.0314, "step": 181740 }, { "epoch": 0.17875, "grad_norm": 0.030258994549512863, "learning_rate": 1.1297374367210205e-06, "loss": 0.0324, "step": 181750 }, { "epoch": 0.1788, "grad_norm": 0.033731479197740555, "learning_rate": 1.1285091719195589e-06, "loss": 0.0326, "step": 181760 }, { "epoch": 0.17885, "grad_norm": 0.029932241886854172, "learning_rate": 1.1272815597556914e-06, "loss": 0.0323, "step": 181770 }, { "epoch": 0.1789, "grad_norm": 0.030889419838786125, "learning_rate": 1.126054600262974e-06, "loss": 0.0332, "step": 181780 }, { "epoch": 0.17895, "grad_norm": 0.03132836893200874, "learning_rate": 1.1248282934749554e-06, "loss": 0.0322, "step": 181790 }, { "epoch": 0.179, "grad_norm": 0.03017721325159073, "learning_rate": 1.1236026394251642e-06, "loss": 0.0333, "step": 181800 }, { "epoch": 0.17905, "grad_norm": 0.03249027952551842, "learning_rate": 1.122377638147104e-06, "loss": 0.0332, "step": 181810 }, { "epoch": 0.1791, "grad_norm": 0.03272115811705589, "learning_rate": 1.1211532896742704e-06, "loss": 0.034, "step": 181820 }, { "epoch": 0.17915, "grad_norm": 0.03992825374007225, "learning_rate": 1.1199295940401367e-06, "loss": 0.034, "step": 181830 }, { "epoch": 0.1792, "grad_norm": 0.033313509076833725, "learning_rate": 1.1187065512781564e-06, "loss": 0.0338, "step": 181840 }, { "epoch": 0.17925, "grad_norm": 0.03310203179717064, "learning_rate": 1.117484161421764e-06, "loss": 0.0328, "step": 181850 }, { "epoch": 0.1793, "grad_norm": 0.03238173946738243, "learning_rate": 1.1162624245043857e-06, "loss": 0.0346, "step": 181860 }, { "epoch": 0.17935, "grad_norm": 0.032731927931308746, "learning_rate": 1.115041340559414e-06, "loss": 0.033, "step": 181870 }, { "epoch": 0.1794, "grad_norm": 0.03442100062966347, "learning_rate": 1.1138209096202445e-06, "loss": 0.0333, "step": 181880 }, { "epoch": 0.17945, "grad_norm": 0.033178988844156265, "learning_rate": 1.1126011317202367e-06, "loss": 0.0345, "step": 181890 }, { "epoch": 0.1795, "grad_norm": 0.03198530524969101, "learning_rate": 1.1113820068927389e-06, "loss": 0.0328, "step": 181900 }, { "epoch": 0.17955, "grad_norm": 0.03042939305305481, "learning_rate": 1.1101635351710826e-06, "loss": 0.0327, "step": 181910 }, { "epoch": 0.1796, "grad_norm": 0.03228777274489403, "learning_rate": 1.108945716588583e-06, "loss": 0.0342, "step": 181920 }, { "epoch": 0.17965, "grad_norm": 0.029075900092720985, "learning_rate": 1.1077285511785274e-06, "loss": 0.0347, "step": 181930 }, { "epoch": 0.1797, "grad_norm": 0.031394172459840775, "learning_rate": 1.1065120389742e-06, "loss": 0.0339, "step": 181940 }, { "epoch": 0.17975, "grad_norm": 0.035124484449625015, "learning_rate": 1.1052961800088552e-06, "loss": 0.0315, "step": 181950 }, { "epoch": 0.1798, "grad_norm": 0.033435892313718796, "learning_rate": 1.1040809743157438e-06, "loss": 0.0335, "step": 181960 }, { "epoch": 0.17985, "grad_norm": 0.034077659249305725, "learning_rate": 1.1028664219280727e-06, "loss": 0.0317, "step": 181970 }, { "epoch": 0.1799, "grad_norm": 0.034195058047771454, "learning_rate": 1.1016525228790598e-06, "loss": 0.0327, "step": 181980 }, { "epoch": 0.17995, "grad_norm": 0.02897937037050724, "learning_rate": 1.1004392772018841e-06, "loss": 0.0315, "step": 181990 }, { "epoch": 0.18, "grad_norm": 0.03581666946411133, "learning_rate": 1.0992266849297246e-06, "loss": 0.0315, "step": 182000 }, { "epoch": 0.18005, "grad_norm": 0.02805798500776291, "learning_rate": 1.0980147460957268e-06, "loss": 0.0321, "step": 182010 }, { "epoch": 0.1801, "grad_norm": 0.03290720656514168, "learning_rate": 1.0968034607330258e-06, "loss": 0.0322, "step": 182020 }, { "epoch": 0.18015, "grad_norm": 0.03115527331829071, "learning_rate": 1.0955928288747392e-06, "loss": 0.0337, "step": 182030 }, { "epoch": 0.1802, "grad_norm": 0.03652266785502434, "learning_rate": 1.0943828505539656e-06, "loss": 0.0327, "step": 182040 }, { "epoch": 0.18025, "grad_norm": 0.03430403023958206, "learning_rate": 1.093173525803781e-06, "loss": 0.0329, "step": 182050 }, { "epoch": 0.1803, "grad_norm": 0.032786864787340164, "learning_rate": 1.0919648546572515e-06, "loss": 0.0321, "step": 182060 }, { "epoch": 0.18035, "grad_norm": 0.02922545000910759, "learning_rate": 1.0907568371474191e-06, "loss": 0.0318, "step": 182070 }, { "epoch": 0.1804, "grad_norm": 0.029249820858240128, "learning_rate": 1.0895494733073164e-06, "loss": 0.0353, "step": 182080 }, { "epoch": 0.18045, "grad_norm": 0.029231199994683266, "learning_rate": 1.0883427631699472e-06, "loss": 0.0321, "step": 182090 }, { "epoch": 0.1805, "grad_norm": 0.029502786695957184, "learning_rate": 1.0871367067683047e-06, "loss": 0.0312, "step": 182100 }, { "epoch": 0.18055, "grad_norm": 0.028941819444298744, "learning_rate": 1.085931304135357e-06, "loss": 0.033, "step": 182110 }, { "epoch": 0.1806, "grad_norm": 0.03209478035569191, "learning_rate": 1.0847265553040665e-06, "loss": 0.0321, "step": 182120 }, { "epoch": 0.18065, "grad_norm": 0.030377700924873352, "learning_rate": 1.083522460307368e-06, "loss": 0.0319, "step": 182130 }, { "epoch": 0.1807, "grad_norm": 0.029901035130023956, "learning_rate": 1.0823190191781768e-06, "loss": 0.0318, "step": 182140 }, { "epoch": 0.18075, "grad_norm": 0.02963991090655327, "learning_rate": 1.0811162319494028e-06, "loss": 0.0327, "step": 182150 }, { "epoch": 0.1808, "grad_norm": 0.033136945217847824, "learning_rate": 1.0799140986539197e-06, "loss": 0.0326, "step": 182160 }, { "epoch": 0.18085, "grad_norm": 0.030957898125052452, "learning_rate": 1.0787126193246066e-06, "loss": 0.0309, "step": 182170 }, { "epoch": 0.1809, "grad_norm": 0.03250615671277046, "learning_rate": 1.0775117939942957e-06, "loss": 0.0337, "step": 182180 }, { "epoch": 0.18095, "grad_norm": 0.032300982624292374, "learning_rate": 1.0763116226958276e-06, "loss": 0.0335, "step": 182190 }, { "epoch": 0.181, "grad_norm": 0.03462289273738861, "learning_rate": 1.0751121054620144e-06, "loss": 0.0323, "step": 182200 }, { "epoch": 0.18105, "grad_norm": 0.03301731124520302, "learning_rate": 1.073913242325647e-06, "loss": 0.0333, "step": 182210 }, { "epoch": 0.1811, "grad_norm": 0.03348396345973015, "learning_rate": 1.0727150333195046e-06, "loss": 0.0326, "step": 182220 }, { "epoch": 0.18115, "grad_norm": 0.028879957273602486, "learning_rate": 1.0715174784763388e-06, "loss": 0.031, "step": 182230 }, { "epoch": 0.1812, "grad_norm": 0.03134356439113617, "learning_rate": 1.070320577828901e-06, "loss": 0.0326, "step": 182240 }, { "epoch": 0.18125, "grad_norm": 0.03427322953939438, "learning_rate": 1.069124331409907e-06, "loss": 0.0324, "step": 182250 }, { "epoch": 0.1813, "grad_norm": 0.0308916624635458, "learning_rate": 1.0679287392520608e-06, "loss": 0.0319, "step": 182260 }, { "epoch": 0.18135, "grad_norm": 0.030325835570693016, "learning_rate": 1.0667338013880563e-06, "loss": 0.0324, "step": 182270 }, { "epoch": 0.1814, "grad_norm": 0.03367728367447853, "learning_rate": 1.0655395178505529e-06, "loss": 0.0335, "step": 182280 }, { "epoch": 0.18145, "grad_norm": 0.03004777617752552, "learning_rate": 1.0643458886722108e-06, "loss": 0.0318, "step": 182290 }, { "epoch": 0.1815, "grad_norm": 0.031628742814064026, "learning_rate": 1.0631529138856621e-06, "loss": 0.0326, "step": 182300 }, { "epoch": 0.18155, "grad_norm": 0.03036637231707573, "learning_rate": 1.0619605935235145e-06, "loss": 0.0319, "step": 182310 }, { "epoch": 0.1816, "grad_norm": 0.034068137407302856, "learning_rate": 1.0607689276183746e-06, "loss": 0.0324, "step": 182320 }, { "epoch": 0.18165, "grad_norm": 0.031157853081822395, "learning_rate": 1.0595779162028196e-06, "loss": 0.0321, "step": 182330 }, { "epoch": 0.1817, "grad_norm": 0.03150942549109459, "learning_rate": 1.0583875593094122e-06, "loss": 0.0327, "step": 182340 }, { "epoch": 0.18175, "grad_norm": 0.029292112216353416, "learning_rate": 1.0571978569706876e-06, "loss": 0.0319, "step": 182350 }, { "epoch": 0.1818, "grad_norm": 0.03450557589530945, "learning_rate": 1.0560088092191833e-06, "loss": 0.0341, "step": 182360 }, { "epoch": 0.18185, "grad_norm": 0.03337952122092247, "learning_rate": 1.0548204160874015e-06, "loss": 0.0329, "step": 182370 }, { "epoch": 0.1819, "grad_norm": 0.028782257810235023, "learning_rate": 1.0536326776078353e-06, "loss": 0.0349, "step": 182380 }, { "epoch": 0.18195, "grad_norm": 0.03170391544699669, "learning_rate": 1.0524455938129534e-06, "loss": 0.0332, "step": 182390 }, { "epoch": 0.182, "grad_norm": 0.029747601598501205, "learning_rate": 1.0512591647352133e-06, "loss": 0.0338, "step": 182400 }, { "epoch": 0.18205, "grad_norm": 0.0323491096496582, "learning_rate": 1.0500733904070497e-06, "loss": 0.0343, "step": 182410 }, { "epoch": 0.1821, "grad_norm": 0.03273355960845947, "learning_rate": 1.0488882708608843e-06, "loss": 0.0337, "step": 182420 }, { "epoch": 0.18215, "grad_norm": 0.0317191444337368, "learning_rate": 1.0477038061291162e-06, "loss": 0.0312, "step": 182430 }, { "epoch": 0.1822, "grad_norm": 0.03376864641904831, "learning_rate": 1.0465199962441246e-06, "loss": 0.0352, "step": 182440 }, { "epoch": 0.18225, "grad_norm": 0.03369827941060066, "learning_rate": 1.0453368412382758e-06, "loss": 0.0317, "step": 182450 }, { "epoch": 0.1823, "grad_norm": 0.032915305346250534, "learning_rate": 1.0441543411439242e-06, "loss": 0.0323, "step": 182460 }, { "epoch": 0.18235, "grad_norm": 0.031523942947387695, "learning_rate": 1.0429724959933885e-06, "loss": 0.0327, "step": 182470 }, { "epoch": 0.1824, "grad_norm": 0.03458832949399948, "learning_rate": 1.041791305818987e-06, "loss": 0.0324, "step": 182480 }, { "epoch": 0.18245, "grad_norm": 0.028763752430677414, "learning_rate": 1.0406107706530056e-06, "loss": 0.0343, "step": 182490 }, { "epoch": 0.1825, "grad_norm": 0.03280990570783615, "learning_rate": 1.0394308905277316e-06, "loss": 0.0328, "step": 182500 }, { "epoch": 0.18255, "grad_norm": 0.027918484061956406, "learning_rate": 1.038251665475412e-06, "loss": 0.0331, "step": 182510 }, { "epoch": 0.1826, "grad_norm": 0.034678444266319275, "learning_rate": 1.0370730955282876e-06, "loss": 0.0345, "step": 182520 }, { "epoch": 0.18265, "grad_norm": 0.02536662481725216, "learning_rate": 1.035895180718588e-06, "loss": 0.0335, "step": 182530 }, { "epoch": 0.1827, "grad_norm": 0.031071780249476433, "learning_rate": 1.034717921078507e-06, "loss": 0.0329, "step": 182540 }, { "epoch": 0.18275, "grad_norm": 0.03006831556558609, "learning_rate": 1.033541316640238e-06, "loss": 0.0349, "step": 182550 }, { "epoch": 0.1828, "grad_norm": 0.03078848123550415, "learning_rate": 1.0323653674359417e-06, "loss": 0.0327, "step": 182560 }, { "epoch": 0.18285, "grad_norm": 0.025866178795695305, "learning_rate": 1.0311900734977702e-06, "loss": 0.033, "step": 182570 }, { "epoch": 0.1829, "grad_norm": 0.03372535854578018, "learning_rate": 1.0300154348578616e-06, "loss": 0.0345, "step": 182580 }, { "epoch": 0.18295, "grad_norm": 0.03365950658917427, "learning_rate": 1.0288414515483263e-06, "loss": 0.0317, "step": 182590 }, { "epoch": 0.183, "grad_norm": 0.03343362361192703, "learning_rate": 1.0276681236012608e-06, "loss": 0.0318, "step": 182600 }, { "epoch": 0.18305, "grad_norm": 0.03235025331377983, "learning_rate": 1.0264954510487395e-06, "loss": 0.0321, "step": 182610 }, { "epoch": 0.1831, "grad_norm": 0.0361965037882328, "learning_rate": 1.0253234339228286e-06, "loss": 0.0328, "step": 182620 }, { "epoch": 0.18315, "grad_norm": 0.029026083648204803, "learning_rate": 1.0241520722555685e-06, "loss": 0.0323, "step": 182630 }, { "epoch": 0.1832, "grad_norm": 0.02814497798681259, "learning_rate": 1.0229813660789817e-06, "loss": 0.032, "step": 182640 }, { "epoch": 0.18325, "grad_norm": 0.031243901699781418, "learning_rate": 1.021811315425078e-06, "loss": 0.0329, "step": 182650 }, { "epoch": 0.1833, "grad_norm": 0.032474394887685776, "learning_rate": 1.0206419203258405e-06, "loss": 0.0316, "step": 182660 }, { "epoch": 0.18335, "grad_norm": 0.035039883106946945, "learning_rate": 1.0194731808132518e-06, "loss": 0.0353, "step": 182670 }, { "epoch": 0.1834, "grad_norm": 0.028590889647603035, "learning_rate": 1.0183050969192532e-06, "loss": 0.0318, "step": 182680 }, { "epoch": 0.18345, "grad_norm": 0.03609303757548332, "learning_rate": 1.01713766867578e-06, "loss": 0.0328, "step": 182690 }, { "epoch": 0.1835, "grad_norm": 0.03724497929215431, "learning_rate": 1.0159708961147596e-06, "loss": 0.0317, "step": 182700 }, { "epoch": 0.18355, "grad_norm": 0.03723419830203056, "learning_rate": 1.0148047792680803e-06, "loss": 0.0321, "step": 182710 }, { "epoch": 0.1836, "grad_norm": 0.0317065566778183, "learning_rate": 1.0136393181676306e-06, "loss": 0.032, "step": 182720 }, { "epoch": 0.18365, "grad_norm": 0.03472043573856354, "learning_rate": 1.0124745128452685e-06, "loss": 0.0315, "step": 182730 }, { "epoch": 0.1837, "grad_norm": 0.031878240406513214, "learning_rate": 1.011310363332843e-06, "loss": 0.0325, "step": 182740 }, { "epoch": 0.18375, "grad_norm": 0.033075589686632156, "learning_rate": 1.0101468696621792e-06, "loss": 0.0327, "step": 182750 }, { "epoch": 0.1838, "grad_norm": 0.03156924620270729, "learning_rate": 1.0089840318650845e-06, "loss": 0.0338, "step": 182760 }, { "epoch": 0.18385, "grad_norm": 0.03170720487833023, "learning_rate": 1.0078218499733589e-06, "loss": 0.0311, "step": 182770 }, { "epoch": 0.1839, "grad_norm": 0.031086206436157227, "learning_rate": 1.0066603240187655e-06, "loss": 0.0324, "step": 182780 }, { "epoch": 0.18395, "grad_norm": 0.030343493446707726, "learning_rate": 1.005499454033068e-06, "loss": 0.0332, "step": 182790 }, { "epoch": 0.184, "grad_norm": 0.0303870290517807, "learning_rate": 1.0043392400479996e-06, "loss": 0.0336, "step": 182800 }, { "epoch": 0.18405, "grad_norm": 0.029981570318341255, "learning_rate": 1.0031796820952844e-06, "loss": 0.0334, "step": 182810 }, { "epoch": 0.1841, "grad_norm": 0.035858385264873505, "learning_rate": 1.0020207802066166e-06, "loss": 0.0319, "step": 182820 }, { "epoch": 0.18415, "grad_norm": 0.030471090227365494, "learning_rate": 1.0008625344136907e-06, "loss": 0.0319, "step": 182830 }, { "epoch": 0.1842, "grad_norm": 0.030062668025493622, "learning_rate": 9.997049447481644e-07, "loss": 0.0325, "step": 182840 }, { "epoch": 0.18425, "grad_norm": 0.026811838150024414, "learning_rate": 9.985480112416846e-07, "loss": 0.0328, "step": 182850 }, { "epoch": 0.1843, "grad_norm": 0.03334164619445801, "learning_rate": 9.973917339258898e-07, "loss": 0.032, "step": 182860 }, { "epoch": 0.18435, "grad_norm": 0.03380081430077553, "learning_rate": 9.962361128323854e-07, "loss": 0.0326, "step": 182870 }, { "epoch": 0.1844, "grad_norm": 0.030175121501088142, "learning_rate": 9.950811479927712e-07, "loss": 0.033, "step": 182880 }, { "epoch": 0.18445, "grad_norm": 0.030168289318680763, "learning_rate": 9.939268394386193e-07, "loss": 0.0321, "step": 182890 }, { "epoch": 0.1845, "grad_norm": 0.029061466455459595, "learning_rate": 9.927731872014845e-07, "loss": 0.031, "step": 182900 }, { "epoch": 0.18455, "grad_norm": 0.030922871083021164, "learning_rate": 9.916201913129169e-07, "loss": 0.031, "step": 182910 }, { "epoch": 0.1846, "grad_norm": 0.03239313140511513, "learning_rate": 9.90467851804433e-07, "loss": 0.0308, "step": 182920 }, { "epoch": 0.18465, "grad_norm": 0.0330660305917263, "learning_rate": 9.89316168707538e-07, "loss": 0.0324, "step": 182930 }, { "epoch": 0.1847, "grad_norm": 0.032215192914009094, "learning_rate": 9.881651420537153e-07, "loss": 0.0328, "step": 182940 }, { "epoch": 0.18475, "grad_norm": 0.03356137499213219, "learning_rate": 9.870147718744365e-07, "loss": 0.0322, "step": 182950 }, { "epoch": 0.1848, "grad_norm": 0.029361480847001076, "learning_rate": 9.858650582011602e-07, "loss": 0.0318, "step": 182960 }, { "epoch": 0.18485, "grad_norm": 0.031547416001558304, "learning_rate": 9.847160010653028e-07, "loss": 0.0319, "step": 182970 }, { "epoch": 0.1849, "grad_norm": 0.02946125902235508, "learning_rate": 9.83567600498292e-07, "loss": 0.0337, "step": 182980 }, { "epoch": 0.18495, "grad_norm": 0.03201250731945038, "learning_rate": 9.82419856531519e-07, "loss": 0.0317, "step": 182990 }, { "epoch": 0.185, "grad_norm": 0.03201431408524513, "learning_rate": 9.812727691963647e-07, "loss": 0.0327, "step": 183000 }, { "epoch": 0.18505, "grad_norm": 0.03448358178138733, "learning_rate": 9.801263385241927e-07, "loss": 0.0318, "step": 183010 }, { "epoch": 0.1851, "grad_norm": 0.03170866519212723, "learning_rate": 9.789805645463363e-07, "loss": 0.0331, "step": 183020 }, { "epoch": 0.18515, "grad_norm": 0.03106853738427162, "learning_rate": 9.778354472941315e-07, "loss": 0.0329, "step": 183030 }, { "epoch": 0.1852, "grad_norm": 0.02936870977282524, "learning_rate": 9.76690986798881e-07, "loss": 0.0324, "step": 183040 }, { "epoch": 0.18525, "grad_norm": 0.02852589264512062, "learning_rate": 9.755471830918738e-07, "loss": 0.0324, "step": 183050 }, { "epoch": 0.1853, "grad_norm": 0.032404638826847076, "learning_rate": 9.744040362043765e-07, "loss": 0.0334, "step": 183060 }, { "epoch": 0.18535, "grad_norm": 0.026101185008883476, "learning_rate": 9.732615461676531e-07, "loss": 0.0332, "step": 183070 }, { "epoch": 0.1854, "grad_norm": 0.03185758367180824, "learning_rate": 9.721197130129255e-07, "loss": 0.0315, "step": 183080 }, { "epoch": 0.18545, "grad_norm": 0.031807247549295425, "learning_rate": 9.709785367714246e-07, "loss": 0.0325, "step": 183090 }, { "epoch": 0.1855, "grad_norm": 0.03422261402010918, "learning_rate": 9.69838017474342e-07, "loss": 0.0329, "step": 183100 }, { "epoch": 0.18555, "grad_norm": 0.03144890442490578, "learning_rate": 9.686981551528584e-07, "loss": 0.0326, "step": 183110 }, { "epoch": 0.1856, "grad_norm": 0.03239867463707924, "learning_rate": 9.675589498381405e-07, "loss": 0.0338, "step": 183120 }, { "epoch": 0.18565, "grad_norm": 0.03229037672281265, "learning_rate": 9.664204015613327e-07, "loss": 0.0327, "step": 183130 }, { "epoch": 0.1857, "grad_norm": 0.033264774829149246, "learning_rate": 9.652825103535572e-07, "loss": 0.0327, "step": 183140 }, { "epoch": 0.18575, "grad_norm": 0.03592655062675476, "learning_rate": 9.64145276245934e-07, "loss": 0.031, "step": 183150 }, { "epoch": 0.1858, "grad_norm": 0.033310163766145706, "learning_rate": 9.630086992695465e-07, "loss": 0.0368, "step": 183160 }, { "epoch": 0.18585, "grad_norm": 0.02804112248122692, "learning_rate": 9.61872779455475e-07, "loss": 0.0325, "step": 183170 }, { "epoch": 0.1859, "grad_norm": 0.02908741869032383, "learning_rate": 9.607375168347672e-07, "loss": 0.0326, "step": 183180 }, { "epoch": 0.18595, "grad_norm": 0.03357316181063652, "learning_rate": 9.596029114384647e-07, "loss": 0.0332, "step": 183190 }, { "epoch": 0.186, "grad_norm": 0.03149103373289108, "learning_rate": 9.584689632975874e-07, "loss": 0.0332, "step": 183200 }, { "epoch": 0.18605, "grad_norm": 0.033084675669670105, "learning_rate": 9.573356724431381e-07, "loss": 0.0318, "step": 183210 }, { "epoch": 0.1861, "grad_norm": 0.03316180780529976, "learning_rate": 9.562030389060977e-07, "loss": 0.0325, "step": 183220 }, { "epoch": 0.18615, "grad_norm": 0.02788355201482773, "learning_rate": 9.550710627174304e-07, "loss": 0.0328, "step": 183230 }, { "epoch": 0.1862, "grad_norm": 0.028513340279459953, "learning_rate": 9.539397439080917e-07, "loss": 0.0332, "step": 183240 }, { "epoch": 0.18625, "grad_norm": 0.029110228642821312, "learning_rate": 9.528090825090069e-07, "loss": 0.033, "step": 183250 }, { "epoch": 0.1863, "grad_norm": 0.030120152980089188, "learning_rate": 9.516790785510876e-07, "loss": 0.033, "step": 183260 }, { "epoch": 0.18635, "grad_norm": 0.027336476370692253, "learning_rate": 9.505497320652229e-07, "loss": 0.0317, "step": 183270 }, { "epoch": 0.1864, "grad_norm": 0.02947922609746456, "learning_rate": 9.494210430822937e-07, "loss": 0.0319, "step": 183280 }, { "epoch": 0.18645, "grad_norm": 0.02891182340681553, "learning_rate": 9.48293011633164e-07, "loss": 0.0328, "step": 183290 }, { "epoch": 0.1865, "grad_norm": 0.03284899890422821, "learning_rate": 9.471656377486649e-07, "loss": 0.033, "step": 183300 }, { "epoch": 0.18655, "grad_norm": 0.027189364656805992, "learning_rate": 9.460389214596215e-07, "loss": 0.032, "step": 183310 }, { "epoch": 0.1866, "grad_norm": 0.03087041899561882, "learning_rate": 9.449128627968345e-07, "loss": 0.0323, "step": 183320 }, { "epoch": 0.18665, "grad_norm": 0.03247639164328575, "learning_rate": 9.43787461791093e-07, "loss": 0.0322, "step": 183330 }, { "epoch": 0.1867, "grad_norm": 0.03126617893576622, "learning_rate": 9.426627184731696e-07, "loss": 0.032, "step": 183340 }, { "epoch": 0.18675, "grad_norm": 0.03199268504977226, "learning_rate": 9.415386328738035e-07, "loss": 0.0332, "step": 183350 }, { "epoch": 0.1868, "grad_norm": 0.0347495973110199, "learning_rate": 9.40415205023737e-07, "loss": 0.0339, "step": 183360 }, { "epoch": 0.18685, "grad_norm": 0.03300970792770386, "learning_rate": 9.392924349536758e-07, "loss": 0.033, "step": 183370 }, { "epoch": 0.1869, "grad_norm": 0.03369023650884628, "learning_rate": 9.381703226943289e-07, "loss": 0.0328, "step": 183380 }, { "epoch": 0.18695, "grad_norm": 0.031055910512804985, "learning_rate": 9.370488682763579e-07, "loss": 0.0321, "step": 183390 }, { "epoch": 0.187, "grad_norm": 0.03379061818122864, "learning_rate": 9.359280717304297e-07, "loss": 0.035, "step": 183400 }, { "epoch": 0.18705, "grad_norm": 0.030245963484048843, "learning_rate": 9.348079330871923e-07, "loss": 0.0325, "step": 183410 }, { "epoch": 0.1871, "grad_norm": 0.03331426531076431, "learning_rate": 9.336884523772654e-07, "loss": 0.033, "step": 183420 }, { "epoch": 0.18715, "grad_norm": 0.03207800164818764, "learning_rate": 9.325696296312552e-07, "loss": 0.034, "step": 183430 }, { "epoch": 0.1872, "grad_norm": 0.03171722963452339, "learning_rate": 9.314514648797457e-07, "loss": 0.0328, "step": 183440 }, { "epoch": 0.18725, "grad_norm": 0.030682936310768127, "learning_rate": 9.303339581533122e-07, "loss": 0.0324, "step": 183450 }, { "epoch": 0.1873, "grad_norm": 0.033495258539915085, "learning_rate": 9.292171094825053e-07, "loss": 0.0335, "step": 183460 }, { "epoch": 0.18735, "grad_norm": 0.03323593735694885, "learning_rate": 9.281009188978618e-07, "loss": 0.0345, "step": 183470 }, { "epoch": 0.1874, "grad_norm": 0.03410422056913376, "learning_rate": 9.269853864298961e-07, "loss": 0.0336, "step": 183480 }, { "epoch": 0.18745, "grad_norm": 0.031951263546943665, "learning_rate": 9.258705121091032e-07, "loss": 0.0335, "step": 183490 }, { "epoch": 0.1875, "grad_norm": 0.0321340374648571, "learning_rate": 9.247562959659673e-07, "loss": 0.033, "step": 183500 }, { "epoch": 0.18755, "grad_norm": 0.02933504246175289, "learning_rate": 9.236427380309526e-07, "loss": 0.0344, "step": 183510 }, { "epoch": 0.1876, "grad_norm": 0.02984052337706089, "learning_rate": 9.22529838334496e-07, "loss": 0.0325, "step": 183520 }, { "epoch": 0.18765, "grad_norm": 0.031184837222099304, "learning_rate": 9.214175969070288e-07, "loss": 0.0344, "step": 183530 }, { "epoch": 0.1877, "grad_norm": 0.03433763235807419, "learning_rate": 9.203060137789599e-07, "loss": 0.0328, "step": 183540 }, { "epoch": 0.18775, "grad_norm": 0.029375756159424782, "learning_rate": 9.191950889806816e-07, "loss": 0.0324, "step": 183550 }, { "epoch": 0.1878, "grad_norm": 0.03221710026264191, "learning_rate": 9.180848225425586e-07, "loss": 0.0334, "step": 183560 }, { "epoch": 0.18785, "grad_norm": 0.027692032977938652, "learning_rate": 9.169752144949501e-07, "loss": 0.0323, "step": 183570 }, { "epoch": 0.1879, "grad_norm": 0.02899179421365261, "learning_rate": 9.158662648681898e-07, "loss": 0.0327, "step": 183580 }, { "epoch": 0.18795, "grad_norm": 0.032324861735105515, "learning_rate": 9.14757973692601e-07, "loss": 0.0335, "step": 183590 }, { "epoch": 0.188, "grad_norm": 0.029175706207752228, "learning_rate": 9.136503409984815e-07, "loss": 0.0329, "step": 183600 }, { "epoch": 0.18805, "grad_norm": 0.03108326904475689, "learning_rate": 9.125433668161071e-07, "loss": 0.0317, "step": 183610 }, { "epoch": 0.1881, "grad_norm": 0.029612571001052856, "learning_rate": 9.114370511757536e-07, "loss": 0.032, "step": 183620 }, { "epoch": 0.18815, "grad_norm": 0.030926868319511414, "learning_rate": 9.103313941076608e-07, "loss": 0.0341, "step": 183630 }, { "epoch": 0.1882, "grad_norm": 0.03476545587182045, "learning_rate": 9.092263956420572e-07, "loss": 0.0339, "step": 183640 }, { "epoch": 0.18825, "grad_norm": 0.03209243714809418, "learning_rate": 9.081220558091518e-07, "loss": 0.0318, "step": 183650 }, { "epoch": 0.1883, "grad_norm": 0.02719273418188095, "learning_rate": 9.070183746391375e-07, "loss": 0.0315, "step": 183660 }, { "epoch": 0.18835, "grad_norm": 0.03333199396729469, "learning_rate": 9.0591535216219e-07, "loss": 0.0339, "step": 183670 }, { "epoch": 0.1884, "grad_norm": 0.02819048799574375, "learning_rate": 9.048129884084683e-07, "loss": 0.033, "step": 183680 }, { "epoch": 0.18845, "grad_norm": 0.03179158270359039, "learning_rate": 9.037112834081068e-07, "loss": 0.0335, "step": 183690 }, { "epoch": 0.1885, "grad_norm": 0.029788820073008537, "learning_rate": 9.026102371912232e-07, "loss": 0.0322, "step": 183700 }, { "epoch": 0.18855, "grad_norm": 0.02990236133337021, "learning_rate": 9.015098497879265e-07, "loss": 0.0331, "step": 183710 }, { "epoch": 0.1886, "grad_norm": 0.031020531430840492, "learning_rate": 9.004101212282956e-07, "loss": 0.0349, "step": 183720 }, { "epoch": 0.18865, "grad_norm": 0.030472010374069214, "learning_rate": 8.993110515423953e-07, "loss": 0.0346, "step": 183730 }, { "epoch": 0.1887, "grad_norm": 0.04369658604264259, "learning_rate": 8.982126407602792e-07, "loss": 0.0384, "step": 183740 }, { "epoch": 0.18875, "grad_norm": 0.038215216249227524, "learning_rate": 8.971148889119734e-07, "loss": 0.0313, "step": 183750 }, { "epoch": 0.1888, "grad_norm": 0.029607990756630898, "learning_rate": 8.960177960274957e-07, "loss": 0.0333, "step": 183760 }, { "epoch": 0.18885, "grad_norm": 0.03382629528641701, "learning_rate": 8.94921362136833e-07, "loss": 0.0323, "step": 183770 }, { "epoch": 0.1889, "grad_norm": 0.03388132527470589, "learning_rate": 8.938255872699613e-07, "loss": 0.0315, "step": 183780 }, { "epoch": 0.18895, "grad_norm": 0.03242596983909607, "learning_rate": 8.927304714568458e-07, "loss": 0.0331, "step": 183790 }, { "epoch": 0.189, "grad_norm": 0.029478929936885834, "learning_rate": 8.916360147274233e-07, "loss": 0.0319, "step": 183800 }, { "epoch": 0.18905, "grad_norm": 0.029674693942070007, "learning_rate": 8.905422171116145e-07, "loss": 0.031, "step": 183810 }, { "epoch": 0.1891, "grad_norm": 0.029971517622470856, "learning_rate": 8.894490786393206e-07, "loss": 0.0313, "step": 183820 }, { "epoch": 0.18915, "grad_norm": 0.03268228843808174, "learning_rate": 8.883565993404341e-07, "loss": 0.0324, "step": 183830 }, { "epoch": 0.1892, "grad_norm": 0.03221019729971886, "learning_rate": 8.872647792448203e-07, "loss": 0.032, "step": 183840 }, { "epoch": 0.18925, "grad_norm": 0.0357881523668766, "learning_rate": 8.861736183823272e-07, "loss": 0.0327, "step": 183850 }, { "epoch": 0.1893, "grad_norm": 0.03263729438185692, "learning_rate": 8.850831167827895e-07, "loss": 0.0332, "step": 183860 }, { "epoch": 0.18935, "grad_norm": 0.03233606740832329, "learning_rate": 8.839932744760165e-07, "loss": 0.0328, "step": 183870 }, { "epoch": 0.1894, "grad_norm": 0.03519522398710251, "learning_rate": 8.829040914918096e-07, "loss": 0.0317, "step": 183880 }, { "epoch": 0.18945, "grad_norm": 0.02882656268775463, "learning_rate": 8.818155678599477e-07, "loss": 0.0338, "step": 183890 }, { "epoch": 0.1895, "grad_norm": 0.03132500872015953, "learning_rate": 8.807277036101819e-07, "loss": 0.0323, "step": 183900 }, { "epoch": 0.18955, "grad_norm": 0.033128950744867325, "learning_rate": 8.796404987722634e-07, "loss": 0.0348, "step": 183910 }, { "epoch": 0.1896, "grad_norm": 0.030885368585586548, "learning_rate": 8.785539533759101e-07, "loss": 0.0344, "step": 183920 }, { "epoch": 0.18965, "grad_norm": 0.03386777639389038, "learning_rate": 8.774680674508318e-07, "loss": 0.0342, "step": 183930 }, { "epoch": 0.1897, "grad_norm": 0.03264821693301201, "learning_rate": 8.7638284102671e-07, "loss": 0.0339, "step": 183940 }, { "epoch": 0.18975, "grad_norm": 0.03148859739303589, "learning_rate": 8.752982741332239e-07, "loss": 0.0337, "step": 183950 }, { "epoch": 0.1898, "grad_norm": 0.03271210193634033, "learning_rate": 8.742143668000136e-07, "loss": 0.0342, "step": 183960 }, { "epoch": 0.18985, "grad_norm": 0.03562767803668976, "learning_rate": 8.731311190567248e-07, "loss": 0.0338, "step": 183970 }, { "epoch": 0.1899, "grad_norm": 0.030357353389263153, "learning_rate": 8.720485309329646e-07, "loss": 0.0332, "step": 183980 }, { "epoch": 0.18995, "grad_norm": 0.032088980078697205, "learning_rate": 8.709666024583313e-07, "loss": 0.0373, "step": 183990 }, { "epoch": 0.19, "grad_norm": 0.03076958656311035, "learning_rate": 8.698853336624097e-07, "loss": 0.0333, "step": 184000 }, { "epoch": 0.19005, "grad_norm": 0.03482131287455559, "learning_rate": 8.688047245747566e-07, "loss": 0.0355, "step": 184010 }, { "epoch": 0.1901, "grad_norm": 0.03692816570401192, "learning_rate": 8.677247752249151e-07, "loss": 0.0338, "step": 184020 }, { "epoch": 0.19015, "grad_norm": 0.029213665053248405, "learning_rate": 8.666454856424116e-07, "loss": 0.0331, "step": 184030 }, { "epoch": 0.1902, "grad_norm": 0.035306889563798904, "learning_rate": 8.655668558567559e-07, "loss": 0.0332, "step": 184040 }, { "epoch": 0.19025, "grad_norm": 0.029386315494775772, "learning_rate": 8.644888858974381e-07, "loss": 0.0332, "step": 184050 }, { "epoch": 0.1903, "grad_norm": 0.031330134719610214, "learning_rate": 8.634115757939209e-07, "loss": 0.0344, "step": 184060 }, { "epoch": 0.19035, "grad_norm": 0.029435602948069572, "learning_rate": 8.623349255756697e-07, "loss": 0.0333, "step": 184070 }, { "epoch": 0.1904, "grad_norm": 0.02990766242146492, "learning_rate": 8.612589352721079e-07, "loss": 0.0318, "step": 184080 }, { "epoch": 0.19045, "grad_norm": 0.03036910854279995, "learning_rate": 8.601836049126622e-07, "loss": 0.0319, "step": 184090 }, { "epoch": 0.1905, "grad_norm": 0.02997252345085144, "learning_rate": 8.591089345267284e-07, "loss": 0.0343, "step": 184100 }, { "epoch": 0.19055, "grad_norm": 0.03387441858649254, "learning_rate": 8.58034924143683e-07, "loss": 0.0327, "step": 184110 }, { "epoch": 0.1906, "grad_norm": 0.03145996108651161, "learning_rate": 8.569615737928944e-07, "loss": 0.0321, "step": 184120 }, { "epoch": 0.19065, "grad_norm": 0.03035629726946354, "learning_rate": 8.558888835037082e-07, "loss": 0.0314, "step": 184130 }, { "epoch": 0.1907, "grad_norm": 0.0315399244427681, "learning_rate": 8.548168533054513e-07, "loss": 0.0318, "step": 184140 }, { "epoch": 0.19075, "grad_norm": 0.03548549860715866, "learning_rate": 8.53745483227425e-07, "loss": 0.0314, "step": 184150 }, { "epoch": 0.1908, "grad_norm": 0.02948744036257267, "learning_rate": 8.526747732989254e-07, "loss": 0.0328, "step": 184160 }, { "epoch": 0.19085, "grad_norm": 0.030535975471138954, "learning_rate": 8.516047235492292e-07, "loss": 0.0318, "step": 184170 }, { "epoch": 0.1909, "grad_norm": 0.03062056377530098, "learning_rate": 8.505353340075906e-07, "loss": 0.0312, "step": 184180 }, { "epoch": 0.19095, "grad_norm": 0.028339235112071037, "learning_rate": 8.49466604703239e-07, "loss": 0.0332, "step": 184190 }, { "epoch": 0.191, "grad_norm": 0.026632152497768402, "learning_rate": 8.483985356653984e-07, "loss": 0.0318, "step": 184200 }, { "epoch": 0.19105, "grad_norm": 0.027862658724188805, "learning_rate": 8.473311269232703e-07, "loss": 0.0326, "step": 184210 }, { "epoch": 0.1911, "grad_norm": 0.03462366759777069, "learning_rate": 8.462643785060342e-07, "loss": 0.0334, "step": 184220 }, { "epoch": 0.19115, "grad_norm": 0.03214826062321663, "learning_rate": 8.451982904428529e-07, "loss": 0.035, "step": 184230 }, { "epoch": 0.1912, "grad_norm": 0.030515849590301514, "learning_rate": 8.441328627628808e-07, "loss": 0.0318, "step": 184240 }, { "epoch": 0.19125, "grad_norm": 0.033372119069099426, "learning_rate": 8.430680954952364e-07, "loss": 0.0323, "step": 184250 }, { "epoch": 0.1913, "grad_norm": 0.03293319046497345, "learning_rate": 8.420039886690434e-07, "loss": 0.0334, "step": 184260 }, { "epoch": 0.19135, "grad_norm": 0.03229885548353195, "learning_rate": 8.409405423133759e-07, "loss": 0.0319, "step": 184270 }, { "epoch": 0.1914, "grad_norm": 0.029228324070572853, "learning_rate": 8.398777564573246e-07, "loss": 0.0319, "step": 184280 }, { "epoch": 0.19145, "grad_norm": 0.03142399340867996, "learning_rate": 8.388156311299328e-07, "loss": 0.0332, "step": 184290 }, { "epoch": 0.1915, "grad_norm": 0.0325978584587574, "learning_rate": 8.377541663602495e-07, "loss": 0.0334, "step": 184300 }, { "epoch": 0.19155, "grad_norm": 0.03429727628827095, "learning_rate": 8.366933621772905e-07, "loss": 0.0318, "step": 184310 }, { "epoch": 0.1916, "grad_norm": 0.03160303458571434, "learning_rate": 8.356332186100519e-07, "loss": 0.0326, "step": 184320 }, { "epoch": 0.19165, "grad_norm": 0.028069067746400833, "learning_rate": 8.345737356875272e-07, "loss": 0.0338, "step": 184330 }, { "epoch": 0.1917, "grad_norm": 0.03231353685259819, "learning_rate": 8.335149134386794e-07, "loss": 0.0335, "step": 184340 }, { "epoch": 0.19175, "grad_norm": 0.029980802908539772, "learning_rate": 8.324567518924492e-07, "loss": 0.032, "step": 184350 }, { "epoch": 0.1918, "grad_norm": 0.02924294024705887, "learning_rate": 8.313992510777773e-07, "loss": 0.0336, "step": 184360 }, { "epoch": 0.19185, "grad_norm": 0.0314185693860054, "learning_rate": 8.303424110235659e-07, "loss": 0.0323, "step": 184370 }, { "epoch": 0.1919, "grad_norm": 0.03167622163891792, "learning_rate": 8.292862317587163e-07, "loss": 0.0335, "step": 184380 }, { "epoch": 0.19195, "grad_norm": 0.02776946686208248, "learning_rate": 8.282307133121003e-07, "loss": 0.0323, "step": 184390 }, { "epoch": 0.192, "grad_norm": 0.028174638748168945, "learning_rate": 8.271758557125752e-07, "loss": 0.0328, "step": 184400 }, { "epoch": 0.19205, "grad_norm": 0.02484823204576969, "learning_rate": 8.261216589889792e-07, "loss": 0.0319, "step": 184410 }, { "epoch": 0.1921, "grad_norm": 0.0313238799571991, "learning_rate": 8.25068123170139e-07, "loss": 0.0316, "step": 184420 }, { "epoch": 0.19215, "grad_norm": 0.0305357463657856, "learning_rate": 8.240152482848513e-07, "loss": 0.0302, "step": 184430 }, { "epoch": 0.1922, "grad_norm": 0.031983766704797745, "learning_rate": 8.229630343619038e-07, "loss": 0.0329, "step": 184440 }, { "epoch": 0.19225, "grad_norm": 0.028229843825101852, "learning_rate": 8.219114814300655e-07, "loss": 0.0309, "step": 184450 }, { "epoch": 0.1923, "grad_norm": 0.029529288411140442, "learning_rate": 8.208605895180826e-07, "loss": 0.0304, "step": 184460 }, { "epoch": 0.19235, "grad_norm": 0.03323786333203316, "learning_rate": 8.198103586546934e-07, "loss": 0.031, "step": 184470 }, { "epoch": 0.1924, "grad_norm": 0.028421130031347275, "learning_rate": 8.187607888685972e-07, "loss": 0.0309, "step": 184480 }, { "epoch": 0.19245, "grad_norm": 0.03185752406716347, "learning_rate": 8.177118801884986e-07, "loss": 0.0311, "step": 184490 }, { "epoch": 0.1925, "grad_norm": 0.030270256102085114, "learning_rate": 8.166636326430749e-07, "loss": 0.0307, "step": 184500 }, { "epoch": 0.19255, "grad_norm": 0.028725991025567055, "learning_rate": 8.156160462609807e-07, "loss": 0.0312, "step": 184510 }, { "epoch": 0.1926, "grad_norm": 0.027295365929603577, "learning_rate": 8.1456912107086e-07, "loss": 0.0321, "step": 184520 }, { "epoch": 0.19265, "grad_norm": 0.030689042061567307, "learning_rate": 8.135228571013287e-07, "loss": 0.0326, "step": 184530 }, { "epoch": 0.1927, "grad_norm": 0.03691000118851662, "learning_rate": 8.124772543809972e-07, "loss": 0.0327, "step": 184540 }, { "epoch": 0.19275, "grad_norm": 0.030952926725149155, "learning_rate": 8.114323129384566e-07, "loss": 0.0327, "step": 184550 }, { "epoch": 0.1928, "grad_norm": 0.029755594208836555, "learning_rate": 8.103880328022618e-07, "loss": 0.0328, "step": 184560 }, { "epoch": 0.19285, "grad_norm": 0.03075193241238594, "learning_rate": 8.09344414000976e-07, "loss": 0.0332, "step": 184570 }, { "epoch": 0.1929, "grad_norm": 0.03310278430581093, "learning_rate": 8.083014565631209e-07, "loss": 0.0324, "step": 184580 }, { "epoch": 0.19295, "grad_norm": 0.030689707025885582, "learning_rate": 8.072591605172208e-07, "loss": 0.033, "step": 184590 }, { "epoch": 0.193, "grad_norm": 0.028939155861735344, "learning_rate": 8.062175258917643e-07, "loss": 0.0335, "step": 184600 }, { "epoch": 0.19305, "grad_norm": 0.030596747994422913, "learning_rate": 8.051765527152283e-07, "loss": 0.032, "step": 184610 }, { "epoch": 0.1931, "grad_norm": 0.027125678956508636, "learning_rate": 8.041362410160819e-07, "loss": 0.0336, "step": 184620 }, { "epoch": 0.19315, "grad_norm": 0.029070226475596428, "learning_rate": 8.030965908227578e-07, "loss": 0.0329, "step": 184630 }, { "epoch": 0.1932, "grad_norm": 0.032745059579610825, "learning_rate": 8.020576021636834e-07, "loss": 0.032, "step": 184640 }, { "epoch": 0.19325, "grad_norm": 0.029334556311368942, "learning_rate": 8.010192750672607e-07, "loss": 0.0326, "step": 184650 }, { "epoch": 0.1933, "grad_norm": 0.033059775829315186, "learning_rate": 7.999816095618812e-07, "loss": 0.0327, "step": 184660 }, { "epoch": 0.19335, "grad_norm": 0.032330144196748734, "learning_rate": 7.989446056759137e-07, "loss": 0.0312, "step": 184670 }, { "epoch": 0.1934, "grad_norm": 0.030641792342066765, "learning_rate": 7.979082634377078e-07, "loss": 0.0319, "step": 184680 }, { "epoch": 0.19345, "grad_norm": 0.028173161670565605, "learning_rate": 7.96872582875599e-07, "loss": 0.0313, "step": 184690 }, { "epoch": 0.1935, "grad_norm": 0.03235101327300072, "learning_rate": 7.958375640178983e-07, "loss": 0.0333, "step": 184700 }, { "epoch": 0.19355, "grad_norm": 0.030581427738070488, "learning_rate": 7.948032068929079e-07, "loss": 0.0317, "step": 184710 }, { "epoch": 0.1936, "grad_norm": 0.0303508210927248, "learning_rate": 7.937695115289051e-07, "loss": 0.032, "step": 184720 }, { "epoch": 0.19365, "grad_norm": 0.032854851335287094, "learning_rate": 7.927364779541479e-07, "loss": 0.0314, "step": 184730 }, { "epoch": 0.1937, "grad_norm": 0.029865741729736328, "learning_rate": 7.917041061968833e-07, "loss": 0.031, "step": 184740 }, { "epoch": 0.19375, "grad_norm": 0.030726918950676918, "learning_rate": 7.906723962853302e-07, "loss": 0.032, "step": 184750 }, { "epoch": 0.1938, "grad_norm": 0.02736191637814045, "learning_rate": 7.896413482477049e-07, "loss": 0.0326, "step": 184760 }, { "epoch": 0.19385, "grad_norm": 0.029535965994000435, "learning_rate": 7.886109621121851e-07, "loss": 0.0316, "step": 184770 }, { "epoch": 0.1939, "grad_norm": 0.028387896716594696, "learning_rate": 7.87581237906948e-07, "loss": 0.0318, "step": 184780 }, { "epoch": 0.19395, "grad_norm": 0.03226856514811516, "learning_rate": 7.865521756601407e-07, "loss": 0.0323, "step": 184790 }, { "epoch": 0.194, "grad_norm": 0.029670629650354385, "learning_rate": 7.855237753999017e-07, "loss": 0.0316, "step": 184800 }, { "epoch": 0.19405, "grad_norm": 0.03363959863781929, "learning_rate": 7.844960371543475e-07, "loss": 0.0333, "step": 184810 }, { "epoch": 0.1941, "grad_norm": 0.028326643630862236, "learning_rate": 7.834689609515722e-07, "loss": 0.0325, "step": 184820 }, { "epoch": 0.19415, "grad_norm": 0.030526431277394295, "learning_rate": 7.824425468196589e-07, "loss": 0.0334, "step": 184830 }, { "epoch": 0.1942, "grad_norm": 0.029290050268173218, "learning_rate": 7.814167947866685e-07, "loss": 0.0326, "step": 184840 }, { "epoch": 0.19425, "grad_norm": 0.03072735294699669, "learning_rate": 7.803917048806453e-07, "loss": 0.0337, "step": 184850 }, { "epoch": 0.1943, "grad_norm": 0.02674674801528454, "learning_rate": 7.793672771296112e-07, "loss": 0.0319, "step": 184860 }, { "epoch": 0.19435, "grad_norm": 0.030022749677300453, "learning_rate": 7.783435115615745e-07, "loss": 0.0335, "step": 184870 }, { "epoch": 0.1944, "grad_norm": 0.030002307146787643, "learning_rate": 7.773204082045321e-07, "loss": 0.032, "step": 184880 }, { "epoch": 0.19445, "grad_norm": 0.02685355767607689, "learning_rate": 7.762979670864479e-07, "loss": 0.0323, "step": 184890 }, { "epoch": 0.1945, "grad_norm": 0.03253389894962311, "learning_rate": 7.752761882352771e-07, "loss": 0.032, "step": 184900 }, { "epoch": 0.19455, "grad_norm": 0.03180722892284393, "learning_rate": 7.742550716789531e-07, "loss": 0.0334, "step": 184910 }, { "epoch": 0.1946, "grad_norm": 0.029725681990385056, "learning_rate": 7.732346174453953e-07, "loss": 0.0314, "step": 184920 }, { "epoch": 0.19465, "grad_norm": 0.027232058346271515, "learning_rate": 7.722148255625006e-07, "loss": 0.0321, "step": 184930 }, { "epoch": 0.1947, "grad_norm": 0.03249523788690567, "learning_rate": 7.711956960581495e-07, "loss": 0.035, "step": 184940 }, { "epoch": 0.19475, "grad_norm": 0.03233838453888893, "learning_rate": 7.701772289602089e-07, "loss": 0.0326, "step": 184950 }, { "epoch": 0.1948, "grad_norm": 0.028998851776123047, "learning_rate": 7.691594242965172e-07, "loss": 0.0326, "step": 184960 }, { "epoch": 0.19485, "grad_norm": 0.031819239258766174, "learning_rate": 7.68142282094908e-07, "loss": 0.033, "step": 184970 }, { "epoch": 0.1949, "grad_norm": 0.028010355308651924, "learning_rate": 7.671258023831812e-07, "loss": 0.0323, "step": 184980 }, { "epoch": 0.19495, "grad_norm": 0.032065752893686295, "learning_rate": 7.661099851891312e-07, "loss": 0.0314, "step": 184990 }, { "epoch": 0.195, "grad_norm": 0.0302036851644516, "learning_rate": 7.650948305405303e-07, "loss": 0.0332, "step": 185000 }, { "epoch": 0.19505, "grad_norm": 0.02926947921514511, "learning_rate": 7.64080338465134e-07, "loss": 0.0325, "step": 185010 }, { "epoch": 0.1951, "grad_norm": 0.03630632534623146, "learning_rate": 7.630665089906758e-07, "loss": 0.0326, "step": 185020 }, { "epoch": 0.19515, "grad_norm": 0.03500113636255264, "learning_rate": 7.620533421448722e-07, "loss": 0.0316, "step": 185030 }, { "epoch": 0.1952, "grad_norm": 0.027939170598983765, "learning_rate": 7.610408379554263e-07, "loss": 0.0326, "step": 185040 }, { "epoch": 0.19525, "grad_norm": 0.027237216010689735, "learning_rate": 7.600289964500184e-07, "loss": 0.0329, "step": 185050 }, { "epoch": 0.1953, "grad_norm": 0.031244980171322823, "learning_rate": 7.590178176563073e-07, "loss": 0.0318, "step": 185060 }, { "epoch": 0.19535, "grad_norm": 0.031116962432861328, "learning_rate": 7.580073016019457e-07, "loss": 0.034, "step": 185070 }, { "epoch": 0.1954, "grad_norm": 0.030840104445815086, "learning_rate": 7.569974483145531e-07, "loss": 0.0329, "step": 185080 }, { "epoch": 0.19545, "grad_norm": 0.029891157522797585, "learning_rate": 7.559882578217464e-07, "loss": 0.0336, "step": 185090 }, { "epoch": 0.1955, "grad_norm": 0.0267768744379282, "learning_rate": 7.549797301511146e-07, "loss": 0.0319, "step": 185100 }, { "epoch": 0.19555, "grad_norm": 0.02854592353105545, "learning_rate": 7.539718653302247e-07, "loss": 0.0325, "step": 185110 }, { "epoch": 0.1956, "grad_norm": 0.030489597469568253, "learning_rate": 7.529646633866349e-07, "loss": 0.033, "step": 185120 }, { "epoch": 0.19565, "grad_norm": 0.028737762942910194, "learning_rate": 7.519581243478846e-07, "loss": 0.0336, "step": 185130 }, { "epoch": 0.1957, "grad_norm": 0.030285654589533806, "learning_rate": 7.509522482414905e-07, "loss": 0.0321, "step": 185140 }, { "epoch": 0.19575, "grad_norm": 0.029154222458600998, "learning_rate": 7.499470350949473e-07, "loss": 0.0334, "step": 185150 }, { "epoch": 0.1958, "grad_norm": 0.032220274209976196, "learning_rate": 7.489424849357441e-07, "loss": 0.031, "step": 185160 }, { "epoch": 0.19585, "grad_norm": 0.027242610231041908, "learning_rate": 7.479385977913422e-07, "loss": 0.0327, "step": 185170 }, { "epoch": 0.1959, "grad_norm": 0.027455078437924385, "learning_rate": 7.469353736891893e-07, "loss": 0.0316, "step": 185180 }, { "epoch": 0.19595, "grad_norm": 0.030220970511436462, "learning_rate": 7.459328126567134e-07, "loss": 0.0336, "step": 185190 }, { "epoch": 0.196, "grad_norm": 0.030868202447891235, "learning_rate": 7.449309147213173e-07, "loss": 0.031, "step": 185200 }, { "epoch": 0.19605, "grad_norm": 0.034092117100954056, "learning_rate": 7.439296799104018e-07, "loss": 0.0329, "step": 185210 }, { "epoch": 0.1961, "grad_norm": 0.030522791668772697, "learning_rate": 7.429291082513362e-07, "loss": 0.0327, "step": 185220 }, { "epoch": 0.19615, "grad_norm": 0.02995004877448082, "learning_rate": 7.419291997714766e-07, "loss": 0.0324, "step": 185230 }, { "epoch": 0.1962, "grad_norm": 0.03135078400373459, "learning_rate": 7.409299544981568e-07, "loss": 0.034, "step": 185240 }, { "epoch": 0.19625, "grad_norm": 0.03217240795493126, "learning_rate": 7.399313724586965e-07, "loss": 0.0343, "step": 185250 }, { "epoch": 0.1963, "grad_norm": 0.03078630194067955, "learning_rate": 7.389334536804044e-07, "loss": 0.0332, "step": 185260 }, { "epoch": 0.19635, "grad_norm": 0.02970891259610653, "learning_rate": 7.379361981905531e-07, "loss": 0.0335, "step": 185270 }, { "epoch": 0.1964, "grad_norm": 0.029535263776779175, "learning_rate": 7.369396060164124e-07, "loss": 0.0353, "step": 185280 }, { "epoch": 0.19645, "grad_norm": 0.03212609142065048, "learning_rate": 7.359436771852274e-07, "loss": 0.0327, "step": 185290 }, { "epoch": 0.1965, "grad_norm": 0.024767275899648666, "learning_rate": 7.349484117242261e-07, "loss": 0.0338, "step": 185300 }, { "epoch": 0.19655, "grad_norm": 0.03457412123680115, "learning_rate": 7.339538096606202e-07, "loss": 0.0321, "step": 185310 }, { "epoch": 0.1966, "grad_norm": 0.024502325803041458, "learning_rate": 7.32959871021599e-07, "loss": 0.0324, "step": 185320 }, { "epoch": 0.19665, "grad_norm": 0.03082755208015442, "learning_rate": 7.319665958343408e-07, "loss": 0.033, "step": 185330 }, { "epoch": 0.1967, "grad_norm": 0.03430657833814621, "learning_rate": 7.309739841259988e-07, "loss": 0.0324, "step": 185340 }, { "epoch": 0.19675, "grad_norm": 0.03185485675930977, "learning_rate": 7.299820359237097e-07, "loss": 0.0316, "step": 185350 }, { "epoch": 0.1968, "grad_norm": 0.028983809053897858, "learning_rate": 7.289907512545935e-07, "loss": 0.0329, "step": 185360 }, { "epoch": 0.19685, "grad_norm": 0.027429502457380295, "learning_rate": 7.280001301457507e-07, "loss": 0.0324, "step": 185370 }, { "epoch": 0.1969, "grad_norm": 0.033588334918022156, "learning_rate": 7.270101726242679e-07, "loss": 0.0308, "step": 185380 }, { "epoch": 0.19695, "grad_norm": 0.028332434594631195, "learning_rate": 7.260208787172068e-07, "loss": 0.0325, "step": 185390 }, { "epoch": 0.197, "grad_norm": 0.02579881250858307, "learning_rate": 7.250322484516181e-07, "loss": 0.0325, "step": 185400 }, { "epoch": 0.19705, "grad_norm": 0.027061356231570244, "learning_rate": 7.240442818545245e-07, "loss": 0.0327, "step": 185410 }, { "epoch": 0.1971, "grad_norm": 0.027370747178792953, "learning_rate": 7.230569789529434e-07, "loss": 0.0323, "step": 185420 }, { "epoch": 0.19715, "grad_norm": 0.02994256466627121, "learning_rate": 7.220703397738615e-07, "loss": 0.032, "step": 185430 }, { "epoch": 0.1972, "grad_norm": 0.02895674854516983, "learning_rate": 7.210843643442572e-07, "loss": 0.0318, "step": 185440 }, { "epoch": 0.19725, "grad_norm": 0.028868872672319412, "learning_rate": 7.200990526910839e-07, "loss": 0.0319, "step": 185450 }, { "epoch": 0.1973, "grad_norm": 0.02675914578139782, "learning_rate": 7.191144048412812e-07, "loss": 0.0324, "step": 185460 }, { "epoch": 0.19735, "grad_norm": 0.030706174671649933, "learning_rate": 7.181304208217721e-07, "loss": 0.0323, "step": 185470 }, { "epoch": 0.1974, "grad_norm": 0.027158884331583977, "learning_rate": 7.171471006594515e-07, "loss": 0.032, "step": 185480 }, { "epoch": 0.19745, "grad_norm": 0.03242984041571617, "learning_rate": 7.161644443812065e-07, "loss": 0.0323, "step": 185490 }, { "epoch": 0.1975, "grad_norm": 0.02678975835442543, "learning_rate": 7.151824520139044e-07, "loss": 0.0299, "step": 185500 }, { "epoch": 0.19755, "grad_norm": 0.031223347410559654, "learning_rate": 7.142011235843904e-07, "loss": 0.0314, "step": 185510 }, { "epoch": 0.1976, "grad_norm": 0.03180424124002457, "learning_rate": 7.13220459119493e-07, "loss": 0.0319, "step": 185520 }, { "epoch": 0.19765, "grad_norm": 0.03262518346309662, "learning_rate": 7.122404586460213e-07, "loss": 0.0316, "step": 185530 }, { "epoch": 0.1977, "grad_norm": 0.03049931675195694, "learning_rate": 7.112611221907761e-07, "loss": 0.0333, "step": 185540 }, { "epoch": 0.19775, "grad_norm": 0.030612872913479805, "learning_rate": 7.10282449780525e-07, "loss": 0.0319, "step": 185550 }, { "epoch": 0.1978, "grad_norm": 0.029911888763308525, "learning_rate": 7.093044414420241e-07, "loss": 0.0315, "step": 185560 }, { "epoch": 0.19785, "grad_norm": 0.03132949769496918, "learning_rate": 7.083270972020189e-07, "loss": 0.0329, "step": 185570 }, { "epoch": 0.1979, "grad_norm": 0.027829304337501526, "learning_rate": 7.073504170872213e-07, "loss": 0.0317, "step": 185580 }, { "epoch": 0.19795, "grad_norm": 0.027748405933380127, "learning_rate": 7.063744011243378e-07, "loss": 0.0318, "step": 185590 }, { "epoch": 0.198, "grad_norm": 0.028777683153748512, "learning_rate": 7.053990493400525e-07, "loss": 0.033, "step": 185600 }, { "epoch": 0.19805, "grad_norm": 0.03167557343840599, "learning_rate": 7.044243617610302e-07, "loss": 0.0332, "step": 185610 }, { "epoch": 0.1981, "grad_norm": 0.03244497627019882, "learning_rate": 7.034503384139163e-07, "loss": 0.0349, "step": 185620 }, { "epoch": 0.19815, "grad_norm": 0.02962498925626278, "learning_rate": 7.024769793253449e-07, "loss": 0.0321, "step": 185630 }, { "epoch": 0.1982, "grad_norm": 0.03409123048186302, "learning_rate": 7.015042845219256e-07, "loss": 0.0331, "step": 185640 }, { "epoch": 0.19825, "grad_norm": 0.032920096069574356, "learning_rate": 7.00532254030245e-07, "loss": 0.0325, "step": 185650 }, { "epoch": 0.1983, "grad_norm": 0.030276518315076828, "learning_rate": 6.995608878768906e-07, "loss": 0.032, "step": 185660 }, { "epoch": 0.19835, "grad_norm": 0.035315632820129395, "learning_rate": 6.985901860884048e-07, "loss": 0.0337, "step": 185670 }, { "epoch": 0.1984, "grad_norm": 0.031692810356616974, "learning_rate": 6.976201486913414e-07, "loss": 0.0335, "step": 185680 }, { "epoch": 0.19845, "grad_norm": 0.030826926231384277, "learning_rate": 6.966507757122099e-07, "loss": 0.0323, "step": 185690 }, { "epoch": 0.1985, "grad_norm": 0.03321309760212898, "learning_rate": 6.956820671775138e-07, "loss": 0.0335, "step": 185700 }, { "epoch": 0.19855, "grad_norm": 0.02920692041516304, "learning_rate": 6.947140231137406e-07, "loss": 0.0335, "step": 185710 }, { "epoch": 0.1986, "grad_norm": 0.029470006003975868, "learning_rate": 6.937466435473577e-07, "loss": 0.0325, "step": 185720 }, { "epoch": 0.19865, "grad_norm": 0.029567958787083626, "learning_rate": 6.927799285048081e-07, "loss": 0.0328, "step": 185730 }, { "epoch": 0.1987, "grad_norm": 0.031060943379998207, "learning_rate": 6.918138780125206e-07, "loss": 0.0337, "step": 185740 }, { "epoch": 0.19875, "grad_norm": 0.03221989423036575, "learning_rate": 6.908484920969099e-07, "loss": 0.0332, "step": 185750 }, { "epoch": 0.1988, "grad_norm": 0.030383775010704994, "learning_rate": 6.898837707843747e-07, "loss": 0.0317, "step": 185760 }, { "epoch": 0.19885, "grad_norm": 0.030970165506005287, "learning_rate": 6.889197141012799e-07, "loss": 0.0331, "step": 185770 }, { "epoch": 0.1989, "grad_norm": 0.0318511463701725, "learning_rate": 6.879563220739877e-07, "loss": 0.0332, "step": 185780 }, { "epoch": 0.19895, "grad_norm": 0.029180170968174934, "learning_rate": 6.869935947288353e-07, "loss": 0.0323, "step": 185790 }, { "epoch": 0.199, "grad_norm": 0.03175966814160347, "learning_rate": 6.860315320921462e-07, "loss": 0.0333, "step": 185800 }, { "epoch": 0.19905, "grad_norm": 0.027447475120425224, "learning_rate": 6.850701341902188e-07, "loss": 0.0332, "step": 185810 }, { "epoch": 0.1991, "grad_norm": 0.026209700852632523, "learning_rate": 6.841094010493376e-07, "loss": 0.0323, "step": 185820 }, { "epoch": 0.19915, "grad_norm": 0.03160052374005318, "learning_rate": 6.831493326957733e-07, "loss": 0.0327, "step": 185830 }, { "epoch": 0.1992, "grad_norm": 0.03303055837750435, "learning_rate": 6.821899291557715e-07, "loss": 0.0335, "step": 185840 }, { "epoch": 0.19925, "grad_norm": 0.03005937859416008, "learning_rate": 6.812311904555613e-07, "loss": 0.0329, "step": 185850 }, { "epoch": 0.1993, "grad_norm": 0.02981853485107422, "learning_rate": 6.802731166213495e-07, "loss": 0.0345, "step": 185860 }, { "epoch": 0.19935, "grad_norm": 0.027913715690374374, "learning_rate": 6.793157076793399e-07, "loss": 0.0331, "step": 185870 }, { "epoch": 0.1994, "grad_norm": 0.03380822017788887, "learning_rate": 6.783589636556981e-07, "loss": 0.0325, "step": 185880 }, { "epoch": 0.19945, "grad_norm": 0.032246727496385574, "learning_rate": 6.774028845765862e-07, "loss": 0.0335, "step": 185890 }, { "epoch": 0.1995, "grad_norm": 0.029604503884911537, "learning_rate": 6.764474704681417e-07, "loss": 0.0323, "step": 185900 }, { "epoch": 0.19955, "grad_norm": 0.032075826078653336, "learning_rate": 6.754927213564855e-07, "loss": 0.0326, "step": 185910 }, { "epoch": 0.1996, "grad_norm": 0.024991866201162338, "learning_rate": 6.745386372677215e-07, "loss": 0.0314, "step": 185920 }, { "epoch": 0.19965, "grad_norm": 0.02830740064382553, "learning_rate": 6.735852182279318e-07, "loss": 0.0318, "step": 185930 }, { "epoch": 0.1997, "grad_norm": 0.027261871844530106, "learning_rate": 6.726324642631814e-07, "loss": 0.0331, "step": 185940 }, { "epoch": 0.19975, "grad_norm": 0.03271022439002991, "learning_rate": 6.716803753995221e-07, "loss": 0.035, "step": 185950 }, { "epoch": 0.1998, "grad_norm": 0.033689193427562714, "learning_rate": 6.707289516629772e-07, "loss": 0.0323, "step": 185960 }, { "epoch": 0.19985, "grad_norm": 0.02958158403635025, "learning_rate": 6.697781930795705e-07, "loss": 0.0315, "step": 185970 }, { "epoch": 0.1999, "grad_norm": 0.02819625288248062, "learning_rate": 6.688280996752811e-07, "loss": 0.0318, "step": 185980 }, { "epoch": 0.19995, "grad_norm": 0.027729539200663567, "learning_rate": 6.678786714760937e-07, "loss": 0.0311, "step": 185990 }, { "epoch": 0.2, "grad_norm": 0.02650776319205761, "learning_rate": 6.6692990850796e-07, "loss": 0.0319, "step": 186000 }, { "epoch": 0.20005, "grad_norm": 0.028526129201054573, "learning_rate": 6.65981810796823e-07, "loss": 0.0311, "step": 186010 }, { "epoch": 0.2001, "grad_norm": 0.033663127571344376, "learning_rate": 6.650343783686036e-07, "loss": 0.032, "step": 186020 }, { "epoch": 0.20015, "grad_norm": 0.030074715614318848, "learning_rate": 6.640876112491978e-07, "loss": 0.0327, "step": 186030 }, { "epoch": 0.2002, "grad_norm": 0.02809826284646988, "learning_rate": 6.63141509464496e-07, "loss": 0.0329, "step": 186040 }, { "epoch": 0.20025, "grad_norm": 0.03013697639107704, "learning_rate": 6.621960730403637e-07, "loss": 0.0321, "step": 186050 }, { "epoch": 0.2003, "grad_norm": 0.029662052169442177, "learning_rate": 6.612513020026467e-07, "loss": 0.0333, "step": 186060 }, { "epoch": 0.20035, "grad_norm": 0.034348465502262115, "learning_rate": 6.603071963771717e-07, "loss": 0.0325, "step": 186070 }, { "epoch": 0.2004, "grad_norm": 0.025963271036744118, "learning_rate": 6.59363756189757e-07, "loss": 0.0323, "step": 186080 }, { "epoch": 0.20045, "grad_norm": 0.03223303705453873, "learning_rate": 6.58420981466193e-07, "loss": 0.0324, "step": 186090 }, { "epoch": 0.2005, "grad_norm": 0.02908337488770485, "learning_rate": 6.574788722322561e-07, "loss": 0.032, "step": 186100 }, { "epoch": 0.20055, "grad_norm": 0.029895760118961334, "learning_rate": 6.56537428513701e-07, "loss": 0.0347, "step": 186110 }, { "epoch": 0.2006, "grad_norm": 0.02662333846092224, "learning_rate": 6.555966503362626e-07, "loss": 0.0319, "step": 186120 }, { "epoch": 0.20065, "grad_norm": 0.033412255346775055, "learning_rate": 6.546565377256731e-07, "loss": 0.033, "step": 186130 }, { "epoch": 0.2007, "grad_norm": 0.028197573497891426, "learning_rate": 6.537170907076229e-07, "loss": 0.033, "step": 186140 }, { "epoch": 0.20075, "grad_norm": 0.03171047568321228, "learning_rate": 6.527783093078027e-07, "loss": 0.0333, "step": 186150 }, { "epoch": 0.2008, "grad_norm": 0.028691383078694344, "learning_rate": 6.518401935518753e-07, "loss": 0.0353, "step": 186160 }, { "epoch": 0.20085, "grad_norm": 0.03228580579161644, "learning_rate": 6.509027434654896e-07, "loss": 0.0325, "step": 186170 }, { "epoch": 0.2009, "grad_norm": 0.02812507189810276, "learning_rate": 6.499659590742807e-07, "loss": 0.0331, "step": 186180 }, { "epoch": 0.20095, "grad_norm": 0.034292567521333694, "learning_rate": 6.490298404038503e-07, "loss": 0.0319, "step": 186190 }, { "epoch": 0.201, "grad_norm": 0.03362613543868065, "learning_rate": 6.480943874797946e-07, "loss": 0.0322, "step": 186200 }, { "epoch": 0.20105, "grad_norm": 0.029850492253899574, "learning_rate": 6.471596003276903e-07, "loss": 0.0324, "step": 186210 }, { "epoch": 0.2011, "grad_norm": 0.032299816608428955, "learning_rate": 6.462254789730976e-07, "loss": 0.0331, "step": 186220 }, { "epoch": 0.20115, "grad_norm": 0.031973760575056076, "learning_rate": 6.452920234415488e-07, "loss": 0.0325, "step": 186230 }, { "epoch": 0.2012, "grad_norm": 0.029282039031386375, "learning_rate": 6.443592337585624e-07, "loss": 0.0327, "step": 186240 }, { "epoch": 0.20125, "grad_norm": 0.025493284687399864, "learning_rate": 6.434271099496486e-07, "loss": 0.0327, "step": 186250 }, { "epoch": 0.2013, "grad_norm": 0.03137873485684395, "learning_rate": 6.424956520402869e-07, "loss": 0.0334, "step": 186260 }, { "epoch": 0.20135, "grad_norm": 0.030143195763230324, "learning_rate": 6.415648600559432e-07, "loss": 0.034, "step": 186270 }, { "epoch": 0.2014, "grad_norm": 0.02951633930206299, "learning_rate": 6.406347340220664e-07, "loss": 0.0336, "step": 186280 }, { "epoch": 0.20145, "grad_norm": 0.030829209834337234, "learning_rate": 6.397052739640808e-07, "loss": 0.0321, "step": 186290 }, { "epoch": 0.2015, "grad_norm": 0.03237324580550194, "learning_rate": 6.387764799074047e-07, "loss": 0.0318, "step": 186300 }, { "epoch": 0.20155, "grad_norm": 0.028324754908680916, "learning_rate": 6.378483518774264e-07, "loss": 0.0327, "step": 186310 }, { "epoch": 0.2016, "grad_norm": 0.03085837885737419, "learning_rate": 6.369208898995199e-07, "loss": 0.0319, "step": 186320 }, { "epoch": 0.20165, "grad_norm": 0.028847992420196533, "learning_rate": 6.359940939990484e-07, "loss": 0.0338, "step": 186330 }, { "epoch": 0.2017, "grad_norm": 0.030875183641910553, "learning_rate": 6.350679642013413e-07, "loss": 0.0319, "step": 186340 }, { "epoch": 0.20175, "grad_norm": 0.03190063312649727, "learning_rate": 6.341425005317259e-07, "loss": 0.0327, "step": 186350 }, { "epoch": 0.2018, "grad_norm": 0.02719941921532154, "learning_rate": 6.332177030154957e-07, "loss": 0.0323, "step": 186360 }, { "epoch": 0.20185, "grad_norm": 0.031786005944013596, "learning_rate": 6.322935716779416e-07, "loss": 0.0322, "step": 186370 }, { "epoch": 0.2019, "grad_norm": 0.027638213708996773, "learning_rate": 6.313701065443268e-07, "loss": 0.032, "step": 186380 }, { "epoch": 0.20195, "grad_norm": 0.032468460500240326, "learning_rate": 6.304473076399004e-07, "loss": 0.0325, "step": 186390 }, { "epoch": 0.202, "grad_norm": 0.028046119958162308, "learning_rate": 6.295251749898868e-07, "loss": 0.0319, "step": 186400 }, { "epoch": 0.20205, "grad_norm": 0.032322533428668976, "learning_rate": 6.28603708619499e-07, "loss": 0.0313, "step": 186410 }, { "epoch": 0.2021, "grad_norm": 0.028440384194254875, "learning_rate": 6.276829085539337e-07, "loss": 0.033, "step": 186420 }, { "epoch": 0.20215, "grad_norm": 0.031794071197509766, "learning_rate": 6.267627748183597e-07, "loss": 0.0314, "step": 186430 }, { "epoch": 0.2022, "grad_norm": 0.039318595081567764, "learning_rate": 6.258433074379344e-07, "loss": 0.0333, "step": 186440 }, { "epoch": 0.20225, "grad_norm": 0.028141120448708534, "learning_rate": 6.249245064377934e-07, "loss": 0.0326, "step": 186450 }, { "epoch": 0.2023, "grad_norm": 0.03346704691648483, "learning_rate": 6.240063718430611e-07, "loss": 0.0329, "step": 186460 }, { "epoch": 0.20235, "grad_norm": 0.02704657055437565, "learning_rate": 6.230889036788395e-07, "loss": 0.0317, "step": 186470 }, { "epoch": 0.2024, "grad_norm": 0.025984736159443855, "learning_rate": 6.221721019702059e-07, "loss": 0.0333, "step": 186480 }, { "epoch": 0.20245, "grad_norm": 0.03137907013297081, "learning_rate": 6.212559667422291e-07, "loss": 0.0308, "step": 186490 }, { "epoch": 0.2025, "grad_norm": 0.029576167464256287, "learning_rate": 6.203404980199556e-07, "loss": 0.0321, "step": 186500 }, { "epoch": 0.20255, "grad_norm": 0.029019024223089218, "learning_rate": 6.194256958284156e-07, "loss": 0.0322, "step": 186510 }, { "epoch": 0.2026, "grad_norm": 0.030817793682217598, "learning_rate": 6.185115601926167e-07, "loss": 0.0323, "step": 186520 }, { "epoch": 0.20265, "grad_norm": 0.031037267297506332, "learning_rate": 6.175980911375528e-07, "loss": 0.0321, "step": 186530 }, { "epoch": 0.2027, "grad_norm": 0.02664116770029068, "learning_rate": 6.166852886881958e-07, "loss": 0.0315, "step": 186540 }, { "epoch": 0.20275, "grad_norm": 0.029405994340777397, "learning_rate": 6.157731528695033e-07, "loss": 0.0324, "step": 186550 }, { "epoch": 0.2028, "grad_norm": 0.029494572430849075, "learning_rate": 6.148616837064136e-07, "loss": 0.0328, "step": 186560 }, { "epoch": 0.20285, "grad_norm": 0.031170979142189026, "learning_rate": 6.139508812238404e-07, "loss": 0.0327, "step": 186570 }, { "epoch": 0.2029, "grad_norm": 0.0288227628916502, "learning_rate": 6.130407454466913e-07, "loss": 0.0342, "step": 186580 }, { "epoch": 0.20295, "grad_norm": 0.02931247465312481, "learning_rate": 6.121312763998465e-07, "loss": 0.0325, "step": 186590 }, { "epoch": 0.203, "grad_norm": 0.03437415510416031, "learning_rate": 6.112224741081696e-07, "loss": 0.0336, "step": 186600 }, { "epoch": 0.20305, "grad_norm": 0.02927682362496853, "learning_rate": 6.103143385965099e-07, "loss": 0.0343, "step": 186610 }, { "epoch": 0.2031, "grad_norm": 0.03057851642370224, "learning_rate": 6.094068698896893e-07, "loss": 0.0335, "step": 186620 }, { "epoch": 0.20315, "grad_norm": 0.03229416161775589, "learning_rate": 6.085000680125269e-07, "loss": 0.0343, "step": 186630 }, { "epoch": 0.2032, "grad_norm": 0.031911615282297134, "learning_rate": 6.075939329898056e-07, "loss": 0.0333, "step": 186640 }, { "epoch": 0.20325, "grad_norm": 0.033049266785383224, "learning_rate": 6.066884648463028e-07, "loss": 0.0333, "step": 186650 }, { "epoch": 0.2033, "grad_norm": 0.030126668512821198, "learning_rate": 6.057836636067738e-07, "loss": 0.0348, "step": 186660 }, { "epoch": 0.20335, "grad_norm": 0.027441836893558502, "learning_rate": 6.048795292959541e-07, "loss": 0.0332, "step": 186670 }, { "epoch": 0.2034, "grad_norm": 0.03227389603853226, "learning_rate": 6.039760619385687e-07, "loss": 0.0348, "step": 186680 }, { "epoch": 0.20345, "grad_norm": 0.0290833692997694, "learning_rate": 6.03073261559306e-07, "loss": 0.0332, "step": 186690 }, { "epoch": 0.2035, "grad_norm": 0.03083917498588562, "learning_rate": 6.021711281828546e-07, "loss": 0.0341, "step": 186700 }, { "epoch": 0.20355, "grad_norm": 0.031132884323596954, "learning_rate": 6.012696618338809e-07, "loss": 0.033, "step": 186710 }, { "epoch": 0.2036, "grad_norm": 0.03095533512532711, "learning_rate": 6.003688625370291e-07, "loss": 0.0332, "step": 186720 }, { "epoch": 0.20365, "grad_norm": 0.03213070333003998, "learning_rate": 5.994687303169266e-07, "loss": 0.0332, "step": 186730 }, { "epoch": 0.2037, "grad_norm": 0.027768230065703392, "learning_rate": 5.985692651981816e-07, "loss": 0.0348, "step": 186740 }, { "epoch": 0.20375, "grad_norm": 0.033160459250211716, "learning_rate": 5.976704672053856e-07, "loss": 0.0345, "step": 186750 }, { "epoch": 0.2038, "grad_norm": 0.031500443816185, "learning_rate": 5.967723363631106e-07, "loss": 0.0332, "step": 186760 }, { "epoch": 0.20385, "grad_norm": 0.0280007254332304, "learning_rate": 5.958748726959118e-07, "loss": 0.034, "step": 186770 }, { "epoch": 0.2039, "grad_norm": 0.028425922617316246, "learning_rate": 5.94978076228328e-07, "loss": 0.0328, "step": 186780 }, { "epoch": 0.20395, "grad_norm": 0.030183615162968636, "learning_rate": 5.940819469848702e-07, "loss": 0.0328, "step": 186790 }, { "epoch": 0.204, "grad_norm": 0.033598169684410095, "learning_rate": 5.931864849900493e-07, "loss": 0.0337, "step": 186800 }, { "epoch": 0.20405, "grad_norm": 0.030246658250689507, "learning_rate": 5.922916902683373e-07, "loss": 0.0322, "step": 186810 }, { "epoch": 0.2041, "grad_norm": 0.030375244095921516, "learning_rate": 5.913975628442037e-07, "loss": 0.0339, "step": 186820 }, { "epoch": 0.20415, "grad_norm": 0.031668275594711304, "learning_rate": 5.905041027420871e-07, "loss": 0.0326, "step": 186830 }, { "epoch": 0.2042, "grad_norm": 0.03186219185590744, "learning_rate": 5.896113099864209e-07, "loss": 0.0327, "step": 186840 }, { "epoch": 0.20425, "grad_norm": 0.03156822919845581, "learning_rate": 5.887191846016104e-07, "loss": 0.0332, "step": 186850 }, { "epoch": 0.2043, "grad_norm": 0.02741374634206295, "learning_rate": 5.878277266120419e-07, "loss": 0.0331, "step": 186860 }, { "epoch": 0.20435, "grad_norm": 0.030092798173427582, "learning_rate": 5.869369360420985e-07, "loss": 0.0333, "step": 186870 }, { "epoch": 0.2044, "grad_norm": 0.033063847571611404, "learning_rate": 5.860468129161218e-07, "loss": 0.0344, "step": 186880 }, { "epoch": 0.20445, "grad_norm": 0.03125067055225372, "learning_rate": 5.851573572584618e-07, "loss": 0.0342, "step": 186890 }, { "epoch": 0.2045, "grad_norm": 0.034990180283784866, "learning_rate": 5.842685690934214e-07, "loss": 0.0338, "step": 186900 }, { "epoch": 0.20455, "grad_norm": 0.03362009674310684, "learning_rate": 5.833804484453031e-07, "loss": 0.0321, "step": 186910 }, { "epoch": 0.2046, "grad_norm": 0.028897780925035477, "learning_rate": 5.824929953383962e-07, "loss": 0.0345, "step": 186920 }, { "epoch": 0.20465, "grad_norm": 0.02927793189883232, "learning_rate": 5.81606209796956e-07, "loss": 0.0344, "step": 186930 }, { "epoch": 0.2047, "grad_norm": 0.031953468918800354, "learning_rate": 5.807200918452299e-07, "loss": 0.0347, "step": 186940 }, { "epoch": 0.20475, "grad_norm": 0.034804243594408035, "learning_rate": 5.798346415074373e-07, "loss": 0.0316, "step": 186950 }, { "epoch": 0.2048, "grad_norm": 0.027775434777140617, "learning_rate": 5.789498588077924e-07, "loss": 0.032, "step": 186960 }, { "epoch": 0.20485, "grad_norm": 0.03166566044092178, "learning_rate": 5.780657437704895e-07, "loss": 0.0316, "step": 186970 }, { "epoch": 0.2049, "grad_norm": 0.029515348374843597, "learning_rate": 5.771822964196899e-07, "loss": 0.0319, "step": 186980 }, { "epoch": 0.20495, "grad_norm": 0.03157178685069084, "learning_rate": 5.762995167795521e-07, "loss": 0.0332, "step": 186990 }, { "epoch": 0.205, "grad_norm": 0.033914707601070404, "learning_rate": 5.754174048742094e-07, "loss": 0.0321, "step": 187000 }, { "epoch": 0.20505, "grad_norm": 0.025544434785842896, "learning_rate": 5.745359607277789e-07, "loss": 0.0329, "step": 187010 }, { "epoch": 0.2051, "grad_norm": 0.028234345838427544, "learning_rate": 5.736551843643606e-07, "loss": 0.0334, "step": 187020 }, { "epoch": 0.20515, "grad_norm": 0.02860359102487564, "learning_rate": 5.727750758080324e-07, "loss": 0.0319, "step": 187030 }, { "epoch": 0.2052, "grad_norm": 0.03399662300944328, "learning_rate": 5.718956350828558e-07, "loss": 0.0335, "step": 187040 }, { "epoch": 0.20525, "grad_norm": 0.02993389591574669, "learning_rate": 5.710168622128781e-07, "loss": 0.0333, "step": 187050 }, { "epoch": 0.2053, "grad_norm": 0.028217710554599762, "learning_rate": 5.70138757222119e-07, "loss": 0.0323, "step": 187060 }, { "epoch": 0.20535, "grad_norm": 0.026970872655510902, "learning_rate": 5.692613201345869e-07, "loss": 0.0331, "step": 187070 }, { "epoch": 0.2054, "grad_norm": 0.02764004096388817, "learning_rate": 5.683845509742769e-07, "loss": 0.0325, "step": 187080 }, { "epoch": 0.20545, "grad_norm": 0.03004232421517372, "learning_rate": 5.675084497651501e-07, "loss": 0.0325, "step": 187090 }, { "epoch": 0.2055, "grad_norm": 0.027504999190568924, "learning_rate": 5.666330165311651e-07, "loss": 0.0329, "step": 187100 }, { "epoch": 0.20555, "grad_norm": 0.02881404012441635, "learning_rate": 5.657582512962556e-07, "loss": 0.032, "step": 187110 }, { "epoch": 0.2056, "grad_norm": 0.030582886189222336, "learning_rate": 5.64884154084333e-07, "loss": 0.0329, "step": 187120 }, { "epoch": 0.20565, "grad_norm": 0.02659016102552414, "learning_rate": 5.640107249193005e-07, "loss": 0.0341, "step": 187130 }, { "epoch": 0.2057, "grad_norm": 0.027498317882418633, "learning_rate": 5.631379638250362e-07, "loss": 0.0336, "step": 187140 }, { "epoch": 0.20575, "grad_norm": 0.026438282802700996, "learning_rate": 5.622658708253959e-07, "loss": 0.0344, "step": 187150 }, { "epoch": 0.2058, "grad_norm": 0.030266601592302322, "learning_rate": 5.613944459442272e-07, "loss": 0.0339, "step": 187160 }, { "epoch": 0.20585, "grad_norm": 0.032785773277282715, "learning_rate": 5.6052368920535e-07, "loss": 0.0344, "step": 187170 }, { "epoch": 0.2059, "grad_norm": 0.027709029614925385, "learning_rate": 5.596536006325814e-07, "loss": 0.0322, "step": 187180 }, { "epoch": 0.20595, "grad_norm": 0.03300001472234726, "learning_rate": 5.58784180249694e-07, "loss": 0.0352, "step": 187190 }, { "epoch": 0.206, "grad_norm": 0.03350808471441269, "learning_rate": 5.579154280804688e-07, "loss": 0.0352, "step": 187200 }, { "epoch": 0.20605, "grad_norm": 0.03382578864693642, "learning_rate": 5.570473441486507e-07, "loss": 0.0345, "step": 187210 }, { "epoch": 0.2061, "grad_norm": 0.033224329352378845, "learning_rate": 5.56179928477979e-07, "loss": 0.034, "step": 187220 }, { "epoch": 0.20615, "grad_norm": 0.03182445093989372, "learning_rate": 5.553131810921624e-07, "loss": 0.0338, "step": 187230 }, { "epoch": 0.2062, "grad_norm": 0.0331844724714756, "learning_rate": 5.544471020148989e-07, "loss": 0.0379, "step": 187240 }, { "epoch": 0.20625, "grad_norm": 0.029393313452601433, "learning_rate": 5.535816912698722e-07, "loss": 0.0342, "step": 187250 }, { "epoch": 0.2063, "grad_norm": 0.026721693575382233, "learning_rate": 5.527169488807354e-07, "loss": 0.037, "step": 187260 }, { "epoch": 0.20635, "grad_norm": 0.0304440725594759, "learning_rate": 5.518528748711338e-07, "loss": 0.034, "step": 187270 }, { "epoch": 0.2064, "grad_norm": 0.02855251356959343, "learning_rate": 5.509894692646872e-07, "loss": 0.0355, "step": 187280 }, { "epoch": 0.20645, "grad_norm": 0.03019746206700802, "learning_rate": 5.501267320850018e-07, "loss": 0.0334, "step": 187290 }, { "epoch": 0.2065, "grad_norm": 0.031135357916355133, "learning_rate": 5.492646633556698e-07, "loss": 0.0351, "step": 187300 }, { "epoch": 0.20655, "grad_norm": 0.02638038992881775, "learning_rate": 5.484032631002583e-07, "loss": 0.0331, "step": 187310 }, { "epoch": 0.2066, "grad_norm": 0.029533961787819862, "learning_rate": 5.475425313423127e-07, "loss": 0.0341, "step": 187320 }, { "epoch": 0.20665, "grad_norm": 0.03587843477725983, "learning_rate": 5.466824681053667e-07, "loss": 0.0336, "step": 187330 }, { "epoch": 0.2067, "grad_norm": 0.02860553003847599, "learning_rate": 5.458230734129378e-07, "loss": 0.0344, "step": 187340 }, { "epoch": 0.20675, "grad_norm": 0.03146568313241005, "learning_rate": 5.44964347288518e-07, "loss": 0.0326, "step": 187350 }, { "epoch": 0.2068, "grad_norm": 0.031543321907520294, "learning_rate": 5.44106289755586e-07, "loss": 0.033, "step": 187360 }, { "epoch": 0.20685, "grad_norm": 0.03184448182582855, "learning_rate": 5.432489008376007e-07, "loss": 0.0328, "step": 187370 }, { "epoch": 0.2069, "grad_norm": 0.03035517781972885, "learning_rate": 5.42392180557999e-07, "loss": 0.0325, "step": 187380 }, { "epoch": 0.20695, "grad_norm": 0.027728291228413582, "learning_rate": 5.415361289402148e-07, "loss": 0.0345, "step": 187390 }, { "epoch": 0.207, "grad_norm": 0.02883809804916382, "learning_rate": 5.406807460076379e-07, "loss": 0.0323, "step": 187400 }, { "epoch": 0.20705, "grad_norm": 0.031016673892736435, "learning_rate": 5.398260317836578e-07, "loss": 0.0328, "step": 187410 }, { "epoch": 0.2071, "grad_norm": 0.030551131814718246, "learning_rate": 5.389719862916504e-07, "loss": 0.0339, "step": 187420 }, { "epoch": 0.20715, "grad_norm": 0.03497905284166336, "learning_rate": 5.381186095549578e-07, "loss": 0.0346, "step": 187430 }, { "epoch": 0.2072, "grad_norm": 0.028034161776304245, "learning_rate": 5.372659015969145e-07, "loss": 0.0337, "step": 187440 }, { "epoch": 0.20725, "grad_norm": 0.027271872386336327, "learning_rate": 5.364138624408266e-07, "loss": 0.0333, "step": 187450 }, { "epoch": 0.2073, "grad_norm": 0.027147216722369194, "learning_rate": 5.35562492109995e-07, "loss": 0.0345, "step": 187460 }, { "epoch": 0.20735, "grad_norm": 0.030328378081321716, "learning_rate": 5.347117906276955e-07, "loss": 0.0342, "step": 187470 }, { "epoch": 0.2074, "grad_norm": 0.03240053728222847, "learning_rate": 5.338617580171817e-07, "loss": 0.0329, "step": 187480 }, { "epoch": 0.20745, "grad_norm": 0.0268976092338562, "learning_rate": 5.33012394301699e-07, "loss": 0.0357, "step": 187490 }, { "epoch": 0.2075, "grad_norm": 0.032217223197221756, "learning_rate": 5.321636995044649e-07, "loss": 0.0351, "step": 187500 }, { "epoch": 0.20755, "grad_norm": 0.03143179416656494, "learning_rate": 5.313156736486829e-07, "loss": 0.034, "step": 187510 }, { "epoch": 0.2076, "grad_norm": 0.03546081483364105, "learning_rate": 5.304683167575374e-07, "loss": 0.0321, "step": 187520 }, { "epoch": 0.20765, "grad_norm": 0.029902616515755653, "learning_rate": 5.296216288541933e-07, "loss": 0.0338, "step": 187530 }, { "epoch": 0.2077, "grad_norm": 0.03386787325143814, "learning_rate": 5.287756099618041e-07, "loss": 0.0337, "step": 187540 }, { "epoch": 0.20775, "grad_norm": 0.031185338273644447, "learning_rate": 5.279302601034958e-07, "loss": 0.035, "step": 187550 }, { "epoch": 0.2078, "grad_norm": 0.03265991061925888, "learning_rate": 5.270855793023805e-07, "loss": 0.0344, "step": 187560 }, { "epoch": 0.20785, "grad_norm": 0.031603556126356125, "learning_rate": 5.262415675815507e-07, "loss": 0.0356, "step": 187570 }, { "epoch": 0.2079, "grad_norm": 0.02581261657178402, "learning_rate": 5.253982249640826e-07, "loss": 0.0332, "step": 187580 }, { "epoch": 0.20795, "grad_norm": 0.028013408184051514, "learning_rate": 5.245555514730299e-07, "loss": 0.0324, "step": 187590 }, { "epoch": 0.208, "grad_norm": 0.03117271140217781, "learning_rate": 5.237135471314352e-07, "loss": 0.0327, "step": 187600 }, { "epoch": 0.20805, "grad_norm": 0.02825392223894596, "learning_rate": 5.228722119623192e-07, "loss": 0.0329, "step": 187610 }, { "epoch": 0.2081, "grad_norm": 0.02971552312374115, "learning_rate": 5.220315459886771e-07, "loss": 0.0322, "step": 187620 }, { "epoch": 0.20815, "grad_norm": 0.02579326182603836, "learning_rate": 5.21191549233499e-07, "loss": 0.032, "step": 187630 }, { "epoch": 0.2082, "grad_norm": 0.030249234288930893, "learning_rate": 5.203522217197499e-07, "loss": 0.0337, "step": 187640 }, { "epoch": 0.20825, "grad_norm": 0.030575744807720184, "learning_rate": 5.195135634703724e-07, "loss": 0.0335, "step": 187650 }, { "epoch": 0.2083, "grad_norm": 0.027355728670954704, "learning_rate": 5.186755745082955e-07, "loss": 0.0324, "step": 187660 }, { "epoch": 0.20835, "grad_norm": 0.027793534100055695, "learning_rate": 5.178382548564287e-07, "loss": 0.0341, "step": 187670 }, { "epoch": 0.2084, "grad_norm": 0.028333058580756187, "learning_rate": 5.17001604537673e-07, "loss": 0.0332, "step": 187680 }, { "epoch": 0.20845, "grad_norm": 0.03142617270350456, "learning_rate": 5.161656235748935e-07, "loss": 0.0321, "step": 187690 }, { "epoch": 0.2085, "grad_norm": 0.029200812801718712, "learning_rate": 5.153303119909469e-07, "loss": 0.0327, "step": 187700 }, { "epoch": 0.20855, "grad_norm": 0.02589821256697178, "learning_rate": 5.144956698086706e-07, "loss": 0.0313, "step": 187710 }, { "epoch": 0.2086, "grad_norm": 0.026966974139213562, "learning_rate": 5.136616970508851e-07, "loss": 0.0324, "step": 187720 }, { "epoch": 0.20865, "grad_norm": 0.02764982171356678, "learning_rate": 5.128283937403888e-07, "loss": 0.0314, "step": 187730 }, { "epoch": 0.2087, "grad_norm": 0.02868589200079441, "learning_rate": 5.119957598999636e-07, "loss": 0.0316, "step": 187740 }, { "epoch": 0.20875, "grad_norm": 0.02683471143245697, "learning_rate": 5.111637955523773e-07, "loss": 0.0316, "step": 187750 }, { "epoch": 0.2088, "grad_norm": 0.028452489525079727, "learning_rate": 5.10332500720373e-07, "loss": 0.0327, "step": 187760 }, { "epoch": 0.20885, "grad_norm": 0.02829771302640438, "learning_rate": 5.095018754266767e-07, "loss": 0.0317, "step": 187770 }, { "epoch": 0.2089, "grad_norm": 0.02697194740176201, "learning_rate": 5.086719196939954e-07, "loss": 0.0317, "step": 187780 }, { "epoch": 0.20895, "grad_norm": 0.02591918595135212, "learning_rate": 5.078426335450248e-07, "loss": 0.0313, "step": 187790 }, { "epoch": 0.209, "grad_norm": 0.026268985122442245, "learning_rate": 5.070140170024384e-07, "loss": 0.0326, "step": 187800 }, { "epoch": 0.20905, "grad_norm": 0.026967208832502365, "learning_rate": 5.061860700888849e-07, "loss": 0.0365, "step": 187810 }, { "epoch": 0.2091, "grad_norm": 0.02764206752181053, "learning_rate": 5.053587928270014e-07, "loss": 0.0323, "step": 187820 }, { "epoch": 0.20915, "grad_norm": 0.030433472245931625, "learning_rate": 5.045321852394064e-07, "loss": 0.0324, "step": 187830 }, { "epoch": 0.2092, "grad_norm": 0.031036920845508575, "learning_rate": 5.037062473487009e-07, "loss": 0.0336, "step": 187840 }, { "epoch": 0.20925, "grad_norm": 0.03053821064531803, "learning_rate": 5.028809791774641e-07, "loss": 0.0335, "step": 187850 }, { "epoch": 0.2093, "grad_norm": 0.036653995513916016, "learning_rate": 5.020563807482559e-07, "loss": 0.0335, "step": 187860 }, { "epoch": 0.20935, "grad_norm": 0.030879056081175804, "learning_rate": 5.012324520836248e-07, "loss": 0.0331, "step": 187870 }, { "epoch": 0.2094, "grad_norm": 0.03588445857167244, "learning_rate": 5.004091932060917e-07, "loss": 0.0333, "step": 187880 }, { "epoch": 0.20945, "grad_norm": 0.03237863630056381, "learning_rate": 4.995866041381719e-07, "loss": 0.0332, "step": 187890 }, { "epoch": 0.2095, "grad_norm": 0.032769039273262024, "learning_rate": 4.987646849023447e-07, "loss": 0.0343, "step": 187900 }, { "epoch": 0.20955, "grad_norm": 0.029265888035297394, "learning_rate": 4.979434355210866e-07, "loss": 0.035, "step": 187910 }, { "epoch": 0.2096, "grad_norm": 0.031995292752981186, "learning_rate": 4.971228560168545e-07, "loss": 0.034, "step": 187920 }, { "epoch": 0.20965, "grad_norm": 0.031782425940036774, "learning_rate": 4.96302946412075e-07, "loss": 0.0334, "step": 187930 }, { "epoch": 0.2097, "grad_norm": 0.02957136556506157, "learning_rate": 4.95483706729169e-07, "loss": 0.0343, "step": 187940 }, { "epoch": 0.20975, "grad_norm": 0.03448570892214775, "learning_rate": 4.946651369905297e-07, "loss": 0.0333, "step": 187950 }, { "epoch": 0.2098, "grad_norm": 0.03404470160603523, "learning_rate": 4.938472372185449e-07, "loss": 0.0347, "step": 187960 }, { "epoch": 0.20985, "grad_norm": 0.030334487557411194, "learning_rate": 4.930300074355659e-07, "loss": 0.0343, "step": 187970 }, { "epoch": 0.2099, "grad_norm": 0.0322144441306591, "learning_rate": 4.922134476639389e-07, "loss": 0.033, "step": 187980 }, { "epoch": 0.20995, "grad_norm": 0.031088093295693398, "learning_rate": 4.913975579259905e-07, "loss": 0.0322, "step": 187990 }, { "epoch": 0.21, "grad_norm": 0.03156421706080437, "learning_rate": 4.90582338244025e-07, "loss": 0.0323, "step": 188000 }, { "epoch": 0.21005, "grad_norm": 0.030572090297937393, "learning_rate": 4.897677886403301e-07, "loss": 0.0331, "step": 188010 }, { "epoch": 0.2101, "grad_norm": 0.03554949909448624, "learning_rate": 4.889539091371797e-07, "loss": 0.0341, "step": 188020 }, { "epoch": 0.21015, "grad_norm": 0.03122507408261299, "learning_rate": 4.881406997568172e-07, "loss": 0.0339, "step": 188030 }, { "epoch": 0.2102, "grad_norm": 0.030517447739839554, "learning_rate": 4.873281605214802e-07, "loss": 0.0317, "step": 188040 }, { "epoch": 0.21025, "grad_norm": 0.026050496846437454, "learning_rate": 4.865162914533816e-07, "loss": 0.0338, "step": 188050 }, { "epoch": 0.2103, "grad_norm": 0.03257044404745102, "learning_rate": 4.857050925747203e-07, "loss": 0.0337, "step": 188060 }, { "epoch": 0.21035, "grad_norm": 0.03048430196940899, "learning_rate": 4.848945639076702e-07, "loss": 0.034, "step": 188070 }, { "epoch": 0.2104, "grad_norm": 0.0283748097717762, "learning_rate": 4.840847054743941e-07, "loss": 0.0335, "step": 188080 }, { "epoch": 0.21045, "grad_norm": 0.030453065410256386, "learning_rate": 4.832755172970299e-07, "loss": 0.0332, "step": 188090 }, { "epoch": 0.2105, "grad_norm": 0.027233123779296875, "learning_rate": 4.824669993977071e-07, "loss": 0.0329, "step": 188100 }, { "epoch": 0.21055, "grad_norm": 0.0295072291046381, "learning_rate": 4.816591517985192e-07, "loss": 0.033, "step": 188110 }, { "epoch": 0.2106, "grad_norm": 0.027236513793468475, "learning_rate": 4.808519745215623e-07, "loss": 0.0332, "step": 188120 }, { "epoch": 0.21065, "grad_norm": 0.02930319309234619, "learning_rate": 4.800454675889021e-07, "loss": 0.0345, "step": 188130 }, { "epoch": 0.2107, "grad_norm": 0.03257892280817032, "learning_rate": 4.79239631022585e-07, "loss": 0.0335, "step": 188140 }, { "epoch": 0.21075, "grad_norm": 0.02791167050600052, "learning_rate": 4.784344648446487e-07, "loss": 0.0355, "step": 188150 }, { "epoch": 0.2108, "grad_norm": 0.0288220401853323, "learning_rate": 4.776299690770952e-07, "loss": 0.0331, "step": 188160 }, { "epoch": 0.21085, "grad_norm": 0.03018355555832386, "learning_rate": 4.7682614374192913e-07, "loss": 0.0347, "step": 188170 }, { "epoch": 0.2109, "grad_norm": 0.030637793242931366, "learning_rate": 4.760229888611245e-07, "loss": 0.0343, "step": 188180 }, { "epoch": 0.21095, "grad_norm": 0.02879856899380684, "learning_rate": 4.75220504456636e-07, "loss": 0.033, "step": 188190 }, { "epoch": 0.211, "grad_norm": 0.030380692332983017, "learning_rate": 4.744186905504072e-07, "loss": 0.0335, "step": 188200 }, { "epoch": 0.21105, "grad_norm": 0.028760310262441635, "learning_rate": 4.736175471643567e-07, "loss": 0.0323, "step": 188210 }, { "epoch": 0.2111, "grad_norm": 0.02885890007019043, "learning_rate": 4.7281707432038915e-07, "loss": 0.0349, "step": 188220 }, { "epoch": 0.21115, "grad_norm": 0.02876354567706585, "learning_rate": 4.72017272040387e-07, "loss": 0.0335, "step": 188230 }, { "epoch": 0.2112, "grad_norm": 0.02850225754082203, "learning_rate": 4.7121814034621623e-07, "loss": 0.0333, "step": 188240 }, { "epoch": 0.21125, "grad_norm": 0.029061682522296906, "learning_rate": 4.7041967925972873e-07, "loss": 0.0323, "step": 188250 }, { "epoch": 0.2113, "grad_norm": 0.028914617374539375, "learning_rate": 4.6962188880275426e-07, "loss": 0.0319, "step": 188260 }, { "epoch": 0.21135, "grad_norm": 0.0294718686491251, "learning_rate": 4.6882476899710037e-07, "loss": 0.0324, "step": 188270 }, { "epoch": 0.2114, "grad_norm": 0.026144810020923615, "learning_rate": 4.68028319864558e-07, "loss": 0.0324, "step": 188280 }, { "epoch": 0.21145, "grad_norm": 0.026472182944417, "learning_rate": 4.6723254142690687e-07, "loss": 0.0316, "step": 188290 }, { "epoch": 0.2115, "grad_norm": 0.029723865911364555, "learning_rate": 4.664374337059019e-07, "loss": 0.0329, "step": 188300 }, { "epoch": 0.21155, "grad_norm": 0.02758902497589588, "learning_rate": 4.6564299672328116e-07, "loss": 0.0326, "step": 188310 }, { "epoch": 0.2116, "grad_norm": 0.03302999958395958, "learning_rate": 4.6484923050076344e-07, "loss": 0.0339, "step": 188320 }, { "epoch": 0.21165, "grad_norm": 0.03390069305896759, "learning_rate": 4.640561350600509e-07, "loss": 0.0315, "step": 188330 }, { "epoch": 0.2117, "grad_norm": 0.029498016461730003, "learning_rate": 4.6326371042282603e-07, "loss": 0.0314, "step": 188340 }, { "epoch": 0.21175, "grad_norm": 0.029174910858273506, "learning_rate": 4.6247195661075214e-07, "loss": 0.0326, "step": 188350 }, { "epoch": 0.2118, "grad_norm": 0.028577126562595367, "learning_rate": 4.616808736454759e-07, "loss": 0.033, "step": 188360 }, { "epoch": 0.21185, "grad_norm": 0.029439343139529228, "learning_rate": 4.60890461548627e-07, "loss": 0.0313, "step": 188370 }, { "epoch": 0.2119, "grad_norm": 0.02873920649290085, "learning_rate": 4.601007203418134e-07, "loss": 0.0319, "step": 188380 }, { "epoch": 0.21195, "grad_norm": 0.02306370809674263, "learning_rate": 4.593116500466288e-07, "loss": 0.0324, "step": 188390 }, { "epoch": 0.212, "grad_norm": 0.027125053107738495, "learning_rate": 4.5852325068464206e-07, "loss": 0.0326, "step": 188400 }, { "epoch": 0.21205, "grad_norm": 0.030318230390548706, "learning_rate": 4.57735522277411e-07, "loss": 0.0342, "step": 188410 }, { "epoch": 0.2121, "grad_norm": 0.02977856434881687, "learning_rate": 4.569484648464711e-07, "loss": 0.0317, "step": 188420 }, { "epoch": 0.21215, "grad_norm": 0.02663765288889408, "learning_rate": 4.561620784133386e-07, "loss": 0.0324, "step": 188430 }, { "epoch": 0.2122, "grad_norm": 0.029476555064320564, "learning_rate": 4.553763629995156e-07, "loss": 0.0333, "step": 188440 }, { "epoch": 0.21225, "grad_norm": 0.031408656388521194, "learning_rate": 4.545913186264794e-07, "loss": 0.0315, "step": 188450 }, { "epoch": 0.2123, "grad_norm": 0.0317244715988636, "learning_rate": 4.538069453156962e-07, "loss": 0.0342, "step": 188460 }, { "epoch": 0.21235, "grad_norm": 0.030181189998984337, "learning_rate": 4.5302324308861275e-07, "loss": 0.0344, "step": 188470 }, { "epoch": 0.2124, "grad_norm": 0.029961835592985153, "learning_rate": 4.5224021196664803e-07, "loss": 0.0331, "step": 188480 }, { "epoch": 0.21245, "grad_norm": 0.027818555012345314, "learning_rate": 4.5145785197121537e-07, "loss": 0.0335, "step": 188490 }, { "epoch": 0.2125, "grad_norm": 0.03190042823553085, "learning_rate": 4.5067616312370055e-07, "loss": 0.0355, "step": 188500 }, { "epoch": 0.21255, "grad_norm": 0.035076189786195755, "learning_rate": 4.4989514544547807e-07, "loss": 0.0343, "step": 188510 }, { "epoch": 0.2126, "grad_norm": 0.03411129489541054, "learning_rate": 4.491147989579003e-07, "loss": 0.0335, "step": 188520 }, { "epoch": 0.21265, "grad_norm": 0.028793184086680412, "learning_rate": 4.483351236823002e-07, "loss": 0.0332, "step": 188530 }, { "epoch": 0.2127, "grad_norm": 0.02898499369621277, "learning_rate": 4.4755611963999414e-07, "loss": 0.0327, "step": 188540 }, { "epoch": 0.21275, "grad_norm": 0.027540000155568123, "learning_rate": 4.467777868522788e-07, "loss": 0.0332, "step": 188550 }, { "epoch": 0.2128, "grad_norm": 0.031743086874485016, "learning_rate": 4.4600012534043723e-07, "loss": 0.0379, "step": 188560 }, { "epoch": 0.21285, "grad_norm": 0.030596960335969925, "learning_rate": 4.452231351257247e-07, "loss": 0.0338, "step": 188570 }, { "epoch": 0.2129, "grad_norm": 0.030939722433686256, "learning_rate": 4.44446816229388e-07, "loss": 0.0333, "step": 188580 }, { "epoch": 0.21295, "grad_norm": 0.028294682502746582, "learning_rate": 4.4367116867264914e-07, "loss": 0.0331, "step": 188590 }, { "epoch": 0.213, "grad_norm": 0.03085498698055744, "learning_rate": 4.4289619247671886e-07, "loss": 0.033, "step": 188600 }, { "epoch": 0.21305, "grad_norm": 0.03062901645898819, "learning_rate": 4.421218876627775e-07, "loss": 0.0327, "step": 188610 }, { "epoch": 0.2131, "grad_norm": 0.03261803835630417, "learning_rate": 4.41348254251997e-07, "loss": 0.0325, "step": 188620 }, { "epoch": 0.21315, "grad_norm": 0.03610049933195114, "learning_rate": 4.4057529226552986e-07, "loss": 0.0346, "step": 188630 }, { "epoch": 0.2132, "grad_norm": 0.026045652106404305, "learning_rate": 4.3980300172450914e-07, "loss": 0.0335, "step": 188640 }, { "epoch": 0.21325, "grad_norm": 0.027834657579660416, "learning_rate": 4.390313826500486e-07, "loss": 0.0329, "step": 188650 }, { "epoch": 0.2133, "grad_norm": 0.03452204167842865, "learning_rate": 4.3826043506323964e-07, "loss": 0.0342, "step": 188660 }, { "epoch": 0.21335, "grad_norm": 0.028179027140140533, "learning_rate": 4.374901589851654e-07, "loss": 0.0342, "step": 188670 }, { "epoch": 0.2134, "grad_norm": 0.03150675445795059, "learning_rate": 4.3672055443688134e-07, "loss": 0.0325, "step": 188680 }, { "epoch": 0.21345, "grad_norm": 0.028220821171998978, "learning_rate": 4.359516214394288e-07, "loss": 0.0325, "step": 188690 }, { "epoch": 0.2135, "grad_norm": 0.029086345806717873, "learning_rate": 4.3518336001382995e-07, "loss": 0.0329, "step": 188700 }, { "epoch": 0.21355, "grad_norm": 0.029618870466947556, "learning_rate": 4.3441577018109025e-07, "loss": 0.0339, "step": 188710 }, { "epoch": 0.2136, "grad_norm": 0.02819829247891903, "learning_rate": 4.3364885196219564e-07, "loss": 0.033, "step": 188720 }, { "epoch": 0.21365, "grad_norm": 0.026339873671531677, "learning_rate": 4.3288260537811267e-07, "loss": 0.0328, "step": 188730 }, { "epoch": 0.2137, "grad_norm": 0.03084726259112358, "learning_rate": 4.321170304497885e-07, "loss": 0.0322, "step": 188740 }, { "epoch": 0.21375, "grad_norm": 0.028051769360899925, "learning_rate": 4.313521271981563e-07, "loss": 0.0325, "step": 188750 }, { "epoch": 0.2138, "grad_norm": 0.03308770805597305, "learning_rate": 4.3058789564412724e-07, "loss": 0.0319, "step": 188760 }, { "epoch": 0.21385, "grad_norm": 0.028255965560674667, "learning_rate": 4.298243358085929e-07, "loss": 0.0322, "step": 188770 }, { "epoch": 0.2139, "grad_norm": 0.030764950439333916, "learning_rate": 4.2906144771243106e-07, "loss": 0.0326, "step": 188780 }, { "epoch": 0.21395, "grad_norm": 0.027962734922766685, "learning_rate": 4.2829923137649996e-07, "loss": 0.0319, "step": 188790 }, { "epoch": 0.214, "grad_norm": 0.03067231923341751, "learning_rate": 4.2753768682163297e-07, "loss": 0.0328, "step": 188800 }, { "epoch": 0.21405, "grad_norm": 0.031174374744296074, "learning_rate": 4.267768140686579e-07, "loss": 0.0323, "step": 188810 }, { "epoch": 0.2141, "grad_norm": 0.02567133493721485, "learning_rate": 4.2601661313837193e-07, "loss": 0.0337, "step": 188820 }, { "epoch": 0.21415, "grad_norm": 0.02617892250418663, "learning_rate": 4.252570840515585e-07, "loss": 0.0328, "step": 188830 }, { "epoch": 0.2142, "grad_norm": 0.02718042954802513, "learning_rate": 4.244982268289843e-07, "loss": 0.0325, "step": 188840 }, { "epoch": 0.21425, "grad_norm": 0.027024347335100174, "learning_rate": 4.2374004149139944e-07, "loss": 0.0331, "step": 188850 }, { "epoch": 0.2143, "grad_norm": 0.023870961740612984, "learning_rate": 4.229825280595262e-07, "loss": 0.0316, "step": 188860 }, { "epoch": 0.21435, "grad_norm": 0.030253371223807335, "learning_rate": 4.222256865540758e-07, "loss": 0.0346, "step": 188870 }, { "epoch": 0.2144, "grad_norm": 0.029407694935798645, "learning_rate": 4.214695169957428e-07, "loss": 0.0311, "step": 188880 }, { "epoch": 0.21445, "grad_norm": 0.029314350336790085, "learning_rate": 4.207140194052023e-07, "loss": 0.0318, "step": 188890 }, { "epoch": 0.2145, "grad_norm": 0.02597719244658947, "learning_rate": 4.1995919380310444e-07, "loss": 0.033, "step": 188900 }, { "epoch": 0.21455, "grad_norm": 0.027614757418632507, "learning_rate": 4.192050402100883e-07, "loss": 0.0322, "step": 188910 }, { "epoch": 0.2146, "grad_norm": 0.027147000655531883, "learning_rate": 4.184515586467708e-07, "loss": 0.0316, "step": 188920 }, { "epoch": 0.21465, "grad_norm": 0.02751532383263111, "learning_rate": 4.1769874913375196e-07, "loss": 0.0319, "step": 188930 }, { "epoch": 0.2147, "grad_norm": 0.024400847032666206, "learning_rate": 4.169466116916182e-07, "loss": 0.0338, "step": 188940 }, { "epoch": 0.21475, "grad_norm": 0.029047008603811264, "learning_rate": 4.1619514634092526e-07, "loss": 0.0316, "step": 188950 }, { "epoch": 0.2148, "grad_norm": 0.02734926901757717, "learning_rate": 4.154443531022206e-07, "loss": 0.0376, "step": 188960 }, { "epoch": 0.21485, "grad_norm": 0.025272736325860023, "learning_rate": 4.1469423199603505e-07, "loss": 0.0319, "step": 188970 }, { "epoch": 0.2149, "grad_norm": 0.02764223702251911, "learning_rate": 4.1394478304287167e-07, "loss": 0.0322, "step": 188980 }, { "epoch": 0.21495, "grad_norm": 0.029302928596735, "learning_rate": 4.1319600626321955e-07, "loss": 0.0319, "step": 188990 }, { "epoch": 0.215, "grad_norm": 0.025961730629205704, "learning_rate": 4.124479016775512e-07, "loss": 0.032, "step": 189000 }, { "epoch": 0.21505, "grad_norm": 0.03143499791622162, "learning_rate": 4.1170046930632255e-07, "loss": 0.0312, "step": 189010 }, { "epoch": 0.2151, "grad_norm": 0.030058082193136215, "learning_rate": 4.1095370916996443e-07, "loss": 0.0323, "step": 189020 }, { "epoch": 0.21515, "grad_norm": 0.026301292702555656, "learning_rate": 4.102076212888939e-07, "loss": 0.0321, "step": 189030 }, { "epoch": 0.2152, "grad_norm": 0.027706298977136612, "learning_rate": 4.0946220568350844e-07, "loss": 0.0327, "step": 189040 }, { "epoch": 0.21525, "grad_norm": 0.02647777646780014, "learning_rate": 4.087174623741918e-07, "loss": 0.032, "step": 189050 }, { "epoch": 0.2153, "grad_norm": 0.03239917755126953, "learning_rate": 4.079733913812972e-07, "loss": 0.0317, "step": 189060 }, { "epoch": 0.21535, "grad_norm": 0.026101669296622276, "learning_rate": 4.0722999272517217e-07, "loss": 0.0314, "step": 189070 }, { "epoch": 0.2154, "grad_norm": 0.0336785614490509, "learning_rate": 4.064872664261421e-07, "loss": 0.0316, "step": 189080 }, { "epoch": 0.21545, "grad_norm": 0.02944769896566868, "learning_rate": 4.057452125045075e-07, "loss": 0.032, "step": 189090 }, { "epoch": 0.2155, "grad_norm": 0.028852440416812897, "learning_rate": 4.0500383098056315e-07, "loss": 0.0309, "step": 189100 }, { "epoch": 0.21555, "grad_norm": 0.026862692087888718, "learning_rate": 4.042631218745707e-07, "loss": 0.0325, "step": 189110 }, { "epoch": 0.2156, "grad_norm": 0.02777714654803276, "learning_rate": 4.035230852067862e-07, "loss": 0.0326, "step": 189120 }, { "epoch": 0.21565, "grad_norm": 0.026612672954797745, "learning_rate": 4.0278372099744054e-07, "loss": 0.0312, "step": 189130 }, { "epoch": 0.2157, "grad_norm": 0.032957401126623154, "learning_rate": 4.020450292667455e-07, "loss": 0.0319, "step": 189140 }, { "epoch": 0.21575, "grad_norm": 0.028477706015110016, "learning_rate": 4.013070100348987e-07, "loss": 0.033, "step": 189150 }, { "epoch": 0.2158, "grad_norm": 0.027616405859589577, "learning_rate": 4.0056966332207844e-07, "loss": 0.0309, "step": 189160 }, { "epoch": 0.21585, "grad_norm": 0.028580589219927788, "learning_rate": 3.9983298914844093e-07, "loss": 0.0317, "step": 189170 }, { "epoch": 0.2159, "grad_norm": 0.03125343471765518, "learning_rate": 3.990969875341283e-07, "loss": 0.0318, "step": 189180 }, { "epoch": 0.21595, "grad_norm": 0.028869450092315674, "learning_rate": 3.983616584992578e-07, "loss": 0.0316, "step": 189190 }, { "epoch": 0.216, "grad_norm": 0.028730357065796852, "learning_rate": 3.9762700206394387e-07, "loss": 0.0341, "step": 189200 }, { "epoch": 0.21605, "grad_norm": 0.03006094880402088, "learning_rate": 3.968930182482594e-07, "loss": 0.0327, "step": 189210 }, { "epoch": 0.2161, "grad_norm": 0.03059045970439911, "learning_rate": 3.9615970707228e-07, "loss": 0.0309, "step": 189220 }, { "epoch": 0.21615, "grad_norm": 0.028501491993665695, "learning_rate": 3.954270685560507e-07, "loss": 0.0327, "step": 189230 }, { "epoch": 0.2162, "grad_norm": 0.027690019458532333, "learning_rate": 3.9469510271960274e-07, "loss": 0.0315, "step": 189240 }, { "epoch": 0.21625, "grad_norm": 0.026815390214323997, "learning_rate": 3.9396380958294233e-07, "loss": 0.0341, "step": 189250 }, { "epoch": 0.2163, "grad_norm": 0.028432907536625862, "learning_rate": 3.932331891660701e-07, "loss": 0.0314, "step": 189260 }, { "epoch": 0.21635, "grad_norm": 0.02925742045044899, "learning_rate": 3.925032414889618e-07, "loss": 0.0336, "step": 189270 }, { "epoch": 0.2164, "grad_norm": 0.02542842924594879, "learning_rate": 3.917739665715653e-07, "loss": 0.036, "step": 189280 }, { "epoch": 0.21645, "grad_norm": 0.02953684702515602, "learning_rate": 3.910453644338258e-07, "loss": 0.0329, "step": 189290 }, { "epoch": 0.2165, "grad_norm": 0.02706819586455822, "learning_rate": 3.903174350956579e-07, "loss": 0.0339, "step": 189300 }, { "epoch": 0.21655, "grad_norm": 0.02872304804623127, "learning_rate": 3.895901785769707e-07, "loss": 0.0324, "step": 189310 }, { "epoch": 0.2166, "grad_norm": 0.030689561739563942, "learning_rate": 3.888635948976399e-07, "loss": 0.0344, "step": 189320 }, { "epoch": 0.21665, "grad_norm": 0.028889385983347893, "learning_rate": 3.8813768407753025e-07, "loss": 0.033, "step": 189330 }, { "epoch": 0.2167, "grad_norm": 0.03080740012228489, "learning_rate": 3.874124461364925e-07, "loss": 0.0323, "step": 189340 }, { "epoch": 0.21675, "grad_norm": 0.028255589306354523, "learning_rate": 3.866878810943525e-07, "loss": 0.0322, "step": 189350 }, { "epoch": 0.2168, "grad_norm": 0.030432872474193573, "learning_rate": 3.8596398897091936e-07, "loss": 0.0334, "step": 189360 }, { "epoch": 0.21685, "grad_norm": 0.027476327493786812, "learning_rate": 3.852407697859828e-07, "loss": 0.0324, "step": 189370 }, { "epoch": 0.2169, "grad_norm": 0.028619499877095222, "learning_rate": 3.8451822355931313e-07, "loss": 0.0333, "step": 189380 }, { "epoch": 0.21695, "grad_norm": 0.02922292985022068, "learning_rate": 3.837963503106723e-07, "loss": 0.0315, "step": 189390 }, { "epoch": 0.217, "grad_norm": 0.03195172920823097, "learning_rate": 3.83075150059789e-07, "loss": 0.0314, "step": 189400 }, { "epoch": 0.21705, "grad_norm": 0.028518592938780785, "learning_rate": 3.8235462282638357e-07, "loss": 0.032, "step": 189410 }, { "epoch": 0.2171, "grad_norm": 0.02795417048037052, "learning_rate": 3.816347686301541e-07, "loss": 0.0319, "step": 189420 }, { "epoch": 0.21715, "grad_norm": 0.02762647718191147, "learning_rate": 3.8091558749078214e-07, "loss": 0.032, "step": 189430 }, { "epoch": 0.2172, "grad_norm": 0.02550506219267845, "learning_rate": 3.801970794279297e-07, "loss": 0.0325, "step": 189440 }, { "epoch": 0.21725, "grad_norm": 0.028216617181897163, "learning_rate": 3.794792444612366e-07, "loss": 0.032, "step": 189450 }, { "epoch": 0.2173, "grad_norm": 0.028356458991765976, "learning_rate": 3.7876208261033443e-07, "loss": 0.0321, "step": 189460 }, { "epoch": 0.21735, "grad_norm": 0.029298197478055954, "learning_rate": 3.7804559389482686e-07, "loss": 0.0334, "step": 189470 }, { "epoch": 0.2174, "grad_norm": 0.02883121185004711, "learning_rate": 3.7732977833430383e-07, "loss": 0.0319, "step": 189480 }, { "epoch": 0.21745, "grad_norm": 0.03034260682761669, "learning_rate": 3.7661463594833027e-07, "loss": 0.0336, "step": 189490 }, { "epoch": 0.2175, "grad_norm": 0.03522493317723274, "learning_rate": 3.759001667564654e-07, "loss": 0.0354, "step": 189500 }, { "epoch": 0.21755, "grad_norm": 0.029949499294161797, "learning_rate": 3.751863707782383e-07, "loss": 0.0342, "step": 189510 }, { "epoch": 0.2176, "grad_norm": 0.031180361285805702, "learning_rate": 3.7447324803316364e-07, "loss": 0.0329, "step": 189520 }, { "epoch": 0.21765, "grad_norm": 0.03215509653091431, "learning_rate": 3.737607985407426e-07, "loss": 0.0336, "step": 189530 }, { "epoch": 0.2177, "grad_norm": 0.027371907606720924, "learning_rate": 3.730490223204458e-07, "loss": 0.0333, "step": 189540 }, { "epoch": 0.21775, "grad_norm": 0.030672768130898476, "learning_rate": 3.723379193917381e-07, "loss": 0.0346, "step": 189550 }, { "epoch": 0.2178, "grad_norm": 0.027337929233908653, "learning_rate": 3.7162748977405957e-07, "loss": 0.0336, "step": 189560 }, { "epoch": 0.21785, "grad_norm": 0.03581344336271286, "learning_rate": 3.709177334868308e-07, "loss": 0.0332, "step": 189570 }, { "epoch": 0.2179, "grad_norm": 0.030147524550557137, "learning_rate": 3.7020865054946117e-07, "loss": 0.0333, "step": 189580 }, { "epoch": 0.21795, "grad_norm": 0.030416414141654968, "learning_rate": 3.695002409813325e-07, "loss": 0.0343, "step": 189590 }, { "epoch": 0.218, "grad_norm": 0.030246717855334282, "learning_rate": 3.6879250480181816e-07, "loss": 0.0336, "step": 189600 }, { "epoch": 0.21805, "grad_norm": 0.028833702206611633, "learning_rate": 3.680854420302582e-07, "loss": 0.0339, "step": 189610 }, { "epoch": 0.2181, "grad_norm": 0.02759072370827198, "learning_rate": 3.673790526859899e-07, "loss": 0.0327, "step": 189620 }, { "epoch": 0.21815, "grad_norm": 0.027316806837916374, "learning_rate": 3.6667333678832294e-07, "loss": 0.0345, "step": 189630 }, { "epoch": 0.2182, "grad_norm": 0.03333732485771179, "learning_rate": 3.6596829435655565e-07, "loss": 0.0339, "step": 189640 }, { "epoch": 0.21825, "grad_norm": 0.029696255922317505, "learning_rate": 3.652639254099616e-07, "loss": 0.0323, "step": 189650 }, { "epoch": 0.2183, "grad_norm": 0.031996987760066986, "learning_rate": 3.645602299677919e-07, "loss": 0.0331, "step": 189660 }, { "epoch": 0.21835, "grad_norm": 0.030974699184298515, "learning_rate": 3.638572080492952e-07, "loss": 0.0323, "step": 189670 }, { "epoch": 0.2184, "grad_norm": 0.02718987688422203, "learning_rate": 3.631548596736839e-07, "loss": 0.0327, "step": 189680 }, { "epoch": 0.21845, "grad_norm": 0.03376190364360809, "learning_rate": 3.624531848601648e-07, "loss": 0.0334, "step": 189690 }, { "epoch": 0.2185, "grad_norm": 0.029311642050743103, "learning_rate": 3.6175218362791976e-07, "loss": 0.0319, "step": 189700 }, { "epoch": 0.21855, "grad_norm": 0.02694571390748024, "learning_rate": 3.6105185599611125e-07, "loss": 0.0305, "step": 189710 }, { "epoch": 0.2186, "grad_norm": 0.03201557695865631, "learning_rate": 3.603522019838906e-07, "loss": 0.0322, "step": 189720 }, { "epoch": 0.21865, "grad_norm": 0.028358617797493935, "learning_rate": 3.596532216103843e-07, "loss": 0.0321, "step": 189730 }, { "epoch": 0.2187, "grad_norm": 0.029142500832676888, "learning_rate": 3.589549148947019e-07, "loss": 0.0307, "step": 189740 }, { "epoch": 0.21875, "grad_norm": 0.02805599942803383, "learning_rate": 3.5825728185593374e-07, "loss": 0.0313, "step": 189750 }, { "epoch": 0.2188, "grad_norm": 0.02762032300233841, "learning_rate": 3.575603225131563e-07, "loss": 0.0319, "step": 189760 }, { "epoch": 0.21885, "grad_norm": 0.02900952659547329, "learning_rate": 3.568640368854209e-07, "loss": 0.0328, "step": 189770 }, { "epoch": 0.2189, "grad_norm": 0.02901539020240307, "learning_rate": 3.561684249917652e-07, "loss": 0.032, "step": 189780 }, { "epoch": 0.21895, "grad_norm": 0.03188694640994072, "learning_rate": 3.554734868512044e-07, "loss": 0.0331, "step": 189790 }, { "epoch": 0.219, "grad_norm": 0.028981653973460197, "learning_rate": 3.547792224827401e-07, "loss": 0.0328, "step": 189800 }, { "epoch": 0.21905, "grad_norm": 0.02720548026263714, "learning_rate": 3.5408563190535704e-07, "loss": 0.0338, "step": 189810 }, { "epoch": 0.2191, "grad_norm": 0.028782818466424942, "learning_rate": 3.5339271513800953e-07, "loss": 0.0324, "step": 189820 }, { "epoch": 0.21915, "grad_norm": 0.034867823123931885, "learning_rate": 3.527004721996463e-07, "loss": 0.0333, "step": 189830 }, { "epoch": 0.2192, "grad_norm": 0.027969233691692352, "learning_rate": 3.5200890310919385e-07, "loss": 0.0328, "step": 189840 }, { "epoch": 0.21925, "grad_norm": 0.026636585593223572, "learning_rate": 3.5131800788555936e-07, "loss": 0.0338, "step": 189850 }, { "epoch": 0.2193, "grad_norm": 0.027630211785435677, "learning_rate": 3.506277865476304e-07, "loss": 0.0325, "step": 189860 }, { "epoch": 0.21935, "grad_norm": 0.026662291958928108, "learning_rate": 3.4993823911427527e-07, "loss": 0.0322, "step": 189870 }, { "epoch": 0.2194, "grad_norm": 0.02801119163632393, "learning_rate": 3.492493656043483e-07, "loss": 0.0348, "step": 189880 }, { "epoch": 0.21945, "grad_norm": 0.029345333576202393, "learning_rate": 3.485611660366844e-07, "loss": 0.0323, "step": 189890 }, { "epoch": 0.2195, "grad_norm": 0.028748812153935432, "learning_rate": 3.478736404300964e-07, "loss": 0.0336, "step": 189900 }, { "epoch": 0.21955, "grad_norm": 0.028934547677636147, "learning_rate": 3.471867888033803e-07, "loss": 0.0308, "step": 189910 }, { "epoch": 0.2196, "grad_norm": 0.026555748656392097, "learning_rate": 3.465006111753155e-07, "loss": 0.0317, "step": 189920 }, { "epoch": 0.21965, "grad_norm": 0.027853377163410187, "learning_rate": 3.458151075646648e-07, "loss": 0.0322, "step": 189930 }, { "epoch": 0.2197, "grad_norm": 0.028755880892276764, "learning_rate": 3.451302779901661e-07, "loss": 0.0345, "step": 189940 }, { "epoch": 0.21975, "grad_norm": 0.025807704776525497, "learning_rate": 3.444461224705431e-07, "loss": 0.0323, "step": 189950 }, { "epoch": 0.2198, "grad_norm": 0.029136665165424347, "learning_rate": 3.437626410245004e-07, "loss": 0.0344, "step": 189960 }, { "epoch": 0.21985, "grad_norm": 0.02834586426615715, "learning_rate": 3.4307983367072304e-07, "loss": 0.0321, "step": 189970 }, { "epoch": 0.2199, "grad_norm": 0.028064792975783348, "learning_rate": 3.423977004278822e-07, "loss": 0.0321, "step": 189980 }, { "epoch": 0.21995, "grad_norm": 0.025715850293636322, "learning_rate": 3.417162413146213e-07, "loss": 0.0322, "step": 189990 }, { "epoch": 0.22, "grad_norm": 0.026166275143623352, "learning_rate": 3.4103545634957825e-07, "loss": 0.0315, "step": 190000 }, { "epoch": 0.22005, "grad_norm": 0.02789056859910488, "learning_rate": 3.4035534555135753e-07, "loss": 0.0321, "step": 190010 }, { "epoch": 0.2201, "grad_norm": 0.02760968543589115, "learning_rate": 3.3967590893856096e-07, "loss": 0.0325, "step": 190020 }, { "epoch": 0.22015, "grad_norm": 0.030100341886281967, "learning_rate": 3.389971465297598e-07, "loss": 0.0321, "step": 190030 }, { "epoch": 0.2202, "grad_norm": 0.027394311502575874, "learning_rate": 3.383190583435114e-07, "loss": 0.0317, "step": 190040 }, { "epoch": 0.22025, "grad_norm": 0.031111113727092743, "learning_rate": 3.3764164439835656e-07, "loss": 0.0336, "step": 190050 }, { "epoch": 0.2203, "grad_norm": 0.032798249274492264, "learning_rate": 3.369649047128137e-07, "loss": 0.0323, "step": 190060 }, { "epoch": 0.22035, "grad_norm": 0.025414522737264633, "learning_rate": 3.362888393053848e-07, "loss": 0.0336, "step": 190070 }, { "epoch": 0.2204, "grad_norm": 0.03104369156062603, "learning_rate": 3.3561344819455223e-07, "loss": 0.0341, "step": 190080 }, { "epoch": 0.22045, "grad_norm": 0.027488548308610916, "learning_rate": 3.3493873139878174e-07, "loss": 0.032, "step": 190090 }, { "epoch": 0.2205, "grad_norm": 0.026049386709928513, "learning_rate": 3.342646889365225e-07, "loss": 0.0333, "step": 190100 }, { "epoch": 0.22055, "grad_norm": 0.02731347270309925, "learning_rate": 3.335913208261959e-07, "loss": 0.0328, "step": 190110 }, { "epoch": 0.2206, "grad_norm": 0.027487874031066895, "learning_rate": 3.329186270862206e-07, "loss": 0.0324, "step": 190120 }, { "epoch": 0.22065, "grad_norm": 0.030462585389614105, "learning_rate": 3.3224660773497896e-07, "loss": 0.0331, "step": 190130 }, { "epoch": 0.2207, "grad_norm": 0.031835246831178665, "learning_rate": 3.315752627908508e-07, "loss": 0.0335, "step": 190140 }, { "epoch": 0.22075, "grad_norm": 0.029755624011158943, "learning_rate": 3.309045922721854e-07, "loss": 0.032, "step": 190150 }, { "epoch": 0.2208, "grad_norm": 0.031246056780219078, "learning_rate": 3.302345961973208e-07, "loss": 0.0351, "step": 190160 }, { "epoch": 0.22085, "grad_norm": 0.030124295502901077, "learning_rate": 3.295652745845756e-07, "loss": 0.0352, "step": 190170 }, { "epoch": 0.2209, "grad_norm": 0.030465351417660713, "learning_rate": 3.2889662745224924e-07, "loss": 0.0336, "step": 190180 }, { "epoch": 0.22095, "grad_norm": 0.029054658487439156, "learning_rate": 3.2822865481861865e-07, "loss": 0.0333, "step": 190190 }, { "epoch": 0.221, "grad_norm": 0.023691993206739426, "learning_rate": 3.27561356701947e-07, "loss": 0.033, "step": 190200 }, { "epoch": 0.22105, "grad_norm": 0.03325042873620987, "learning_rate": 3.2689473312047805e-07, "loss": 0.0331, "step": 190210 }, { "epoch": 0.2211, "grad_norm": 0.03363754227757454, "learning_rate": 3.2622878409243884e-07, "loss": 0.0332, "step": 190220 }, { "epoch": 0.22115, "grad_norm": 0.028941981494426727, "learning_rate": 3.255635096360371e-07, "loss": 0.0322, "step": 190230 }, { "epoch": 0.2212, "grad_norm": 0.030986396595835686, "learning_rate": 3.2489890976945825e-07, "loss": 0.0335, "step": 190240 }, { "epoch": 0.22125, "grad_norm": 0.028466373682022095, "learning_rate": 3.242349845108711e-07, "loss": 0.034, "step": 190250 }, { "epoch": 0.2213, "grad_norm": 0.02595651149749756, "learning_rate": 3.235717338784333e-07, "loss": 0.0323, "step": 190260 }, { "epoch": 0.22135, "grad_norm": 0.026936156675219536, "learning_rate": 3.229091578902693e-07, "loss": 0.0311, "step": 190270 }, { "epoch": 0.2214, "grad_norm": 0.028447262942790985, "learning_rate": 3.2224725656450073e-07, "loss": 0.0333, "step": 190280 }, { "epoch": 0.22145, "grad_norm": 0.033287130296230316, "learning_rate": 3.215860299192214e-07, "loss": 0.0317, "step": 190290 }, { "epoch": 0.2215, "grad_norm": 0.03119870461523533, "learning_rate": 3.209254779725057e-07, "loss": 0.0323, "step": 190300 }, { "epoch": 0.22155, "grad_norm": 0.027340726926922798, "learning_rate": 3.202656007424226e-07, "loss": 0.0321, "step": 190310 }, { "epoch": 0.2216, "grad_norm": 0.028995640575885773, "learning_rate": 3.1960639824699936e-07, "loss": 0.0353, "step": 190320 }, { "epoch": 0.22165, "grad_norm": 0.028964687138795853, "learning_rate": 3.189478705042659e-07, "loss": 0.0307, "step": 190330 }, { "epoch": 0.2217, "grad_norm": 0.03010116145014763, "learning_rate": 3.1829001753223006e-07, "loss": 0.032, "step": 190340 }, { "epoch": 0.22175, "grad_norm": 0.026755282655358315, "learning_rate": 3.176328393488692e-07, "loss": 0.033, "step": 190350 }, { "epoch": 0.2218, "grad_norm": 0.02934892289340496, "learning_rate": 3.1697633597215503e-07, "loss": 0.0332, "step": 190360 }, { "epoch": 0.22185, "grad_norm": 0.027839114889502525, "learning_rate": 3.1632050742003427e-07, "loss": 0.0311, "step": 190370 }, { "epoch": 0.2219, "grad_norm": 0.024980947375297546, "learning_rate": 3.15665353710437e-07, "loss": 0.0327, "step": 190380 }, { "epoch": 0.22195, "grad_norm": 0.026112347841262817, "learning_rate": 3.1501087486127677e-07, "loss": 0.0317, "step": 190390 }, { "epoch": 0.222, "grad_norm": 0.031251177191734314, "learning_rate": 3.1435707089044474e-07, "loss": 0.0329, "step": 190400 }, { "epoch": 0.22205, "grad_norm": 0.02733309008181095, "learning_rate": 3.137039418158155e-07, "loss": 0.0328, "step": 190410 }, { "epoch": 0.2221, "grad_norm": 0.030722726136446, "learning_rate": 3.1305148765524707e-07, "loss": 0.032, "step": 190420 }, { "epoch": 0.22215, "grad_norm": 0.029925871640443802, "learning_rate": 3.1239970842657783e-07, "loss": 0.033, "step": 190430 }, { "epoch": 0.2222, "grad_norm": 0.02639448270201683, "learning_rate": 3.1174860414762417e-07, "loss": 0.0329, "step": 190440 }, { "epoch": 0.22225, "grad_norm": 0.03083515726029873, "learning_rate": 3.110981748361913e-07, "loss": 0.034, "step": 190450 }, { "epoch": 0.2223, "grad_norm": 0.027510441839694977, "learning_rate": 3.104484205100539e-07, "loss": 0.0347, "step": 190460 }, { "epoch": 0.22235, "grad_norm": 0.033879805356264114, "learning_rate": 3.097993411869865e-07, "loss": 0.032, "step": 190470 }, { "epoch": 0.2224, "grad_norm": 0.03048882633447647, "learning_rate": 3.0915093688472787e-07, "loss": 0.0324, "step": 190480 }, { "epoch": 0.22245, "grad_norm": 0.026504499837756157, "learning_rate": 3.0850320762100536e-07, "loss": 0.034, "step": 190490 }, { "epoch": 0.2225, "grad_norm": 0.02800612710416317, "learning_rate": 3.0785615341352993e-07, "loss": 0.0346, "step": 190500 }, { "epoch": 0.22255, "grad_norm": 0.02819383330643177, "learning_rate": 3.072097742799901e-07, "loss": 0.033, "step": 190510 }, { "epoch": 0.2226, "grad_norm": 0.029371775686740875, "learning_rate": 3.065640702380607e-07, "loss": 0.0334, "step": 190520 }, { "epoch": 0.22265, "grad_norm": 0.03021889552474022, "learning_rate": 3.059190413053914e-07, "loss": 0.0327, "step": 190530 }, { "epoch": 0.2227, "grad_norm": 0.03006213903427124, "learning_rate": 3.052746874996154e-07, "loss": 0.0325, "step": 190540 }, { "epoch": 0.22275, "grad_norm": 0.026731068268418312, "learning_rate": 3.046310088383575e-07, "loss": 0.0317, "step": 190550 }, { "epoch": 0.2228, "grad_norm": 0.028391912579536438, "learning_rate": 3.0398800533920633e-07, "loss": 0.033, "step": 190560 }, { "epoch": 0.22285, "grad_norm": 0.030019372701644897, "learning_rate": 3.033456770197479e-07, "loss": 0.0315, "step": 190570 }, { "epoch": 0.2229, "grad_norm": 0.023982156068086624, "learning_rate": 3.027040238975376e-07, "loss": 0.0343, "step": 190580 }, { "epoch": 0.22295, "grad_norm": 0.03015189617872238, "learning_rate": 3.0206304599012246e-07, "loss": 0.0326, "step": 190590 }, { "epoch": 0.223, "grad_norm": 0.028634555637836456, "learning_rate": 3.014227433150274e-07, "loss": 0.0336, "step": 190600 }, { "epoch": 0.22305, "grad_norm": 0.024594226852059364, "learning_rate": 3.0078311588975225e-07, "loss": 0.0323, "step": 190610 }, { "epoch": 0.2231, "grad_norm": 0.027328481897711754, "learning_rate": 3.0014416373178866e-07, "loss": 0.0324, "step": 190620 }, { "epoch": 0.22315, "grad_norm": 0.030780017375946045, "learning_rate": 2.9950588685860317e-07, "loss": 0.0326, "step": 190630 }, { "epoch": 0.2232, "grad_norm": 0.028242947533726692, "learning_rate": 2.988682852876484e-07, "loss": 0.034, "step": 190640 }, { "epoch": 0.22325, "grad_norm": 0.03165299445390701, "learning_rate": 2.98231359036355e-07, "loss": 0.0324, "step": 190650 }, { "epoch": 0.2233, "grad_norm": 0.03017549030482769, "learning_rate": 2.9759510812213676e-07, "loss": 0.0347, "step": 190660 }, { "epoch": 0.22335, "grad_norm": 0.028321314603090286, "learning_rate": 2.969595325623881e-07, "loss": 0.0347, "step": 190670 }, { "epoch": 0.2234, "grad_norm": 0.03147047013044357, "learning_rate": 2.963246323744839e-07, "loss": 0.0335, "step": 190680 }, { "epoch": 0.22345, "grad_norm": 0.02470092847943306, "learning_rate": 2.956904075757855e-07, "loss": 0.0325, "step": 190690 }, { "epoch": 0.2235, "grad_norm": 0.0233890600502491, "learning_rate": 2.9505685818362884e-07, "loss": 0.0323, "step": 190700 }, { "epoch": 0.22355, "grad_norm": 0.026280872523784637, "learning_rate": 2.944239842153362e-07, "loss": 0.0365, "step": 190710 }, { "epoch": 0.2236, "grad_norm": 0.03087473474442959, "learning_rate": 2.937917856882105e-07, "loss": 0.0326, "step": 190720 }, { "epoch": 0.22365, "grad_norm": 0.024799011647701263, "learning_rate": 2.931602626195351e-07, "loss": 0.0318, "step": 190730 }, { "epoch": 0.2237, "grad_norm": 0.028520086780190468, "learning_rate": 2.925294150265795e-07, "loss": 0.0328, "step": 190740 }, { "epoch": 0.22375, "grad_norm": 0.031669143587350845, "learning_rate": 2.9189924292658265e-07, "loss": 0.0328, "step": 190750 }, { "epoch": 0.2238, "grad_norm": 0.03020283207297325, "learning_rate": 2.9126974633678085e-07, "loss": 0.0311, "step": 190760 }, { "epoch": 0.22385, "grad_norm": 0.02963424101471901, "learning_rate": 2.906409252743825e-07, "loss": 0.0311, "step": 190770 }, { "epoch": 0.2239, "grad_norm": 0.027471086010336876, "learning_rate": 2.900127797565766e-07, "loss": 0.0323, "step": 190780 }, { "epoch": 0.22395, "grad_norm": 0.02767067588865757, "learning_rate": 2.8938530980053836e-07, "loss": 0.0348, "step": 190790 }, { "epoch": 0.224, "grad_norm": 0.02803168073296547, "learning_rate": 2.8875851542342347e-07, "loss": 0.0313, "step": 190800 }, { "epoch": 0.22405, "grad_norm": 0.02731022797524929, "learning_rate": 2.881323966423682e-07, "loss": 0.0323, "step": 190810 }, { "epoch": 0.2241, "grad_norm": 0.024566948413848877, "learning_rate": 2.8750695347448676e-07, "loss": 0.032, "step": 190820 }, { "epoch": 0.22415, "grad_norm": 0.028864016756415367, "learning_rate": 2.86882185936882e-07, "loss": 0.0356, "step": 190830 }, { "epoch": 0.2242, "grad_norm": 0.02808845229446888, "learning_rate": 2.862580940466347e-07, "loss": 0.0334, "step": 190840 }, { "epoch": 0.22425, "grad_norm": 0.029763944447040558, "learning_rate": 2.8563467782080634e-07, "loss": 0.0338, "step": 190850 }, { "epoch": 0.2243, "grad_norm": 0.029420167207717896, "learning_rate": 2.850119372764415e-07, "loss": 0.033, "step": 190860 }, { "epoch": 0.22435, "grad_norm": 0.029386799782514572, "learning_rate": 2.843898724305627e-07, "loss": 0.0331, "step": 190870 }, { "epoch": 0.2244, "grad_norm": 0.02391725219786167, "learning_rate": 2.8376848330018134e-07, "loss": 0.0327, "step": 190880 }, { "epoch": 0.22445, "grad_norm": 0.02884662337601185, "learning_rate": 2.8314776990228665e-07, "loss": 0.033, "step": 190890 }, { "epoch": 0.2245, "grad_norm": 0.031461041420698166, "learning_rate": 2.8252773225384276e-07, "loss": 0.0338, "step": 190900 }, { "epoch": 0.22455, "grad_norm": 0.027679523453116417, "learning_rate": 2.819083703718056e-07, "loss": 0.0317, "step": 190910 }, { "epoch": 0.2246, "grad_norm": 0.027436112985014915, "learning_rate": 2.812896842731061e-07, "loss": 0.0322, "step": 190920 }, { "epoch": 0.22465, "grad_norm": 0.03533259406685829, "learning_rate": 2.80671673974664e-07, "loss": 0.0328, "step": 190930 }, { "epoch": 0.2247, "grad_norm": 0.031700585037469864, "learning_rate": 2.8005433949336857e-07, "loss": 0.0328, "step": 190940 }, { "epoch": 0.22475, "grad_norm": 0.026392122730612755, "learning_rate": 2.7943768084610356e-07, "loss": 0.0323, "step": 190950 }, { "epoch": 0.2248, "grad_norm": 0.027039388194680214, "learning_rate": 2.7882169804972213e-07, "loss": 0.0333, "step": 190960 }, { "epoch": 0.22485, "grad_norm": 0.03179239109158516, "learning_rate": 2.7820639112106916e-07, "loss": 0.0338, "step": 190970 }, { "epoch": 0.2249, "grad_norm": 0.028280850499868393, "learning_rate": 2.775917600769673e-07, "loss": 0.0325, "step": 190980 }, { "epoch": 0.22495, "grad_norm": 0.029927313327789307, "learning_rate": 2.76977804934217e-07, "loss": 0.0327, "step": 190990 }, { "epoch": 0.225, "grad_norm": 0.029478423297405243, "learning_rate": 2.763645257096076e-07, "loss": 0.0322, "step": 191000 }, { "epoch": 0.22505, "grad_norm": 0.025449639186263084, "learning_rate": 2.7575192241990065e-07, "loss": 0.0331, "step": 191010 }, { "epoch": 0.2251, "grad_norm": 0.02880786545574665, "learning_rate": 2.751399950818523e-07, "loss": 0.0335, "step": 191020 }, { "epoch": 0.22515, "grad_norm": 0.027858871966600418, "learning_rate": 2.745287437121824e-07, "loss": 0.033, "step": 191030 }, { "epoch": 0.2252, "grad_norm": 0.030391762033104897, "learning_rate": 2.73918168327611e-07, "loss": 0.0315, "step": 191040 }, { "epoch": 0.22525, "grad_norm": 0.028253132477402687, "learning_rate": 2.733082689448274e-07, "loss": 0.0311, "step": 191050 }, { "epoch": 0.2253, "grad_norm": 0.02949906326830387, "learning_rate": 2.726990455805073e-07, "loss": 0.0326, "step": 191060 }, { "epoch": 0.22535, "grad_norm": 0.02909819222986698, "learning_rate": 2.7209049825130393e-07, "loss": 0.033, "step": 191070 }, { "epoch": 0.2254, "grad_norm": 0.029378492385149002, "learning_rate": 2.7148262697385685e-07, "loss": 0.0325, "step": 191080 }, { "epoch": 0.22545, "grad_norm": 0.027026867493987083, "learning_rate": 2.7087543176478324e-07, "loss": 0.0324, "step": 191090 }, { "epoch": 0.2255, "grad_norm": 0.027683105319738388, "learning_rate": 2.7026891264068934e-07, "loss": 0.034, "step": 191100 }, { "epoch": 0.22555, "grad_norm": 0.027874600142240524, "learning_rate": 2.696630696181479e-07, "loss": 0.0334, "step": 191110 }, { "epoch": 0.2256, "grad_norm": 0.02619648166000843, "learning_rate": 2.690579027137319e-07, "loss": 0.0333, "step": 191120 }, { "epoch": 0.22565, "grad_norm": 0.028711529448628426, "learning_rate": 2.6845341194397797e-07, "loss": 0.0319, "step": 191130 }, { "epoch": 0.2257, "grad_norm": 0.027037620544433594, "learning_rate": 2.678495973254175e-07, "loss": 0.0327, "step": 191140 }, { "epoch": 0.22575, "grad_norm": 0.0235857293009758, "learning_rate": 2.672464588745593e-07, "loss": 0.0313, "step": 191150 }, { "epoch": 0.2258, "grad_norm": 0.027867760509252548, "learning_rate": 2.666439966078904e-07, "loss": 0.0325, "step": 191160 }, { "epoch": 0.22585, "grad_norm": 0.027430761605501175, "learning_rate": 2.6604221054188085e-07, "loss": 0.0347, "step": 191170 }, { "epoch": 0.2259, "grad_norm": 0.029700936749577522, "learning_rate": 2.654411006929897e-07, "loss": 0.0325, "step": 191180 }, { "epoch": 0.22595, "grad_norm": 0.028554104268550873, "learning_rate": 2.6484066707764266e-07, "loss": 0.0309, "step": 191190 }, { "epoch": 0.226, "grad_norm": 0.027590837329626083, "learning_rate": 2.6424090971226e-07, "loss": 0.0331, "step": 191200 }, { "epoch": 0.22605, "grad_norm": 0.026260821148753166, "learning_rate": 2.6364182861323694e-07, "loss": 0.0317, "step": 191210 }, { "epoch": 0.2261, "grad_norm": 0.02568810060620308, "learning_rate": 2.630434237969548e-07, "loss": 0.0311, "step": 191220 }, { "epoch": 0.22615, "grad_norm": 0.02667641080915928, "learning_rate": 2.624456952797727e-07, "loss": 0.0319, "step": 191230 }, { "epoch": 0.2262, "grad_norm": 0.026591245085000992, "learning_rate": 2.6184864307803035e-07, "loss": 0.032, "step": 191240 }, { "epoch": 0.22625, "grad_norm": 0.029083015397191048, "learning_rate": 2.6125226720805364e-07, "loss": 0.032, "step": 191250 }, { "epoch": 0.2263, "grad_norm": 0.029713058844208717, "learning_rate": 2.6065656768614613e-07, "loss": 0.0312, "step": 191260 }, { "epoch": 0.22635, "grad_norm": 0.02492072619497776, "learning_rate": 2.600615445285948e-07, "loss": 0.0325, "step": 191270 }, { "epoch": 0.2264, "grad_norm": 0.028060585260391235, "learning_rate": 2.5946719775166437e-07, "loss": 0.0323, "step": 191280 }, { "epoch": 0.22645, "grad_norm": 0.025646554306149483, "learning_rate": 2.5887352737160587e-07, "loss": 0.033, "step": 191290 }, { "epoch": 0.2265, "grad_norm": 0.028695549815893173, "learning_rate": 2.5828053340465065e-07, "loss": 0.0337, "step": 191300 }, { "epoch": 0.22655, "grad_norm": 0.027269084006547928, "learning_rate": 2.576882158670135e-07, "loss": 0.0326, "step": 191310 }, { "epoch": 0.2266, "grad_norm": 0.031050991266965866, "learning_rate": 2.570965747748816e-07, "loss": 0.0319, "step": 191320 }, { "epoch": 0.22665, "grad_norm": 0.028266821056604385, "learning_rate": 2.565056101444363e-07, "loss": 0.0332, "step": 191330 }, { "epoch": 0.2267, "grad_norm": 0.02769509144127369, "learning_rate": 2.559153219918287e-07, "loss": 0.0324, "step": 191340 }, { "epoch": 0.22675, "grad_norm": 0.02997642755508423, "learning_rate": 2.5532571033320407e-07, "loss": 0.0321, "step": 191350 }, { "epoch": 0.2268, "grad_norm": 0.032755471765995026, "learning_rate": 2.5473677518467745e-07, "loss": 0.0326, "step": 191360 }, { "epoch": 0.22685, "grad_norm": 0.027477024123072624, "learning_rate": 2.541485165623497e-07, "loss": 0.0339, "step": 191370 }, { "epoch": 0.2269, "grad_norm": 0.028355475515127182, "learning_rate": 2.5356093448230533e-07, "loss": 0.0335, "step": 191380 }, { "epoch": 0.22695, "grad_norm": 0.0293252132833004, "learning_rate": 2.529740289606092e-07, "loss": 0.0337, "step": 191390 }, { "epoch": 0.227, "grad_norm": 0.030119840055704117, "learning_rate": 2.5238780001330674e-07, "loss": 0.034, "step": 191400 }, { "epoch": 0.22705, "grad_norm": 0.02708440274000168, "learning_rate": 2.5180224765642133e-07, "loss": 0.0345, "step": 191410 }, { "epoch": 0.2271, "grad_norm": 0.03187808766961098, "learning_rate": 2.5121737190596515e-07, "loss": 0.0329, "step": 191420 }, { "epoch": 0.22715, "grad_norm": 0.026619307696819305, "learning_rate": 2.50633172777931e-07, "loss": 0.0333, "step": 191430 }, { "epoch": 0.2272, "grad_norm": 0.028194934129714966, "learning_rate": 2.500496502882893e-07, "loss": 0.0347, "step": 191440 }, { "epoch": 0.22725, "grad_norm": 0.03038596548140049, "learning_rate": 2.4946680445298853e-07, "loss": 0.0335, "step": 191450 }, { "epoch": 0.2273, "grad_norm": 0.02857760339975357, "learning_rate": 2.4888463528796867e-07, "loss": 0.0336, "step": 191460 }, { "epoch": 0.22735, "grad_norm": 0.026745213195681572, "learning_rate": 2.483031428091448e-07, "loss": 0.0332, "step": 191470 }, { "epoch": 0.2274, "grad_norm": 0.026955388486385345, "learning_rate": 2.477223270324125e-07, "loss": 0.033, "step": 191480 }, { "epoch": 0.22745, "grad_norm": 0.030583078041672707, "learning_rate": 2.4714218797365354e-07, "loss": 0.0327, "step": 191490 }, { "epoch": 0.2275, "grad_norm": 0.026133380830287933, "learning_rate": 2.465627256487274e-07, "loss": 0.0345, "step": 191500 }, { "epoch": 0.22755, "grad_norm": 0.02993203140795231, "learning_rate": 2.45983940073477e-07, "loss": 0.0331, "step": 191510 }, { "epoch": 0.2276, "grad_norm": 0.026168562471866608, "learning_rate": 2.454058312637286e-07, "loss": 0.0321, "step": 191520 }, { "epoch": 0.22765, "grad_norm": 0.02790575847029686, "learning_rate": 2.4482839923528343e-07, "loss": 0.0325, "step": 191530 }, { "epoch": 0.2277, "grad_norm": 0.028191763907670975, "learning_rate": 2.4425164400392607e-07, "loss": 0.0341, "step": 191540 }, { "epoch": 0.22775, "grad_norm": 0.027023592963814735, "learning_rate": 2.4367556558543283e-07, "loss": 0.0316, "step": 191550 }, { "epoch": 0.2278, "grad_norm": 0.023854469880461693, "learning_rate": 2.431001639955494e-07, "loss": 0.0317, "step": 191560 }, { "epoch": 0.22785, "grad_norm": 0.028772985562682152, "learning_rate": 2.425254392500048e-07, "loss": 0.0334, "step": 191570 }, { "epoch": 0.2279, "grad_norm": 0.027192121371626854, "learning_rate": 2.4195139136451436e-07, "loss": 0.0333, "step": 191580 }, { "epoch": 0.22795, "grad_norm": 0.030033499002456665, "learning_rate": 2.413780203547711e-07, "loss": 0.0333, "step": 191590 }, { "epoch": 0.228, "grad_norm": 0.02756202407181263, "learning_rate": 2.4080532623645124e-07, "loss": 0.0332, "step": 191600 }, { "epoch": 0.22805, "grad_norm": 0.028734860941767693, "learning_rate": 2.402333090252118e-07, "loss": 0.0329, "step": 191610 }, { "epoch": 0.2281, "grad_norm": 0.03284359350800514, "learning_rate": 2.39661968736693e-07, "loss": 0.0321, "step": 191620 }, { "epoch": 0.22815, "grad_norm": 0.028583502396941185, "learning_rate": 2.390913053865129e-07, "loss": 0.0318, "step": 191630 }, { "epoch": 0.2282, "grad_norm": 0.029747819527983665, "learning_rate": 2.3852131899027576e-07, "loss": 0.0339, "step": 191640 }, { "epoch": 0.22825, "grad_norm": 0.030449246987700462, "learning_rate": 2.3795200956356344e-07, "loss": 0.032, "step": 191650 }, { "epoch": 0.2283, "grad_norm": 0.028850451111793518, "learning_rate": 2.3738337712194137e-07, "loss": 0.0317, "step": 191660 }, { "epoch": 0.22835, "grad_norm": 0.02912822924554348, "learning_rate": 2.3681542168095262e-07, "loss": 0.0323, "step": 191670 }, { "epoch": 0.2284, "grad_norm": 0.02906673774123192, "learning_rate": 2.3624814325612643e-07, "loss": 0.0339, "step": 191680 }, { "epoch": 0.22845, "grad_norm": 0.02683148719370365, "learning_rate": 2.356815418629754e-07, "loss": 0.0336, "step": 191690 }, { "epoch": 0.2285, "grad_norm": 0.03201993927359581, "learning_rate": 2.351156175169844e-07, "loss": 0.0329, "step": 191700 }, { "epoch": 0.22855, "grad_norm": 0.028824683278799057, "learning_rate": 2.345503702336327e-07, "loss": 0.0342, "step": 191710 }, { "epoch": 0.2286, "grad_norm": 0.03363632410764694, "learning_rate": 2.3398580002836624e-07, "loss": 0.0342, "step": 191720 }, { "epoch": 0.22865, "grad_norm": 0.02888336591422558, "learning_rate": 2.3342190691662545e-07, "loss": 0.0323, "step": 191730 }, { "epoch": 0.2287, "grad_norm": 0.03185093775391579, "learning_rate": 2.3285869091382583e-07, "loss": 0.0327, "step": 191740 }, { "epoch": 0.22875, "grad_norm": 0.026476627215743065, "learning_rate": 2.3229615203536336e-07, "loss": 0.0363, "step": 191750 }, { "epoch": 0.2288, "grad_norm": 0.02874642238020897, "learning_rate": 2.317342902966202e-07, "loss": 0.0337, "step": 191760 }, { "epoch": 0.22885, "grad_norm": 0.02916344441473484, "learning_rate": 2.3117310571295625e-07, "loss": 0.033, "step": 191770 }, { "epoch": 0.2289, "grad_norm": 0.0316338911652565, "learning_rate": 2.306125982997176e-07, "loss": 0.0352, "step": 191780 }, { "epoch": 0.22895, "grad_norm": 0.030737783759832382, "learning_rate": 2.3005276807221976e-07, "loss": 0.0348, "step": 191790 }, { "epoch": 0.229, "grad_norm": 0.03237530589103699, "learning_rate": 2.294936150457755e-07, "loss": 0.0326, "step": 191800 }, { "epoch": 0.22905, "grad_norm": 0.02888074517250061, "learning_rate": 2.2893513923567255e-07, "loss": 0.0335, "step": 191810 }, { "epoch": 0.2291, "grad_norm": 0.02907225489616394, "learning_rate": 2.2837734065717376e-07, "loss": 0.0332, "step": 191820 }, { "epoch": 0.22915, "grad_norm": 0.024844203144311905, "learning_rate": 2.2782021932553354e-07, "loss": 0.0322, "step": 191830 }, { "epoch": 0.2292, "grad_norm": 0.02850308269262314, "learning_rate": 2.2726377525598142e-07, "loss": 0.0332, "step": 191840 }, { "epoch": 0.22925, "grad_norm": 0.027192220091819763, "learning_rate": 2.2670800846373018e-07, "loss": 0.0317, "step": 191850 }, { "epoch": 0.2293, "grad_norm": 0.02539575845003128, "learning_rate": 2.2615291896397884e-07, "loss": 0.0332, "step": 191860 }, { "epoch": 0.22935, "grad_norm": 0.027498632669448853, "learning_rate": 2.2559850677189577e-07, "loss": 0.0331, "step": 191870 }, { "epoch": 0.2294, "grad_norm": 0.028753872960805893, "learning_rate": 2.2504477190264384e-07, "loss": 0.0329, "step": 191880 }, { "epoch": 0.22945, "grad_norm": 0.028393583372235298, "learning_rate": 2.2449171437136097e-07, "loss": 0.0327, "step": 191890 }, { "epoch": 0.2295, "grad_norm": 0.028692688792943954, "learning_rate": 2.239393341931656e-07, "loss": 0.0325, "step": 191900 }, { "epoch": 0.22955, "grad_norm": 0.025882720947265625, "learning_rate": 2.2338763138315954e-07, "loss": 0.033, "step": 191910 }, { "epoch": 0.2296, "grad_norm": 0.030540117993950844, "learning_rate": 2.2283660595643074e-07, "loss": 0.0319, "step": 191920 }, { "epoch": 0.22965, "grad_norm": 0.025634469464421272, "learning_rate": 2.222862579280366e-07, "loss": 0.0326, "step": 191930 }, { "epoch": 0.2297, "grad_norm": 0.02588530071079731, "learning_rate": 2.217365873130317e-07, "loss": 0.0318, "step": 191940 }, { "epoch": 0.22975, "grad_norm": 0.028533879667520523, "learning_rate": 2.2118759412643742e-07, "loss": 0.0329, "step": 191950 }, { "epoch": 0.2298, "grad_norm": 0.027502888813614845, "learning_rate": 2.206392783832667e-07, "loss": 0.0332, "step": 191960 }, { "epoch": 0.22985, "grad_norm": 0.028430012986063957, "learning_rate": 2.2009164009850758e-07, "loss": 0.0353, "step": 191970 }, { "epoch": 0.2299, "grad_norm": 0.030735129490494728, "learning_rate": 2.1954467928713697e-07, "loss": 0.0331, "step": 191980 }, { "epoch": 0.22995, "grad_norm": 0.029734564945101738, "learning_rate": 2.189983959641012e-07, "loss": 0.0332, "step": 191990 }, { "epoch": 0.23, "grad_norm": 0.02846161276102066, "learning_rate": 2.1845279014434117e-07, "loss": 0.0321, "step": 192000 }, { "epoch": 0.23005, "grad_norm": 0.03232620656490326, "learning_rate": 2.179078618427699e-07, "loss": 0.0334, "step": 192010 }, { "epoch": 0.2301, "grad_norm": 0.028150519356131554, "learning_rate": 2.1736361107429215e-07, "loss": 0.0317, "step": 192020 }, { "epoch": 0.23015, "grad_norm": 0.024552926421165466, "learning_rate": 2.1682003785377936e-07, "loss": 0.0307, "step": 192030 }, { "epoch": 0.2302, "grad_norm": 0.02875414490699768, "learning_rate": 2.162771421960974e-07, "loss": 0.0328, "step": 192040 }, { "epoch": 0.23025, "grad_norm": 0.030259743332862854, "learning_rate": 2.157349241160872e-07, "loss": 0.0328, "step": 192050 }, { "epoch": 0.2303, "grad_norm": 0.02931458130478859, "learning_rate": 2.1519338362857299e-07, "loss": 0.0329, "step": 192060 }, { "epoch": 0.23035, "grad_norm": 0.027251889929175377, "learning_rate": 2.1465252074835963e-07, "loss": 0.032, "step": 192070 }, { "epoch": 0.2304, "grad_norm": 0.031506650149822235, "learning_rate": 2.1411233549023525e-07, "loss": 0.0337, "step": 192080 }, { "epoch": 0.23045, "grad_norm": 0.028512097895145416, "learning_rate": 2.135728278689686e-07, "loss": 0.033, "step": 192090 }, { "epoch": 0.2305, "grad_norm": 0.02668013609945774, "learning_rate": 2.13033997899309e-07, "loss": 0.0327, "step": 192100 }, { "epoch": 0.23055, "grad_norm": 0.027364933863282204, "learning_rate": 2.1249584559598913e-07, "loss": 0.0327, "step": 192110 }, { "epoch": 0.2306, "grad_norm": 0.02794736810028553, "learning_rate": 2.1195837097371661e-07, "loss": 0.0334, "step": 192120 }, { "epoch": 0.23065, "grad_norm": 0.02584332786500454, "learning_rate": 2.1142157404719365e-07, "loss": 0.0315, "step": 192130 }, { "epoch": 0.2307, "grad_norm": 0.029982447624206543, "learning_rate": 2.108854548310918e-07, "loss": 0.0325, "step": 192140 }, { "epoch": 0.23075, "grad_norm": 0.02743016555905342, "learning_rate": 2.10350013340066e-07, "loss": 0.0319, "step": 192150 }, { "epoch": 0.2308, "grad_norm": 0.026245078071951866, "learning_rate": 2.098152495887601e-07, "loss": 0.0321, "step": 192160 }, { "epoch": 0.23085, "grad_norm": 0.027262257412075996, "learning_rate": 2.0928116359179295e-07, "loss": 0.0314, "step": 192170 }, { "epoch": 0.2309, "grad_norm": 0.02742786332964897, "learning_rate": 2.08747755363764e-07, "loss": 0.0318, "step": 192180 }, { "epoch": 0.23095, "grad_norm": 0.027811231091618538, "learning_rate": 2.082150249192588e-07, "loss": 0.0317, "step": 192190 }, { "epoch": 0.231, "grad_norm": 0.027997516095638275, "learning_rate": 2.0768297227283794e-07, "loss": 0.0319, "step": 192200 }, { "epoch": 0.23105, "grad_norm": 0.026771126314997673, "learning_rate": 2.0715159743905365e-07, "loss": 0.0309, "step": 192210 }, { "epoch": 0.2311, "grad_norm": 0.03103078342974186, "learning_rate": 2.0662090043242765e-07, "loss": 0.034, "step": 192220 }, { "epoch": 0.23115, "grad_norm": 0.026181593537330627, "learning_rate": 2.060908812674761e-07, "loss": 0.0338, "step": 192230 }, { "epoch": 0.2312, "grad_norm": 0.03078121319413185, "learning_rate": 2.055615399586819e-07, "loss": 0.0343, "step": 192240 }, { "epoch": 0.23125, "grad_norm": 0.02652091160416603, "learning_rate": 2.0503287652051951e-07, "loss": 0.0323, "step": 192250 }, { "epoch": 0.2313, "grad_norm": 0.028631998226046562, "learning_rate": 2.0450489096744685e-07, "loss": 0.0353, "step": 192260 }, { "epoch": 0.23135, "grad_norm": 0.028035033494234085, "learning_rate": 2.0397758331389405e-07, "loss": 0.0323, "step": 192270 }, { "epoch": 0.2314, "grad_norm": 0.02855309657752514, "learning_rate": 2.0345095357427735e-07, "loss": 0.0316, "step": 192280 }, { "epoch": 0.23145, "grad_norm": 0.02751971408724785, "learning_rate": 2.0292500176299635e-07, "loss": 0.0326, "step": 192290 }, { "epoch": 0.2315, "grad_norm": 0.02798202820122242, "learning_rate": 2.023997278944284e-07, "loss": 0.0309, "step": 192300 }, { "epoch": 0.23155, "grad_norm": 0.03205485641956329, "learning_rate": 2.0187513198293983e-07, "loss": 0.0315, "step": 192310 }, { "epoch": 0.2316, "grad_norm": 0.02981637232005596, "learning_rate": 2.0135121404286637e-07, "loss": 0.0353, "step": 192320 }, { "epoch": 0.23165, "grad_norm": 0.02659517154097557, "learning_rate": 2.0082797408853272e-07, "loss": 0.0312, "step": 192330 }, { "epoch": 0.2317, "grad_norm": 0.026026614010334015, "learning_rate": 2.003054121342468e-07, "loss": 0.0314, "step": 192340 }, { "epoch": 0.23175, "grad_norm": 0.030751172453165054, "learning_rate": 1.9978352819429725e-07, "loss": 0.0324, "step": 192350 }, { "epoch": 0.2318, "grad_norm": 0.025679949671030045, "learning_rate": 1.992623222829476e-07, "loss": 0.0304, "step": 192360 }, { "epoch": 0.23185, "grad_norm": 0.02781066484749317, "learning_rate": 1.9874179441444484e-07, "loss": 0.0321, "step": 192370 }, { "epoch": 0.2319, "grad_norm": 0.03086954541504383, "learning_rate": 1.9822194460302755e-07, "loss": 0.0318, "step": 192380 }, { "epoch": 0.23195, "grad_norm": 0.02711847797036171, "learning_rate": 1.977027728629066e-07, "loss": 0.0343, "step": 192390 }, { "epoch": 0.232, "grad_norm": 0.027603790163993835, "learning_rate": 1.9718427920827064e-07, "loss": 0.0347, "step": 192400 }, { "epoch": 0.23205, "grad_norm": 0.028913889080286026, "learning_rate": 1.966664636533e-07, "loss": 0.033, "step": 192410 }, { "epoch": 0.2321, "grad_norm": 0.030028890818357468, "learning_rate": 1.9614932621215e-07, "loss": 0.0347, "step": 192420 }, { "epoch": 0.23215, "grad_norm": 0.02870725467801094, "learning_rate": 1.9563286689895655e-07, "loss": 0.0326, "step": 192430 }, { "epoch": 0.2322, "grad_norm": 0.026102574542164803, "learning_rate": 1.9511708572784448e-07, "loss": 0.0322, "step": 192440 }, { "epoch": 0.23225, "grad_norm": 0.027219127863645554, "learning_rate": 1.9460198271291364e-07, "loss": 0.0319, "step": 192450 }, { "epoch": 0.2323, "grad_norm": 0.026555761694908142, "learning_rate": 1.940875578682416e-07, "loss": 0.0322, "step": 192460 }, { "epoch": 0.23235, "grad_norm": 0.02638009563088417, "learning_rate": 1.9357381120790052e-07, "loss": 0.0324, "step": 192470 }, { "epoch": 0.2324, "grad_norm": 0.029163340106606483, "learning_rate": 1.930607427459319e-07, "loss": 0.0331, "step": 192480 }, { "epoch": 0.23245, "grad_norm": 0.02803208865225315, "learning_rate": 1.925483524963606e-07, "loss": 0.0329, "step": 192490 }, { "epoch": 0.2325, "grad_norm": 0.02882474660873413, "learning_rate": 1.920366404731977e-07, "loss": 0.0312, "step": 192500 }, { "epoch": 0.23255, "grad_norm": 0.028602832928299904, "learning_rate": 1.9152560669043473e-07, "loss": 0.0336, "step": 192510 }, { "epoch": 0.2326, "grad_norm": 0.027052734047174454, "learning_rate": 1.910152511620439e-07, "loss": 0.032, "step": 192520 }, { "epoch": 0.23265, "grad_norm": 0.029143305495381355, "learning_rate": 1.9050557390196956e-07, "loss": 0.0335, "step": 192530 }, { "epoch": 0.2327, "grad_norm": 0.026500994339585304, "learning_rate": 1.899965749241561e-07, "loss": 0.0319, "step": 192540 }, { "epoch": 0.23275, "grad_norm": 0.025927657261490822, "learning_rate": 1.894882542425147e-07, "loss": 0.0317, "step": 192550 }, { "epoch": 0.2328, "grad_norm": 0.025806518271565437, "learning_rate": 1.8898061187094528e-07, "loss": 0.0316, "step": 192560 }, { "epoch": 0.23285, "grad_norm": 0.026212098076939583, "learning_rate": 1.8847364782332567e-07, "loss": 0.0322, "step": 192570 }, { "epoch": 0.2329, "grad_norm": 0.02708258107304573, "learning_rate": 1.879673621135114e-07, "loss": 0.0329, "step": 192580 }, { "epoch": 0.23295, "grad_norm": 0.026623349636793137, "learning_rate": 1.8746175475534978e-07, "loss": 0.0316, "step": 192590 }, { "epoch": 0.233, "grad_norm": 0.02785472758114338, "learning_rate": 1.8695682576266304e-07, "loss": 0.0314, "step": 192600 }, { "epoch": 0.23305, "grad_norm": 0.026384657248854637, "learning_rate": 1.8645257514925406e-07, "loss": 0.0321, "step": 192610 }, { "epoch": 0.2331, "grad_norm": 0.03154732659459114, "learning_rate": 1.8594900292890903e-07, "loss": 0.0345, "step": 192620 }, { "epoch": 0.23315, "grad_norm": 0.026553558185696602, "learning_rate": 1.8544610911539472e-07, "loss": 0.0327, "step": 192630 }, { "epoch": 0.2332, "grad_norm": 0.02985524944961071, "learning_rate": 1.8494389372246402e-07, "loss": 0.0311, "step": 192640 }, { "epoch": 0.23325, "grad_norm": 0.025981910526752472, "learning_rate": 1.844423567638448e-07, "loss": 0.0333, "step": 192650 }, { "epoch": 0.2333, "grad_norm": 0.028925621882081032, "learning_rate": 1.8394149825324834e-07, "loss": 0.0319, "step": 192660 }, { "epoch": 0.23335, "grad_norm": 0.025977320969104767, "learning_rate": 1.8344131820436927e-07, "loss": 0.0327, "step": 192670 }, { "epoch": 0.2334, "grad_norm": 0.029717909172177315, "learning_rate": 1.8294181663087995e-07, "loss": 0.0343, "step": 192680 }, { "epoch": 0.23345, "grad_norm": 0.026923447847366333, "learning_rate": 1.824429935464389e-07, "loss": 0.0344, "step": 192690 }, { "epoch": 0.2335, "grad_norm": 0.03424394503235817, "learning_rate": 1.8194484896467967e-07, "loss": 0.0332, "step": 192700 }, { "epoch": 0.23355, "grad_norm": 0.027123430743813515, "learning_rate": 1.8144738289922747e-07, "loss": 0.0333, "step": 192710 }, { "epoch": 0.2336, "grad_norm": 0.0246063731610775, "learning_rate": 1.8095059536367974e-07, "loss": 0.0344, "step": 192720 }, { "epoch": 0.23365, "grad_norm": 0.029820656403899193, "learning_rate": 1.8045448637162011e-07, "loss": 0.0353, "step": 192730 }, { "epoch": 0.2337, "grad_norm": 0.025614451617002487, "learning_rate": 1.7995905593660712e-07, "loss": 0.0333, "step": 192740 }, { "epoch": 0.23375, "grad_norm": 0.0293387733399868, "learning_rate": 1.7946430407219105e-07, "loss": 0.0354, "step": 192750 }, { "epoch": 0.2338, "grad_norm": 0.0259340051561594, "learning_rate": 1.7897023079189722e-07, "loss": 0.034, "step": 192760 }, { "epoch": 0.23385, "grad_norm": 0.026477213948965073, "learning_rate": 1.784768361092315e-07, "loss": 0.0341, "step": 192770 }, { "epoch": 0.2339, "grad_norm": 0.029923735186457634, "learning_rate": 1.7798412003768307e-07, "loss": 0.0338, "step": 192780 }, { "epoch": 0.23395, "grad_norm": 0.023119883611798286, "learning_rate": 1.7749208259072448e-07, "loss": 0.0335, "step": 192790 }, { "epoch": 0.234, "grad_norm": 0.024888034909963608, "learning_rate": 1.770007237818061e-07, "loss": 0.0329, "step": 192800 }, { "epoch": 0.23405, "grad_norm": 0.029386069625616074, "learning_rate": 1.765100436243644e-07, "loss": 0.0329, "step": 192810 }, { "epoch": 0.2341, "grad_norm": 0.03265475854277611, "learning_rate": 1.760200421318109e-07, "loss": 0.0344, "step": 192820 }, { "epoch": 0.23415, "grad_norm": 0.027390794828534126, "learning_rate": 1.7553071931754317e-07, "loss": 0.033, "step": 192830 }, { "epoch": 0.2342, "grad_norm": 0.025554992258548737, "learning_rate": 1.750420751949394e-07, "loss": 0.0337, "step": 192840 }, { "epoch": 0.23425, "grad_norm": 0.02988513931632042, "learning_rate": 1.745541097773612e-07, "loss": 0.034, "step": 192850 }, { "epoch": 0.2343, "grad_norm": 0.028282584622502327, "learning_rate": 1.7406682307814505e-07, "loss": 0.0336, "step": 192860 }, { "epoch": 0.23435, "grad_norm": 0.028356118127703667, "learning_rate": 1.735802151106164e-07, "loss": 0.0335, "step": 192870 }, { "epoch": 0.2344, "grad_norm": 0.030579427257180214, "learning_rate": 1.7309428588807575e-07, "loss": 0.034, "step": 192880 }, { "epoch": 0.23445, "grad_norm": 0.029863102361559868, "learning_rate": 1.7260903542381246e-07, "loss": 0.0339, "step": 192890 }, { "epoch": 0.2345, "grad_norm": 0.028973223641514778, "learning_rate": 1.7212446373109092e-07, "loss": 0.0352, "step": 192900 }, { "epoch": 0.23455, "grad_norm": 0.024388255551457405, "learning_rate": 1.7164057082315887e-07, "loss": 0.0321, "step": 192910 }, { "epoch": 0.2346, "grad_norm": 0.028484197333455086, "learning_rate": 1.7115735671324462e-07, "loss": 0.0318, "step": 192920 }, { "epoch": 0.23465, "grad_norm": 0.02944735251367092, "learning_rate": 1.706748214145598e-07, "loss": 0.0338, "step": 192930 }, { "epoch": 0.2347, "grad_norm": 0.02990567497909069, "learning_rate": 1.7019296494029945e-07, "loss": 0.0324, "step": 192940 }, { "epoch": 0.23475, "grad_norm": 0.026609139516949654, "learning_rate": 1.6971178730363635e-07, "loss": 0.0322, "step": 192950 }, { "epoch": 0.2348, "grad_norm": 0.026992447674274445, "learning_rate": 1.6923128851772385e-07, "loss": 0.0319, "step": 192960 }, { "epoch": 0.23485, "grad_norm": 0.03282034024596214, "learning_rate": 1.687514685956987e-07, "loss": 0.0316, "step": 192970 }, { "epoch": 0.2349, "grad_norm": 0.02752852253615856, "learning_rate": 1.682723275506809e-07, "loss": 0.0305, "step": 192980 }, { "epoch": 0.23495, "grad_norm": 0.026460938155651093, "learning_rate": 1.6779386539576835e-07, "loss": 0.0309, "step": 192990 }, { "epoch": 0.235, "grad_norm": 0.02743816375732422, "learning_rate": 1.6731608214403948e-07, "loss": 0.0304, "step": 193000 }, { "epoch": 0.23505, "grad_norm": 0.030733531340956688, "learning_rate": 1.668389778085616e-07, "loss": 0.0311, "step": 193010 }, { "epoch": 0.2351, "grad_norm": 0.030693871900439262, "learning_rate": 1.6636255240237986e-07, "loss": 0.0306, "step": 193020 }, { "epoch": 0.23515, "grad_norm": 0.0279314573854208, "learning_rate": 1.6588680593851157e-07, "loss": 0.0328, "step": 193030 }, { "epoch": 0.2352, "grad_norm": 0.029680553823709488, "learning_rate": 1.6541173842997137e-07, "loss": 0.0303, "step": 193040 }, { "epoch": 0.23525, "grad_norm": 0.03066929802298546, "learning_rate": 1.649373498897433e-07, "loss": 0.0312, "step": 193050 }, { "epoch": 0.2353, "grad_norm": 0.028259824961423874, "learning_rate": 1.6446364033079752e-07, "loss": 0.0313, "step": 193060 }, { "epoch": 0.23535, "grad_norm": 0.025081414729356766, "learning_rate": 1.639906097660876e-07, "loss": 0.0306, "step": 193070 }, { "epoch": 0.2354, "grad_norm": 0.026953531429171562, "learning_rate": 1.6351825820854206e-07, "loss": 0.0311, "step": 193080 }, { "epoch": 0.23545, "grad_norm": 0.02603212371468544, "learning_rate": 1.6304658567107834e-07, "loss": 0.0314, "step": 193090 }, { "epoch": 0.2355, "grad_norm": 0.02506883069872856, "learning_rate": 1.625755921665889e-07, "loss": 0.031, "step": 193100 }, { "epoch": 0.23555, "grad_norm": 0.026660045608878136, "learning_rate": 1.6210527770795237e-07, "loss": 0.0328, "step": 193110 }, { "epoch": 0.2356, "grad_norm": 0.02374088764190674, "learning_rate": 1.6163564230802507e-07, "loss": 0.0313, "step": 193120 }, { "epoch": 0.23565, "grad_norm": 0.028937511146068573, "learning_rate": 1.6116668597964956e-07, "loss": 0.0317, "step": 193130 }, { "epoch": 0.2357, "grad_norm": 0.031917814165353775, "learning_rate": 1.6069840873564335e-07, "loss": 0.0311, "step": 193140 }, { "epoch": 0.23575, "grad_norm": 0.028658544644713402, "learning_rate": 1.6023081058881284e-07, "loss": 0.0315, "step": 193150 }, { "epoch": 0.2358, "grad_norm": 0.0289542805403471, "learning_rate": 1.5976389155193948e-07, "loss": 0.0323, "step": 193160 }, { "epoch": 0.23585, "grad_norm": 0.025311751291155815, "learning_rate": 1.592976516377881e-07, "loss": 0.032, "step": 193170 }, { "epoch": 0.2359, "grad_norm": 0.02736661396920681, "learning_rate": 1.5883209085910678e-07, "loss": 0.0315, "step": 193180 }, { "epoch": 0.23595, "grad_norm": 0.03126165643334389, "learning_rate": 1.5836720922862703e-07, "loss": 0.0333, "step": 193190 }, { "epoch": 0.236, "grad_norm": 0.030230486765503883, "learning_rate": 1.5790300675904979e-07, "loss": 0.0333, "step": 193200 }, { "epoch": 0.23605, "grad_norm": 0.031154369935393333, "learning_rate": 1.5743948346307603e-07, "loss": 0.0331, "step": 193210 }, { "epoch": 0.2361, "grad_norm": 0.027177628129720688, "learning_rate": 1.5697663935337336e-07, "loss": 0.0323, "step": 193220 }, { "epoch": 0.23615, "grad_norm": 0.026651490479707718, "learning_rate": 1.565144744425956e-07, "loss": 0.0323, "step": 193230 }, { "epoch": 0.2362, "grad_norm": 0.030147448182106018, "learning_rate": 1.5605298874337704e-07, "loss": 0.032, "step": 193240 }, { "epoch": 0.23625, "grad_norm": 0.028682773932814598, "learning_rate": 1.5559218226834094e-07, "loss": 0.0322, "step": 193250 }, { "epoch": 0.2363, "grad_norm": 0.02755606733262539, "learning_rate": 1.5513205503007722e-07, "loss": 0.033, "step": 193260 }, { "epoch": 0.23635, "grad_norm": 0.026752468198537827, "learning_rate": 1.5467260704117025e-07, "loss": 0.0334, "step": 193270 }, { "epoch": 0.2364, "grad_norm": 0.026598379015922546, "learning_rate": 1.5421383831417945e-07, "loss": 0.0349, "step": 193280 }, { "epoch": 0.23645, "grad_norm": 0.02493317425251007, "learning_rate": 1.5375574886164755e-07, "loss": 0.032, "step": 193290 }, { "epoch": 0.2365, "grad_norm": 0.029579149559140205, "learning_rate": 1.5329833869610066e-07, "loss": 0.0324, "step": 193300 }, { "epoch": 0.23655, "grad_norm": 0.02820001170039177, "learning_rate": 1.5284160783004543e-07, "loss": 0.0324, "step": 193310 }, { "epoch": 0.2366, "grad_norm": 0.02783927135169506, "learning_rate": 1.5238555627596352e-07, "loss": 0.0319, "step": 193320 }, { "epoch": 0.23665, "grad_norm": 0.028218500316143036, "learning_rate": 1.5193018404632277e-07, "loss": 0.032, "step": 193330 }, { "epoch": 0.2367, "grad_norm": 0.027087442576885223, "learning_rate": 1.5147549115357706e-07, "loss": 0.0328, "step": 193340 }, { "epoch": 0.23675, "grad_norm": 0.029415588825941086, "learning_rate": 1.5102147761015817e-07, "loss": 0.0328, "step": 193350 }, { "epoch": 0.2368, "grad_norm": 0.027064288035035133, "learning_rate": 1.5056814342847836e-07, "loss": 0.0319, "step": 193360 }, { "epoch": 0.23685, "grad_norm": 0.027783773839473724, "learning_rate": 1.5011548862092773e-07, "loss": 0.0324, "step": 193370 }, { "epoch": 0.2369, "grad_norm": 0.02960365265607834, "learning_rate": 1.4966351319988248e-07, "loss": 0.0347, "step": 193380 }, { "epoch": 0.23695, "grad_norm": 0.030076824128627777, "learning_rate": 1.4921221717770218e-07, "loss": 0.0341, "step": 193390 }, { "epoch": 0.237, "grad_norm": 0.026086056604981422, "learning_rate": 1.4876160056672417e-07, "loss": 0.0315, "step": 193400 }, { "epoch": 0.23705, "grad_norm": 0.025460442528128624, "learning_rate": 1.4831166337926917e-07, "loss": 0.033, "step": 193410 }, { "epoch": 0.2371, "grad_norm": 0.024396125227212906, "learning_rate": 1.4786240562763566e-07, "loss": 0.0331, "step": 193420 }, { "epoch": 0.23715, "grad_norm": 0.03096782974898815, "learning_rate": 1.4741382732410546e-07, "loss": 0.0331, "step": 193430 }, { "epoch": 0.2372, "grad_norm": 0.026441195979714394, "learning_rate": 1.4696592848094935e-07, "loss": 0.0321, "step": 193440 }, { "epoch": 0.23725, "grad_norm": 0.026662474498152733, "learning_rate": 1.4651870911040478e-07, "loss": 0.033, "step": 193450 }, { "epoch": 0.2373, "grad_norm": 0.028206605464220047, "learning_rate": 1.4607216922470078e-07, "loss": 0.0328, "step": 193460 }, { "epoch": 0.23735, "grad_norm": 0.028729049488902092, "learning_rate": 1.456263088360471e-07, "loss": 0.0326, "step": 193470 }, { "epoch": 0.2374, "grad_norm": 0.027290131896734238, "learning_rate": 1.4518112795663397e-07, "loss": 0.0327, "step": 193480 }, { "epoch": 0.23745, "grad_norm": 0.026049882173538208, "learning_rate": 1.447366265986322e-07, "loss": 0.0334, "step": 193490 }, { "epoch": 0.2375, "grad_norm": 0.025826916098594666, "learning_rate": 1.4429280477419037e-07, "loss": 0.0361, "step": 193500 }, { "epoch": 0.23755, "grad_norm": 0.030189473181962967, "learning_rate": 1.4384966249544606e-07, "loss": 0.0339, "step": 193510 }, { "epoch": 0.2376, "grad_norm": 0.02801290899515152, "learning_rate": 1.4340719977451455e-07, "loss": 0.0323, "step": 193520 }, { "epoch": 0.23765, "grad_norm": 0.0285183098167181, "learning_rate": 1.4296541662349172e-07, "loss": 0.032, "step": 193530 }, { "epoch": 0.2377, "grad_norm": 0.02660214714705944, "learning_rate": 1.425243130544568e-07, "loss": 0.0327, "step": 193540 }, { "epoch": 0.23775, "grad_norm": 0.027230629697442055, "learning_rate": 1.4208388907946957e-07, "loss": 0.033, "step": 193550 }, { "epoch": 0.2378, "grad_norm": 0.027467437088489532, "learning_rate": 1.4164414471056764e-07, "loss": 0.0332, "step": 193560 }, { "epoch": 0.23785, "grad_norm": 0.02356082573533058, "learning_rate": 1.412050799597775e-07, "loss": 0.0313, "step": 193570 }, { "epoch": 0.2379, "grad_norm": 0.026973361149430275, "learning_rate": 1.4076669483910065e-07, "loss": 0.0325, "step": 193580 }, { "epoch": 0.23795, "grad_norm": 0.030826479196548462, "learning_rate": 1.4032898936052475e-07, "loss": 0.032, "step": 193590 }, { "epoch": 0.238, "grad_norm": 0.02999156527221203, "learning_rate": 1.398919635360152e-07, "loss": 0.0328, "step": 193600 }, { "epoch": 0.23805, "grad_norm": 0.031644634902477264, "learning_rate": 1.39455617377518e-07, "loss": 0.036, "step": 193610 }, { "epoch": 0.2381, "grad_norm": 0.03171585127711296, "learning_rate": 1.390199508969653e-07, "loss": 0.0342, "step": 193620 }, { "epoch": 0.23815, "grad_norm": 0.029693949967622757, "learning_rate": 1.3858496410626698e-07, "loss": 0.0324, "step": 193630 }, { "epoch": 0.2382, "grad_norm": 0.028009943664073944, "learning_rate": 1.3815065701731632e-07, "loss": 0.0326, "step": 193640 }, { "epoch": 0.23825, "grad_norm": 0.026626676321029663, "learning_rate": 1.3771702964198442e-07, "loss": 0.0321, "step": 193650 }, { "epoch": 0.2383, "grad_norm": 0.030735468491911888, "learning_rate": 1.3728408199213118e-07, "loss": 0.0336, "step": 193660 }, { "epoch": 0.23835, "grad_norm": 0.030684100463986397, "learning_rate": 1.368518140795888e-07, "loss": 0.0333, "step": 193670 }, { "epoch": 0.2384, "grad_norm": 0.02619747631251812, "learning_rate": 1.364202259161784e-07, "loss": 0.0326, "step": 193680 }, { "epoch": 0.23845, "grad_norm": 0.028299542143940926, "learning_rate": 1.3598931751369893e-07, "loss": 0.0339, "step": 193690 }, { "epoch": 0.2385, "grad_norm": 0.027913063764572144, "learning_rate": 1.3555908888392976e-07, "loss": 0.0321, "step": 193700 }, { "epoch": 0.23855, "grad_norm": 0.0298161543905735, "learning_rate": 1.3512954003863377e-07, "loss": 0.0327, "step": 193710 }, { "epoch": 0.2386, "grad_norm": 0.027977202087640762, "learning_rate": 1.347006709895543e-07, "loss": 0.0323, "step": 193720 }, { "epoch": 0.23865, "grad_norm": 0.031968772411346436, "learning_rate": 1.342724817484181e-07, "loss": 0.0334, "step": 193730 }, { "epoch": 0.2387, "grad_norm": 0.031502120196819305, "learning_rate": 1.3384497232692973e-07, "loss": 0.0326, "step": 193740 }, { "epoch": 0.23875, "grad_norm": 0.02637472376227379, "learning_rate": 1.3341814273677977e-07, "loss": 0.0336, "step": 193750 }, { "epoch": 0.2388, "grad_norm": 0.024537673220038414, "learning_rate": 1.3299199298963116e-07, "loss": 0.0332, "step": 193760 }, { "epoch": 0.23885, "grad_norm": 0.030038703233003616, "learning_rate": 1.32566523097144e-07, "loss": 0.0334, "step": 193770 }, { "epoch": 0.2389, "grad_norm": 0.02504717744886875, "learning_rate": 1.3214173307094513e-07, "loss": 0.0317, "step": 193780 }, { "epoch": 0.23895, "grad_norm": 0.02821286953985691, "learning_rate": 1.3171762292264744e-07, "loss": 0.0334, "step": 193790 }, { "epoch": 0.239, "grad_norm": 0.026244711130857468, "learning_rate": 1.3129419266385002e-07, "loss": 0.0319, "step": 193800 }, { "epoch": 0.23905, "grad_norm": 0.02635263279080391, "learning_rate": 1.3087144230612414e-07, "loss": 0.0327, "step": 193810 }, { "epoch": 0.2391, "grad_norm": 0.02682708390057087, "learning_rate": 1.3044937186103e-07, "loss": 0.0315, "step": 193820 }, { "epoch": 0.23915, "grad_norm": 0.029811793938279152, "learning_rate": 1.3002798134010841e-07, "loss": 0.033, "step": 193830 }, { "epoch": 0.2392, "grad_norm": 0.02868504635989666, "learning_rate": 1.2960727075487788e-07, "loss": 0.0335, "step": 193840 }, { "epoch": 0.23925, "grad_norm": 0.027720022946596146, "learning_rate": 1.2918724011684036e-07, "loss": 0.0339, "step": 193850 }, { "epoch": 0.2393, "grad_norm": 0.027101268991827965, "learning_rate": 1.2876788943748108e-07, "loss": 0.0334, "step": 193860 }, { "epoch": 0.23935, "grad_norm": 0.03304234892129898, "learning_rate": 1.2834921872826588e-07, "loss": 0.0346, "step": 193870 }, { "epoch": 0.2394, "grad_norm": 0.03118724562227726, "learning_rate": 1.279312280006356e-07, "loss": 0.0349, "step": 193880 }, { "epoch": 0.23945, "grad_norm": 0.031831204891204834, "learning_rate": 1.275139172660228e-07, "loss": 0.0344, "step": 193890 }, { "epoch": 0.2395, "grad_norm": 0.03403082862496376, "learning_rate": 1.2709728653583775e-07, "loss": 0.0351, "step": 193900 }, { "epoch": 0.23955, "grad_norm": 0.03151148185133934, "learning_rate": 1.266813358214658e-07, "loss": 0.0352, "step": 193910 }, { "epoch": 0.2396, "grad_norm": 0.033800844103097916, "learning_rate": 1.262660651342812e-07, "loss": 0.0347, "step": 193920 }, { "epoch": 0.23965, "grad_norm": 0.0265358816832304, "learning_rate": 1.2585147448563872e-07, "loss": 0.0345, "step": 193930 }, { "epoch": 0.2397, "grad_norm": 0.02647106908261776, "learning_rate": 1.2543756388687377e-07, "loss": 0.0374, "step": 193940 }, { "epoch": 0.23975, "grad_norm": 0.029090620577335358, "learning_rate": 1.2502433334929954e-07, "loss": 0.0339, "step": 193950 }, { "epoch": 0.2398, "grad_norm": 0.031015174463391304, "learning_rate": 1.2461178288421527e-07, "loss": 0.0336, "step": 193960 }, { "epoch": 0.23985, "grad_norm": 0.030042123049497604, "learning_rate": 1.2419991250289808e-07, "loss": 0.0351, "step": 193970 }, { "epoch": 0.2399, "grad_norm": 0.03131449967622757, "learning_rate": 1.237887222166112e-07, "loss": 0.0349, "step": 193980 }, { "epoch": 0.23995, "grad_norm": 0.027338171377778053, "learning_rate": 1.2337821203659562e-07, "loss": 0.0345, "step": 193990 }, { "epoch": 0.24, "grad_norm": 0.029214883223176003, "learning_rate": 1.229683819740729e-07, "loss": 0.0345, "step": 194000 }, { "epoch": 0.24005, "grad_norm": 0.030200736597180367, "learning_rate": 1.2255923204025077e-07, "loss": 0.0313, "step": 194010 }, { "epoch": 0.2401, "grad_norm": 0.026402173563838005, "learning_rate": 1.2215076224631194e-07, "loss": 0.0318, "step": 194020 }, { "epoch": 0.24015, "grad_norm": 0.028501980006694794, "learning_rate": 1.217429726034225e-07, "loss": 0.0318, "step": 194030 }, { "epoch": 0.2402, "grad_norm": 0.03134189918637276, "learning_rate": 1.2133586312273738e-07, "loss": 0.0316, "step": 194040 }, { "epoch": 0.24025, "grad_norm": 0.027999697253108025, "learning_rate": 1.20929433815381e-07, "loss": 0.0326, "step": 194050 }, { "epoch": 0.2403, "grad_norm": 0.02933386340737343, "learning_rate": 1.2052368469246954e-07, "loss": 0.0319, "step": 194060 }, { "epoch": 0.24035, "grad_norm": 0.02369658090174198, "learning_rate": 1.201186157650941e-07, "loss": 0.0315, "step": 194070 }, { "epoch": 0.2404, "grad_norm": 0.028840554878115654, "learning_rate": 1.1971422704432633e-07, "loss": 0.0328, "step": 194080 }, { "epoch": 0.24045, "grad_norm": 0.027982434257864952, "learning_rate": 1.193105185412241e-07, "loss": 0.0324, "step": 194090 }, { "epoch": 0.2405, "grad_norm": 0.02944488450884819, "learning_rate": 1.1890749026682857e-07, "loss": 0.0311, "step": 194100 }, { "epoch": 0.24055, "grad_norm": 0.02974824607372284, "learning_rate": 1.1850514223215315e-07, "loss": 0.0323, "step": 194110 }, { "epoch": 0.2406, "grad_norm": 0.02925095707178116, "learning_rate": 1.1810347444819735e-07, "loss": 0.0326, "step": 194120 }, { "epoch": 0.24065, "grad_norm": 0.026488423347473145, "learning_rate": 1.1770248692594687e-07, "loss": 0.0329, "step": 194130 }, { "epoch": 0.2407, "grad_norm": 0.028202392160892487, "learning_rate": 1.1730217967636236e-07, "loss": 0.033, "step": 194140 }, { "epoch": 0.24075, "grad_norm": 0.027688847854733467, "learning_rate": 1.1690255271038786e-07, "loss": 0.0343, "step": 194150 }, { "epoch": 0.2408, "grad_norm": 0.028941884636878967, "learning_rate": 1.1650360603895071e-07, "loss": 0.0323, "step": 194160 }, { "epoch": 0.24085, "grad_norm": 0.02789250761270523, "learning_rate": 1.1610533967295334e-07, "loss": 0.0354, "step": 194170 }, { "epoch": 0.2409, "grad_norm": 0.02641705423593521, "learning_rate": 1.1570775362329255e-07, "loss": 0.0339, "step": 194180 }, { "epoch": 0.24095, "grad_norm": 0.026302112266421318, "learning_rate": 1.1531084790082913e-07, "loss": 0.0333, "step": 194190 }, { "epoch": 0.241, "grad_norm": 0.028520043939352036, "learning_rate": 1.1491462251642104e-07, "loss": 0.0353, "step": 194200 }, { "epoch": 0.24105, "grad_norm": 0.027348719537258148, "learning_rate": 1.1451907748089574e-07, "loss": 0.0339, "step": 194210 }, { "epoch": 0.2411, "grad_norm": 0.031844280660152435, "learning_rate": 1.1412421280506957e-07, "loss": 0.034, "step": 194220 }, { "epoch": 0.24115, "grad_norm": 0.028920553624629974, "learning_rate": 1.1373002849974223e-07, "loss": 0.0334, "step": 194230 }, { "epoch": 0.2412, "grad_norm": 0.02920878306031227, "learning_rate": 1.1333652457568288e-07, "loss": 0.0334, "step": 194240 }, { "epoch": 0.24125, "grad_norm": 0.02664770931005478, "learning_rate": 1.1294370104365515e-07, "loss": 0.0333, "step": 194250 }, { "epoch": 0.2413, "grad_norm": 0.026782682165503502, "learning_rate": 1.1255155791439764e-07, "loss": 0.0336, "step": 194260 }, { "epoch": 0.24135, "grad_norm": 0.025523407384753227, "learning_rate": 1.1216009519862958e-07, "loss": 0.0343, "step": 194270 }, { "epoch": 0.2414, "grad_norm": 0.027562156319618225, "learning_rate": 1.117693129070535e-07, "loss": 0.0337, "step": 194280 }, { "epoch": 0.24145, "grad_norm": 0.03049503266811371, "learning_rate": 1.1137921105035532e-07, "loss": 0.0322, "step": 194290 }, { "epoch": 0.2415, "grad_norm": 0.02665276825428009, "learning_rate": 1.1098978963920148e-07, "loss": 0.0336, "step": 194300 }, { "epoch": 0.24155, "grad_norm": 0.027565669268369675, "learning_rate": 1.106010486842335e-07, "loss": 0.0323, "step": 194310 }, { "epoch": 0.2416, "grad_norm": 0.02628151699900627, "learning_rate": 1.1021298819608449e-07, "loss": 0.0332, "step": 194320 }, { "epoch": 0.24165, "grad_norm": 0.026651812717318535, "learning_rate": 1.0982560818535991e-07, "loss": 0.032, "step": 194330 }, { "epoch": 0.2417, "grad_norm": 0.02496226131916046, "learning_rate": 1.0943890866265405e-07, "loss": 0.0323, "step": 194340 }, { "epoch": 0.24175, "grad_norm": 0.025923805311322212, "learning_rate": 1.0905288963853344e-07, "loss": 0.0361, "step": 194350 }, { "epoch": 0.2418, "grad_norm": 0.030155358836054802, "learning_rate": 1.0866755112355908e-07, "loss": 0.0334, "step": 194360 }, { "epoch": 0.24185, "grad_norm": 0.025724314153194427, "learning_rate": 1.0828289312826423e-07, "loss": 0.0334, "step": 194370 }, { "epoch": 0.2419, "grad_norm": 0.0289236418902874, "learning_rate": 1.0789891566315991e-07, "loss": 0.0321, "step": 194380 }, { "epoch": 0.24195, "grad_norm": 0.02829277515411377, "learning_rate": 1.075156187387516e-07, "loss": 0.0325, "step": 194390 }, { "epoch": 0.242, "grad_norm": 0.032805219292640686, "learning_rate": 1.0713300236551149e-07, "loss": 0.0337, "step": 194400 }, { "epoch": 0.24205, "grad_norm": 0.025196310132741928, "learning_rate": 1.0675106655390343e-07, "loss": 0.0334, "step": 194410 }, { "epoch": 0.2421, "grad_norm": 0.030526315793395042, "learning_rate": 1.0636981131437185e-07, "loss": 0.0344, "step": 194420 }, { "epoch": 0.24215, "grad_norm": 0.026543065905570984, "learning_rate": 1.0598923665733618e-07, "loss": 0.0339, "step": 194430 }, { "epoch": 0.2422, "grad_norm": 0.028449447825551033, "learning_rate": 1.0560934259320199e-07, "loss": 0.0335, "step": 194440 }, { "epoch": 0.24225, "grad_norm": 0.027246778830885887, "learning_rate": 1.0523012913235541e-07, "loss": 0.0322, "step": 194450 }, { "epoch": 0.2423, "grad_norm": 0.025745918974280357, "learning_rate": 1.0485159628516595e-07, "loss": 0.0336, "step": 194460 }, { "epoch": 0.24235, "grad_norm": 0.02711482159793377, "learning_rate": 1.0447374406198085e-07, "loss": 0.0331, "step": 194470 }, { "epoch": 0.2424, "grad_norm": 0.025958344340324402, "learning_rate": 1.0409657247313076e-07, "loss": 0.0328, "step": 194480 }, { "epoch": 0.24245, "grad_norm": 0.02521250769495964, "learning_rate": 1.0372008152892687e-07, "loss": 0.0324, "step": 194490 }, { "epoch": 0.2425, "grad_norm": 0.02667837217450142, "learning_rate": 1.0334427123966373e-07, "loss": 0.0334, "step": 194500 }, { "epoch": 0.24255, "grad_norm": 0.029127979651093483, "learning_rate": 1.0296914161561367e-07, "loss": 0.0321, "step": 194510 }, { "epoch": 0.2426, "grad_norm": 0.029505934566259384, "learning_rate": 1.0259469266703514e-07, "loss": 0.0324, "step": 194520 }, { "epoch": 0.24265, "grad_norm": 0.03228716552257538, "learning_rate": 1.0222092440416442e-07, "loss": 0.0327, "step": 194530 }, { "epoch": 0.2427, "grad_norm": 0.028279023244976997, "learning_rate": 1.0184783683721832e-07, "loss": 0.0327, "step": 194540 }, { "epoch": 0.24275, "grad_norm": 0.02680935710668564, "learning_rate": 1.0147542997639703e-07, "loss": 0.032, "step": 194550 }, { "epoch": 0.2428, "grad_norm": 0.025176655501127243, "learning_rate": 1.0110370383188683e-07, "loss": 0.0324, "step": 194560 }, { "epoch": 0.24285, "grad_norm": 0.030117202550172806, "learning_rate": 1.0073265841384627e-07, "loss": 0.0338, "step": 194570 }, { "epoch": 0.2429, "grad_norm": 0.028528856113553047, "learning_rate": 1.0036229373242279e-07, "loss": 0.0331, "step": 194580 }, { "epoch": 0.24295, "grad_norm": 0.02289486862719059, "learning_rate": 9.999260979773607e-08, "loss": 0.0327, "step": 194590 }, { "epoch": 0.243, "grad_norm": 0.028572697192430496, "learning_rate": 9.962360661990022e-08, "loss": 0.0331, "step": 194600 }, { "epoch": 0.24305, "grad_norm": 0.027456842362880707, "learning_rate": 9.925528420899888e-08, "loss": 0.0328, "step": 194610 }, { "epoch": 0.2431, "grad_norm": 0.02405891753733158, "learning_rate": 9.888764257510174e-08, "loss": 0.0327, "step": 194620 }, { "epoch": 0.24315, "grad_norm": 0.02784058079123497, "learning_rate": 9.852068172826468e-08, "loss": 0.0323, "step": 194630 }, { "epoch": 0.2432, "grad_norm": 0.027669742703437805, "learning_rate": 9.815440167851297e-08, "loss": 0.0325, "step": 194640 }, { "epoch": 0.24325, "grad_norm": 0.03056858293712139, "learning_rate": 9.778880243586919e-08, "loss": 0.0332, "step": 194650 }, { "epoch": 0.2433, "grad_norm": 0.02696123905479908, "learning_rate": 9.742388401032254e-08, "loss": 0.0318, "step": 194660 }, { "epoch": 0.24335, "grad_norm": 0.026333358138799667, "learning_rate": 9.705964641185117e-08, "loss": 0.0325, "step": 194670 }, { "epoch": 0.2434, "grad_norm": 0.0272892527282238, "learning_rate": 9.669608965041377e-08, "loss": 0.0333, "step": 194680 }, { "epoch": 0.24345, "grad_norm": 0.027968794107437134, "learning_rate": 9.633321373594962e-08, "loss": 0.0331, "step": 194690 }, { "epoch": 0.2435, "grad_norm": 0.027383040636777878, "learning_rate": 9.597101867837854e-08, "loss": 0.0329, "step": 194700 }, { "epoch": 0.24355, "grad_norm": 0.02573508210480213, "learning_rate": 9.560950448760375e-08, "loss": 0.0321, "step": 194710 }, { "epoch": 0.2436, "grad_norm": 0.023699520155787468, "learning_rate": 9.5248671173509e-08, "loss": 0.0315, "step": 194720 }, { "epoch": 0.24365, "grad_norm": 0.024864180013537407, "learning_rate": 9.488851874596138e-08, "loss": 0.0326, "step": 194730 }, { "epoch": 0.2437, "grad_norm": 0.027155090123414993, "learning_rate": 9.452904721480304e-08, "loss": 0.0323, "step": 194740 }, { "epoch": 0.24375, "grad_norm": 0.029680032283067703, "learning_rate": 9.4170256589865e-08, "loss": 0.0335, "step": 194750 }, { "epoch": 0.2438, "grad_norm": 0.031484510749578476, "learning_rate": 9.38121468809533e-08, "loss": 0.0328, "step": 194760 }, { "epoch": 0.24385, "grad_norm": 0.024282047525048256, "learning_rate": 9.345471809786289e-08, "loss": 0.0325, "step": 194770 }, { "epoch": 0.2439, "grad_norm": 0.0297898780554533, "learning_rate": 9.309797025036371e-08, "loss": 0.0342, "step": 194780 }, { "epoch": 0.24395, "grad_norm": 0.02838975004851818, "learning_rate": 9.27419033482091e-08, "loss": 0.0334, "step": 194790 }, { "epoch": 0.244, "grad_norm": 0.026697857305407524, "learning_rate": 9.238651740113291e-08, "loss": 0.0327, "step": 194800 }, { "epoch": 0.24405, "grad_norm": 0.032274551689624786, "learning_rate": 9.20318124188524e-08, "loss": 0.0352, "step": 194810 }, { "epoch": 0.2441, "grad_norm": 0.029393857344985008, "learning_rate": 9.167778841106533e-08, "loss": 0.0324, "step": 194820 }, { "epoch": 0.24415, "grad_norm": 0.029212241992354393, "learning_rate": 9.132444538744733e-08, "loss": 0.0324, "step": 194830 }, { "epoch": 0.2442, "grad_norm": 0.029543107375502586, "learning_rate": 9.097178335766287e-08, "loss": 0.0331, "step": 194840 }, { "epoch": 0.24425, "grad_norm": 0.026619311422109604, "learning_rate": 9.061980233135148e-08, "loss": 0.0332, "step": 194850 }, { "epoch": 0.2443, "grad_norm": 0.027571335434913635, "learning_rate": 9.0268502318136e-08, "loss": 0.033, "step": 194860 }, { "epoch": 0.24435, "grad_norm": 0.028135353699326515, "learning_rate": 8.991788332762263e-08, "loss": 0.0329, "step": 194870 }, { "epoch": 0.2444, "grad_norm": 0.028476586565375328, "learning_rate": 8.95679453693954e-08, "loss": 0.0318, "step": 194880 }, { "epoch": 0.24445, "grad_norm": 0.02955649234354496, "learning_rate": 8.92186884530244e-08, "loss": 0.0317, "step": 194890 }, { "epoch": 0.2445, "grad_norm": 0.027156932279467583, "learning_rate": 8.887011258805478e-08, "loss": 0.0346, "step": 194900 }, { "epoch": 0.24455, "grad_norm": 0.02584977261722088, "learning_rate": 8.852221778401781e-08, "loss": 0.0325, "step": 194910 }, { "epoch": 0.2446, "grad_norm": 0.025581881403923035, "learning_rate": 8.817500405042256e-08, "loss": 0.0329, "step": 194920 }, { "epoch": 0.24465, "grad_norm": 0.02725781686604023, "learning_rate": 8.78284713967642e-08, "loss": 0.0337, "step": 194930 }, { "epoch": 0.2447, "grad_norm": 0.02975981868803501, "learning_rate": 8.748261983251849e-08, "loss": 0.0332, "step": 194940 }, { "epoch": 0.24475, "grad_norm": 0.02868971973657608, "learning_rate": 8.71374493671362e-08, "loss": 0.0327, "step": 194950 }, { "epoch": 0.2448, "grad_norm": 0.028604021295905113, "learning_rate": 8.679296001005976e-08, "loss": 0.035, "step": 194960 }, { "epoch": 0.24485, "grad_norm": 0.029460575431585312, "learning_rate": 8.644915177070112e-08, "loss": 0.0326, "step": 194970 }, { "epoch": 0.2449, "grad_norm": 0.027132930234074593, "learning_rate": 8.610602465846385e-08, "loss": 0.0333, "step": 194980 }, { "epoch": 0.24495, "grad_norm": 0.02888166345655918, "learning_rate": 8.576357868272933e-08, "loss": 0.0339, "step": 194990 }, { "epoch": 0.245, "grad_norm": 0.0272995438426733, "learning_rate": 8.542181385285952e-08, "loss": 0.0339, "step": 195000 }, { "epoch": 0.24505, "grad_norm": 0.0266796313226223, "learning_rate": 8.508073017819418e-08, "loss": 0.0338, "step": 195010 }, { "epoch": 0.2451, "grad_norm": 0.02482810989022255, "learning_rate": 8.474032766806472e-08, "loss": 0.0354, "step": 195020 }, { "epoch": 0.24515, "grad_norm": 0.027011895552277565, "learning_rate": 8.440060633177483e-08, "loss": 0.0333, "step": 195030 }, { "epoch": 0.2452, "grad_norm": 0.026108799502253532, "learning_rate": 8.406156617860872e-08, "loss": 0.0339, "step": 195040 }, { "epoch": 0.24525, "grad_norm": 0.030398061498999596, "learning_rate": 8.372320721783955e-08, "loss": 0.0334, "step": 195050 }, { "epoch": 0.2453, "grad_norm": 0.02703634649515152, "learning_rate": 8.338552945871825e-08, "loss": 0.0328, "step": 195060 }, { "epoch": 0.24535, "grad_norm": 0.02893189899623394, "learning_rate": 8.304853291047631e-08, "loss": 0.0345, "step": 195070 }, { "epoch": 0.2454, "grad_norm": 0.027915995568037033, "learning_rate": 8.271221758232583e-08, "loss": 0.0323, "step": 195080 }, { "epoch": 0.24545, "grad_norm": 0.02771705575287342, "learning_rate": 8.237658348346222e-08, "loss": 0.0342, "step": 195090 }, { "epoch": 0.2455, "grad_norm": 0.02994770184159279, "learning_rate": 8.204163062306425e-08, "loss": 0.0335, "step": 195100 }, { "epoch": 0.24555, "grad_norm": 0.030637120828032494, "learning_rate": 8.17073590102857e-08, "loss": 0.0331, "step": 195110 }, { "epoch": 0.2456, "grad_norm": 0.025169167667627335, "learning_rate": 8.137376865426371e-08, "loss": 0.0351, "step": 195120 }, { "epoch": 0.24565, "grad_norm": 0.029002079740166664, "learning_rate": 8.104085956412432e-08, "loss": 0.0328, "step": 195130 }, { "epoch": 0.2457, "grad_norm": 0.02710791677236557, "learning_rate": 8.070863174896303e-08, "loss": 0.0328, "step": 195140 }, { "epoch": 0.24575, "grad_norm": 0.024972479790449142, "learning_rate": 8.03770852178698e-08, "loss": 0.0341, "step": 195150 }, { "epoch": 0.2458, "grad_norm": 0.02914566360414028, "learning_rate": 8.004621997990403e-08, "loss": 0.0349, "step": 195160 }, { "epoch": 0.24585, "grad_norm": 0.027187839150428772, "learning_rate": 7.971603604411126e-08, "loss": 0.0345, "step": 195170 }, { "epoch": 0.2459, "grad_norm": 0.02553461864590645, "learning_rate": 7.93865334195204e-08, "loss": 0.033, "step": 195180 }, { "epoch": 0.24595, "grad_norm": 0.02773602120578289, "learning_rate": 7.90577121151409e-08, "loss": 0.0321, "step": 195190 }, { "epoch": 0.246, "grad_norm": 0.02327432855963707, "learning_rate": 7.872957213996002e-08, "loss": 0.0337, "step": 195200 }, { "epoch": 0.24605, "grad_norm": 0.025786172598600388, "learning_rate": 7.840211350295112e-08, "loss": 0.0332, "step": 195210 }, { "epoch": 0.2461, "grad_norm": 0.029511207714676857, "learning_rate": 7.80753362130654e-08, "loss": 0.0329, "step": 195220 }, { "epoch": 0.24615, "grad_norm": 0.026267917826771736, "learning_rate": 7.774924027923736e-08, "loss": 0.0315, "step": 195230 }, { "epoch": 0.2462, "grad_norm": 0.027054287493228912, "learning_rate": 7.742382571037931e-08, "loss": 0.0327, "step": 195240 }, { "epoch": 0.24625, "grad_norm": 0.02936594747006893, "learning_rate": 7.709909251539526e-08, "loss": 0.0322, "step": 195250 }, { "epoch": 0.2463, "grad_norm": 0.025670327246189117, "learning_rate": 7.677504070315589e-08, "loss": 0.0334, "step": 195260 }, { "epoch": 0.24635, "grad_norm": 0.02422959916293621, "learning_rate": 7.645167028252631e-08, "loss": 0.0318, "step": 195270 }, { "epoch": 0.2464, "grad_norm": 0.030943427234888077, "learning_rate": 7.612898126234114e-08, "loss": 0.0347, "step": 195280 }, { "epoch": 0.24645, "grad_norm": 0.028920264914631844, "learning_rate": 7.580697365142941e-08, "loss": 0.0317, "step": 195290 }, { "epoch": 0.2465, "grad_norm": 0.02755330316722393, "learning_rate": 7.548564745858965e-08, "loss": 0.0318, "step": 195300 }, { "epoch": 0.24655, "grad_norm": 0.031554482877254486, "learning_rate": 7.516500269260929e-08, "loss": 0.0324, "step": 195310 }, { "epoch": 0.2466, "grad_norm": 0.02594132535159588, "learning_rate": 7.484503936225629e-08, "loss": 0.0322, "step": 195320 }, { "epoch": 0.24665, "grad_norm": 0.025025133043527603, "learning_rate": 7.452575747627088e-08, "loss": 0.0329, "step": 195330 }, { "epoch": 0.2467, "grad_norm": 0.029942601919174194, "learning_rate": 7.420715704339054e-08, "loss": 0.033, "step": 195340 }, { "epoch": 0.24675, "grad_norm": 0.027799487113952637, "learning_rate": 7.388923807232217e-08, "loss": 0.0309, "step": 195350 }, { "epoch": 0.2468, "grad_norm": 0.027677707374095917, "learning_rate": 7.357200057175606e-08, "loss": 0.0327, "step": 195360 }, { "epoch": 0.24685, "grad_norm": 0.0252380333840847, "learning_rate": 7.325544455036859e-08, "loss": 0.0333, "step": 195370 }, { "epoch": 0.2469, "grad_norm": 0.02922498993575573, "learning_rate": 7.293957001681395e-08, "loss": 0.0314, "step": 195380 }, { "epoch": 0.24695, "grad_norm": 0.025162069126963615, "learning_rate": 7.262437697972413e-08, "loss": 0.0307, "step": 195390 }, { "epoch": 0.247, "grad_norm": 0.02564503252506256, "learning_rate": 7.230986544772001e-08, "loss": 0.0316, "step": 195400 }, { "epoch": 0.24705, "grad_norm": 0.028158506378531456, "learning_rate": 7.199603542940026e-08, "loss": 0.0318, "step": 195410 }, { "epoch": 0.2471, "grad_norm": 0.0253047663718462, "learning_rate": 7.168288693334135e-08, "loss": 0.0349, "step": 195420 }, { "epoch": 0.24715, "grad_norm": 0.028382329270243645, "learning_rate": 7.137041996810866e-08, "loss": 0.0317, "step": 195430 }, { "epoch": 0.2472, "grad_norm": 0.02788013219833374, "learning_rate": 7.105863454224537e-08, "loss": 0.0325, "step": 195440 }, { "epoch": 0.24725, "grad_norm": 0.026838254183530807, "learning_rate": 7.074753066427242e-08, "loss": 0.0353, "step": 195450 }, { "epoch": 0.2473, "grad_norm": 0.029704350978136063, "learning_rate": 7.043710834269413e-08, "loss": 0.0321, "step": 195460 }, { "epoch": 0.24735, "grad_norm": 0.02257765829563141, "learning_rate": 7.012736758600091e-08, "loss": 0.0325, "step": 195470 }, { "epoch": 0.2474, "grad_norm": 0.024150114506483078, "learning_rate": 6.981830840266102e-08, "loss": 0.0319, "step": 195480 }, { "epoch": 0.24745, "grad_norm": 0.025648394599556923, "learning_rate": 6.950993080112322e-08, "loss": 0.0316, "step": 195490 }, { "epoch": 0.2475, "grad_norm": 0.027938872575759888, "learning_rate": 6.92022347898169e-08, "loss": 0.0314, "step": 195500 }, { "epoch": 0.24755, "grad_norm": 0.027293045073747635, "learning_rate": 6.889522037715479e-08, "loss": 0.0312, "step": 195510 }, { "epoch": 0.2476, "grad_norm": 0.028072131797671318, "learning_rate": 6.858888757153014e-08, "loss": 0.0316, "step": 195520 }, { "epoch": 0.24765, "grad_norm": 0.0319143608212471, "learning_rate": 6.828323638132239e-08, "loss": 0.0321, "step": 195530 }, { "epoch": 0.2477, "grad_norm": 0.026307567954063416, "learning_rate": 6.797826681488318e-08, "loss": 0.0319, "step": 195540 }, { "epoch": 0.24775, "grad_norm": 0.026995103806257248, "learning_rate": 6.767397888055027e-08, "loss": 0.0318, "step": 195550 }, { "epoch": 0.2478, "grad_norm": 0.028282200917601585, "learning_rate": 6.73703725866448e-08, "loss": 0.0338, "step": 195560 }, { "epoch": 0.24785, "grad_norm": 0.027422990649938583, "learning_rate": 6.706744794146846e-08, "loss": 0.0325, "step": 195570 }, { "epoch": 0.2479, "grad_norm": 0.025190260261297226, "learning_rate": 6.676520495329797e-08, "loss": 0.0328, "step": 195580 }, { "epoch": 0.24795, "grad_norm": 0.026048485189676285, "learning_rate": 6.64636436304017e-08, "loss": 0.0333, "step": 195590 }, { "epoch": 0.248, "grad_norm": 0.028442546725273132, "learning_rate": 6.616276398102306e-08, "loss": 0.0318, "step": 195600 }, { "epoch": 0.24805, "grad_norm": 0.028586098924279213, "learning_rate": 6.586256601338603e-08, "loss": 0.0322, "step": 195610 }, { "epoch": 0.2481, "grad_norm": 0.026411043480038643, "learning_rate": 6.556304973569794e-08, "loss": 0.0328, "step": 195620 }, { "epoch": 0.24815, "grad_norm": 0.02859685756266117, "learning_rate": 6.526421515615222e-08, "loss": 0.0327, "step": 195630 }, { "epoch": 0.2482, "grad_norm": 0.029196694493293762, "learning_rate": 6.49660622829118e-08, "loss": 0.0314, "step": 195640 }, { "epoch": 0.24825, "grad_norm": 0.029072273522615433, "learning_rate": 6.466859112413404e-08, "loss": 0.0323, "step": 195650 }, { "epoch": 0.2483, "grad_norm": 0.024802634492516518, "learning_rate": 6.437180168794853e-08, "loss": 0.0312, "step": 195660 }, { "epoch": 0.24835, "grad_norm": 0.029871709644794464, "learning_rate": 6.407569398246827e-08, "loss": 0.0322, "step": 195670 }, { "epoch": 0.2484, "grad_norm": 0.02558273635804653, "learning_rate": 6.378026801579229e-08, "loss": 0.0303, "step": 195680 }, { "epoch": 0.24845, "grad_norm": 0.02865140326321125, "learning_rate": 6.34855237959947e-08, "loss": 0.0336, "step": 195690 }, { "epoch": 0.2485, "grad_norm": 0.0272515956312418, "learning_rate": 6.319146133113574e-08, "loss": 0.0301, "step": 195700 }, { "epoch": 0.24855, "grad_norm": 0.02938549779355526, "learning_rate": 6.28980806292534e-08, "loss": 0.0312, "step": 195710 }, { "epoch": 0.2486, "grad_norm": 0.02831849828362465, "learning_rate": 6.260538169836905e-08, "loss": 0.0322, "step": 195720 }, { "epoch": 0.24865, "grad_norm": 0.028205547481775284, "learning_rate": 6.231336454648462e-08, "loss": 0.0306, "step": 195730 }, { "epoch": 0.2487, "grad_norm": 0.027268605306744576, "learning_rate": 6.20220291815854e-08, "loss": 0.032, "step": 195740 }, { "epoch": 0.24875, "grad_norm": 0.029969947412610054, "learning_rate": 6.173137561163444e-08, "loss": 0.032, "step": 195750 }, { "epoch": 0.2488, "grad_norm": 0.029885603114962578, "learning_rate": 6.14414038445782e-08, "loss": 0.0337, "step": 195760 }, { "epoch": 0.24885, "grad_norm": 0.026608698070049286, "learning_rate": 6.115211388834641e-08, "loss": 0.0327, "step": 195770 }, { "epoch": 0.2489, "grad_norm": 0.026822002604603767, "learning_rate": 6.086350575084665e-08, "loss": 0.0329, "step": 195780 }, { "epoch": 0.24895, "grad_norm": 0.02918635495007038, "learning_rate": 6.057557943996705e-08, "loss": 0.0368, "step": 195790 }, { "epoch": 0.249, "grad_norm": 0.026154810562729836, "learning_rate": 6.028833496358465e-08, "loss": 0.0343, "step": 195800 }, { "epoch": 0.24905, "grad_norm": 0.028903482481837273, "learning_rate": 6.000177232954873e-08, "loss": 0.0332, "step": 195810 }, { "epoch": 0.2491, "grad_norm": 0.02927960641682148, "learning_rate": 5.971589154569468e-08, "loss": 0.0354, "step": 195820 }, { "epoch": 0.24915, "grad_norm": 0.02714477851986885, "learning_rate": 5.943069261983847e-08, "loss": 0.0345, "step": 195830 }, { "epoch": 0.2492, "grad_norm": 0.02752465382218361, "learning_rate": 5.914617555977664e-08, "loss": 0.0335, "step": 195840 }, { "epoch": 0.24925, "grad_norm": 0.028988268226385117, "learning_rate": 5.8862340373289085e-08, "loss": 0.034, "step": 195850 }, { "epoch": 0.2493, "grad_norm": 0.02949836663901806, "learning_rate": 5.857918706813625e-08, "loss": 0.0347, "step": 195860 }, { "epoch": 0.24935, "grad_norm": 0.024647751823067665, "learning_rate": 5.829671565205641e-08, "loss": 0.0343, "step": 195870 }, { "epoch": 0.2494, "grad_norm": 0.02909855730831623, "learning_rate": 5.8014926132776706e-08, "loss": 0.0333, "step": 195880 }, { "epoch": 0.24945, "grad_norm": 0.023018838837742805, "learning_rate": 5.773381851799653e-08, "loss": 0.0336, "step": 195890 }, { "epoch": 0.2495, "grad_norm": 0.02698889747262001, "learning_rate": 5.7453392815404185e-08, "loss": 0.0339, "step": 195900 }, { "epoch": 0.24955, "grad_norm": 0.022461283951997757, "learning_rate": 5.7173649032665756e-08, "loss": 0.0332, "step": 195910 }, { "epoch": 0.2496, "grad_norm": 0.02490866556763649, "learning_rate": 5.6894587177427904e-08, "loss": 0.0336, "step": 195920 }, { "epoch": 0.24965, "grad_norm": 0.03168974071741104, "learning_rate": 5.661620725732342e-08, "loss": 0.0339, "step": 195930 }, { "epoch": 0.2497, "grad_norm": 0.0300124604254961, "learning_rate": 5.63385092799601e-08, "loss": 0.0342, "step": 195940 }, { "epoch": 0.24975, "grad_norm": 0.028176583349704742, "learning_rate": 5.606149325293186e-08, "loss": 0.0348, "step": 195950 }, { "epoch": 0.2498, "grad_norm": 0.032841626554727554, "learning_rate": 5.5785159183810445e-08, "loss": 0.033, "step": 195960 }, { "epoch": 0.24985, "grad_norm": 0.02682030200958252, "learning_rate": 5.55095070801509e-08, "loss": 0.0329, "step": 195970 }, { "epoch": 0.2499, "grad_norm": 0.031123394146561623, "learning_rate": 5.5234536949491656e-08, "loss": 0.0342, "step": 195980 }, { "epoch": 0.24995, "grad_norm": 0.024029873311519623, "learning_rate": 5.496024879934891e-08, "loss": 0.0327, "step": 195990 }, { "epoch": 0.25, "grad_norm": 0.025896171107888222, "learning_rate": 5.4686642637219455e-08, "loss": 0.0328, "step": 196000 } ], "logging_steps": 10, "max_steps": 200000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }