{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 99.6268656716418, "eval_steps": 500, "global_step": 66750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03731343283582089, "grad_norm": 2.25, "learning_rate": 4.8e-05, "loss": 0.6868, "step": 25 }, { "epoch": 0.07462686567164178, "grad_norm": 0.396484375, "learning_rate": 9.8e-05, "loss": 0.1119, "step": 50 }, { "epoch": 0.11194029850746269, "grad_norm": 0.365234375, "learning_rate": 9.996415235250188e-05, "loss": 0.1082, "step": 75 }, { "epoch": 0.14925373134328357, "grad_norm": 0.419921875, "learning_rate": 9.992681105302465e-05, "loss": 0.1056, "step": 100 }, { "epoch": 0.1865671641791045, "grad_norm": 0.365234375, "learning_rate": 9.988946975354743e-05, "loss": 0.1093, "step": 125 }, { "epoch": 0.22388059701492538, "grad_norm": 0.318359375, "learning_rate": 9.985212845407021e-05, "loss": 0.1057, "step": 150 }, { "epoch": 0.26119402985074625, "grad_norm": 0.38671875, "learning_rate": 9.981478715459298e-05, "loss": 0.1056, "step": 175 }, { "epoch": 0.29850746268656714, "grad_norm": 0.326171875, "learning_rate": 9.977744585511576e-05, "loss": 0.1026, "step": 200 }, { "epoch": 0.3358208955223881, "grad_norm": 0.345703125, "learning_rate": 9.974010455563854e-05, "loss": 0.1016, "step": 225 }, { "epoch": 0.373134328358209, "grad_norm": 0.361328125, "learning_rate": 9.970276325616132e-05, "loss": 0.0966, "step": 250 }, { "epoch": 0.41044776119402987, "grad_norm": 0.392578125, "learning_rate": 9.96654219566841e-05, "loss": 0.1044, "step": 275 }, { "epoch": 0.44776119402985076, "grad_norm": 0.34375, "learning_rate": 9.962808065720688e-05, "loss": 0.1038, "step": 300 }, { "epoch": 0.48507462686567165, "grad_norm": 0.384765625, "learning_rate": 9.959073935772966e-05, "loss": 0.1049, "step": 325 }, { "epoch": 0.5223880597014925, "grad_norm": 0.279296875, "learning_rate": 9.955339805825243e-05, "loss": 0.0999, "step": 350 }, { "epoch": 0.5597014925373134, "grad_norm": 0.3046875, "learning_rate": 9.951605675877521e-05, "loss": 0.1038, "step": 375 }, { "epoch": 0.5970149253731343, "grad_norm": 0.306640625, "learning_rate": 9.947871545929799e-05, "loss": 0.1054, "step": 400 }, { "epoch": 0.6343283582089553, "grad_norm": 0.341796875, "learning_rate": 9.944137415982076e-05, "loss": 0.1025, "step": 425 }, { "epoch": 0.6716417910447762, "grad_norm": 0.28515625, "learning_rate": 9.940403286034354e-05, "loss": 0.1033, "step": 450 }, { "epoch": 0.7089552238805971, "grad_norm": 0.349609375, "learning_rate": 9.936669156086632e-05, "loss": 0.1043, "step": 475 }, { "epoch": 0.746268656716418, "grad_norm": 0.310546875, "learning_rate": 9.93293502613891e-05, "loss": 0.102, "step": 500 }, { "epoch": 0.7835820895522388, "grad_norm": 0.345703125, "learning_rate": 9.929200896191187e-05, "loss": 0.0956, "step": 525 }, { "epoch": 0.8208955223880597, "grad_norm": 0.3125, "learning_rate": 9.925466766243465e-05, "loss": 0.1005, "step": 550 }, { "epoch": 0.8582089552238806, "grad_norm": 0.265625, "learning_rate": 9.921732636295745e-05, "loss": 0.0951, "step": 575 }, { "epoch": 0.8955223880597015, "grad_norm": 0.28515625, "learning_rate": 9.917998506348021e-05, "loss": 0.1006, "step": 600 }, { "epoch": 0.9328358208955224, "grad_norm": 0.291015625, "learning_rate": 9.9142643764003e-05, "loss": 0.1002, "step": 625 }, { "epoch": 0.9701492537313433, "grad_norm": 0.279296875, "learning_rate": 9.910530246452578e-05, "loss": 0.0983, "step": 650 }, { "epoch": 1.007462686567164, "grad_norm": 0.251953125, "learning_rate": 9.906796116504854e-05, "loss": 0.0953, "step": 675 }, { "epoch": 1.044776119402985, "grad_norm": 0.2890625, "learning_rate": 9.903061986557133e-05, "loss": 0.0815, "step": 700 }, { "epoch": 1.0820895522388059, "grad_norm": 0.298828125, "learning_rate": 9.899327856609411e-05, "loss": 0.076, "step": 725 }, { "epoch": 1.1194029850746268, "grad_norm": 0.267578125, "learning_rate": 9.895593726661689e-05, "loss": 0.0765, "step": 750 }, { "epoch": 1.1567164179104479, "grad_norm": 0.28515625, "learning_rate": 9.891859596713966e-05, "loss": 0.0774, "step": 775 }, { "epoch": 1.1940298507462686, "grad_norm": 0.291015625, "learning_rate": 9.888125466766244e-05, "loss": 0.0792, "step": 800 }, { "epoch": 1.2313432835820897, "grad_norm": 0.27734375, "learning_rate": 9.884391336818522e-05, "loss": 0.0792, "step": 825 }, { "epoch": 1.2686567164179103, "grad_norm": 0.322265625, "learning_rate": 9.880657206870799e-05, "loss": 0.0762, "step": 850 }, { "epoch": 1.3059701492537314, "grad_norm": 0.244140625, "learning_rate": 9.876923076923077e-05, "loss": 0.0769, "step": 875 }, { "epoch": 1.3432835820895521, "grad_norm": 0.28125, "learning_rate": 9.873188946975355e-05, "loss": 0.0784, "step": 900 }, { "epoch": 1.3805970149253732, "grad_norm": 0.28515625, "learning_rate": 9.869454817027633e-05, "loss": 0.0798, "step": 925 }, { "epoch": 1.417910447761194, "grad_norm": 0.263671875, "learning_rate": 9.865720687079911e-05, "loss": 0.0775, "step": 950 }, { "epoch": 1.455223880597015, "grad_norm": 0.3046875, "learning_rate": 9.861986557132189e-05, "loss": 0.0759, "step": 975 }, { "epoch": 1.4925373134328357, "grad_norm": 0.296875, "learning_rate": 9.858252427184467e-05, "loss": 0.0793, "step": 1000 }, { "epoch": 1.5298507462686568, "grad_norm": 0.287109375, "learning_rate": 9.854518297236744e-05, "loss": 0.0823, "step": 1025 }, { "epoch": 1.5671641791044775, "grad_norm": 0.3828125, "learning_rate": 9.850784167289022e-05, "loss": 0.0809, "step": 1050 }, { "epoch": 1.6044776119402986, "grad_norm": 0.3046875, "learning_rate": 9.8470500373413e-05, "loss": 0.0789, "step": 1075 }, { "epoch": 1.6417910447761193, "grad_norm": 0.283203125, "learning_rate": 9.843315907393577e-05, "loss": 0.08, "step": 1100 }, { "epoch": 1.6791044776119404, "grad_norm": 0.306640625, "learning_rate": 9.839581777445855e-05, "loss": 0.0788, "step": 1125 }, { "epoch": 1.716417910447761, "grad_norm": 0.37890625, "learning_rate": 9.835847647498133e-05, "loss": 0.0836, "step": 1150 }, { "epoch": 1.7537313432835822, "grad_norm": 0.3046875, "learning_rate": 9.832113517550411e-05, "loss": 0.0778, "step": 1175 }, { "epoch": 1.7910447761194028, "grad_norm": 0.2890625, "learning_rate": 9.828379387602688e-05, "loss": 0.0824, "step": 1200 }, { "epoch": 1.828358208955224, "grad_norm": 0.28125, "learning_rate": 9.824645257654966e-05, "loss": 0.0787, "step": 1225 }, { "epoch": 1.8656716417910446, "grad_norm": 0.26953125, "learning_rate": 9.820911127707246e-05, "loss": 0.0822, "step": 1250 }, { "epoch": 1.9029850746268657, "grad_norm": 0.25, "learning_rate": 9.817176997759523e-05, "loss": 0.0777, "step": 1275 }, { "epoch": 1.9402985074626866, "grad_norm": 0.318359375, "learning_rate": 9.8134428678118e-05, "loss": 0.0819, "step": 1300 }, { "epoch": 1.9776119402985075, "grad_norm": 0.34375, "learning_rate": 9.809708737864079e-05, "loss": 0.0879, "step": 1325 }, { "epoch": 2.014925373134328, "grad_norm": 8.5625, "learning_rate": 9.805974607916356e-05, "loss": 0.109, "step": 1350 }, { "epoch": 2.0522388059701493, "grad_norm": 0.28125, "learning_rate": 9.802240477968634e-05, "loss": 0.0701, "step": 1375 }, { "epoch": 2.08955223880597, "grad_norm": 0.234375, "learning_rate": 9.798506348020912e-05, "loss": 0.0559, "step": 1400 }, { "epoch": 2.126865671641791, "grad_norm": 5.0625, "learning_rate": 9.79477221807319e-05, "loss": 0.0574, "step": 1425 }, { "epoch": 2.1641791044776117, "grad_norm": 0.296875, "learning_rate": 9.791038088125467e-05, "loss": 0.0549, "step": 1450 }, { "epoch": 2.201492537313433, "grad_norm": 0.3125, "learning_rate": 9.787303958177745e-05, "loss": 0.0588, "step": 1475 }, { "epoch": 2.2388059701492535, "grad_norm": 0.25390625, "learning_rate": 9.783569828230023e-05, "loss": 0.0542, "step": 1500 }, { "epoch": 2.2761194029850746, "grad_norm": 0.26171875, "learning_rate": 9.7798356982823e-05, "loss": 0.0557, "step": 1525 }, { "epoch": 2.3134328358208958, "grad_norm": 0.28125, "learning_rate": 9.776101568334578e-05, "loss": 0.0559, "step": 1550 }, { "epoch": 2.3507462686567164, "grad_norm": 0.318359375, "learning_rate": 9.772367438386857e-05, "loss": 0.0562, "step": 1575 }, { "epoch": 2.388059701492537, "grad_norm": 0.291015625, "learning_rate": 9.768633308439134e-05, "loss": 0.057, "step": 1600 }, { "epoch": 2.425373134328358, "grad_norm": 0.265625, "learning_rate": 9.764899178491412e-05, "loss": 0.0556, "step": 1625 }, { "epoch": 2.4626865671641793, "grad_norm": 0.279296875, "learning_rate": 9.76116504854369e-05, "loss": 0.0566, "step": 1650 }, { "epoch": 2.5, "grad_norm": 0.271484375, "learning_rate": 9.757430918595968e-05, "loss": 0.0563, "step": 1675 }, { "epoch": 2.5373134328358207, "grad_norm": 0.291015625, "learning_rate": 9.753696788648245e-05, "loss": 0.0593, "step": 1700 }, { "epoch": 2.574626865671642, "grad_norm": 0.333984375, "learning_rate": 9.749962658700523e-05, "loss": 0.058, "step": 1725 }, { "epoch": 2.611940298507463, "grad_norm": 0.310546875, "learning_rate": 9.746228528752801e-05, "loss": 0.0599, "step": 1750 }, { "epoch": 2.6492537313432836, "grad_norm": 0.310546875, "learning_rate": 9.742494398805078e-05, "loss": 0.054, "step": 1775 }, { "epoch": 2.6865671641791042, "grad_norm": 0.279296875, "learning_rate": 9.738760268857356e-05, "loss": 0.058, "step": 1800 }, { "epoch": 2.7238805970149254, "grad_norm": 0.349609375, "learning_rate": 9.735026138909634e-05, "loss": 0.0563, "step": 1825 }, { "epoch": 2.7611940298507465, "grad_norm": 0.328125, "learning_rate": 9.731292008961912e-05, "loss": 0.0581, "step": 1850 }, { "epoch": 2.798507462686567, "grad_norm": 0.265625, "learning_rate": 9.727557879014189e-05, "loss": 0.0599, "step": 1875 }, { "epoch": 2.835820895522388, "grad_norm": 0.375, "learning_rate": 9.723823749066467e-05, "loss": 0.0598, "step": 1900 }, { "epoch": 2.873134328358209, "grad_norm": 0.27734375, "learning_rate": 9.720089619118747e-05, "loss": 0.0579, "step": 1925 }, { "epoch": 2.91044776119403, "grad_norm": 0.322265625, "learning_rate": 9.716355489171024e-05, "loss": 0.0602, "step": 1950 }, { "epoch": 2.9477611940298507, "grad_norm": 0.296875, "learning_rate": 9.712621359223302e-05, "loss": 0.0579, "step": 1975 }, { "epoch": 2.9850746268656714, "grad_norm": 0.294921875, "learning_rate": 9.70888722927558e-05, "loss": 0.0624, "step": 2000 }, { "epoch": 3.0223880597014925, "grad_norm": 0.5078125, "learning_rate": 9.705153099327857e-05, "loss": 0.0452, "step": 2025 }, { "epoch": 3.0597014925373136, "grad_norm": 0.31640625, "learning_rate": 9.701418969380135e-05, "loss": 0.0344, "step": 2050 }, { "epoch": 3.0970149253731343, "grad_norm": 0.267578125, "learning_rate": 9.697684839432413e-05, "loss": 0.0341, "step": 2075 }, { "epoch": 3.1343283582089554, "grad_norm": 0.2490234375, "learning_rate": 9.693950709484691e-05, "loss": 0.0356, "step": 2100 }, { "epoch": 3.171641791044776, "grad_norm": 0.240234375, "learning_rate": 9.690216579536968e-05, "loss": 0.0354, "step": 2125 }, { "epoch": 3.208955223880597, "grad_norm": 0.279296875, "learning_rate": 9.686482449589246e-05, "loss": 0.0331, "step": 2150 }, { "epoch": 3.246268656716418, "grad_norm": 0.28515625, "learning_rate": 9.682748319641524e-05, "loss": 0.0338, "step": 2175 }, { "epoch": 3.283582089552239, "grad_norm": 0.369140625, "learning_rate": 9.679014189693802e-05, "loss": 0.0352, "step": 2200 }, { "epoch": 3.3208955223880596, "grad_norm": 0.265625, "learning_rate": 9.675280059746079e-05, "loss": 0.0362, "step": 2225 }, { "epoch": 3.3582089552238807, "grad_norm": 0.306640625, "learning_rate": 9.671545929798358e-05, "loss": 0.0368, "step": 2250 }, { "epoch": 3.3955223880597014, "grad_norm": 0.259765625, "learning_rate": 9.667811799850635e-05, "loss": 0.0357, "step": 2275 }, { "epoch": 3.4328358208955225, "grad_norm": 0.287109375, "learning_rate": 9.664077669902913e-05, "loss": 0.0363, "step": 2300 }, { "epoch": 3.470149253731343, "grad_norm": 0.3125, "learning_rate": 9.660343539955191e-05, "loss": 0.0365, "step": 2325 }, { "epoch": 3.5074626865671643, "grad_norm": 0.2275390625, "learning_rate": 9.65660941000747e-05, "loss": 0.0369, "step": 2350 }, { "epoch": 3.544776119402985, "grad_norm": 0.251953125, "learning_rate": 9.652875280059746e-05, "loss": 0.0366, "step": 2375 }, { "epoch": 3.582089552238806, "grad_norm": 0.232421875, "learning_rate": 9.649141150112024e-05, "loss": 0.0371, "step": 2400 }, { "epoch": 3.6194029850746268, "grad_norm": 0.287109375, "learning_rate": 9.645407020164302e-05, "loss": 0.0369, "step": 2425 }, { "epoch": 3.656716417910448, "grad_norm": 0.267578125, "learning_rate": 9.64167289021658e-05, "loss": 0.0374, "step": 2450 }, { "epoch": 3.6940298507462686, "grad_norm": 0.3203125, "learning_rate": 9.637938760268857e-05, "loss": 0.0383, "step": 2475 }, { "epoch": 3.7313432835820897, "grad_norm": 0.546875, "learning_rate": 9.634204630321135e-05, "loss": 0.0391, "step": 2500 }, { "epoch": 3.7686567164179103, "grad_norm": 0.34765625, "learning_rate": 9.630470500373413e-05, "loss": 0.039, "step": 2525 }, { "epoch": 3.8059701492537314, "grad_norm": 0.302734375, "learning_rate": 9.62673637042569e-05, "loss": 0.0396, "step": 2550 }, { "epoch": 3.843283582089552, "grad_norm": 0.283203125, "learning_rate": 9.62300224047797e-05, "loss": 0.039, "step": 2575 }, { "epoch": 3.8805970149253732, "grad_norm": 0.31640625, "learning_rate": 9.619268110530248e-05, "loss": 0.039, "step": 2600 }, { "epoch": 3.917910447761194, "grad_norm": 0.255859375, "learning_rate": 9.615533980582525e-05, "loss": 0.0383, "step": 2625 }, { "epoch": 3.955223880597015, "grad_norm": 0.259765625, "learning_rate": 9.611799850634803e-05, "loss": 0.0403, "step": 2650 }, { "epoch": 3.9925373134328357, "grad_norm": 0.298828125, "learning_rate": 9.608065720687081e-05, "loss": 0.0393, "step": 2675 }, { "epoch": 4.029850746268656, "grad_norm": 0.2265625, "learning_rate": 9.604331590739359e-05, "loss": 0.0245, "step": 2700 }, { "epoch": 4.067164179104478, "grad_norm": 0.2373046875, "learning_rate": 9.600597460791636e-05, "loss": 0.018, "step": 2725 }, { "epoch": 4.104477611940299, "grad_norm": 0.2890625, "learning_rate": 9.596863330843914e-05, "loss": 0.0192, "step": 2750 }, { "epoch": 4.141791044776119, "grad_norm": 0.263671875, "learning_rate": 9.593129200896192e-05, "loss": 0.023, "step": 2775 }, { "epoch": 4.17910447761194, "grad_norm": 0.265625, "learning_rate": 9.589395070948469e-05, "loss": 0.0233, "step": 2800 }, { "epoch": 4.2164179104477615, "grad_norm": 0.279296875, "learning_rate": 9.585660941000747e-05, "loss": 0.0218, "step": 2825 }, { "epoch": 4.253731343283582, "grad_norm": 0.1845703125, "learning_rate": 9.581926811053025e-05, "loss": 0.0203, "step": 2850 }, { "epoch": 4.291044776119403, "grad_norm": 0.2060546875, "learning_rate": 9.578192681105303e-05, "loss": 0.0204, "step": 2875 }, { "epoch": 4.3283582089552235, "grad_norm": 0.259765625, "learning_rate": 9.574458551157581e-05, "loss": 0.0218, "step": 2900 }, { "epoch": 4.365671641791045, "grad_norm": 0.234375, "learning_rate": 9.570724421209859e-05, "loss": 0.0209, "step": 2925 }, { "epoch": 4.402985074626866, "grad_norm": 0.1943359375, "learning_rate": 9.566990291262137e-05, "loss": 0.02, "step": 2950 }, { "epoch": 4.440298507462686, "grad_norm": 0.271484375, "learning_rate": 9.563256161314414e-05, "loss": 0.0203, "step": 2975 }, { "epoch": 4.477611940298507, "grad_norm": 0.255859375, "learning_rate": 9.559522031366692e-05, "loss": 0.0225, "step": 3000 }, { "epoch": 4.514925373134329, "grad_norm": 0.2578125, "learning_rate": 9.55578790141897e-05, "loss": 0.0226, "step": 3025 }, { "epoch": 4.552238805970149, "grad_norm": 0.279296875, "learning_rate": 9.552053771471247e-05, "loss": 0.021, "step": 3050 }, { "epoch": 4.58955223880597, "grad_norm": 0.2216796875, "learning_rate": 9.548319641523525e-05, "loss": 0.0233, "step": 3075 }, { "epoch": 4.6268656716417915, "grad_norm": 0.314453125, "learning_rate": 9.544585511575803e-05, "loss": 0.0214, "step": 3100 }, { "epoch": 4.664179104477612, "grad_norm": 0.52734375, "learning_rate": 9.540851381628081e-05, "loss": 0.0233, "step": 3125 }, { "epoch": 4.701492537313433, "grad_norm": 0.2578125, "learning_rate": 9.537117251680358e-05, "loss": 0.0244, "step": 3150 }, { "epoch": 4.7388059701492535, "grad_norm": 0.30859375, "learning_rate": 9.533383121732636e-05, "loss": 0.0234, "step": 3175 }, { "epoch": 4.776119402985074, "grad_norm": 0.330078125, "learning_rate": 9.529648991784914e-05, "loss": 0.0238, "step": 3200 }, { "epoch": 4.813432835820896, "grad_norm": 0.279296875, "learning_rate": 9.525914861837191e-05, "loss": 0.0234, "step": 3225 }, { "epoch": 4.850746268656716, "grad_norm": 0.1689453125, "learning_rate": 9.522180731889471e-05, "loss": 0.0241, "step": 3250 }, { "epoch": 4.888059701492537, "grad_norm": 0.26953125, "learning_rate": 9.518446601941749e-05, "loss": 0.0232, "step": 3275 }, { "epoch": 4.925373134328359, "grad_norm": 0.25390625, "learning_rate": 9.514712471994026e-05, "loss": 0.0227, "step": 3300 }, { "epoch": 4.962686567164179, "grad_norm": 0.2216796875, "learning_rate": 9.510978342046304e-05, "loss": 0.0227, "step": 3325 }, { "epoch": 5.0, "grad_norm": 0.31640625, "learning_rate": 9.507244212098582e-05, "loss": 0.0224, "step": 3350 }, { "epoch": 5.037313432835821, "grad_norm": 0.193359375, "learning_rate": 9.50351008215086e-05, "loss": 0.0104, "step": 3375 }, { "epoch": 5.074626865671641, "grad_norm": 0.12255859375, "learning_rate": 9.499775952203137e-05, "loss": 0.0096, "step": 3400 }, { "epoch": 5.111940298507463, "grad_norm": 0.2734375, "learning_rate": 9.496041822255415e-05, "loss": 0.0122, "step": 3425 }, { "epoch": 5.149253731343284, "grad_norm": 0.2109375, "learning_rate": 9.492307692307693e-05, "loss": 0.011, "step": 3450 }, { "epoch": 5.186567164179104, "grad_norm": 0.2197265625, "learning_rate": 9.48857356235997e-05, "loss": 0.0117, "step": 3475 }, { "epoch": 5.223880597014926, "grad_norm": 0.1787109375, "learning_rate": 9.484839432412248e-05, "loss": 0.0102, "step": 3500 }, { "epoch": 5.2611940298507465, "grad_norm": 0.2119140625, "learning_rate": 9.481105302464526e-05, "loss": 0.0107, "step": 3525 }, { "epoch": 5.298507462686567, "grad_norm": 0.197265625, "learning_rate": 9.477371172516804e-05, "loss": 0.0114, "step": 3550 }, { "epoch": 5.335820895522388, "grad_norm": 0.15234375, "learning_rate": 9.473637042569082e-05, "loss": 0.0123, "step": 3575 }, { "epoch": 5.373134328358209, "grad_norm": 0.1552734375, "learning_rate": 9.46990291262136e-05, "loss": 0.0125, "step": 3600 }, { "epoch": 5.41044776119403, "grad_norm": 0.189453125, "learning_rate": 9.466168782673638e-05, "loss": 0.0122, "step": 3625 }, { "epoch": 5.447761194029851, "grad_norm": 0.18359375, "learning_rate": 9.462434652725915e-05, "loss": 0.0112, "step": 3650 }, { "epoch": 5.485074626865671, "grad_norm": 0.2080078125, "learning_rate": 9.458700522778193e-05, "loss": 0.0127, "step": 3675 }, { "epoch": 5.522388059701493, "grad_norm": 0.1650390625, "learning_rate": 9.454966392830471e-05, "loss": 0.0113, "step": 3700 }, { "epoch": 5.559701492537314, "grad_norm": 0.19921875, "learning_rate": 9.451232262882748e-05, "loss": 0.012, "step": 3725 }, { "epoch": 5.597014925373134, "grad_norm": 0.18359375, "learning_rate": 9.447498132935026e-05, "loss": 0.0132, "step": 3750 }, { "epoch": 5.634328358208955, "grad_norm": 0.2890625, "learning_rate": 9.443764002987304e-05, "loss": 0.0121, "step": 3775 }, { "epoch": 5.6716417910447765, "grad_norm": 0.216796875, "learning_rate": 9.440029873039583e-05, "loss": 0.0119, "step": 3800 }, { "epoch": 5.708955223880597, "grad_norm": 0.1650390625, "learning_rate": 9.436295743091859e-05, "loss": 0.013, "step": 3825 }, { "epoch": 5.746268656716418, "grad_norm": 0.2080078125, "learning_rate": 9.432561613144137e-05, "loss": 0.0144, "step": 3850 }, { "epoch": 5.7835820895522385, "grad_norm": 0.1982421875, "learning_rate": 9.428827483196416e-05, "loss": 0.0119, "step": 3875 }, { "epoch": 5.82089552238806, "grad_norm": 0.2177734375, "learning_rate": 9.425093353248694e-05, "loss": 0.0129, "step": 3900 }, { "epoch": 5.858208955223881, "grad_norm": 0.185546875, "learning_rate": 9.421359223300972e-05, "loss": 0.0128, "step": 3925 }, { "epoch": 5.895522388059701, "grad_norm": 0.177734375, "learning_rate": 9.41762509335325e-05, "loss": 0.0134, "step": 3950 }, { "epoch": 5.932835820895522, "grad_norm": 0.220703125, "learning_rate": 9.413890963405527e-05, "loss": 0.0125, "step": 3975 }, { "epoch": 5.970149253731344, "grad_norm": 0.2734375, "learning_rate": 9.410156833457805e-05, "loss": 0.0142, "step": 4000 }, { "epoch": 6.007462686567164, "grad_norm": 0.10791015625, "learning_rate": 9.406422703510083e-05, "loss": 0.0122, "step": 4025 }, { "epoch": 6.044776119402985, "grad_norm": 0.373046875, "learning_rate": 9.402688573562361e-05, "loss": 0.0063, "step": 4050 }, { "epoch": 6.082089552238806, "grad_norm": 0.11279296875, "learning_rate": 9.398954443614638e-05, "loss": 0.0064, "step": 4075 }, { "epoch": 6.119402985074627, "grad_norm": 0.0908203125, "learning_rate": 9.395220313666916e-05, "loss": 0.0077, "step": 4100 }, { "epoch": 6.156716417910448, "grad_norm": 0.166015625, "learning_rate": 9.391486183719194e-05, "loss": 0.0072, "step": 4125 }, { "epoch": 6.1940298507462686, "grad_norm": 0.1552734375, "learning_rate": 9.387752053771471e-05, "loss": 0.0067, "step": 4150 }, { "epoch": 6.231343283582089, "grad_norm": 0.1376953125, "learning_rate": 9.384017923823749e-05, "loss": 0.0071, "step": 4175 }, { "epoch": 6.268656716417911, "grad_norm": 0.115234375, "learning_rate": 9.380283793876027e-05, "loss": 0.0075, "step": 4200 }, { "epoch": 6.3059701492537314, "grad_norm": 0.185546875, "learning_rate": 9.376549663928305e-05, "loss": 0.0069, "step": 4225 }, { "epoch": 6.343283582089552, "grad_norm": 0.11376953125, "learning_rate": 9.372815533980583e-05, "loss": 0.0067, "step": 4250 }, { "epoch": 6.380597014925373, "grad_norm": 0.10595703125, "learning_rate": 9.369081404032861e-05, "loss": 0.0072, "step": 4275 }, { "epoch": 6.417910447761194, "grad_norm": 0.0869140625, "learning_rate": 9.36534727408514e-05, "loss": 0.0072, "step": 4300 }, { "epoch": 6.455223880597015, "grad_norm": 0.142578125, "learning_rate": 9.361613144137416e-05, "loss": 0.0076, "step": 4325 }, { "epoch": 6.492537313432836, "grad_norm": 0.259765625, "learning_rate": 9.357879014189694e-05, "loss": 0.008, "step": 4350 }, { "epoch": 6.529850746268656, "grad_norm": 0.271484375, "learning_rate": 9.354144884241972e-05, "loss": 0.0074, "step": 4375 }, { "epoch": 6.567164179104478, "grad_norm": 0.1875, "learning_rate": 9.350410754294249e-05, "loss": 0.0091, "step": 4400 }, { "epoch": 6.604477611940299, "grad_norm": 0.21875, "learning_rate": 9.346676624346527e-05, "loss": 0.0076, "step": 4425 }, { "epoch": 6.641791044776119, "grad_norm": 0.11328125, "learning_rate": 9.342942494398805e-05, "loss": 0.0081, "step": 4450 }, { "epoch": 6.67910447761194, "grad_norm": 0.07958984375, "learning_rate": 9.339208364451084e-05, "loss": 0.0085, "step": 4475 }, { "epoch": 6.7164179104477615, "grad_norm": 0.234375, "learning_rate": 9.33547423450336e-05, "loss": 0.0094, "step": 4500 }, { "epoch": 6.753731343283582, "grad_norm": 0.1982421875, "learning_rate": 9.331740104555638e-05, "loss": 0.009, "step": 4525 }, { "epoch": 6.791044776119403, "grad_norm": 0.13671875, "learning_rate": 9.328005974607917e-05, "loss": 0.0084, "step": 4550 }, { "epoch": 6.8283582089552235, "grad_norm": 0.16015625, "learning_rate": 9.324271844660195e-05, "loss": 0.0088, "step": 4575 }, { "epoch": 6.865671641791045, "grad_norm": 0.150390625, "learning_rate": 9.320537714712473e-05, "loss": 0.0099, "step": 4600 }, { "epoch": 6.902985074626866, "grad_norm": 0.1474609375, "learning_rate": 9.316803584764751e-05, "loss": 0.009, "step": 4625 }, { "epoch": 6.940298507462686, "grad_norm": 0.099609375, "learning_rate": 9.313069454817028e-05, "loss": 0.0091, "step": 4650 }, { "epoch": 6.977611940298507, "grad_norm": 0.2490234375, "learning_rate": 9.309335324869306e-05, "loss": 0.0103, "step": 4675 }, { "epoch": 7.014925373134329, "grad_norm": 0.150390625, "learning_rate": 9.305601194921584e-05, "loss": 0.0095, "step": 4700 }, { "epoch": 7.052238805970149, "grad_norm": 0.1103515625, "learning_rate": 9.301867064973862e-05, "loss": 0.0064, "step": 4725 }, { "epoch": 7.08955223880597, "grad_norm": 0.193359375, "learning_rate": 9.298132935026139e-05, "loss": 0.0062, "step": 4750 }, { "epoch": 7.126865671641791, "grad_norm": 0.11328125, "learning_rate": 9.294398805078417e-05, "loss": 0.0053, "step": 4775 }, { "epoch": 7.164179104477612, "grad_norm": 0.15625, "learning_rate": 9.290664675130695e-05, "loss": 0.0052, "step": 4800 }, { "epoch": 7.201492537313433, "grad_norm": 0.0947265625, "learning_rate": 9.286930545182972e-05, "loss": 0.005, "step": 4825 }, { "epoch": 7.2388059701492535, "grad_norm": 0.11376953125, "learning_rate": 9.28319641523525e-05, "loss": 0.0045, "step": 4850 }, { "epoch": 7.276119402985074, "grad_norm": 0.10205078125, "learning_rate": 9.279462285287528e-05, "loss": 0.0051, "step": 4875 }, { "epoch": 7.313432835820896, "grad_norm": 0.1240234375, "learning_rate": 9.275728155339806e-05, "loss": 0.0062, "step": 4900 }, { "epoch": 7.350746268656716, "grad_norm": 0.12255859375, "learning_rate": 9.271994025392084e-05, "loss": 0.0061, "step": 4925 }, { "epoch": 7.388059701492537, "grad_norm": 0.10546875, "learning_rate": 9.268259895444362e-05, "loss": 0.006, "step": 4950 }, { "epoch": 7.425373134328359, "grad_norm": 0.1357421875, "learning_rate": 9.26452576549664e-05, "loss": 0.0057, "step": 4975 }, { "epoch": 7.462686567164179, "grad_norm": 0.1396484375, "learning_rate": 9.260791635548917e-05, "loss": 0.0066, "step": 5000 }, { "epoch": 7.5, "grad_norm": 0.08740234375, "learning_rate": 9.257057505601195e-05, "loss": 0.0058, "step": 5025 }, { "epoch": 7.537313432835821, "grad_norm": 0.1513671875, "learning_rate": 9.253323375653473e-05, "loss": 0.0058, "step": 5050 }, { "epoch": 7.574626865671641, "grad_norm": 0.14453125, "learning_rate": 9.24958924570575e-05, "loss": 0.0068, "step": 5075 }, { "epoch": 7.611940298507463, "grad_norm": 0.12255859375, "learning_rate": 9.245855115758028e-05, "loss": 0.0066, "step": 5100 }, { "epoch": 7.649253731343284, "grad_norm": 0.1416015625, "learning_rate": 9.242120985810306e-05, "loss": 0.0069, "step": 5125 }, { "epoch": 7.686567164179104, "grad_norm": 0.171875, "learning_rate": 9.238386855862585e-05, "loss": 0.0084, "step": 5150 }, { "epoch": 7.723880597014926, "grad_norm": 0.109375, "learning_rate": 9.234652725914861e-05, "loss": 0.0075, "step": 5175 }, { "epoch": 7.7611940298507465, "grad_norm": 0.181640625, "learning_rate": 9.23091859596714e-05, "loss": 0.0072, "step": 5200 }, { "epoch": 7.798507462686567, "grad_norm": 0.2431640625, "learning_rate": 9.227184466019419e-05, "loss": 0.0074, "step": 5225 }, { "epoch": 7.835820895522388, "grad_norm": 0.1083984375, "learning_rate": 9.223450336071696e-05, "loss": 0.0073, "step": 5250 }, { "epoch": 7.8731343283582085, "grad_norm": 0.1455078125, "learning_rate": 9.219716206123974e-05, "loss": 0.0068, "step": 5275 }, { "epoch": 7.91044776119403, "grad_norm": 0.1513671875, "learning_rate": 9.215982076176252e-05, "loss": 0.007, "step": 5300 }, { "epoch": 7.947761194029851, "grad_norm": 0.10400390625, "learning_rate": 9.212247946228529e-05, "loss": 0.0069, "step": 5325 }, { "epoch": 7.985074626865671, "grad_norm": 0.11474609375, "learning_rate": 9.208513816280807e-05, "loss": 0.0077, "step": 5350 }, { "epoch": 8.022388059701493, "grad_norm": 0.181640625, "learning_rate": 9.204779686333085e-05, "loss": 0.0064, "step": 5375 }, { "epoch": 8.059701492537313, "grad_norm": 0.07958984375, "learning_rate": 9.201045556385363e-05, "loss": 0.0042, "step": 5400 }, { "epoch": 8.097014925373134, "grad_norm": 0.09912109375, "learning_rate": 9.19731142643764e-05, "loss": 0.0042, "step": 5425 }, { "epoch": 8.134328358208956, "grad_norm": 0.171875, "learning_rate": 9.193577296489918e-05, "loss": 0.0051, "step": 5450 }, { "epoch": 8.171641791044776, "grad_norm": 0.251953125, "learning_rate": 9.189843166542196e-05, "loss": 0.005, "step": 5475 }, { "epoch": 8.208955223880597, "grad_norm": 0.103515625, "learning_rate": 9.186109036594474e-05, "loss": 0.0045, "step": 5500 }, { "epoch": 8.246268656716419, "grad_norm": 0.09130859375, "learning_rate": 9.182374906646751e-05, "loss": 0.0056, "step": 5525 }, { "epoch": 8.283582089552239, "grad_norm": 0.1279296875, "learning_rate": 9.17864077669903e-05, "loss": 0.006, "step": 5550 }, { "epoch": 8.32089552238806, "grad_norm": 0.24609375, "learning_rate": 9.174906646751309e-05, "loss": 0.0051, "step": 5575 }, { "epoch": 8.35820895522388, "grad_norm": 0.1015625, "learning_rate": 9.171172516803585e-05, "loss": 0.0052, "step": 5600 }, { "epoch": 8.395522388059701, "grad_norm": 0.2333984375, "learning_rate": 9.167438386855863e-05, "loss": 0.0059, "step": 5625 }, { "epoch": 8.432835820895523, "grad_norm": 0.23828125, "learning_rate": 9.163704256908142e-05, "loss": 0.0068, "step": 5650 }, { "epoch": 8.470149253731343, "grad_norm": 0.1005859375, "learning_rate": 9.159970126960418e-05, "loss": 0.0063, "step": 5675 }, { "epoch": 8.507462686567164, "grad_norm": 0.091796875, "learning_rate": 9.156235997012696e-05, "loss": 0.0058, "step": 5700 }, { "epoch": 8.544776119402986, "grad_norm": 0.173828125, "learning_rate": 9.152501867064975e-05, "loss": 0.005, "step": 5725 }, { "epoch": 8.582089552238806, "grad_norm": 0.12255859375, "learning_rate": 9.148767737117253e-05, "loss": 0.0069, "step": 5750 }, { "epoch": 8.619402985074627, "grad_norm": 0.1787109375, "learning_rate": 9.14503360716953e-05, "loss": 0.0054, "step": 5775 }, { "epoch": 8.656716417910447, "grad_norm": 0.1826171875, "learning_rate": 9.141299477221808e-05, "loss": 0.0076, "step": 5800 }, { "epoch": 8.694029850746269, "grad_norm": 0.271484375, "learning_rate": 9.137565347274086e-05, "loss": 0.0078, "step": 5825 }, { "epoch": 8.73134328358209, "grad_norm": 0.1474609375, "learning_rate": 9.133831217326362e-05, "loss": 0.0082, "step": 5850 }, { "epoch": 8.76865671641791, "grad_norm": 0.2578125, "learning_rate": 9.13009708737864e-05, "loss": 0.008, "step": 5875 }, { "epoch": 8.805970149253731, "grad_norm": 0.24609375, "learning_rate": 9.12636295743092e-05, "loss": 0.0083, "step": 5900 }, { "epoch": 8.843283582089553, "grad_norm": 0.1240234375, "learning_rate": 9.122628827483197e-05, "loss": 0.0074, "step": 5925 }, { "epoch": 8.880597014925373, "grad_norm": 0.138671875, "learning_rate": 9.118894697535475e-05, "loss": 0.0088, "step": 5950 }, { "epoch": 8.917910447761194, "grad_norm": 0.13671875, "learning_rate": 9.115160567587753e-05, "loss": 0.0086, "step": 5975 }, { "epoch": 8.955223880597014, "grad_norm": 0.2001953125, "learning_rate": 9.111426437640031e-05, "loss": 0.0087, "step": 6000 }, { "epoch": 8.992537313432836, "grad_norm": 0.224609375, "learning_rate": 9.107692307692308e-05, "loss": 0.0086, "step": 6025 }, { "epoch": 9.029850746268657, "grad_norm": 0.326171875, "learning_rate": 9.103958177744586e-05, "loss": 0.0064, "step": 6050 }, { "epoch": 9.067164179104477, "grad_norm": 0.1083984375, "learning_rate": 9.100224047796864e-05, "loss": 0.0059, "step": 6075 }, { "epoch": 9.104477611940299, "grad_norm": 0.36328125, "learning_rate": 9.096489917849141e-05, "loss": 0.0062, "step": 6100 }, { "epoch": 9.14179104477612, "grad_norm": 0.3828125, "learning_rate": 9.092755787901419e-05, "loss": 0.0067, "step": 6125 }, { "epoch": 9.17910447761194, "grad_norm": 0.1845703125, "learning_rate": 9.089021657953697e-05, "loss": 0.006, "step": 6150 }, { "epoch": 9.216417910447761, "grad_norm": 0.1865234375, "learning_rate": 9.085287528005975e-05, "loss": 0.0065, "step": 6175 }, { "epoch": 9.253731343283581, "grad_norm": 0.1650390625, "learning_rate": 9.081553398058252e-05, "loss": 0.0074, "step": 6200 }, { "epoch": 9.291044776119403, "grad_norm": 0.0908203125, "learning_rate": 9.077819268110531e-05, "loss": 0.0079, "step": 6225 }, { "epoch": 9.328358208955224, "grad_norm": 0.1162109375, "learning_rate": 9.07408513816281e-05, "loss": 0.0071, "step": 6250 }, { "epoch": 9.365671641791044, "grad_norm": 0.14453125, "learning_rate": 9.070351008215086e-05, "loss": 0.0063, "step": 6275 }, { "epoch": 9.402985074626866, "grad_norm": 0.162109375, "learning_rate": 9.066616878267364e-05, "loss": 0.0063, "step": 6300 }, { "epoch": 9.440298507462687, "grad_norm": 0.09375, "learning_rate": 9.062882748319643e-05, "loss": 0.0062, "step": 6325 }, { "epoch": 9.477611940298507, "grad_norm": 0.130859375, "learning_rate": 9.059148618371919e-05, "loss": 0.0074, "step": 6350 }, { "epoch": 9.514925373134329, "grad_norm": 0.275390625, "learning_rate": 9.055414488424197e-05, "loss": 0.007, "step": 6375 }, { "epoch": 9.552238805970148, "grad_norm": 0.11572265625, "learning_rate": 9.051680358476476e-05, "loss": 0.0064, "step": 6400 }, { "epoch": 9.58955223880597, "grad_norm": 0.10546875, "learning_rate": 9.047946228528754e-05, "loss": 0.0065, "step": 6425 }, { "epoch": 9.626865671641792, "grad_norm": 0.138671875, "learning_rate": 9.04421209858103e-05, "loss": 0.007, "step": 6450 }, { "epoch": 9.664179104477611, "grad_norm": 0.07421875, "learning_rate": 9.040477968633309e-05, "loss": 0.0056, "step": 6475 }, { "epoch": 9.701492537313433, "grad_norm": 0.12353515625, "learning_rate": 9.036743838685587e-05, "loss": 0.0069, "step": 6500 }, { "epoch": 9.738805970149254, "grad_norm": 0.1396484375, "learning_rate": 9.033009708737863e-05, "loss": 0.0075, "step": 6525 }, { "epoch": 9.776119402985074, "grad_norm": 0.08740234375, "learning_rate": 9.029275578790143e-05, "loss": 0.0063, "step": 6550 }, { "epoch": 9.813432835820896, "grad_norm": 0.1591796875, "learning_rate": 9.025541448842421e-05, "loss": 0.008, "step": 6575 }, { "epoch": 9.850746268656717, "grad_norm": 0.326171875, "learning_rate": 9.021807318894698e-05, "loss": 0.0064, "step": 6600 }, { "epoch": 9.888059701492537, "grad_norm": 0.220703125, "learning_rate": 9.018073188946976e-05, "loss": 0.0105, "step": 6625 }, { "epoch": 9.925373134328359, "grad_norm": 0.1455078125, "learning_rate": 9.014339058999254e-05, "loss": 0.0085, "step": 6650 }, { "epoch": 9.962686567164178, "grad_norm": 0.23046875, "learning_rate": 9.010604929051532e-05, "loss": 0.0087, "step": 6675 }, { "epoch": 10.0, "grad_norm": 0.32421875, "learning_rate": 9.006870799103809e-05, "loss": 0.0077, "step": 6700 }, { "epoch": 10.037313432835822, "grad_norm": 0.26171875, "learning_rate": 9.003136669156087e-05, "loss": 0.0049, "step": 6725 }, { "epoch": 10.074626865671641, "grad_norm": 0.2119140625, "learning_rate": 8.999402539208365e-05, "loss": 0.0049, "step": 6750 }, { "epoch": 10.111940298507463, "grad_norm": 0.09619140625, "learning_rate": 8.995668409260642e-05, "loss": 0.0055, "step": 6775 }, { "epoch": 10.149253731343283, "grad_norm": 0.10107421875, "learning_rate": 8.99193427931292e-05, "loss": 0.0054, "step": 6800 }, { "epoch": 10.186567164179104, "grad_norm": 0.040771484375, "learning_rate": 8.988200149365198e-05, "loss": 0.0049, "step": 6825 }, { "epoch": 10.223880597014926, "grad_norm": 0.0791015625, "learning_rate": 8.984466019417476e-05, "loss": 0.0046, "step": 6850 }, { "epoch": 10.261194029850746, "grad_norm": 0.150390625, "learning_rate": 8.980731889469754e-05, "loss": 0.0057, "step": 6875 }, { "epoch": 10.298507462686567, "grad_norm": 0.2119140625, "learning_rate": 8.976997759522032e-05, "loss": 0.0058, "step": 6900 }, { "epoch": 10.335820895522389, "grad_norm": 0.10205078125, "learning_rate": 8.97326362957431e-05, "loss": 0.0049, "step": 6925 }, { "epoch": 10.373134328358208, "grad_norm": 0.130859375, "learning_rate": 8.969529499626587e-05, "loss": 0.0055, "step": 6950 }, { "epoch": 10.41044776119403, "grad_norm": 0.11572265625, "learning_rate": 8.965795369678865e-05, "loss": 0.0051, "step": 6975 }, { "epoch": 10.447761194029852, "grad_norm": 0.2431640625, "learning_rate": 8.962061239731144e-05, "loss": 0.0053, "step": 7000 }, { "epoch": 10.485074626865671, "grad_norm": 0.220703125, "learning_rate": 8.95832710978342e-05, "loss": 0.0051, "step": 7025 }, { "epoch": 10.522388059701493, "grad_norm": 0.0732421875, "learning_rate": 8.954592979835698e-05, "loss": 0.0053, "step": 7050 }, { "epoch": 10.559701492537313, "grad_norm": 0.0810546875, "learning_rate": 8.950858849887977e-05, "loss": 0.0045, "step": 7075 }, { "epoch": 10.597014925373134, "grad_norm": 0.2197265625, "learning_rate": 8.947124719940255e-05, "loss": 0.0053, "step": 7100 }, { "epoch": 10.634328358208956, "grad_norm": 0.10107421875, "learning_rate": 8.943390589992531e-05, "loss": 0.0055, "step": 7125 }, { "epoch": 10.671641791044776, "grad_norm": 0.26953125, "learning_rate": 8.93965646004481e-05, "loss": 0.0055, "step": 7150 }, { "epoch": 10.708955223880597, "grad_norm": 0.1630859375, "learning_rate": 8.935922330097088e-05, "loss": 0.0053, "step": 7175 }, { "epoch": 10.746268656716419, "grad_norm": 0.1396484375, "learning_rate": 8.932188200149364e-05, "loss": 0.0056, "step": 7200 }, { "epoch": 10.783582089552239, "grad_norm": 0.2109375, "learning_rate": 8.928454070201644e-05, "loss": 0.0053, "step": 7225 }, { "epoch": 10.82089552238806, "grad_norm": 0.0810546875, "learning_rate": 8.924719940253922e-05, "loss": 0.0052, "step": 7250 }, { "epoch": 10.85820895522388, "grad_norm": 0.0859375, "learning_rate": 8.920985810306199e-05, "loss": 0.0048, "step": 7275 }, { "epoch": 10.895522388059701, "grad_norm": 0.1494140625, "learning_rate": 8.917251680358477e-05, "loss": 0.0049, "step": 7300 }, { "epoch": 10.932835820895523, "grad_norm": 0.1875, "learning_rate": 8.913517550410755e-05, "loss": 0.0055, "step": 7325 }, { "epoch": 10.970149253731343, "grad_norm": 0.30859375, "learning_rate": 8.909783420463033e-05, "loss": 0.0058, "step": 7350 }, { "epoch": 11.007462686567164, "grad_norm": 0.2060546875, "learning_rate": 8.90604929051531e-05, "loss": 0.0049, "step": 7375 }, { "epoch": 11.044776119402986, "grad_norm": 0.09326171875, "learning_rate": 8.902315160567588e-05, "loss": 0.0031, "step": 7400 }, { "epoch": 11.082089552238806, "grad_norm": 0.08203125, "learning_rate": 8.898581030619866e-05, "loss": 0.0033, "step": 7425 }, { "epoch": 11.119402985074627, "grad_norm": 0.09619140625, "learning_rate": 8.894846900672143e-05, "loss": 0.0034, "step": 7450 }, { "epoch": 11.156716417910447, "grad_norm": 0.05908203125, "learning_rate": 8.891112770724421e-05, "loss": 0.0029, "step": 7475 }, { "epoch": 11.194029850746269, "grad_norm": 0.03271484375, "learning_rate": 8.887378640776699e-05, "loss": 0.0037, "step": 7500 }, { "epoch": 11.23134328358209, "grad_norm": 0.08154296875, "learning_rate": 8.883644510828977e-05, "loss": 0.0033, "step": 7525 }, { "epoch": 11.26865671641791, "grad_norm": 0.07666015625, "learning_rate": 8.879910380881255e-05, "loss": 0.0036, "step": 7550 }, { "epoch": 11.305970149253731, "grad_norm": 0.046630859375, "learning_rate": 8.876176250933533e-05, "loss": 0.0033, "step": 7575 }, { "epoch": 11.343283582089553, "grad_norm": 0.0262451171875, "learning_rate": 8.872442120985812e-05, "loss": 0.0033, "step": 7600 }, { "epoch": 11.380597014925373, "grad_norm": 0.049560546875, "learning_rate": 8.868707991038088e-05, "loss": 0.0035, "step": 7625 }, { "epoch": 11.417910447761194, "grad_norm": 0.0517578125, "learning_rate": 8.864973861090366e-05, "loss": 0.0035, "step": 7650 }, { "epoch": 11.455223880597014, "grad_norm": 0.01806640625, "learning_rate": 8.861239731142645e-05, "loss": 0.0037, "step": 7675 }, { "epoch": 11.492537313432836, "grad_norm": 0.0830078125, "learning_rate": 8.857505601194921e-05, "loss": 0.0036, "step": 7700 }, { "epoch": 11.529850746268657, "grad_norm": 0.16796875, "learning_rate": 8.8537714712472e-05, "loss": 0.0032, "step": 7725 }, { "epoch": 11.567164179104477, "grad_norm": 0.09375, "learning_rate": 8.850037341299478e-05, "loss": 0.0038, "step": 7750 }, { "epoch": 11.604477611940299, "grad_norm": 0.263671875, "learning_rate": 8.846303211351756e-05, "loss": 0.0039, "step": 7775 }, { "epoch": 11.64179104477612, "grad_norm": 0.06103515625, "learning_rate": 8.842569081404032e-05, "loss": 0.0038, "step": 7800 }, { "epoch": 11.67910447761194, "grad_norm": 0.0791015625, "learning_rate": 8.83883495145631e-05, "loss": 0.0037, "step": 7825 }, { "epoch": 11.716417910447761, "grad_norm": 0.11962890625, "learning_rate": 8.835100821508589e-05, "loss": 0.0042, "step": 7850 }, { "epoch": 11.753731343283581, "grad_norm": 0.1259765625, "learning_rate": 8.831366691560867e-05, "loss": 0.0036, "step": 7875 }, { "epoch": 11.791044776119403, "grad_norm": 0.049072265625, "learning_rate": 8.827632561613145e-05, "loss": 0.004, "step": 7900 }, { "epoch": 11.828358208955224, "grad_norm": 0.0791015625, "learning_rate": 8.823898431665423e-05, "loss": 0.0041, "step": 7925 }, { "epoch": 11.865671641791044, "grad_norm": 0.0625, "learning_rate": 8.8201643017177e-05, "loss": 0.0035, "step": 7950 }, { "epoch": 11.902985074626866, "grad_norm": 0.09912109375, "learning_rate": 8.816430171769978e-05, "loss": 0.0043, "step": 7975 }, { "epoch": 11.940298507462687, "grad_norm": 0.060546875, "learning_rate": 8.812696041822256e-05, "loss": 0.0048, "step": 8000 }, { "epoch": 11.977611940298507, "grad_norm": 0.04736328125, "learning_rate": 8.808961911874534e-05, "loss": 0.0043, "step": 8025 }, { "epoch": 12.014925373134329, "grad_norm": 0.083984375, "learning_rate": 8.805227781926811e-05, "loss": 0.0039, "step": 8050 }, { "epoch": 12.052238805970148, "grad_norm": 0.10498046875, "learning_rate": 8.801493651979089e-05, "loss": 0.0026, "step": 8075 }, { "epoch": 12.08955223880597, "grad_norm": 0.04150390625, "learning_rate": 8.797759522031367e-05, "loss": 0.0027, "step": 8100 }, { "epoch": 12.126865671641792, "grad_norm": 0.03564453125, "learning_rate": 8.794025392083644e-05, "loss": 0.0028, "step": 8125 }, { "epoch": 12.164179104477611, "grad_norm": 0.361328125, "learning_rate": 8.790291262135922e-05, "loss": 0.0035, "step": 8150 }, { "epoch": 12.201492537313433, "grad_norm": 0.036376953125, "learning_rate": 8.7865571321882e-05, "loss": 0.003, "step": 8175 }, { "epoch": 12.238805970149254, "grad_norm": 0.06494140625, "learning_rate": 8.782823002240478e-05, "loss": 0.0033, "step": 8200 }, { "epoch": 12.276119402985074, "grad_norm": 0.169921875, "learning_rate": 8.779088872292756e-05, "loss": 0.003, "step": 8225 }, { "epoch": 12.313432835820896, "grad_norm": 0.0625, "learning_rate": 8.775354742345035e-05, "loss": 0.0034, "step": 8250 }, { "epoch": 12.350746268656717, "grad_norm": 0.06982421875, "learning_rate": 8.771620612397313e-05, "loss": 0.0031, "step": 8275 }, { "epoch": 12.388059701492537, "grad_norm": 0.04443359375, "learning_rate": 8.76788648244959e-05, "loss": 0.0033, "step": 8300 }, { "epoch": 12.425373134328359, "grad_norm": 0.0927734375, "learning_rate": 8.764152352501868e-05, "loss": 0.0034, "step": 8325 }, { "epoch": 12.462686567164178, "grad_norm": 0.0927734375, "learning_rate": 8.760418222554146e-05, "loss": 0.0034, "step": 8350 }, { "epoch": 12.5, "grad_norm": 0.09716796875, "learning_rate": 8.756684092606424e-05, "loss": 0.0037, "step": 8375 }, { "epoch": 12.537313432835822, "grad_norm": 0.13671875, "learning_rate": 8.7529499626587e-05, "loss": 0.0032, "step": 8400 }, { "epoch": 12.574626865671641, "grad_norm": 0.10791015625, "learning_rate": 8.749215832710979e-05, "loss": 0.0036, "step": 8425 }, { "epoch": 12.611940298507463, "grad_norm": 0.06982421875, "learning_rate": 8.745481702763257e-05, "loss": 0.0037, "step": 8450 }, { "epoch": 12.649253731343283, "grad_norm": 0.056884765625, "learning_rate": 8.741747572815534e-05, "loss": 0.0036, "step": 8475 }, { "epoch": 12.686567164179104, "grad_norm": 0.054443359375, "learning_rate": 8.738013442867812e-05, "loss": 0.0038, "step": 8500 }, { "epoch": 12.723880597014926, "grad_norm": 0.1748046875, "learning_rate": 8.73427931292009e-05, "loss": 0.0039, "step": 8525 }, { "epoch": 12.761194029850746, "grad_norm": 0.07666015625, "learning_rate": 8.730545182972368e-05, "loss": 0.0041, "step": 8550 }, { "epoch": 12.798507462686567, "grad_norm": 0.095703125, "learning_rate": 8.726811053024646e-05, "loss": 0.0039, "step": 8575 }, { "epoch": 12.835820895522389, "grad_norm": 0.06591796875, "learning_rate": 8.723076923076924e-05, "loss": 0.0035, "step": 8600 }, { "epoch": 12.873134328358208, "grad_norm": 0.169921875, "learning_rate": 8.719342793129202e-05, "loss": 0.0044, "step": 8625 }, { "epoch": 12.91044776119403, "grad_norm": 0.49609375, "learning_rate": 8.715608663181479e-05, "loss": 0.0048, "step": 8650 }, { "epoch": 12.947761194029852, "grad_norm": 0.09130859375, "learning_rate": 8.711874533233757e-05, "loss": 0.0046, "step": 8675 }, { "epoch": 12.985074626865671, "grad_norm": 0.146484375, "learning_rate": 8.708140403286035e-05, "loss": 0.0039, "step": 8700 }, { "epoch": 13.022388059701493, "grad_norm": 0.15234375, "learning_rate": 8.704406273338312e-05, "loss": 0.004, "step": 8725 }, { "epoch": 13.059701492537313, "grad_norm": 0.060302734375, "learning_rate": 8.70067214339059e-05, "loss": 0.0043, "step": 8750 }, { "epoch": 13.097014925373134, "grad_norm": 0.042236328125, "learning_rate": 8.696938013442868e-05, "loss": 0.004, "step": 8775 }, { "epoch": 13.134328358208956, "grad_norm": 0.060546875, "learning_rate": 8.693203883495146e-05, "loss": 0.0042, "step": 8800 }, { "epoch": 13.171641791044776, "grad_norm": 0.0732421875, "learning_rate": 8.689469753547423e-05, "loss": 0.0033, "step": 8825 }, { "epoch": 13.208955223880597, "grad_norm": 0.06787109375, "learning_rate": 8.685735623599701e-05, "loss": 0.0035, "step": 8850 }, { "epoch": 13.246268656716419, "grad_norm": 0.11328125, "learning_rate": 8.682001493651981e-05, "loss": 0.0031, "step": 8875 }, { "epoch": 13.283582089552239, "grad_norm": 0.04833984375, "learning_rate": 8.678267363704257e-05, "loss": 0.0033, "step": 8900 }, { "epoch": 13.32089552238806, "grad_norm": 0.08837890625, "learning_rate": 8.674533233756536e-05, "loss": 0.0038, "step": 8925 }, { "epoch": 13.35820895522388, "grad_norm": 0.0849609375, "learning_rate": 8.670799103808814e-05, "loss": 0.0043, "step": 8950 }, { "epoch": 13.395522388059701, "grad_norm": 0.2216796875, "learning_rate": 8.66706497386109e-05, "loss": 0.0048, "step": 8975 }, { "epoch": 13.432835820895523, "grad_norm": 0.396484375, "learning_rate": 8.663330843913369e-05, "loss": 0.0053, "step": 9000 }, { "epoch": 13.470149253731343, "grad_norm": 0.2158203125, "learning_rate": 8.659596713965647e-05, "loss": 0.004, "step": 9025 }, { "epoch": 13.507462686567164, "grad_norm": 0.125, "learning_rate": 8.655862584017925e-05, "loss": 0.0051, "step": 9050 }, { "epoch": 13.544776119402986, "grad_norm": 0.3515625, "learning_rate": 8.652128454070202e-05, "loss": 0.0049, "step": 9075 }, { "epoch": 13.582089552238806, "grad_norm": 0.12060546875, "learning_rate": 8.64839432412248e-05, "loss": 0.0049, "step": 9100 }, { "epoch": 13.619402985074627, "grad_norm": 0.15625, "learning_rate": 8.644660194174758e-05, "loss": 0.0062, "step": 9125 }, { "epoch": 13.656716417910447, "grad_norm": 0.197265625, "learning_rate": 8.640926064227035e-05, "loss": 0.0091, "step": 9150 }, { "epoch": 13.694029850746269, "grad_norm": 0.1416015625, "learning_rate": 8.637191934279313e-05, "loss": 0.0068, "step": 9175 }, { "epoch": 13.73134328358209, "grad_norm": 0.30859375, "learning_rate": 8.633457804331592e-05, "loss": 0.0073, "step": 9200 }, { "epoch": 13.76865671641791, "grad_norm": 0.1142578125, "learning_rate": 8.629723674383869e-05, "loss": 0.0066, "step": 9225 }, { "epoch": 13.805970149253731, "grad_norm": 0.130859375, "learning_rate": 8.625989544436147e-05, "loss": 0.0057, "step": 9250 }, { "epoch": 13.843283582089553, "grad_norm": 0.1513671875, "learning_rate": 8.622255414488425e-05, "loss": 0.0066, "step": 9275 }, { "epoch": 13.880597014925373, "grad_norm": 0.193359375, "learning_rate": 8.618521284540703e-05, "loss": 0.0065, "step": 9300 }, { "epoch": 13.917910447761194, "grad_norm": 0.1474609375, "learning_rate": 8.61478715459298e-05, "loss": 0.0081, "step": 9325 }, { "epoch": 13.955223880597014, "grad_norm": 0.283203125, "learning_rate": 8.611053024645258e-05, "loss": 0.009, "step": 9350 }, { "epoch": 13.992537313432836, "grad_norm": 0.203125, "learning_rate": 8.607318894697536e-05, "loss": 0.0085, "step": 9375 }, { "epoch": 14.029850746268657, "grad_norm": 0.23828125, "learning_rate": 8.603584764749813e-05, "loss": 0.0062, "step": 9400 }, { "epoch": 14.067164179104477, "grad_norm": 0.244140625, "learning_rate": 8.599850634802091e-05, "loss": 0.0058, "step": 9425 }, { "epoch": 14.104477611940299, "grad_norm": 0.10498046875, "learning_rate": 8.596116504854369e-05, "loss": 0.0048, "step": 9450 }, { "epoch": 14.14179104477612, "grad_norm": 0.2734375, "learning_rate": 8.592382374906647e-05, "loss": 0.0053, "step": 9475 }, { "epoch": 14.17910447761194, "grad_norm": 0.43359375, "learning_rate": 8.588648244958924e-05, "loss": 0.0051, "step": 9500 }, { "epoch": 14.216417910447761, "grad_norm": 0.3984375, "learning_rate": 8.584914115011204e-05, "loss": 0.0065, "step": 9525 }, { "epoch": 14.253731343283581, "grad_norm": 0.10791015625, "learning_rate": 8.581179985063482e-05, "loss": 0.005, "step": 9550 }, { "epoch": 14.291044776119403, "grad_norm": 0.1328125, "learning_rate": 8.577445855115758e-05, "loss": 0.0056, "step": 9575 }, { "epoch": 14.328358208955224, "grad_norm": 0.2041015625, "learning_rate": 8.573711725168037e-05, "loss": 0.0058, "step": 9600 }, { "epoch": 14.365671641791044, "grad_norm": 0.328125, "learning_rate": 8.569977595220315e-05, "loss": 0.005, "step": 9625 }, { "epoch": 14.402985074626866, "grad_norm": 0.1787109375, "learning_rate": 8.566243465272591e-05, "loss": 0.0053, "step": 9650 }, { "epoch": 14.440298507462687, "grad_norm": 0.1337890625, "learning_rate": 8.56250933532487e-05, "loss": 0.0046, "step": 9675 }, { "epoch": 14.477611940298507, "grad_norm": 0.1044921875, "learning_rate": 8.558775205377148e-05, "loss": 0.0049, "step": 9700 }, { "epoch": 14.514925373134329, "grad_norm": 0.1767578125, "learning_rate": 8.555041075429426e-05, "loss": 0.0057, "step": 9725 }, { "epoch": 14.552238805970148, "grad_norm": 0.07373046875, "learning_rate": 8.551306945481703e-05, "loss": 0.0056, "step": 9750 }, { "epoch": 14.58955223880597, "grad_norm": 0.08056640625, "learning_rate": 8.547572815533981e-05, "loss": 0.0046, "step": 9775 }, { "epoch": 14.626865671641792, "grad_norm": 0.2470703125, "learning_rate": 8.543838685586259e-05, "loss": 0.0055, "step": 9800 }, { "epoch": 14.664179104477611, "grad_norm": 0.1279296875, "learning_rate": 8.540104555638536e-05, "loss": 0.0046, "step": 9825 }, { "epoch": 14.701492537313433, "grad_norm": 0.07373046875, "learning_rate": 8.536370425690814e-05, "loss": 0.0051, "step": 9850 }, { "epoch": 14.738805970149254, "grad_norm": 0.197265625, "learning_rate": 8.532636295743093e-05, "loss": 0.0056, "step": 9875 }, { "epoch": 14.776119402985074, "grad_norm": 0.12060546875, "learning_rate": 8.52890216579537e-05, "loss": 0.0049, "step": 9900 }, { "epoch": 14.813432835820896, "grad_norm": 0.07568359375, "learning_rate": 8.525168035847648e-05, "loss": 0.0054, "step": 9925 }, { "epoch": 14.850746268656717, "grad_norm": 0.10888671875, "learning_rate": 8.521433905899926e-05, "loss": 0.0045, "step": 9950 }, { "epoch": 14.888059701492537, "grad_norm": 0.2060546875, "learning_rate": 8.517699775952204e-05, "loss": 0.007, "step": 9975 }, { "epoch": 14.925373134328359, "grad_norm": 0.12890625, "learning_rate": 8.513965646004481e-05, "loss": 0.0052, "step": 10000 }, { "epoch": 14.962686567164178, "grad_norm": 0.11376953125, "learning_rate": 8.510231516056759e-05, "loss": 0.007, "step": 10025 }, { "epoch": 15.0, "grad_norm": 0.09716796875, "learning_rate": 8.506497386109037e-05, "loss": 0.0059, "step": 10050 }, { "epoch": 15.037313432835822, "grad_norm": 0.224609375, "learning_rate": 8.502763256161314e-05, "loss": 0.0037, "step": 10075 }, { "epoch": 15.074626865671641, "grad_norm": 0.049072265625, "learning_rate": 8.499029126213592e-05, "loss": 0.003, "step": 10100 }, { "epoch": 15.111940298507463, "grad_norm": 0.1552734375, "learning_rate": 8.49529499626587e-05, "loss": 0.0037, "step": 10125 }, { "epoch": 15.149253731343283, "grad_norm": 0.11962890625, "learning_rate": 8.491560866318148e-05, "loss": 0.0041, "step": 10150 }, { "epoch": 15.186567164179104, "grad_norm": 0.06396484375, "learning_rate": 8.487826736370425e-05, "loss": 0.0026, "step": 10175 }, { "epoch": 15.223880597014926, "grad_norm": 0.1142578125, "learning_rate": 8.484092606422705e-05, "loss": 0.0034, "step": 10200 }, { "epoch": 15.261194029850746, "grad_norm": 0.055419921875, "learning_rate": 8.480358476474983e-05, "loss": 0.0037, "step": 10225 }, { "epoch": 15.298507462686567, "grad_norm": 0.07568359375, "learning_rate": 8.47662434652726e-05, "loss": 0.0034, "step": 10250 }, { "epoch": 15.335820895522389, "grad_norm": 0.1552734375, "learning_rate": 8.472890216579538e-05, "loss": 0.0042, "step": 10275 }, { "epoch": 15.373134328358208, "grad_norm": 0.047607421875, "learning_rate": 8.469156086631816e-05, "loss": 0.0032, "step": 10300 }, { "epoch": 15.41044776119403, "grad_norm": 0.126953125, "learning_rate": 8.465421956684093e-05, "loss": 0.0034, "step": 10325 }, { "epoch": 15.447761194029852, "grad_norm": 0.0849609375, "learning_rate": 8.46168782673637e-05, "loss": 0.0033, "step": 10350 }, { "epoch": 15.485074626865671, "grad_norm": 0.1875, "learning_rate": 8.457953696788649e-05, "loss": 0.0037, "step": 10375 }, { "epoch": 15.522388059701493, "grad_norm": 0.185546875, "learning_rate": 8.454219566840927e-05, "loss": 0.0034, "step": 10400 }, { "epoch": 15.559701492537313, "grad_norm": 0.1474609375, "learning_rate": 8.450485436893204e-05, "loss": 0.0036, "step": 10425 }, { "epoch": 15.597014925373134, "grad_norm": 0.04931640625, "learning_rate": 8.446751306945482e-05, "loss": 0.0034, "step": 10450 }, { "epoch": 15.634328358208956, "grad_norm": 0.1923828125, "learning_rate": 8.44301717699776e-05, "loss": 0.0043, "step": 10475 }, { "epoch": 15.671641791044776, "grad_norm": 0.1904296875, "learning_rate": 8.439283047050037e-05, "loss": 0.0035, "step": 10500 }, { "epoch": 15.708955223880597, "grad_norm": 0.06005859375, "learning_rate": 8.435548917102316e-05, "loss": 0.0038, "step": 10525 }, { "epoch": 15.746268656716419, "grad_norm": 0.0498046875, "learning_rate": 8.431814787154594e-05, "loss": 0.0036, "step": 10550 }, { "epoch": 15.783582089552239, "grad_norm": 0.08837890625, "learning_rate": 8.428080657206871e-05, "loss": 0.0039, "step": 10575 }, { "epoch": 15.82089552238806, "grad_norm": 0.1201171875, "learning_rate": 8.424346527259149e-05, "loss": 0.0032, "step": 10600 }, { "epoch": 15.85820895522388, "grad_norm": 0.255859375, "learning_rate": 8.420612397311427e-05, "loss": 0.0038, "step": 10625 }, { "epoch": 15.895522388059701, "grad_norm": 0.05810546875, "learning_rate": 8.416878267363705e-05, "loss": 0.0048, "step": 10650 }, { "epoch": 15.932835820895523, "grad_norm": 0.08447265625, "learning_rate": 8.413144137415982e-05, "loss": 0.004, "step": 10675 }, { "epoch": 15.970149253731343, "grad_norm": 0.177734375, "learning_rate": 8.40941000746826e-05, "loss": 0.0041, "step": 10700 }, { "epoch": 16.007462686567163, "grad_norm": 0.045166015625, "learning_rate": 8.405675877520538e-05, "loss": 0.0041, "step": 10725 }, { "epoch": 16.044776119402986, "grad_norm": 0.18359375, "learning_rate": 8.401941747572815e-05, "loss": 0.0028, "step": 10750 }, { "epoch": 16.082089552238806, "grad_norm": 0.0546875, "learning_rate": 8.398207617625093e-05, "loss": 0.0025, "step": 10775 }, { "epoch": 16.119402985074625, "grad_norm": 0.061767578125, "learning_rate": 8.394473487677371e-05, "loss": 0.0027, "step": 10800 }, { "epoch": 16.15671641791045, "grad_norm": 0.040283203125, "learning_rate": 8.39073935772965e-05, "loss": 0.0022, "step": 10825 }, { "epoch": 16.19402985074627, "grad_norm": 0.06982421875, "learning_rate": 8.387005227781928e-05, "loss": 0.0024, "step": 10850 }, { "epoch": 16.23134328358209, "grad_norm": 0.06884765625, "learning_rate": 8.383271097834206e-05, "loss": 0.0027, "step": 10875 }, { "epoch": 16.26865671641791, "grad_norm": 0.115234375, "learning_rate": 8.379536967886484e-05, "loss": 0.0022, "step": 10900 }, { "epoch": 16.30597014925373, "grad_norm": 0.056640625, "learning_rate": 8.37580283793876e-05, "loss": 0.0026, "step": 10925 }, { "epoch": 16.34328358208955, "grad_norm": 0.047607421875, "learning_rate": 8.372068707991039e-05, "loss": 0.0029, "step": 10950 }, { "epoch": 16.380597014925375, "grad_norm": 0.1064453125, "learning_rate": 8.368334578043317e-05, "loss": 0.0033, "step": 10975 }, { "epoch": 16.417910447761194, "grad_norm": 0.0576171875, "learning_rate": 8.364600448095594e-05, "loss": 0.0028, "step": 11000 }, { "epoch": 16.455223880597014, "grad_norm": 0.1435546875, "learning_rate": 8.360866318147872e-05, "loss": 0.0029, "step": 11025 }, { "epoch": 16.492537313432837, "grad_norm": 0.39453125, "learning_rate": 8.35713218820015e-05, "loss": 0.0028, "step": 11050 }, { "epoch": 16.529850746268657, "grad_norm": 0.1474609375, "learning_rate": 8.353398058252428e-05, "loss": 0.0026, "step": 11075 }, { "epoch": 16.567164179104477, "grad_norm": 0.052978515625, "learning_rate": 8.349663928304705e-05, "loss": 0.0024, "step": 11100 }, { "epoch": 16.604477611940297, "grad_norm": 0.080078125, "learning_rate": 8.345929798356983e-05, "loss": 0.0029, "step": 11125 }, { "epoch": 16.64179104477612, "grad_norm": 0.03857421875, "learning_rate": 8.342195668409261e-05, "loss": 0.003, "step": 11150 }, { "epoch": 16.67910447761194, "grad_norm": 0.06494140625, "learning_rate": 8.338461538461538e-05, "loss": 0.0036, "step": 11175 }, { "epoch": 16.71641791044776, "grad_norm": 0.08544921875, "learning_rate": 8.334727408513817e-05, "loss": 0.0028, "step": 11200 }, { "epoch": 16.753731343283583, "grad_norm": 0.07080078125, "learning_rate": 8.330993278566095e-05, "loss": 0.0031, "step": 11225 }, { "epoch": 16.791044776119403, "grad_norm": 0.10498046875, "learning_rate": 8.327259148618372e-05, "loss": 0.0034, "step": 11250 }, { "epoch": 16.828358208955223, "grad_norm": 0.0673828125, "learning_rate": 8.32352501867065e-05, "loss": 0.0031, "step": 11275 }, { "epoch": 16.865671641791046, "grad_norm": 0.197265625, "learning_rate": 8.319790888722928e-05, "loss": 0.0037, "step": 11300 }, { "epoch": 16.902985074626866, "grad_norm": 0.041259765625, "learning_rate": 8.316056758775206e-05, "loss": 0.0034, "step": 11325 }, { "epoch": 16.940298507462686, "grad_norm": 0.1875, "learning_rate": 8.312322628827483e-05, "loss": 0.0034, "step": 11350 }, { "epoch": 16.97761194029851, "grad_norm": 0.1875, "learning_rate": 8.308588498879761e-05, "loss": 0.0031, "step": 11375 }, { "epoch": 17.01492537313433, "grad_norm": 0.05712890625, "learning_rate": 8.30485436893204e-05, "loss": 0.0034, "step": 11400 }, { "epoch": 17.05223880597015, "grad_norm": 0.044921875, "learning_rate": 8.301120238984317e-05, "loss": 0.002, "step": 11425 }, { "epoch": 17.08955223880597, "grad_norm": 0.041015625, "learning_rate": 8.297386109036594e-05, "loss": 0.0023, "step": 11450 }, { "epoch": 17.12686567164179, "grad_norm": 0.03564453125, "learning_rate": 8.293651979088872e-05, "loss": 0.002, "step": 11475 }, { "epoch": 17.16417910447761, "grad_norm": 0.041015625, "learning_rate": 8.28991784914115e-05, "loss": 0.0025, "step": 11500 }, { "epoch": 17.20149253731343, "grad_norm": 0.048583984375, "learning_rate": 8.286183719193429e-05, "loss": 0.0023, "step": 11525 }, { "epoch": 17.238805970149254, "grad_norm": 0.04443359375, "learning_rate": 8.282449589245707e-05, "loss": 0.0026, "step": 11550 }, { "epoch": 17.276119402985074, "grad_norm": 0.10791015625, "learning_rate": 8.278715459297985e-05, "loss": 0.0023, "step": 11575 }, { "epoch": 17.313432835820894, "grad_norm": 0.053466796875, "learning_rate": 8.274981329350262e-05, "loss": 0.0031, "step": 11600 }, { "epoch": 17.350746268656717, "grad_norm": 0.060791015625, "learning_rate": 8.27124719940254e-05, "loss": 0.0024, "step": 11625 }, { "epoch": 17.388059701492537, "grad_norm": 0.0625, "learning_rate": 8.267513069454818e-05, "loss": 0.0023, "step": 11650 }, { "epoch": 17.425373134328357, "grad_norm": 0.05810546875, "learning_rate": 8.263778939507096e-05, "loss": 0.0023, "step": 11675 }, { "epoch": 17.46268656716418, "grad_norm": 0.056396484375, "learning_rate": 8.260044809559373e-05, "loss": 0.0024, "step": 11700 }, { "epoch": 17.5, "grad_norm": 0.06640625, "learning_rate": 8.256310679611651e-05, "loss": 0.0023, "step": 11725 }, { "epoch": 17.53731343283582, "grad_norm": 0.038330078125, "learning_rate": 8.252576549663929e-05, "loss": 0.0021, "step": 11750 }, { "epoch": 17.574626865671643, "grad_norm": 0.2890625, "learning_rate": 8.248842419716206e-05, "loss": 0.0022, "step": 11775 }, { "epoch": 17.611940298507463, "grad_norm": 0.099609375, "learning_rate": 8.245108289768484e-05, "loss": 0.0025, "step": 11800 }, { "epoch": 17.649253731343283, "grad_norm": 0.059814453125, "learning_rate": 8.241374159820762e-05, "loss": 0.0026, "step": 11825 }, { "epoch": 17.686567164179106, "grad_norm": 0.04052734375, "learning_rate": 8.23764002987304e-05, "loss": 0.0025, "step": 11850 }, { "epoch": 17.723880597014926, "grad_norm": 0.060302734375, "learning_rate": 8.233905899925318e-05, "loss": 0.003, "step": 11875 }, { "epoch": 17.761194029850746, "grad_norm": 0.1865234375, "learning_rate": 8.230171769977596e-05, "loss": 0.0032, "step": 11900 }, { "epoch": 17.798507462686565, "grad_norm": 0.0277099609375, "learning_rate": 8.226437640029874e-05, "loss": 0.0029, "step": 11925 }, { "epoch": 17.83582089552239, "grad_norm": 0.248046875, "learning_rate": 8.222703510082151e-05, "loss": 0.0034, "step": 11950 }, { "epoch": 17.87313432835821, "grad_norm": 0.087890625, "learning_rate": 8.218969380134429e-05, "loss": 0.0031, "step": 11975 }, { "epoch": 17.91044776119403, "grad_norm": 0.1396484375, "learning_rate": 8.215235250186707e-05, "loss": 0.0035, "step": 12000 }, { "epoch": 17.94776119402985, "grad_norm": 0.041259765625, "learning_rate": 8.211501120238984e-05, "loss": 0.0039, "step": 12025 }, { "epoch": 17.98507462686567, "grad_norm": 0.08837890625, "learning_rate": 8.207766990291262e-05, "loss": 0.0036, "step": 12050 }, { "epoch": 18.02238805970149, "grad_norm": 0.050048828125, "learning_rate": 8.20403286034354e-05, "loss": 0.0025, "step": 12075 }, { "epoch": 18.059701492537314, "grad_norm": 0.130859375, "learning_rate": 8.200298730395818e-05, "loss": 0.003, "step": 12100 }, { "epoch": 18.097014925373134, "grad_norm": 0.21875, "learning_rate": 8.196564600448095e-05, "loss": 0.0035, "step": 12125 }, { "epoch": 18.134328358208954, "grad_norm": 0.0654296875, "learning_rate": 8.192830470500373e-05, "loss": 0.0037, "step": 12150 }, { "epoch": 18.171641791044777, "grad_norm": 0.055908203125, "learning_rate": 8.189096340552653e-05, "loss": 0.0036, "step": 12175 }, { "epoch": 18.208955223880597, "grad_norm": 0.06884765625, "learning_rate": 8.18536221060493e-05, "loss": 0.0033, "step": 12200 }, { "epoch": 18.246268656716417, "grad_norm": 0.240234375, "learning_rate": 8.181628080657208e-05, "loss": 0.0032, "step": 12225 }, { "epoch": 18.28358208955224, "grad_norm": 0.2001953125, "learning_rate": 8.177893950709486e-05, "loss": 0.0039, "step": 12250 }, { "epoch": 18.32089552238806, "grad_norm": 0.07275390625, "learning_rate": 8.174159820761763e-05, "loss": 0.0046, "step": 12275 }, { "epoch": 18.35820895522388, "grad_norm": 0.1875, "learning_rate": 8.170425690814041e-05, "loss": 0.0116, "step": 12300 }, { "epoch": 18.395522388059703, "grad_norm": 0.390625, "learning_rate": 8.166691560866319e-05, "loss": 0.011, "step": 12325 }, { "epoch": 18.432835820895523, "grad_norm": 0.1474609375, "learning_rate": 8.162957430918597e-05, "loss": 0.0078, "step": 12350 }, { "epoch": 18.470149253731343, "grad_norm": 0.1474609375, "learning_rate": 8.159223300970874e-05, "loss": 0.0062, "step": 12375 }, { "epoch": 18.507462686567163, "grad_norm": 0.2041015625, "learning_rate": 8.155489171023152e-05, "loss": 0.0057, "step": 12400 }, { "epoch": 18.544776119402986, "grad_norm": 0.1748046875, "learning_rate": 8.15175504107543e-05, "loss": 0.0055, "step": 12425 }, { "epoch": 18.582089552238806, "grad_norm": 0.119140625, "learning_rate": 8.148020911127707e-05, "loss": 0.0062, "step": 12450 }, { "epoch": 18.619402985074625, "grad_norm": 0.10498046875, "learning_rate": 8.144286781179985e-05, "loss": 0.0056, "step": 12475 }, { "epoch": 18.65671641791045, "grad_norm": 0.220703125, "learning_rate": 8.140552651232263e-05, "loss": 0.0062, "step": 12500 }, { "epoch": 18.69402985074627, "grad_norm": 0.11279296875, "learning_rate": 8.136818521284541e-05, "loss": 0.0053, "step": 12525 }, { "epoch": 18.73134328358209, "grad_norm": 0.12060546875, "learning_rate": 8.133084391336819e-05, "loss": 0.0053, "step": 12550 }, { "epoch": 18.76865671641791, "grad_norm": 0.1669921875, "learning_rate": 8.129350261389097e-05, "loss": 0.0053, "step": 12575 }, { "epoch": 18.80597014925373, "grad_norm": 0.267578125, "learning_rate": 8.125616131441375e-05, "loss": 0.0053, "step": 12600 }, { "epoch": 18.84328358208955, "grad_norm": 0.099609375, "learning_rate": 8.121882001493652e-05, "loss": 0.0051, "step": 12625 }, { "epoch": 18.880597014925375, "grad_norm": 0.060302734375, "learning_rate": 8.11814787154593e-05, "loss": 0.0061, "step": 12650 }, { "epoch": 18.917910447761194, "grad_norm": 0.12109375, "learning_rate": 8.114413741598208e-05, "loss": 0.0064, "step": 12675 }, { "epoch": 18.955223880597014, "grad_norm": 0.166015625, "learning_rate": 8.110679611650485e-05, "loss": 0.0058, "step": 12700 }, { "epoch": 18.992537313432837, "grad_norm": 0.2099609375, "learning_rate": 8.106945481702763e-05, "loss": 0.0057, "step": 12725 }, { "epoch": 19.029850746268657, "grad_norm": 0.1552734375, "learning_rate": 8.103211351755041e-05, "loss": 0.0042, "step": 12750 }, { "epoch": 19.067164179104477, "grad_norm": 0.10791015625, "learning_rate": 8.09947722180732e-05, "loss": 0.0038, "step": 12775 }, { "epoch": 19.104477611940297, "grad_norm": 0.09033203125, "learning_rate": 8.095743091859596e-05, "loss": 0.0035, "step": 12800 }, { "epoch": 19.14179104477612, "grad_norm": 0.1298828125, "learning_rate": 8.092008961911874e-05, "loss": 0.0042, "step": 12825 }, { "epoch": 19.17910447761194, "grad_norm": 0.046875, "learning_rate": 8.088274831964154e-05, "loss": 0.003, "step": 12850 }, { "epoch": 19.21641791044776, "grad_norm": 0.10302734375, "learning_rate": 8.08454070201643e-05, "loss": 0.0035, "step": 12875 }, { "epoch": 19.253731343283583, "grad_norm": 0.1982421875, "learning_rate": 8.080806572068709e-05, "loss": 0.004, "step": 12900 }, { "epoch": 19.291044776119403, "grad_norm": 0.150390625, "learning_rate": 8.077072442120987e-05, "loss": 0.0036, "step": 12925 }, { "epoch": 19.328358208955223, "grad_norm": 0.07177734375, "learning_rate": 8.073338312173264e-05, "loss": 0.0036, "step": 12950 }, { "epoch": 19.365671641791046, "grad_norm": 0.1259765625, "learning_rate": 8.069604182225542e-05, "loss": 0.0038, "step": 12975 }, { "epoch": 19.402985074626866, "grad_norm": 0.2001953125, "learning_rate": 8.06587005227782e-05, "loss": 0.0039, "step": 13000 }, { "epoch": 19.440298507462686, "grad_norm": 0.06005859375, "learning_rate": 8.062135922330098e-05, "loss": 0.0035, "step": 13025 }, { "epoch": 19.47761194029851, "grad_norm": 0.07568359375, "learning_rate": 8.058401792382375e-05, "loss": 0.0037, "step": 13050 }, { "epoch": 19.51492537313433, "grad_norm": 0.0615234375, "learning_rate": 8.054667662434653e-05, "loss": 0.0038, "step": 13075 }, { "epoch": 19.55223880597015, "grad_norm": 0.0791015625, "learning_rate": 8.050933532486931e-05, "loss": 0.0032, "step": 13100 }, { "epoch": 19.58955223880597, "grad_norm": 0.08642578125, "learning_rate": 8.047199402539208e-05, "loss": 0.0045, "step": 13125 }, { "epoch": 19.62686567164179, "grad_norm": 0.05712890625, "learning_rate": 8.043465272591486e-05, "loss": 0.0039, "step": 13150 }, { "epoch": 19.66417910447761, "grad_norm": 0.158203125, "learning_rate": 8.039731142643765e-05, "loss": 0.0037, "step": 13175 }, { "epoch": 19.701492537313435, "grad_norm": 0.10302734375, "learning_rate": 8.035997012696042e-05, "loss": 0.0034, "step": 13200 }, { "epoch": 19.738805970149254, "grad_norm": 0.1962890625, "learning_rate": 8.03226288274832e-05, "loss": 0.0036, "step": 13225 }, { "epoch": 19.776119402985074, "grad_norm": 0.05615234375, "learning_rate": 8.028528752800598e-05, "loss": 0.004, "step": 13250 }, { "epoch": 19.813432835820894, "grad_norm": 0.111328125, "learning_rate": 8.024794622852876e-05, "loss": 0.0038, "step": 13275 }, { "epoch": 19.850746268656717, "grad_norm": 0.1044921875, "learning_rate": 8.021060492905153e-05, "loss": 0.0039, "step": 13300 }, { "epoch": 19.888059701492537, "grad_norm": 0.119140625, "learning_rate": 8.017326362957431e-05, "loss": 0.0044, "step": 13325 }, { "epoch": 19.925373134328357, "grad_norm": 0.0546875, "learning_rate": 8.01359223300971e-05, "loss": 0.0047, "step": 13350 }, { "epoch": 19.96268656716418, "grad_norm": 0.259765625, "learning_rate": 8.009858103061986e-05, "loss": 0.0043, "step": 13375 }, { "epoch": 20.0, "grad_norm": 0.1455078125, "learning_rate": 8.006123973114264e-05, "loss": 0.0039, "step": 13400 }, { "epoch": 20.03731343283582, "grad_norm": 0.06396484375, "learning_rate": 8.002389843166542e-05, "loss": 0.0022, "step": 13425 }, { "epoch": 20.074626865671643, "grad_norm": 0.05078125, "learning_rate": 7.99865571321882e-05, "loss": 0.0019, "step": 13450 }, { "epoch": 20.111940298507463, "grad_norm": 0.07080078125, "learning_rate": 7.994921583271097e-05, "loss": 0.0028, "step": 13475 }, { "epoch": 20.149253731343283, "grad_norm": 0.0595703125, "learning_rate": 7.991187453323375e-05, "loss": 0.0027, "step": 13500 }, { "epoch": 20.186567164179106, "grad_norm": 0.03515625, "learning_rate": 7.987453323375655e-05, "loss": 0.0024, "step": 13525 }, { "epoch": 20.223880597014926, "grad_norm": 0.05517578125, "learning_rate": 7.983719193427932e-05, "loss": 0.0024, "step": 13550 }, { "epoch": 20.261194029850746, "grad_norm": 0.036865234375, "learning_rate": 7.97998506348021e-05, "loss": 0.0028, "step": 13575 }, { "epoch": 20.298507462686565, "grad_norm": 0.0390625, "learning_rate": 7.976250933532488e-05, "loss": 0.0027, "step": 13600 }, { "epoch": 20.33582089552239, "grad_norm": 0.033447265625, "learning_rate": 7.972516803584765e-05, "loss": 0.0027, "step": 13625 }, { "epoch": 20.37313432835821, "grad_norm": 0.1689453125, "learning_rate": 7.968782673637043e-05, "loss": 0.0023, "step": 13650 }, { "epoch": 20.41044776119403, "grad_norm": 0.0322265625, "learning_rate": 7.965048543689321e-05, "loss": 0.0023, "step": 13675 }, { "epoch": 20.44776119402985, "grad_norm": 0.034912109375, "learning_rate": 7.961314413741599e-05, "loss": 0.0022, "step": 13700 }, { "epoch": 20.48507462686567, "grad_norm": 0.064453125, "learning_rate": 7.957580283793876e-05, "loss": 0.0023, "step": 13725 }, { "epoch": 20.52238805970149, "grad_norm": 0.0712890625, "learning_rate": 7.953846153846154e-05, "loss": 0.0024, "step": 13750 }, { "epoch": 20.559701492537314, "grad_norm": 0.035400390625, "learning_rate": 7.950112023898432e-05, "loss": 0.0027, "step": 13775 }, { "epoch": 20.597014925373134, "grad_norm": 0.035888671875, "learning_rate": 7.946377893950709e-05, "loss": 0.0026, "step": 13800 }, { "epoch": 20.634328358208954, "grad_norm": 0.054931640625, "learning_rate": 7.942643764002987e-05, "loss": 0.0024, "step": 13825 }, { "epoch": 20.671641791044777, "grad_norm": 0.047607421875, "learning_rate": 7.938909634055266e-05, "loss": 0.0026, "step": 13850 }, { "epoch": 20.708955223880597, "grad_norm": 0.025390625, "learning_rate": 7.935175504107543e-05, "loss": 0.0024, "step": 13875 }, { "epoch": 20.746268656716417, "grad_norm": 0.1416015625, "learning_rate": 7.931441374159821e-05, "loss": 0.0038, "step": 13900 }, { "epoch": 20.78358208955224, "grad_norm": 0.099609375, "learning_rate": 7.9277072442121e-05, "loss": 0.0031, "step": 13925 }, { "epoch": 20.82089552238806, "grad_norm": 0.048828125, "learning_rate": 7.923973114264377e-05, "loss": 0.0027, "step": 13950 }, { "epoch": 20.85820895522388, "grad_norm": 0.0341796875, "learning_rate": 7.920238984316654e-05, "loss": 0.0028, "step": 13975 }, { "epoch": 20.895522388059703, "grad_norm": 0.0233154296875, "learning_rate": 7.916504854368932e-05, "loss": 0.0026, "step": 14000 }, { "epoch": 20.932835820895523, "grad_norm": 0.031982421875, "learning_rate": 7.91277072442121e-05, "loss": 0.0025, "step": 14025 }, { "epoch": 20.970149253731343, "grad_norm": 0.052734375, "learning_rate": 7.909036594473487e-05, "loss": 0.0028, "step": 14050 }, { "epoch": 21.007462686567163, "grad_norm": 0.025390625, "learning_rate": 7.905302464525765e-05, "loss": 0.0026, "step": 14075 }, { "epoch": 21.044776119402986, "grad_norm": 0.2265625, "learning_rate": 7.901568334578043e-05, "loss": 0.0022, "step": 14100 }, { "epoch": 21.082089552238806, "grad_norm": 0.039794921875, "learning_rate": 7.897834204630322e-05, "loss": 0.0019, "step": 14125 }, { "epoch": 21.119402985074625, "grad_norm": 0.03173828125, "learning_rate": 7.894100074682598e-05, "loss": 0.0021, "step": 14150 }, { "epoch": 21.15671641791045, "grad_norm": 0.05126953125, "learning_rate": 7.890365944734878e-05, "loss": 0.0022, "step": 14175 }, { "epoch": 21.19402985074627, "grad_norm": 0.04833984375, "learning_rate": 7.886631814787156e-05, "loss": 0.0018, "step": 14200 }, { "epoch": 21.23134328358209, "grad_norm": 0.035400390625, "learning_rate": 7.882897684839433e-05, "loss": 0.0023, "step": 14225 }, { "epoch": 21.26865671641791, "grad_norm": 0.038818359375, "learning_rate": 7.879163554891711e-05, "loss": 0.0019, "step": 14250 }, { "epoch": 21.30597014925373, "grad_norm": 0.06640625, "learning_rate": 7.875429424943989e-05, "loss": 0.002, "step": 14275 }, { "epoch": 21.34328358208955, "grad_norm": 0.0390625, "learning_rate": 7.871695294996266e-05, "loss": 0.0019, "step": 14300 }, { "epoch": 21.380597014925375, "grad_norm": 0.044189453125, "learning_rate": 7.867961165048544e-05, "loss": 0.0023, "step": 14325 }, { "epoch": 21.417910447761194, "grad_norm": 0.039306640625, "learning_rate": 7.864227035100822e-05, "loss": 0.0024, "step": 14350 }, { "epoch": 21.455223880597014, "grad_norm": 0.017578125, "learning_rate": 7.8604929051531e-05, "loss": 0.0021, "step": 14375 }, { "epoch": 21.492537313432837, "grad_norm": 0.060791015625, "learning_rate": 7.856758775205377e-05, "loss": 0.0025, "step": 14400 }, { "epoch": 21.529850746268657, "grad_norm": 0.048583984375, "learning_rate": 7.853024645257655e-05, "loss": 0.0023, "step": 14425 }, { "epoch": 21.567164179104477, "grad_norm": 0.0177001953125, "learning_rate": 7.849290515309933e-05, "loss": 0.0022, "step": 14450 }, { "epoch": 21.604477611940297, "grad_norm": 0.054931640625, "learning_rate": 7.845556385362211e-05, "loss": 0.0023, "step": 14475 }, { "epoch": 21.64179104477612, "grad_norm": 0.01177978515625, "learning_rate": 7.841822255414489e-05, "loss": 0.0022, "step": 14500 }, { "epoch": 21.67910447761194, "grad_norm": 0.047607421875, "learning_rate": 7.838088125466767e-05, "loss": 0.0021, "step": 14525 }, { "epoch": 21.71641791044776, "grad_norm": 0.046875, "learning_rate": 7.834353995519044e-05, "loss": 0.002, "step": 14550 }, { "epoch": 21.753731343283583, "grad_norm": 0.04150390625, "learning_rate": 7.830619865571322e-05, "loss": 0.0023, "step": 14575 }, { "epoch": 21.791044776119403, "grad_norm": 0.0311279296875, "learning_rate": 7.8268857356236e-05, "loss": 0.0025, "step": 14600 }, { "epoch": 21.828358208955223, "grad_norm": 0.1064453125, "learning_rate": 7.823151605675879e-05, "loss": 0.0025, "step": 14625 }, { "epoch": 21.865671641791046, "grad_norm": 0.035400390625, "learning_rate": 7.819417475728155e-05, "loss": 0.0024, "step": 14650 }, { "epoch": 21.902985074626866, "grad_norm": 0.042236328125, "learning_rate": 7.815683345780433e-05, "loss": 0.0021, "step": 14675 }, { "epoch": 21.940298507462686, "grad_norm": 0.032470703125, "learning_rate": 7.811949215832712e-05, "loss": 0.0021, "step": 14700 }, { "epoch": 21.97761194029851, "grad_norm": 0.043701171875, "learning_rate": 7.80821508588499e-05, "loss": 0.0024, "step": 14725 }, { "epoch": 22.01492537313433, "grad_norm": 0.1103515625, "learning_rate": 7.804480955937266e-05, "loss": 0.0022, "step": 14750 }, { "epoch": 22.05223880597015, "grad_norm": 0.059326171875, "learning_rate": 7.800746825989545e-05, "loss": 0.0017, "step": 14775 }, { "epoch": 22.08955223880597, "grad_norm": 0.035888671875, "learning_rate": 7.797012696041823e-05, "loss": 0.0017, "step": 14800 }, { "epoch": 22.12686567164179, "grad_norm": 0.048583984375, "learning_rate": 7.7932785660941e-05, "loss": 0.0019, "step": 14825 }, { "epoch": 22.16417910447761, "grad_norm": 0.052978515625, "learning_rate": 7.789544436146379e-05, "loss": 0.0016, "step": 14850 }, { "epoch": 22.20149253731343, "grad_norm": 0.04443359375, "learning_rate": 7.785810306198657e-05, "loss": 0.0015, "step": 14875 }, { "epoch": 22.238805970149254, "grad_norm": 0.01458740234375, "learning_rate": 7.782076176250934e-05, "loss": 0.0018, "step": 14900 }, { "epoch": 22.276119402985074, "grad_norm": 0.031982421875, "learning_rate": 7.778342046303212e-05, "loss": 0.0017, "step": 14925 }, { "epoch": 22.313432835820894, "grad_norm": 0.03564453125, "learning_rate": 7.77460791635549e-05, "loss": 0.0021, "step": 14950 }, { "epoch": 22.350746268656717, "grad_norm": 0.04443359375, "learning_rate": 7.770873786407768e-05, "loss": 0.0023, "step": 14975 }, { "epoch": 22.388059701492537, "grad_norm": 0.043701171875, "learning_rate": 7.767139656460045e-05, "loss": 0.0019, "step": 15000 }, { "epoch": 22.425373134328357, "grad_norm": 0.03125, "learning_rate": 7.763405526512323e-05, "loss": 0.0019, "step": 15025 }, { "epoch": 22.46268656716418, "grad_norm": 0.041259765625, "learning_rate": 7.759671396564601e-05, "loss": 0.0019, "step": 15050 }, { "epoch": 22.5, "grad_norm": 0.043701171875, "learning_rate": 7.755937266616878e-05, "loss": 0.0027, "step": 15075 }, { "epoch": 22.53731343283582, "grad_norm": 0.0439453125, "learning_rate": 7.752203136669156e-05, "loss": 0.0023, "step": 15100 }, { "epoch": 22.574626865671643, "grad_norm": 0.04443359375, "learning_rate": 7.748469006721434e-05, "loss": 0.002, "step": 15125 }, { "epoch": 22.611940298507463, "grad_norm": 0.03662109375, "learning_rate": 7.744734876773712e-05, "loss": 0.0025, "step": 15150 }, { "epoch": 22.649253731343283, "grad_norm": 0.042724609375, "learning_rate": 7.74100074682599e-05, "loss": 0.0027, "step": 15175 }, { "epoch": 22.686567164179106, "grad_norm": 0.0361328125, "learning_rate": 7.737266616878268e-05, "loss": 0.0024, "step": 15200 }, { "epoch": 22.723880597014926, "grad_norm": 0.049560546875, "learning_rate": 7.733532486930547e-05, "loss": 0.0025, "step": 15225 }, { "epoch": 22.761194029850746, "grad_norm": 0.051025390625, "learning_rate": 7.729798356982823e-05, "loss": 0.0027, "step": 15250 }, { "epoch": 22.798507462686565, "grad_norm": 0.052978515625, "learning_rate": 7.726064227035101e-05, "loss": 0.0028, "step": 15275 }, { "epoch": 22.83582089552239, "grad_norm": 0.0517578125, "learning_rate": 7.72233009708738e-05, "loss": 0.003, "step": 15300 }, { "epoch": 22.87313432835821, "grad_norm": 0.047119140625, "learning_rate": 7.718595967139656e-05, "loss": 0.0034, "step": 15325 }, { "epoch": 22.91044776119403, "grad_norm": 0.048828125, "learning_rate": 7.714861837191934e-05, "loss": 0.0035, "step": 15350 }, { "epoch": 22.94776119402985, "grad_norm": 0.10400390625, "learning_rate": 7.711127707244213e-05, "loss": 0.0032, "step": 15375 }, { "epoch": 22.98507462686567, "grad_norm": 0.056396484375, "learning_rate": 7.70739357729649e-05, "loss": 0.0033, "step": 15400 }, { "epoch": 23.02238805970149, "grad_norm": 0.064453125, "learning_rate": 7.703659447348767e-05, "loss": 0.0032, "step": 15425 }, { "epoch": 23.059701492537314, "grad_norm": 0.11474609375, "learning_rate": 7.699925317401046e-05, "loss": 0.0035, "step": 15450 }, { "epoch": 23.097014925373134, "grad_norm": 0.15625, "learning_rate": 7.696191187453324e-05, "loss": 0.0053, "step": 15475 }, { "epoch": 23.134328358208954, "grad_norm": 0.1669921875, "learning_rate": 7.692457057505602e-05, "loss": 0.0048, "step": 15500 }, { "epoch": 23.171641791044777, "grad_norm": 0.1025390625, "learning_rate": 7.68872292755788e-05, "loss": 0.0052, "step": 15525 }, { "epoch": 23.208955223880597, "grad_norm": 0.12451171875, "learning_rate": 7.684988797610158e-05, "loss": 0.0056, "step": 15550 }, { "epoch": 23.246268656716417, "grad_norm": 0.08447265625, "learning_rate": 7.681254667662435e-05, "loss": 0.0061, "step": 15575 }, { "epoch": 23.28358208955224, "grad_norm": 0.2451171875, "learning_rate": 7.677520537714713e-05, "loss": 0.009, "step": 15600 }, { "epoch": 23.32089552238806, "grad_norm": 0.259765625, "learning_rate": 7.673786407766991e-05, "loss": 0.009, "step": 15625 }, { "epoch": 23.35820895522388, "grad_norm": 0.1640625, "learning_rate": 7.670052277819269e-05, "loss": 0.0091, "step": 15650 }, { "epoch": 23.395522388059703, "grad_norm": 0.2216796875, "learning_rate": 7.666318147871546e-05, "loss": 0.0098, "step": 15675 }, { "epoch": 23.432835820895523, "grad_norm": 0.33203125, "learning_rate": 7.662584017923824e-05, "loss": 0.0087, "step": 15700 }, { "epoch": 23.470149253731343, "grad_norm": 0.279296875, "learning_rate": 7.658849887976102e-05, "loss": 0.0089, "step": 15725 }, { "epoch": 23.507462686567163, "grad_norm": 0.1826171875, "learning_rate": 7.655115758028379e-05, "loss": 0.0094, "step": 15750 }, { "epoch": 23.544776119402986, "grad_norm": 0.296875, "learning_rate": 7.651381628080657e-05, "loss": 0.0095, "step": 15775 }, { "epoch": 23.582089552238806, "grad_norm": 0.298828125, "learning_rate": 7.647647498132935e-05, "loss": 0.0097, "step": 15800 }, { "epoch": 23.619402985074625, "grad_norm": 0.314453125, "learning_rate": 7.643913368185213e-05, "loss": 0.01, "step": 15825 }, { "epoch": 23.65671641791045, "grad_norm": 0.2255859375, "learning_rate": 7.640179238237491e-05, "loss": 0.0092, "step": 15850 }, { "epoch": 23.69402985074627, "grad_norm": 0.2060546875, "learning_rate": 7.63644510828977e-05, "loss": 0.0088, "step": 15875 }, { "epoch": 23.73134328358209, "grad_norm": 0.1953125, "learning_rate": 7.632710978342048e-05, "loss": 0.0106, "step": 15900 }, { "epoch": 23.76865671641791, "grad_norm": 0.384765625, "learning_rate": 7.628976848394324e-05, "loss": 0.0081, "step": 15925 }, { "epoch": 23.80597014925373, "grad_norm": 0.275390625, "learning_rate": 7.625242718446602e-05, "loss": 0.0073, "step": 15950 }, { "epoch": 23.84328358208955, "grad_norm": 0.1640625, "learning_rate": 7.62150858849888e-05, "loss": 0.0073, "step": 15975 }, { "epoch": 23.880597014925375, "grad_norm": 0.287109375, "learning_rate": 7.617774458551157e-05, "loss": 0.0078, "step": 16000 }, { "epoch": 23.917910447761194, "grad_norm": 0.30078125, "learning_rate": 7.614040328603435e-05, "loss": 0.0067, "step": 16025 }, { "epoch": 23.955223880597014, "grad_norm": 0.271484375, "learning_rate": 7.610306198655714e-05, "loss": 0.0071, "step": 16050 }, { "epoch": 23.992537313432837, "grad_norm": 0.14453125, "learning_rate": 7.606572068707992e-05, "loss": 0.007, "step": 16075 }, { "epoch": 24.029850746268657, "grad_norm": 0.0751953125, "learning_rate": 7.602837938760268e-05, "loss": 0.0049, "step": 16100 }, { "epoch": 24.067164179104477, "grad_norm": 0.12353515625, "learning_rate": 7.599103808812547e-05, "loss": 0.004, "step": 16125 }, { "epoch": 24.104477611940297, "grad_norm": 0.1513671875, "learning_rate": 7.595369678864825e-05, "loss": 0.0044, "step": 16150 }, { "epoch": 24.14179104477612, "grad_norm": 0.0693359375, "learning_rate": 7.591635548917103e-05, "loss": 0.0044, "step": 16175 }, { "epoch": 24.17910447761194, "grad_norm": 0.1259765625, "learning_rate": 7.587901418969381e-05, "loss": 0.004, "step": 16200 }, { "epoch": 24.21641791044776, "grad_norm": 0.248046875, "learning_rate": 7.584167289021659e-05, "loss": 0.0047, "step": 16225 }, { "epoch": 24.253731343283583, "grad_norm": 0.1376953125, "learning_rate": 7.580433159073936e-05, "loss": 0.0041, "step": 16250 }, { "epoch": 24.291044776119403, "grad_norm": 0.056640625, "learning_rate": 7.576699029126214e-05, "loss": 0.0036, "step": 16275 }, { "epoch": 24.328358208955223, "grad_norm": 0.1767578125, "learning_rate": 7.572964899178492e-05, "loss": 0.0033, "step": 16300 }, { "epoch": 24.365671641791046, "grad_norm": 0.11962890625, "learning_rate": 7.56923076923077e-05, "loss": 0.0041, "step": 16325 }, { "epoch": 24.402985074626866, "grad_norm": 0.01513671875, "learning_rate": 7.565496639283047e-05, "loss": 0.0035, "step": 16350 }, { "epoch": 24.440298507462686, "grad_norm": 0.0830078125, "learning_rate": 7.561762509335325e-05, "loss": 0.0039, "step": 16375 }, { "epoch": 24.47761194029851, "grad_norm": 0.1259765625, "learning_rate": 7.558028379387603e-05, "loss": 0.0038, "step": 16400 }, { "epoch": 24.51492537313433, "grad_norm": 0.189453125, "learning_rate": 7.55429424943988e-05, "loss": 0.0036, "step": 16425 }, { "epoch": 24.55223880597015, "grad_norm": 0.09423828125, "learning_rate": 7.550560119492158e-05, "loss": 0.0047, "step": 16450 }, { "epoch": 24.58955223880597, "grad_norm": 0.07861328125, "learning_rate": 7.546825989544436e-05, "loss": 0.0041, "step": 16475 }, { "epoch": 24.62686567164179, "grad_norm": 0.055419921875, "learning_rate": 7.543091859596714e-05, "loss": 0.004, "step": 16500 }, { "epoch": 24.66417910447761, "grad_norm": 0.0556640625, "learning_rate": 7.539357729648992e-05, "loss": 0.0041, "step": 16525 }, { "epoch": 24.701492537313435, "grad_norm": 0.0869140625, "learning_rate": 7.53562359970127e-05, "loss": 0.0036, "step": 16550 }, { "epoch": 24.738805970149254, "grad_norm": 0.1650390625, "learning_rate": 7.531889469753549e-05, "loss": 0.004, "step": 16575 }, { "epoch": 24.776119402985074, "grad_norm": 0.07861328125, "learning_rate": 7.528155339805825e-05, "loss": 0.0038, "step": 16600 }, { "epoch": 24.813432835820894, "grad_norm": 0.06640625, "learning_rate": 7.524421209858103e-05, "loss": 0.0043, "step": 16625 }, { "epoch": 24.850746268656717, "grad_norm": 0.1396484375, "learning_rate": 7.520687079910382e-05, "loss": 0.0038, "step": 16650 }, { "epoch": 24.888059701492537, "grad_norm": 0.08154296875, "learning_rate": 7.516952949962658e-05, "loss": 0.0041, "step": 16675 }, { "epoch": 24.925373134328357, "grad_norm": 0.1494140625, "learning_rate": 7.513218820014936e-05, "loss": 0.0035, "step": 16700 }, { "epoch": 24.96268656716418, "grad_norm": 0.07275390625, "learning_rate": 7.509484690067215e-05, "loss": 0.0039, "step": 16725 }, { "epoch": 25.0, "grad_norm": 0.1455078125, "learning_rate": 7.505750560119493e-05, "loss": 0.0036, "step": 16750 }, { "epoch": 25.03731343283582, "grad_norm": 0.10791015625, "learning_rate": 7.50201643017177e-05, "loss": 0.0022, "step": 16775 }, { "epoch": 25.074626865671643, "grad_norm": 0.056640625, "learning_rate": 7.498282300224048e-05, "loss": 0.0018, "step": 16800 }, { "epoch": 25.111940298507463, "grad_norm": 0.0537109375, "learning_rate": 7.494548170276327e-05, "loss": 0.0024, "step": 16825 }, { "epoch": 25.149253731343283, "grad_norm": 0.0238037109375, "learning_rate": 7.490814040328604e-05, "loss": 0.0022, "step": 16850 }, { "epoch": 25.186567164179106, "grad_norm": 0.038818359375, "learning_rate": 7.487079910380882e-05, "loss": 0.002, "step": 16875 }, { "epoch": 25.223880597014926, "grad_norm": 0.041748046875, "learning_rate": 7.48334578043316e-05, "loss": 0.0022, "step": 16900 }, { "epoch": 25.261194029850746, "grad_norm": 0.05517578125, "learning_rate": 7.479611650485437e-05, "loss": 0.0022, "step": 16925 }, { "epoch": 25.298507462686565, "grad_norm": 0.07470703125, "learning_rate": 7.475877520537715e-05, "loss": 0.0021, "step": 16950 }, { "epoch": 25.33582089552239, "grad_norm": 0.03857421875, "learning_rate": 7.472143390589993e-05, "loss": 0.0025, "step": 16975 }, { "epoch": 25.37313432835821, "grad_norm": 0.04638671875, "learning_rate": 7.468409260642271e-05, "loss": 0.0024, "step": 17000 }, { "epoch": 25.41044776119403, "grad_norm": 0.04833984375, "learning_rate": 7.464675130694548e-05, "loss": 0.0021, "step": 17025 }, { "epoch": 25.44776119402985, "grad_norm": 0.051513671875, "learning_rate": 7.460941000746826e-05, "loss": 0.0024, "step": 17050 }, { "epoch": 25.48507462686567, "grad_norm": 0.04638671875, "learning_rate": 7.457206870799104e-05, "loss": 0.002, "step": 17075 }, { "epoch": 25.52238805970149, "grad_norm": 0.062255859375, "learning_rate": 7.453472740851381e-05, "loss": 0.0021, "step": 17100 }, { "epoch": 25.559701492537314, "grad_norm": 0.048583984375, "learning_rate": 7.449738610903659e-05, "loss": 0.002, "step": 17125 }, { "epoch": 25.597014925373134, "grad_norm": 0.0299072265625, "learning_rate": 7.446004480955939e-05, "loss": 0.0021, "step": 17150 }, { "epoch": 25.634328358208954, "grad_norm": 0.0712890625, "learning_rate": 7.442270351008215e-05, "loss": 0.0022, "step": 17175 }, { "epoch": 25.671641791044777, "grad_norm": 0.0771484375, "learning_rate": 7.438536221060493e-05, "loss": 0.0024, "step": 17200 }, { "epoch": 25.708955223880597, "grad_norm": 0.0390625, "learning_rate": 7.434802091112772e-05, "loss": 0.0023, "step": 17225 }, { "epoch": 25.746268656716417, "grad_norm": 0.023681640625, "learning_rate": 7.43106796116505e-05, "loss": 0.0027, "step": 17250 }, { "epoch": 25.78358208955224, "grad_norm": 0.0458984375, "learning_rate": 7.427333831217326e-05, "loss": 0.0027, "step": 17275 }, { "epoch": 25.82089552238806, "grad_norm": 0.03271484375, "learning_rate": 7.423599701269605e-05, "loss": 0.0024, "step": 17300 }, { "epoch": 25.85820895522388, "grad_norm": 0.03564453125, "learning_rate": 7.419865571321883e-05, "loss": 0.003, "step": 17325 }, { "epoch": 25.895522388059703, "grad_norm": 0.0498046875, "learning_rate": 7.41613144137416e-05, "loss": 0.0025, "step": 17350 }, { "epoch": 25.932835820895523, "grad_norm": 0.05810546875, "learning_rate": 7.412397311426438e-05, "loss": 0.0025, "step": 17375 }, { "epoch": 25.970149253731343, "grad_norm": 0.048095703125, "learning_rate": 7.408663181478716e-05, "loss": 0.0022, "step": 17400 }, { "epoch": 26.007462686567163, "grad_norm": 0.028564453125, "learning_rate": 7.404929051530994e-05, "loss": 0.0027, "step": 17425 }, { "epoch": 26.044776119402986, "grad_norm": 0.028076171875, "learning_rate": 7.40119492158327e-05, "loss": 0.0019, "step": 17450 }, { "epoch": 26.082089552238806, "grad_norm": 0.0267333984375, "learning_rate": 7.397460791635549e-05, "loss": 0.0019, "step": 17475 }, { "epoch": 26.119402985074625, "grad_norm": 0.03369140625, "learning_rate": 7.393726661687828e-05, "loss": 0.0018, "step": 17500 }, { "epoch": 26.15671641791045, "grad_norm": 0.041748046875, "learning_rate": 7.389992531740105e-05, "loss": 0.0022, "step": 17525 }, { "epoch": 26.19402985074627, "grad_norm": 0.040771484375, "learning_rate": 7.386258401792383e-05, "loss": 0.0019, "step": 17550 }, { "epoch": 26.23134328358209, "grad_norm": 0.029541015625, "learning_rate": 7.382524271844661e-05, "loss": 0.0015, "step": 17575 }, { "epoch": 26.26865671641791, "grad_norm": 0.03564453125, "learning_rate": 7.378790141896938e-05, "loss": 0.0021, "step": 17600 }, { "epoch": 26.30597014925373, "grad_norm": 0.078125, "learning_rate": 7.375056011949216e-05, "loss": 0.0019, "step": 17625 }, { "epoch": 26.34328358208955, "grad_norm": 0.043701171875, "learning_rate": 7.371321882001494e-05, "loss": 0.002, "step": 17650 }, { "epoch": 26.380597014925375, "grad_norm": 0.02197265625, "learning_rate": 7.367587752053772e-05, "loss": 0.002, "step": 17675 }, { "epoch": 26.417910447761194, "grad_norm": 0.033447265625, "learning_rate": 7.363853622106049e-05, "loss": 0.002, "step": 17700 }, { "epoch": 26.455223880597014, "grad_norm": 0.0380859375, "learning_rate": 7.360119492158327e-05, "loss": 0.0017, "step": 17725 }, { "epoch": 26.492537313432837, "grad_norm": 0.0299072265625, "learning_rate": 7.356385362210605e-05, "loss": 0.0023, "step": 17750 }, { "epoch": 26.529850746268657, "grad_norm": 0.0223388671875, "learning_rate": 7.352651232262883e-05, "loss": 0.0023, "step": 17775 }, { "epoch": 26.567164179104477, "grad_norm": 0.061767578125, "learning_rate": 7.34891710231516e-05, "loss": 0.0023, "step": 17800 }, { "epoch": 26.604477611940297, "grad_norm": 0.040283203125, "learning_rate": 7.34518297236744e-05, "loss": 0.003, "step": 17825 }, { "epoch": 26.64179104477612, "grad_norm": 0.1826171875, "learning_rate": 7.341448842419716e-05, "loss": 0.0041, "step": 17850 }, { "epoch": 26.67910447761194, "grad_norm": 0.0810546875, "learning_rate": 7.337714712471994e-05, "loss": 0.0026, "step": 17875 }, { "epoch": 26.71641791044776, "grad_norm": 0.044189453125, "learning_rate": 7.333980582524273e-05, "loss": 0.0025, "step": 17900 }, { "epoch": 26.753731343283583, "grad_norm": 0.017578125, "learning_rate": 7.33024645257655e-05, "loss": 0.0021, "step": 17925 }, { "epoch": 26.791044776119403, "grad_norm": 0.0255126953125, "learning_rate": 7.326512322628827e-05, "loss": 0.0025, "step": 17950 }, { "epoch": 26.828358208955223, "grad_norm": 0.0126953125, "learning_rate": 7.322778192681106e-05, "loss": 0.0023, "step": 17975 }, { "epoch": 26.865671641791046, "grad_norm": 0.07568359375, "learning_rate": 7.319044062733384e-05, "loss": 0.0021, "step": 18000 }, { "epoch": 26.902985074626866, "grad_norm": 0.04541015625, "learning_rate": 7.315309932785662e-05, "loss": 0.0025, "step": 18025 }, { "epoch": 26.940298507462686, "grad_norm": 0.24609375, "learning_rate": 7.311575802837939e-05, "loss": 0.0032, "step": 18050 }, { "epoch": 26.97761194029851, "grad_norm": 0.050048828125, "learning_rate": 7.307841672890217e-05, "loss": 0.0026, "step": 18075 }, { "epoch": 27.01492537313433, "grad_norm": 0.02392578125, "learning_rate": 7.304107542942495e-05, "loss": 0.0025, "step": 18100 }, { "epoch": 27.05223880597015, "grad_norm": 0.0263671875, "learning_rate": 7.300373412994772e-05, "loss": 0.0018, "step": 18125 }, { "epoch": 27.08955223880597, "grad_norm": 0.057861328125, "learning_rate": 7.296639283047051e-05, "loss": 0.0023, "step": 18150 }, { "epoch": 27.12686567164179, "grad_norm": 0.041259765625, "learning_rate": 7.292905153099329e-05, "loss": 0.0025, "step": 18175 }, { "epoch": 27.16417910447761, "grad_norm": 0.0517578125, "learning_rate": 7.289171023151606e-05, "loss": 0.0025, "step": 18200 }, { "epoch": 27.20149253731343, "grad_norm": 0.07275390625, "learning_rate": 7.285436893203884e-05, "loss": 0.0021, "step": 18225 }, { "epoch": 27.238805970149254, "grad_norm": 0.050048828125, "learning_rate": 7.281702763256162e-05, "loss": 0.0021, "step": 18250 }, { "epoch": 27.276119402985074, "grad_norm": 0.043701171875, "learning_rate": 7.27796863330844e-05, "loss": 0.0027, "step": 18275 }, { "epoch": 27.313432835820894, "grad_norm": 0.04296875, "learning_rate": 7.274234503360717e-05, "loss": 0.0023, "step": 18300 }, { "epoch": 27.350746268656717, "grad_norm": 0.068359375, "learning_rate": 7.270500373412995e-05, "loss": 0.0025, "step": 18325 }, { "epoch": 27.388059701492537, "grad_norm": 0.04345703125, "learning_rate": 7.266766243465273e-05, "loss": 0.0023, "step": 18350 }, { "epoch": 27.425373134328357, "grad_norm": 0.033447265625, "learning_rate": 7.26303211351755e-05, "loss": 0.0026, "step": 18375 }, { "epoch": 27.46268656716418, "grad_norm": 0.052001953125, "learning_rate": 7.259297983569828e-05, "loss": 0.0021, "step": 18400 }, { "epoch": 27.5, "grad_norm": 0.033935546875, "learning_rate": 7.255563853622106e-05, "loss": 0.003, "step": 18425 }, { "epoch": 27.53731343283582, "grad_norm": 0.035888671875, "learning_rate": 7.251829723674384e-05, "loss": 0.0026, "step": 18450 }, { "epoch": 27.574626865671643, "grad_norm": 0.02978515625, "learning_rate": 7.248095593726662e-05, "loss": 0.0022, "step": 18475 }, { "epoch": 27.611940298507463, "grad_norm": 0.06396484375, "learning_rate": 7.24436146377894e-05, "loss": 0.0022, "step": 18500 }, { "epoch": 27.649253731343283, "grad_norm": 0.0810546875, "learning_rate": 7.240627333831219e-05, "loss": 0.0026, "step": 18525 }, { "epoch": 27.686567164179106, "grad_norm": 0.064453125, "learning_rate": 7.236893203883495e-05, "loss": 0.0026, "step": 18550 }, { "epoch": 27.723880597014926, "grad_norm": 0.04541015625, "learning_rate": 7.233159073935774e-05, "loss": 0.0021, "step": 18575 }, { "epoch": 27.761194029850746, "grad_norm": 0.0228271484375, "learning_rate": 7.229424943988052e-05, "loss": 0.0024, "step": 18600 }, { "epoch": 27.798507462686565, "grad_norm": 0.0311279296875, "learning_rate": 7.225690814040328e-05, "loss": 0.0025, "step": 18625 }, { "epoch": 27.83582089552239, "grad_norm": 0.060302734375, "learning_rate": 7.221956684092607e-05, "loss": 0.0024, "step": 18650 }, { "epoch": 27.87313432835821, "grad_norm": 0.07958984375, "learning_rate": 7.218222554144885e-05, "loss": 0.0028, "step": 18675 }, { "epoch": 27.91044776119403, "grad_norm": 0.0419921875, "learning_rate": 7.214488424197163e-05, "loss": 0.0023, "step": 18700 }, { "epoch": 27.94776119402985, "grad_norm": 0.043701171875, "learning_rate": 7.21075429424944e-05, "loss": 0.0023, "step": 18725 }, { "epoch": 27.98507462686567, "grad_norm": 0.032958984375, "learning_rate": 7.207020164301718e-05, "loss": 0.0027, "step": 18750 }, { "epoch": 28.02238805970149, "grad_norm": 0.053955078125, "learning_rate": 7.203286034353996e-05, "loss": 0.0024, "step": 18775 }, { "epoch": 28.059701492537314, "grad_norm": 0.0260009765625, "learning_rate": 7.199551904406273e-05, "loss": 0.0019, "step": 18800 }, { "epoch": 28.097014925373134, "grad_norm": 0.022705078125, "learning_rate": 7.195817774458552e-05, "loss": 0.002, "step": 18825 }, { "epoch": 28.134328358208954, "grad_norm": 0.04345703125, "learning_rate": 7.19208364451083e-05, "loss": 0.0018, "step": 18850 }, { "epoch": 28.171641791044777, "grad_norm": 0.033203125, "learning_rate": 7.188349514563107e-05, "loss": 0.002, "step": 18875 }, { "epoch": 28.208955223880597, "grad_norm": 0.03955078125, "learning_rate": 7.184615384615385e-05, "loss": 0.002, "step": 18900 }, { "epoch": 28.246268656716417, "grad_norm": 0.033935546875, "learning_rate": 7.180881254667663e-05, "loss": 0.0022, "step": 18925 }, { "epoch": 28.28358208955224, "grad_norm": 0.0390625, "learning_rate": 7.177147124719941e-05, "loss": 0.0022, "step": 18950 }, { "epoch": 28.32089552238806, "grad_norm": 0.0201416015625, "learning_rate": 7.173412994772218e-05, "loss": 0.0022, "step": 18975 }, { "epoch": 28.35820895522388, "grad_norm": 0.045654296875, "learning_rate": 7.169678864824496e-05, "loss": 0.0026, "step": 19000 }, { "epoch": 28.395522388059703, "grad_norm": 0.04833984375, "learning_rate": 7.165944734876774e-05, "loss": 0.0026, "step": 19025 }, { "epoch": 28.432835820895523, "grad_norm": 0.06396484375, "learning_rate": 7.162210604929051e-05, "loss": 0.0023, "step": 19050 }, { "epoch": 28.470149253731343, "grad_norm": 0.04296875, "learning_rate": 7.158476474981329e-05, "loss": 0.0027, "step": 19075 }, { "epoch": 28.507462686567163, "grad_norm": 0.0284423828125, "learning_rate": 7.154742345033607e-05, "loss": 0.0019, "step": 19100 }, { "epoch": 28.544776119402986, "grad_norm": 0.039306640625, "learning_rate": 7.151008215085885e-05, "loss": 0.0022, "step": 19125 }, { "epoch": 28.582089552238806, "grad_norm": 0.042236328125, "learning_rate": 7.147274085138164e-05, "loss": 0.0021, "step": 19150 }, { "epoch": 28.619402985074625, "grad_norm": 0.0400390625, "learning_rate": 7.143539955190442e-05, "loss": 0.0024, "step": 19175 }, { "epoch": 28.65671641791045, "grad_norm": 0.036376953125, "learning_rate": 7.13980582524272e-05, "loss": 0.0029, "step": 19200 }, { "epoch": 28.69402985074627, "grad_norm": 0.0419921875, "learning_rate": 7.136071695294997e-05, "loss": 0.0031, "step": 19225 }, { "epoch": 28.73134328358209, "grad_norm": 0.08984375, "learning_rate": 7.132337565347275e-05, "loss": 0.0038, "step": 19250 }, { "epoch": 28.76865671641791, "grad_norm": 0.056640625, "learning_rate": 7.128603435399553e-05, "loss": 0.0026, "step": 19275 }, { "epoch": 28.80597014925373, "grad_norm": 0.0703125, "learning_rate": 7.12486930545183e-05, "loss": 0.0031, "step": 19300 }, { "epoch": 28.84328358208955, "grad_norm": 0.1142578125, "learning_rate": 7.121135175504108e-05, "loss": 0.003, "step": 19325 }, { "epoch": 28.880597014925375, "grad_norm": 0.03662109375, "learning_rate": 7.117401045556386e-05, "loss": 0.0033, "step": 19350 }, { "epoch": 28.917910447761194, "grad_norm": 0.09765625, "learning_rate": 7.113666915608664e-05, "loss": 0.0035, "step": 19375 }, { "epoch": 28.955223880597014, "grad_norm": 0.10595703125, "learning_rate": 7.10993278566094e-05, "loss": 0.0031, "step": 19400 }, { "epoch": 28.992537313432837, "grad_norm": 0.072265625, "learning_rate": 7.106198655713219e-05, "loss": 0.0043, "step": 19425 }, { "epoch": 29.029850746268657, "grad_norm": 0.07958984375, "learning_rate": 7.102464525765497e-05, "loss": 0.0037, "step": 19450 }, { "epoch": 29.067164179104477, "grad_norm": 0.054443359375, "learning_rate": 7.098730395817775e-05, "loss": 0.0036, "step": 19475 }, { "epoch": 29.104477611940297, "grad_norm": 0.0966796875, "learning_rate": 7.094996265870053e-05, "loss": 0.0031, "step": 19500 }, { "epoch": 29.14179104477612, "grad_norm": 0.078125, "learning_rate": 7.091262135922331e-05, "loss": 0.0035, "step": 19525 }, { "epoch": 29.17910447761194, "grad_norm": 0.11865234375, "learning_rate": 7.087528005974608e-05, "loss": 0.0034, "step": 19550 }, { "epoch": 29.21641791044776, "grad_norm": 0.0615234375, "learning_rate": 7.083793876026886e-05, "loss": 0.0031, "step": 19575 }, { "epoch": 29.253731343283583, "grad_norm": 0.072265625, "learning_rate": 7.080059746079164e-05, "loss": 0.003, "step": 19600 }, { "epoch": 29.291044776119403, "grad_norm": 0.052978515625, "learning_rate": 7.076325616131442e-05, "loss": 0.0031, "step": 19625 }, { "epoch": 29.328358208955223, "grad_norm": 0.14453125, "learning_rate": 7.072591486183719e-05, "loss": 0.004, "step": 19650 }, { "epoch": 29.365671641791046, "grad_norm": 0.259765625, "learning_rate": 7.068857356235997e-05, "loss": 0.0051, "step": 19675 }, { "epoch": 29.402985074626866, "grad_norm": 0.1455078125, "learning_rate": 7.065123226288275e-05, "loss": 0.0049, "step": 19700 }, { "epoch": 29.440298507462686, "grad_norm": 0.0830078125, "learning_rate": 7.061389096340552e-05, "loss": 0.0078, "step": 19725 }, { "epoch": 29.47761194029851, "grad_norm": 0.0634765625, "learning_rate": 7.05765496639283e-05, "loss": 0.0043, "step": 19750 }, { "epoch": 29.51492537313433, "grad_norm": 0.08251953125, "learning_rate": 7.053920836445108e-05, "loss": 0.0047, "step": 19775 }, { "epoch": 29.55223880597015, "grad_norm": 0.212890625, "learning_rate": 7.050186706497386e-05, "loss": 0.0046, "step": 19800 }, { "epoch": 29.58955223880597, "grad_norm": 0.23046875, "learning_rate": 7.046452576549665e-05, "loss": 0.005, "step": 19825 }, { "epoch": 29.62686567164179, "grad_norm": 0.10595703125, "learning_rate": 7.042718446601943e-05, "loss": 0.0042, "step": 19850 }, { "epoch": 29.66417910447761, "grad_norm": 0.0888671875, "learning_rate": 7.038984316654221e-05, "loss": 0.0045, "step": 19875 }, { "epoch": 29.701492537313435, "grad_norm": 0.21875, "learning_rate": 7.035250186706498e-05, "loss": 0.004, "step": 19900 }, { "epoch": 29.738805970149254, "grad_norm": 0.09375, "learning_rate": 7.031516056758776e-05, "loss": 0.0037, "step": 19925 }, { "epoch": 29.776119402985074, "grad_norm": 0.13671875, "learning_rate": 7.027781926811054e-05, "loss": 0.004, "step": 19950 }, { "epoch": 29.813432835820894, "grad_norm": 0.0986328125, "learning_rate": 7.02404779686333e-05, "loss": 0.0039, "step": 19975 }, { "epoch": 29.850746268656717, "grad_norm": 0.09033203125, "learning_rate": 7.020313666915609e-05, "loss": 0.004, "step": 20000 }, { "epoch": 29.888059701492537, "grad_norm": 0.0888671875, "learning_rate": 7.016579536967887e-05, "loss": 0.0041, "step": 20025 }, { "epoch": 29.925373134328357, "grad_norm": 0.119140625, "learning_rate": 7.012845407020165e-05, "loss": 0.0038, "step": 20050 }, { "epoch": 29.96268656716418, "grad_norm": 0.05029296875, "learning_rate": 7.009111277072442e-05, "loss": 0.003, "step": 20075 }, { "epoch": 30.0, "grad_norm": 0.14453125, "learning_rate": 7.00537714712472e-05, "loss": 0.0036, "step": 20100 }, { "epoch": 30.03731343283582, "grad_norm": 0.189453125, "learning_rate": 7.001643017176998e-05, "loss": 0.0028, "step": 20125 }, { "epoch": 30.074626865671643, "grad_norm": 0.171875, "learning_rate": 6.997908887229276e-05, "loss": 0.0023, "step": 20150 }, { "epoch": 30.111940298507463, "grad_norm": 0.1298828125, "learning_rate": 6.994174757281554e-05, "loss": 0.0022, "step": 20175 }, { "epoch": 30.149253731343283, "grad_norm": 0.138671875, "learning_rate": 6.990440627333832e-05, "loss": 0.0021, "step": 20200 }, { "epoch": 30.186567164179106, "grad_norm": 0.04736328125, "learning_rate": 6.986706497386109e-05, "loss": 0.0026, "step": 20225 }, { "epoch": 30.223880597014926, "grad_norm": 0.060302734375, "learning_rate": 6.982972367438387e-05, "loss": 0.003, "step": 20250 }, { "epoch": 30.261194029850746, "grad_norm": 0.048095703125, "learning_rate": 6.979238237490665e-05, "loss": 0.0022, "step": 20275 }, { "epoch": 30.298507462686565, "grad_norm": 0.04638671875, "learning_rate": 6.975504107542943e-05, "loss": 0.0025, "step": 20300 }, { "epoch": 30.33582089552239, "grad_norm": 0.09375, "learning_rate": 6.97176997759522e-05, "loss": 0.0028, "step": 20325 }, { "epoch": 30.37313432835821, "grad_norm": 0.04248046875, "learning_rate": 6.968035847647498e-05, "loss": 0.0025, "step": 20350 }, { "epoch": 30.41044776119403, "grad_norm": 0.09228515625, "learning_rate": 6.964301717699776e-05, "loss": 0.0028, "step": 20375 }, { "epoch": 30.44776119402985, "grad_norm": 0.06884765625, "learning_rate": 6.960567587752053e-05, "loss": 0.0023, "step": 20400 }, { "epoch": 30.48507462686567, "grad_norm": 0.06494140625, "learning_rate": 6.956833457804331e-05, "loss": 0.0027, "step": 20425 }, { "epoch": 30.52238805970149, "grad_norm": 0.049072265625, "learning_rate": 6.95309932785661e-05, "loss": 0.0031, "step": 20450 }, { "epoch": 30.559701492537314, "grad_norm": 0.035888671875, "learning_rate": 6.949365197908887e-05, "loss": 0.0032, "step": 20475 }, { "epoch": 30.597014925373134, "grad_norm": 0.0556640625, "learning_rate": 6.945631067961166e-05, "loss": 0.0023, "step": 20500 }, { "epoch": 30.634328358208954, "grad_norm": 0.0537109375, "learning_rate": 6.941896938013444e-05, "loss": 0.0026, "step": 20525 }, { "epoch": 30.671641791044777, "grad_norm": 0.1552734375, "learning_rate": 6.938162808065722e-05, "loss": 0.0026, "step": 20550 }, { "epoch": 30.708955223880597, "grad_norm": 0.044921875, "learning_rate": 6.934428678117999e-05, "loss": 0.0027, "step": 20575 }, { "epoch": 30.746268656716417, "grad_norm": 0.07421875, "learning_rate": 6.930694548170277e-05, "loss": 0.0033, "step": 20600 }, { "epoch": 30.78358208955224, "grad_norm": 0.07763671875, "learning_rate": 6.926960418222555e-05, "loss": 0.003, "step": 20625 }, { "epoch": 30.82089552238806, "grad_norm": 0.054931640625, "learning_rate": 6.923226288274832e-05, "loss": 0.003, "step": 20650 }, { "epoch": 30.85820895522388, "grad_norm": 0.044921875, "learning_rate": 6.91949215832711e-05, "loss": 0.0028, "step": 20675 }, { "epoch": 30.895522388059703, "grad_norm": 0.12353515625, "learning_rate": 6.915758028379388e-05, "loss": 0.0034, "step": 20700 }, { "epoch": 30.932835820895523, "grad_norm": 0.07177734375, "learning_rate": 6.912023898431666e-05, "loss": 0.0026, "step": 20725 }, { "epoch": 30.970149253731343, "grad_norm": 0.08154296875, "learning_rate": 6.908289768483943e-05, "loss": 0.003, "step": 20750 }, { "epoch": 31.007462686567163, "grad_norm": 0.0400390625, "learning_rate": 6.904555638536221e-05, "loss": 0.0024, "step": 20775 }, { "epoch": 31.044776119402986, "grad_norm": 0.040771484375, "learning_rate": 6.9008215085885e-05, "loss": 0.0018, "step": 20800 }, { "epoch": 31.082089552238806, "grad_norm": 0.03759765625, "learning_rate": 6.897087378640777e-05, "loss": 0.0023, "step": 20825 }, { "epoch": 31.119402985074625, "grad_norm": 0.047119140625, "learning_rate": 6.893353248693055e-05, "loss": 0.0019, "step": 20850 }, { "epoch": 31.15671641791045, "grad_norm": 0.035400390625, "learning_rate": 6.889619118745333e-05, "loss": 0.0021, "step": 20875 }, { "epoch": 31.19402985074627, "grad_norm": 0.0281982421875, "learning_rate": 6.88588498879761e-05, "loss": 0.0019, "step": 20900 }, { "epoch": 31.23134328358209, "grad_norm": 0.0390625, "learning_rate": 6.882150858849888e-05, "loss": 0.0019, "step": 20925 }, { "epoch": 31.26865671641791, "grad_norm": 0.04345703125, "learning_rate": 6.878416728902166e-05, "loss": 0.0019, "step": 20950 }, { "epoch": 31.30597014925373, "grad_norm": 0.044189453125, "learning_rate": 6.874682598954444e-05, "loss": 0.0024, "step": 20975 }, { "epoch": 31.34328358208955, "grad_norm": 0.02685546875, "learning_rate": 6.870948469006721e-05, "loss": 0.002, "step": 21000 }, { "epoch": 31.380597014925375, "grad_norm": 0.041015625, "learning_rate": 6.867214339058999e-05, "loss": 0.0019, "step": 21025 }, { "epoch": 31.417910447761194, "grad_norm": 0.042724609375, "learning_rate": 6.863480209111277e-05, "loss": 0.0024, "step": 21050 }, { "epoch": 31.455223880597014, "grad_norm": 0.0751953125, "learning_rate": 6.859746079163555e-05, "loss": 0.0025, "step": 21075 }, { "epoch": 31.492537313432837, "grad_norm": 0.061279296875, "learning_rate": 6.856011949215832e-05, "loss": 0.002, "step": 21100 }, { "epoch": 31.529850746268657, "grad_norm": 0.046875, "learning_rate": 6.852277819268112e-05, "loss": 0.0025, "step": 21125 }, { "epoch": 31.567164179104477, "grad_norm": 0.059814453125, "learning_rate": 6.848543689320388e-05, "loss": 0.0026, "step": 21150 }, { "epoch": 31.604477611940297, "grad_norm": 0.01446533203125, "learning_rate": 6.844809559372667e-05, "loss": 0.0018, "step": 21175 }, { "epoch": 31.64179104477612, "grad_norm": 0.0206298828125, "learning_rate": 6.841075429424945e-05, "loss": 0.0022, "step": 21200 }, { "epoch": 31.67910447761194, "grad_norm": 0.04345703125, "learning_rate": 6.837341299477223e-05, "loss": 0.0021, "step": 21225 }, { "epoch": 31.71641791044776, "grad_norm": 0.185546875, "learning_rate": 6.8336071695295e-05, "loss": 0.0023, "step": 21250 }, { "epoch": 31.753731343283583, "grad_norm": 0.039794921875, "learning_rate": 6.829873039581778e-05, "loss": 0.002, "step": 21275 }, { "epoch": 31.791044776119403, "grad_norm": 0.03466796875, "learning_rate": 6.826138909634056e-05, "loss": 0.0024, "step": 21300 }, { "epoch": 31.828358208955223, "grad_norm": 0.06884765625, "learning_rate": 6.822404779686334e-05, "loss": 0.0022, "step": 21325 }, { "epoch": 31.865671641791046, "grad_norm": 0.031494140625, "learning_rate": 6.818670649738611e-05, "loss": 0.0024, "step": 21350 }, { "epoch": 31.902985074626866, "grad_norm": 0.037109375, "learning_rate": 6.814936519790889e-05, "loss": 0.0026, "step": 21375 }, { "epoch": 31.940298507462686, "grad_norm": 0.0517578125, "learning_rate": 6.811202389843167e-05, "loss": 0.0023, "step": 21400 }, { "epoch": 31.97761194029851, "grad_norm": 0.015625, "learning_rate": 6.807468259895444e-05, "loss": 0.002, "step": 21425 }, { "epoch": 32.014925373134325, "grad_norm": 0.0218505859375, "learning_rate": 6.803734129947722e-05, "loss": 0.0024, "step": 21450 }, { "epoch": 32.05223880597015, "grad_norm": 0.038818359375, "learning_rate": 6.800000000000001e-05, "loss": 0.0018, "step": 21475 }, { "epoch": 32.08955223880597, "grad_norm": 0.0189208984375, "learning_rate": 6.796265870052278e-05, "loss": 0.0016, "step": 21500 }, { "epoch": 32.12686567164179, "grad_norm": 0.037109375, "learning_rate": 6.792531740104556e-05, "loss": 0.0018, "step": 21525 }, { "epoch": 32.16417910447761, "grad_norm": 0.039306640625, "learning_rate": 6.788797610156834e-05, "loss": 0.0018, "step": 21550 }, { "epoch": 32.201492537313435, "grad_norm": 0.035888671875, "learning_rate": 6.785063480209112e-05, "loss": 0.0021, "step": 21575 }, { "epoch": 32.23880597014925, "grad_norm": 0.028076171875, "learning_rate": 6.781329350261389e-05, "loss": 0.0019, "step": 21600 }, { "epoch": 32.276119402985074, "grad_norm": 0.040283203125, "learning_rate": 6.777595220313667e-05, "loss": 0.002, "step": 21625 }, { "epoch": 32.3134328358209, "grad_norm": 0.03271484375, "learning_rate": 6.773861090365945e-05, "loss": 0.002, "step": 21650 }, { "epoch": 32.350746268656714, "grad_norm": 0.046630859375, "learning_rate": 6.770126960418222e-05, "loss": 0.0021, "step": 21675 }, { "epoch": 32.38805970149254, "grad_norm": 0.1201171875, "learning_rate": 6.7663928304705e-05, "loss": 0.0016, "step": 21700 }, { "epoch": 32.42537313432836, "grad_norm": 0.0284423828125, "learning_rate": 6.762658700522778e-05, "loss": 0.0018, "step": 21725 }, { "epoch": 32.46268656716418, "grad_norm": 0.05712890625, "learning_rate": 6.758924570575057e-05, "loss": 0.0018, "step": 21750 }, { "epoch": 32.5, "grad_norm": 0.033447265625, "learning_rate": 6.755190440627333e-05, "loss": 0.0021, "step": 21775 }, { "epoch": 32.53731343283582, "grad_norm": 0.0498046875, "learning_rate": 6.751456310679613e-05, "loss": 0.0018, "step": 21800 }, { "epoch": 32.57462686567164, "grad_norm": 0.111328125, "learning_rate": 6.747722180731891e-05, "loss": 0.0023, "step": 21825 }, { "epoch": 32.61194029850746, "grad_norm": 0.048583984375, "learning_rate": 6.743988050784168e-05, "loss": 0.002, "step": 21850 }, { "epoch": 32.649253731343286, "grad_norm": 0.04248046875, "learning_rate": 6.740253920836446e-05, "loss": 0.0021, "step": 21875 }, { "epoch": 32.6865671641791, "grad_norm": 0.046142578125, "learning_rate": 6.736519790888724e-05, "loss": 0.0021, "step": 21900 }, { "epoch": 32.723880597014926, "grad_norm": 0.038818359375, "learning_rate": 6.732785660941e-05, "loss": 0.0022, "step": 21925 }, { "epoch": 32.76119402985075, "grad_norm": 0.03564453125, "learning_rate": 6.729051530993279e-05, "loss": 0.0022, "step": 21950 }, { "epoch": 32.798507462686565, "grad_norm": 0.0093994140625, "learning_rate": 6.725317401045557e-05, "loss": 0.0024, "step": 21975 }, { "epoch": 32.83582089552239, "grad_norm": 0.040283203125, "learning_rate": 6.721583271097835e-05, "loss": 0.0021, "step": 22000 }, { "epoch": 32.87313432835821, "grad_norm": 0.0390625, "learning_rate": 6.717849141150112e-05, "loss": 0.002, "step": 22025 }, { "epoch": 32.91044776119403, "grad_norm": 0.1435546875, "learning_rate": 6.71411501120239e-05, "loss": 0.0024, "step": 22050 }, { "epoch": 32.94776119402985, "grad_norm": 0.053466796875, "learning_rate": 6.710380881254668e-05, "loss": 0.0021, "step": 22075 }, { "epoch": 32.985074626865675, "grad_norm": 0.04541015625, "learning_rate": 6.706646751306945e-05, "loss": 0.0023, "step": 22100 }, { "epoch": 33.02238805970149, "grad_norm": 0.02685546875, "learning_rate": 6.702912621359224e-05, "loss": 0.0022, "step": 22125 }, { "epoch": 33.059701492537314, "grad_norm": 0.03466796875, "learning_rate": 6.699178491411502e-05, "loss": 0.0022, "step": 22150 }, { "epoch": 33.09701492537314, "grad_norm": 0.031982421875, "learning_rate": 6.695444361463779e-05, "loss": 0.0017, "step": 22175 }, { "epoch": 33.134328358208954, "grad_norm": 0.0712890625, "learning_rate": 6.691710231516057e-05, "loss": 0.0017, "step": 22200 }, { "epoch": 33.17164179104478, "grad_norm": 0.02783203125, "learning_rate": 6.687976101568335e-05, "loss": 0.0018, "step": 22225 }, { "epoch": 33.208955223880594, "grad_norm": 0.017333984375, "learning_rate": 6.684241971620613e-05, "loss": 0.0016, "step": 22250 }, { "epoch": 33.24626865671642, "grad_norm": 0.048583984375, "learning_rate": 6.68050784167289e-05, "loss": 0.002, "step": 22275 }, { "epoch": 33.28358208955224, "grad_norm": 0.054931640625, "learning_rate": 6.676773711725168e-05, "loss": 0.0019, "step": 22300 }, { "epoch": 33.32089552238806, "grad_norm": 0.0517578125, "learning_rate": 6.673039581777446e-05, "loss": 0.0019, "step": 22325 }, { "epoch": 33.35820895522388, "grad_norm": 0.0213623046875, "learning_rate": 6.669305451829723e-05, "loss": 0.0019, "step": 22350 }, { "epoch": 33.3955223880597, "grad_norm": 0.0262451171875, "learning_rate": 6.665571321882001e-05, "loss": 0.002, "step": 22375 }, { "epoch": 33.43283582089552, "grad_norm": 0.034423828125, "learning_rate": 6.66183719193428e-05, "loss": 0.0022, "step": 22400 }, { "epoch": 33.47014925373134, "grad_norm": 0.061279296875, "learning_rate": 6.658103061986558e-05, "loss": 0.0022, "step": 22425 }, { "epoch": 33.507462686567166, "grad_norm": 0.034912109375, "learning_rate": 6.654368932038836e-05, "loss": 0.0019, "step": 22450 }, { "epoch": 33.54477611940298, "grad_norm": 0.14453125, "learning_rate": 6.650634802091114e-05, "loss": 0.0029, "step": 22475 }, { "epoch": 33.582089552238806, "grad_norm": 0.04541015625, "learning_rate": 6.646900672143392e-05, "loss": 0.0028, "step": 22500 }, { "epoch": 33.61940298507463, "grad_norm": 0.0732421875, "learning_rate": 6.643166542195669e-05, "loss": 0.0029, "step": 22525 }, { "epoch": 33.656716417910445, "grad_norm": 0.043212890625, "learning_rate": 6.639432412247947e-05, "loss": 0.003, "step": 22550 }, { "epoch": 33.69402985074627, "grad_norm": 0.051513671875, "learning_rate": 6.635698282300225e-05, "loss": 0.003, "step": 22575 }, { "epoch": 33.73134328358209, "grad_norm": 0.1064453125, "learning_rate": 6.631964152352502e-05, "loss": 0.0033, "step": 22600 }, { "epoch": 33.76865671641791, "grad_norm": 0.052001953125, "learning_rate": 6.62823002240478e-05, "loss": 0.0028, "step": 22625 }, { "epoch": 33.80597014925373, "grad_norm": 0.0673828125, "learning_rate": 6.624495892457058e-05, "loss": 0.0039, "step": 22650 }, { "epoch": 33.843283582089555, "grad_norm": 0.08349609375, "learning_rate": 6.620761762509336e-05, "loss": 0.0032, "step": 22675 }, { "epoch": 33.88059701492537, "grad_norm": 0.09033203125, "learning_rate": 6.617027632561613e-05, "loss": 0.0031, "step": 22700 }, { "epoch": 33.917910447761194, "grad_norm": 0.302734375, "learning_rate": 6.613293502613891e-05, "loss": 0.0039, "step": 22725 }, { "epoch": 33.95522388059702, "grad_norm": 0.203125, "learning_rate": 6.609559372666169e-05, "loss": 0.0064, "step": 22750 }, { "epoch": 33.992537313432834, "grad_norm": 0.12158203125, "learning_rate": 6.605825242718446e-05, "loss": 0.0061, "step": 22775 }, { "epoch": 34.02985074626866, "grad_norm": 0.2412109375, "learning_rate": 6.602091112770725e-05, "loss": 0.0067, "step": 22800 }, { "epoch": 34.06716417910448, "grad_norm": 0.25390625, "learning_rate": 6.598356982823003e-05, "loss": 0.0059, "step": 22825 }, { "epoch": 34.1044776119403, "grad_norm": 0.1630859375, "learning_rate": 6.59462285287528e-05, "loss": 0.0063, "step": 22850 }, { "epoch": 34.14179104477612, "grad_norm": 0.1796875, "learning_rate": 6.590888722927558e-05, "loss": 0.0064, "step": 22875 }, { "epoch": 34.17910447761194, "grad_norm": 0.265625, "learning_rate": 6.587154592979836e-05, "loss": 0.0069, "step": 22900 }, { "epoch": 34.21641791044776, "grad_norm": 0.1982421875, "learning_rate": 6.583420463032114e-05, "loss": 0.0058, "step": 22925 }, { "epoch": 34.25373134328358, "grad_norm": 0.0888671875, "learning_rate": 6.579686333084391e-05, "loss": 0.0048, "step": 22950 }, { "epoch": 34.291044776119406, "grad_norm": 0.1494140625, "learning_rate": 6.57595220313667e-05, "loss": 0.0049, "step": 22975 }, { "epoch": 34.32835820895522, "grad_norm": 0.1484375, "learning_rate": 6.572218073188947e-05, "loss": 0.0049, "step": 23000 }, { "epoch": 34.365671641791046, "grad_norm": 0.173828125, "learning_rate": 6.568483943241224e-05, "loss": 0.0051, "step": 23025 }, { "epoch": 34.40298507462686, "grad_norm": 0.1083984375, "learning_rate": 6.564749813293502e-05, "loss": 0.0047, "step": 23050 }, { "epoch": 34.440298507462686, "grad_norm": 0.27734375, "learning_rate": 6.56101568334578e-05, "loss": 0.0042, "step": 23075 }, { "epoch": 34.47761194029851, "grad_norm": 0.2216796875, "learning_rate": 6.557281553398059e-05, "loss": 0.0043, "step": 23100 }, { "epoch": 34.514925373134325, "grad_norm": 0.04248046875, "learning_rate": 6.553547423450337e-05, "loss": 0.005, "step": 23125 }, { "epoch": 34.55223880597015, "grad_norm": 0.126953125, "learning_rate": 6.549813293502615e-05, "loss": 0.0051, "step": 23150 }, { "epoch": 34.58955223880597, "grad_norm": 0.1708984375, "learning_rate": 6.546079163554893e-05, "loss": 0.0043, "step": 23175 }, { "epoch": 34.62686567164179, "grad_norm": 0.400390625, "learning_rate": 6.54234503360717e-05, "loss": 0.0048, "step": 23200 }, { "epoch": 34.66417910447761, "grad_norm": 0.1025390625, "learning_rate": 6.538610903659448e-05, "loss": 0.004, "step": 23225 }, { "epoch": 34.701492537313435, "grad_norm": 0.08837890625, "learning_rate": 6.534876773711726e-05, "loss": 0.0043, "step": 23250 }, { "epoch": 34.73880597014925, "grad_norm": 0.2119140625, "learning_rate": 6.531142643764003e-05, "loss": 0.0038, "step": 23275 }, { "epoch": 34.776119402985074, "grad_norm": 0.08837890625, "learning_rate": 6.527408513816281e-05, "loss": 0.004, "step": 23300 }, { "epoch": 34.8134328358209, "grad_norm": 0.058837890625, "learning_rate": 6.523674383868559e-05, "loss": 0.0038, "step": 23325 }, { "epoch": 34.850746268656714, "grad_norm": 0.0556640625, "learning_rate": 6.519940253920837e-05, "loss": 0.0034, "step": 23350 }, { "epoch": 34.88805970149254, "grad_norm": 0.095703125, "learning_rate": 6.516206123973114e-05, "loss": 0.004, "step": 23375 }, { "epoch": 34.92537313432836, "grad_norm": 0.150390625, "learning_rate": 6.512471994025392e-05, "loss": 0.0042, "step": 23400 }, { "epoch": 34.96268656716418, "grad_norm": 0.048583984375, "learning_rate": 6.50873786407767e-05, "loss": 0.0045, "step": 23425 }, { "epoch": 35.0, "grad_norm": 0.1396484375, "learning_rate": 6.505003734129948e-05, "loss": 0.0038, "step": 23450 }, { "epoch": 35.03731343283582, "grad_norm": 0.036376953125, "learning_rate": 6.501269604182226e-05, "loss": 0.0026, "step": 23475 }, { "epoch": 35.07462686567164, "grad_norm": 0.3046875, "learning_rate": 6.497535474234504e-05, "loss": 0.0025, "step": 23500 }, { "epoch": 35.11194029850746, "grad_norm": 0.061767578125, "learning_rate": 6.493801344286781e-05, "loss": 0.0023, "step": 23525 }, { "epoch": 35.149253731343286, "grad_norm": 0.0400390625, "learning_rate": 6.490067214339059e-05, "loss": 0.0023, "step": 23550 }, { "epoch": 35.1865671641791, "grad_norm": 0.03857421875, "learning_rate": 6.486333084391337e-05, "loss": 0.0026, "step": 23575 }, { "epoch": 35.223880597014926, "grad_norm": 0.1015625, "learning_rate": 6.482598954443616e-05, "loss": 0.0023, "step": 23600 }, { "epoch": 35.26119402985075, "grad_norm": 0.03125, "learning_rate": 6.478864824495892e-05, "loss": 0.0026, "step": 23625 }, { "epoch": 35.298507462686565, "grad_norm": 0.0712890625, "learning_rate": 6.47513069454817e-05, "loss": 0.0026, "step": 23650 }, { "epoch": 35.33582089552239, "grad_norm": 0.06982421875, "learning_rate": 6.471396564600449e-05, "loss": 0.0022, "step": 23675 }, { "epoch": 35.37313432835821, "grad_norm": 0.029296875, "learning_rate": 6.467662434652725e-05, "loss": 0.0022, "step": 23700 }, { "epoch": 35.41044776119403, "grad_norm": 0.0291748046875, "learning_rate": 6.463928304705003e-05, "loss": 0.0022, "step": 23725 }, { "epoch": 35.44776119402985, "grad_norm": 0.05078125, "learning_rate": 6.460194174757282e-05, "loss": 0.0027, "step": 23750 }, { "epoch": 35.485074626865675, "grad_norm": 0.051513671875, "learning_rate": 6.45646004480956e-05, "loss": 0.0025, "step": 23775 }, { "epoch": 35.52238805970149, "grad_norm": 0.1484375, "learning_rate": 6.452725914861838e-05, "loss": 0.0025, "step": 23800 }, { "epoch": 35.559701492537314, "grad_norm": 0.032958984375, "learning_rate": 6.448991784914116e-05, "loss": 0.0026, "step": 23825 }, { "epoch": 35.59701492537313, "grad_norm": 0.03271484375, "learning_rate": 6.445257654966394e-05, "loss": 0.0025, "step": 23850 }, { "epoch": 35.634328358208954, "grad_norm": 0.033935546875, "learning_rate": 6.441523525018671e-05, "loss": 0.0027, "step": 23875 }, { "epoch": 35.67164179104478, "grad_norm": 0.0751953125, "learning_rate": 6.437789395070949e-05, "loss": 0.0025, "step": 23900 }, { "epoch": 35.708955223880594, "grad_norm": 0.0289306640625, "learning_rate": 6.434055265123227e-05, "loss": 0.0022, "step": 23925 }, { "epoch": 35.74626865671642, "grad_norm": 0.16796875, "learning_rate": 6.430321135175504e-05, "loss": 0.0026, "step": 23950 }, { "epoch": 35.78358208955224, "grad_norm": 0.1923828125, "learning_rate": 6.426587005227782e-05, "loss": 0.0025, "step": 23975 }, { "epoch": 35.82089552238806, "grad_norm": 0.0478515625, "learning_rate": 6.42285287528006e-05, "loss": 0.0025, "step": 24000 }, { "epoch": 35.85820895522388, "grad_norm": 0.0439453125, "learning_rate": 6.419118745332338e-05, "loss": 0.0022, "step": 24025 }, { "epoch": 35.8955223880597, "grad_norm": 0.05322265625, "learning_rate": 6.415384615384615e-05, "loss": 0.0024, "step": 24050 }, { "epoch": 35.93283582089552, "grad_norm": 0.09716796875, "learning_rate": 6.411650485436893e-05, "loss": 0.0024, "step": 24075 }, { "epoch": 35.97014925373134, "grad_norm": 0.10546875, "learning_rate": 6.407916355489171e-05, "loss": 0.0029, "step": 24100 }, { "epoch": 36.007462686567166, "grad_norm": 0.032470703125, "learning_rate": 6.404182225541449e-05, "loss": 0.0027, "step": 24125 }, { "epoch": 36.04477611940298, "grad_norm": 0.037353515625, "learning_rate": 6.400448095593727e-05, "loss": 0.0018, "step": 24150 }, { "epoch": 36.082089552238806, "grad_norm": 0.083984375, "learning_rate": 6.396713965646005e-05, "loss": 0.0018, "step": 24175 }, { "epoch": 36.11940298507463, "grad_norm": 0.03955078125, "learning_rate": 6.392979835698282e-05, "loss": 0.0019, "step": 24200 }, { "epoch": 36.156716417910445, "grad_norm": 0.040771484375, "learning_rate": 6.38924570575056e-05, "loss": 0.0018, "step": 24225 }, { "epoch": 36.19402985074627, "grad_norm": 0.042724609375, "learning_rate": 6.385511575802838e-05, "loss": 0.0018, "step": 24250 }, { "epoch": 36.23134328358209, "grad_norm": 0.0322265625, "learning_rate": 6.381777445855117e-05, "loss": 0.0022, "step": 24275 }, { "epoch": 36.26865671641791, "grad_norm": 0.040771484375, "learning_rate": 6.378043315907393e-05, "loss": 0.0021, "step": 24300 }, { "epoch": 36.30597014925373, "grad_norm": 0.02490234375, "learning_rate": 6.374309185959671e-05, "loss": 0.0017, "step": 24325 }, { "epoch": 36.343283582089555, "grad_norm": 0.01409912109375, "learning_rate": 6.37057505601195e-05, "loss": 0.0018, "step": 24350 }, { "epoch": 36.38059701492537, "grad_norm": 0.05419921875, "learning_rate": 6.366840926064228e-05, "loss": 0.0023, "step": 24375 }, { "epoch": 36.417910447761194, "grad_norm": 0.038330078125, "learning_rate": 6.363106796116504e-05, "loss": 0.0022, "step": 24400 }, { "epoch": 36.45522388059702, "grad_norm": 0.043212890625, "learning_rate": 6.359372666168783e-05, "loss": 0.0021, "step": 24425 }, { "epoch": 36.492537313432834, "grad_norm": 0.046142578125, "learning_rate": 6.35563853622106e-05, "loss": 0.002, "step": 24450 }, { "epoch": 36.52985074626866, "grad_norm": 0.042724609375, "learning_rate": 6.351904406273339e-05, "loss": 0.0019, "step": 24475 }, { "epoch": 36.56716417910448, "grad_norm": 0.025146484375, "learning_rate": 6.348170276325617e-05, "loss": 0.0019, "step": 24500 }, { "epoch": 36.6044776119403, "grad_norm": 0.0927734375, "learning_rate": 6.344436146377895e-05, "loss": 0.0024, "step": 24525 }, { "epoch": 36.64179104477612, "grad_norm": 0.0341796875, "learning_rate": 6.340702016430172e-05, "loss": 0.0019, "step": 24550 }, { "epoch": 36.67910447761194, "grad_norm": 0.0286865234375, "learning_rate": 6.33696788648245e-05, "loss": 0.002, "step": 24575 }, { "epoch": 36.71641791044776, "grad_norm": 0.043701171875, "learning_rate": 6.333233756534728e-05, "loss": 0.0018, "step": 24600 }, { "epoch": 36.75373134328358, "grad_norm": 0.029541015625, "learning_rate": 6.329499626587006e-05, "loss": 0.0021, "step": 24625 }, { "epoch": 36.791044776119406, "grad_norm": 0.0260009765625, "learning_rate": 6.325765496639283e-05, "loss": 0.0019, "step": 24650 }, { "epoch": 36.82835820895522, "grad_norm": 0.0257568359375, "learning_rate": 6.322031366691561e-05, "loss": 0.0018, "step": 24675 }, { "epoch": 36.865671641791046, "grad_norm": 0.0419921875, "learning_rate": 6.318297236743839e-05, "loss": 0.0023, "step": 24700 }, { "epoch": 36.90298507462687, "grad_norm": 0.040283203125, "learning_rate": 6.314563106796116e-05, "loss": 0.0028, "step": 24725 }, { "epoch": 36.940298507462686, "grad_norm": 0.048095703125, "learning_rate": 6.310828976848394e-05, "loss": 0.0022, "step": 24750 }, { "epoch": 36.97761194029851, "grad_norm": 0.0255126953125, "learning_rate": 6.307094846900673e-05, "loss": 0.0018, "step": 24775 }, { "epoch": 37.014925373134325, "grad_norm": 0.043701171875, "learning_rate": 6.30336071695295e-05, "loss": 0.0018, "step": 24800 }, { "epoch": 37.05223880597015, "grad_norm": 0.03955078125, "learning_rate": 6.299626587005228e-05, "loss": 0.0015, "step": 24825 }, { "epoch": 37.08955223880597, "grad_norm": 0.021728515625, "learning_rate": 6.295892457057506e-05, "loss": 0.0018, "step": 24850 }, { "epoch": 37.12686567164179, "grad_norm": 0.040771484375, "learning_rate": 6.292158327109785e-05, "loss": 0.0018, "step": 24875 }, { "epoch": 37.16417910447761, "grad_norm": 0.055419921875, "learning_rate": 6.288424197162061e-05, "loss": 0.0019, "step": 24900 }, { "epoch": 37.201492537313435, "grad_norm": 0.02734375, "learning_rate": 6.28469006721434e-05, "loss": 0.0018, "step": 24925 }, { "epoch": 37.23880597014925, "grad_norm": 0.041259765625, "learning_rate": 6.280955937266618e-05, "loss": 0.0019, "step": 24950 }, { "epoch": 37.276119402985074, "grad_norm": 0.03173828125, "learning_rate": 6.277221807318894e-05, "loss": 0.0017, "step": 24975 }, { "epoch": 37.3134328358209, "grad_norm": 0.036865234375, "learning_rate": 6.273487677371172e-05, "loss": 0.0019, "step": 25000 }, { "epoch": 37.350746268656714, "grad_norm": 0.04541015625, "learning_rate": 6.26975354742345e-05, "loss": 0.0019, "step": 25025 }, { "epoch": 37.38805970149254, "grad_norm": 0.038330078125, "learning_rate": 6.266019417475729e-05, "loss": 0.0019, "step": 25050 }, { "epoch": 37.42537313432836, "grad_norm": 0.03759765625, "learning_rate": 6.262285287528005e-05, "loss": 0.002, "step": 25075 }, { "epoch": 37.46268656716418, "grad_norm": 0.0301513671875, "learning_rate": 6.258551157580284e-05, "loss": 0.002, "step": 25100 }, { "epoch": 37.5, "grad_norm": 0.04931640625, "learning_rate": 6.254817027632563e-05, "loss": 0.0018, "step": 25125 }, { "epoch": 37.53731343283582, "grad_norm": 0.06103515625, "learning_rate": 6.25108289768484e-05, "loss": 0.0019, "step": 25150 }, { "epoch": 37.57462686567164, "grad_norm": 0.04052734375, "learning_rate": 6.247348767737118e-05, "loss": 0.0021, "step": 25175 }, { "epoch": 37.61194029850746, "grad_norm": 0.033447265625, "learning_rate": 6.243614637789396e-05, "loss": 0.0019, "step": 25200 }, { "epoch": 37.649253731343286, "grad_norm": 0.0390625, "learning_rate": 6.239880507841673e-05, "loss": 0.0018, "step": 25225 }, { "epoch": 37.6865671641791, "grad_norm": 0.0322265625, "learning_rate": 6.236146377893951e-05, "loss": 0.0018, "step": 25250 }, { "epoch": 37.723880597014926, "grad_norm": 0.04052734375, "learning_rate": 6.232412247946229e-05, "loss": 0.0019, "step": 25275 }, { "epoch": 37.76119402985075, "grad_norm": 0.0400390625, "learning_rate": 6.228678117998507e-05, "loss": 0.0021, "step": 25300 }, { "epoch": 37.798507462686565, "grad_norm": 0.038330078125, "learning_rate": 6.224943988050784e-05, "loss": 0.0018, "step": 25325 }, { "epoch": 37.83582089552239, "grad_norm": 0.0299072265625, "learning_rate": 6.221209858103062e-05, "loss": 0.0021, "step": 25350 }, { "epoch": 37.87313432835821, "grad_norm": 0.0380859375, "learning_rate": 6.21747572815534e-05, "loss": 0.0019, "step": 25375 }, { "epoch": 37.91044776119403, "grad_norm": 0.033447265625, "learning_rate": 6.213741598207617e-05, "loss": 0.0018, "step": 25400 }, { "epoch": 37.94776119402985, "grad_norm": 0.04296875, "learning_rate": 6.210007468259895e-05, "loss": 0.002, "step": 25425 }, { "epoch": 37.985074626865675, "grad_norm": 0.0172119140625, "learning_rate": 6.206273338312174e-05, "loss": 0.0022, "step": 25450 }, { "epoch": 38.02238805970149, "grad_norm": 0.03466796875, "learning_rate": 6.202539208364451e-05, "loss": 0.002, "step": 25475 }, { "epoch": 38.059701492537314, "grad_norm": 0.0286865234375, "learning_rate": 6.19880507841673e-05, "loss": 0.0018, "step": 25500 }, { "epoch": 38.09701492537314, "grad_norm": 0.0155029296875, "learning_rate": 6.195070948469007e-05, "loss": 0.0019, "step": 25525 }, { "epoch": 38.134328358208954, "grad_norm": 0.0301513671875, "learning_rate": 6.191336818521286e-05, "loss": 0.0018, "step": 25550 }, { "epoch": 38.17164179104478, "grad_norm": 0.030029296875, "learning_rate": 6.187602688573562e-05, "loss": 0.0016, "step": 25575 }, { "epoch": 38.208955223880594, "grad_norm": 0.045654296875, "learning_rate": 6.18386855862584e-05, "loss": 0.0017, "step": 25600 }, { "epoch": 38.24626865671642, "grad_norm": 0.022216796875, "learning_rate": 6.180134428678119e-05, "loss": 0.0017, "step": 25625 }, { "epoch": 38.28358208955224, "grad_norm": 0.03564453125, "learning_rate": 6.176400298730395e-05, "loss": 0.002, "step": 25650 }, { "epoch": 38.32089552238806, "grad_norm": 0.013427734375, "learning_rate": 6.172666168782673e-05, "loss": 0.0021, "step": 25675 }, { "epoch": 38.35820895522388, "grad_norm": 0.038818359375, "learning_rate": 6.168932038834952e-05, "loss": 0.0021, "step": 25700 }, { "epoch": 38.3955223880597, "grad_norm": 0.04150390625, "learning_rate": 6.16519790888723e-05, "loss": 0.0018, "step": 25725 }, { "epoch": 38.43283582089552, "grad_norm": 0.0211181640625, "learning_rate": 6.161463778939506e-05, "loss": 0.0019, "step": 25750 }, { "epoch": 38.47014925373134, "grad_norm": 0.03662109375, "learning_rate": 6.157729648991786e-05, "loss": 0.0019, "step": 25775 }, { "epoch": 38.507462686567166, "grad_norm": 0.0272216796875, "learning_rate": 6.153995519044064e-05, "loss": 0.0016, "step": 25800 }, { "epoch": 38.54477611940298, "grad_norm": 0.046142578125, "learning_rate": 6.150261389096341e-05, "loss": 0.0016, "step": 25825 }, { "epoch": 38.582089552238806, "grad_norm": 0.050048828125, "learning_rate": 6.146527259148619e-05, "loss": 0.0021, "step": 25850 }, { "epoch": 38.61940298507463, "grad_norm": 0.043212890625, "learning_rate": 6.142793129200897e-05, "loss": 0.0018, "step": 25875 }, { "epoch": 38.656716417910445, "grad_norm": 0.02978515625, "learning_rate": 6.139058999253174e-05, "loss": 0.002, "step": 25900 }, { "epoch": 38.69402985074627, "grad_norm": 0.031005859375, "learning_rate": 6.135324869305452e-05, "loss": 0.0022, "step": 25925 }, { "epoch": 38.73134328358209, "grad_norm": 0.0400390625, "learning_rate": 6.13159073935773e-05, "loss": 0.0019, "step": 25950 }, { "epoch": 38.76865671641791, "grad_norm": 0.045166015625, "learning_rate": 6.127856609410008e-05, "loss": 0.0017, "step": 25975 }, { "epoch": 38.80597014925373, "grad_norm": 0.03271484375, "learning_rate": 6.124122479462285e-05, "loss": 0.0021, "step": 26000 }, { "epoch": 38.843283582089555, "grad_norm": 0.03173828125, "learning_rate": 6.120388349514563e-05, "loss": 0.0019, "step": 26025 }, { "epoch": 38.88059701492537, "grad_norm": 0.049072265625, "learning_rate": 6.116654219566841e-05, "loss": 0.002, "step": 26050 }, { "epoch": 38.917910447761194, "grad_norm": 0.046630859375, "learning_rate": 6.112920089619118e-05, "loss": 0.0022, "step": 26075 }, { "epoch": 38.95522388059702, "grad_norm": 0.032470703125, "learning_rate": 6.109185959671397e-05, "loss": 0.0025, "step": 26100 }, { "epoch": 38.992537313432834, "grad_norm": 0.06982421875, "learning_rate": 6.105451829723676e-05, "loss": 0.0023, "step": 26125 }, { "epoch": 39.02985074626866, "grad_norm": 0.035400390625, "learning_rate": 6.101717699775953e-05, "loss": 0.0021, "step": 26150 }, { "epoch": 39.06716417910448, "grad_norm": 0.035888671875, "learning_rate": 6.0979835698282304e-05, "loss": 0.002, "step": 26175 }, { "epoch": 39.1044776119403, "grad_norm": 0.042236328125, "learning_rate": 6.0942494398805085e-05, "loss": 0.0017, "step": 26200 }, { "epoch": 39.14179104477612, "grad_norm": 0.0341796875, "learning_rate": 6.090515309932786e-05, "loss": 0.0016, "step": 26225 }, { "epoch": 39.17910447761194, "grad_norm": 0.053466796875, "learning_rate": 6.086781179985064e-05, "loss": 0.0016, "step": 26250 }, { "epoch": 39.21641791044776, "grad_norm": 0.0255126953125, "learning_rate": 6.0830470500373415e-05, "loss": 0.0021, "step": 26275 }, { "epoch": 39.25373134328358, "grad_norm": 0.0279541015625, "learning_rate": 6.079312920089619e-05, "loss": 0.002, "step": 26300 }, { "epoch": 39.291044776119406, "grad_norm": 0.052978515625, "learning_rate": 6.075578790141897e-05, "loss": 0.002, "step": 26325 }, { "epoch": 39.32835820895522, "grad_norm": 0.0263671875, "learning_rate": 6.0718446601941745e-05, "loss": 0.0016, "step": 26350 }, { "epoch": 39.365671641791046, "grad_norm": 0.01904296875, "learning_rate": 6.0681105302464526e-05, "loss": 0.0018, "step": 26375 }, { "epoch": 39.40298507462686, "grad_norm": 0.038818359375, "learning_rate": 6.06437640029873e-05, "loss": 0.0021, "step": 26400 }, { "epoch": 39.440298507462686, "grad_norm": 0.03955078125, "learning_rate": 6.060642270351008e-05, "loss": 0.0019, "step": 26425 }, { "epoch": 39.47761194029851, "grad_norm": 0.03466796875, "learning_rate": 6.056908140403287e-05, "loss": 0.0019, "step": 26450 }, { "epoch": 39.514925373134325, "grad_norm": 0.041015625, "learning_rate": 6.0531740104555644e-05, "loss": 0.0021, "step": 26475 }, { "epoch": 39.55223880597015, "grad_norm": 0.0299072265625, "learning_rate": 6.0494398805078425e-05, "loss": 0.0017, "step": 26500 }, { "epoch": 39.58955223880597, "grad_norm": 0.0240478515625, "learning_rate": 6.04570575056012e-05, "loss": 0.0022, "step": 26525 }, { "epoch": 39.62686567164179, "grad_norm": 0.03369140625, "learning_rate": 6.0419716206123974e-05, "loss": 0.0021, "step": 26550 }, { "epoch": 39.66417910447761, "grad_norm": 0.0189208984375, "learning_rate": 6.0382374906646755e-05, "loss": 0.0018, "step": 26575 }, { "epoch": 39.701492537313435, "grad_norm": 0.04296875, "learning_rate": 6.034503360716953e-05, "loss": 0.0019, "step": 26600 }, { "epoch": 39.73880597014925, "grad_norm": 0.032470703125, "learning_rate": 6.030769230769231e-05, "loss": 0.0018, "step": 26625 }, { "epoch": 39.776119402985074, "grad_norm": 0.0299072265625, "learning_rate": 6.0270351008215085e-05, "loss": 0.0022, "step": 26650 }, { "epoch": 39.8134328358209, "grad_norm": 0.0279541015625, "learning_rate": 6.0233009708737867e-05, "loss": 0.0022, "step": 26675 }, { "epoch": 39.850746268656714, "grad_norm": 0.047119140625, "learning_rate": 6.019566840926064e-05, "loss": 0.0023, "step": 26700 }, { "epoch": 39.88805970149254, "grad_norm": 0.030517578125, "learning_rate": 6.015832710978342e-05, "loss": 0.0018, "step": 26725 }, { "epoch": 39.92537313432836, "grad_norm": 0.04248046875, "learning_rate": 6.0120985810306196e-05, "loss": 0.0021, "step": 26750 }, { "epoch": 39.96268656716418, "grad_norm": 0.044677734375, "learning_rate": 6.0083644510828984e-05, "loss": 0.0022, "step": 26775 }, { "epoch": 40.0, "grad_norm": 0.01904296875, "learning_rate": 6.004630321135176e-05, "loss": 0.0021, "step": 26800 }, { "epoch": 40.03731343283582, "grad_norm": 0.030517578125, "learning_rate": 6.000896191187454e-05, "loss": 0.002, "step": 26825 }, { "epoch": 40.07462686567164, "grad_norm": 0.03173828125, "learning_rate": 5.9971620612397314e-05, "loss": 0.0016, "step": 26850 }, { "epoch": 40.11194029850746, "grad_norm": 0.03662109375, "learning_rate": 5.9934279312920096e-05, "loss": 0.0018, "step": 26875 }, { "epoch": 40.149253731343286, "grad_norm": 0.03662109375, "learning_rate": 5.989693801344287e-05, "loss": 0.0016, "step": 26900 }, { "epoch": 40.1865671641791, "grad_norm": 0.037109375, "learning_rate": 5.985959671396565e-05, "loss": 0.0019, "step": 26925 }, { "epoch": 40.223880597014926, "grad_norm": 0.041748046875, "learning_rate": 5.9822255414488426e-05, "loss": 0.0019, "step": 26950 }, { "epoch": 40.26119402985075, "grad_norm": 0.0390625, "learning_rate": 5.978491411501121e-05, "loss": 0.0019, "step": 26975 }, { "epoch": 40.298507462686565, "grad_norm": 0.0196533203125, "learning_rate": 5.974757281553398e-05, "loss": 0.0019, "step": 27000 }, { "epoch": 40.33582089552239, "grad_norm": 0.037353515625, "learning_rate": 5.9710231516056755e-05, "loss": 0.002, "step": 27025 }, { "epoch": 40.37313432835821, "grad_norm": 0.040283203125, "learning_rate": 5.967289021657954e-05, "loss": 0.0021, "step": 27050 }, { "epoch": 40.41044776119403, "grad_norm": 0.028076171875, "learning_rate": 5.963554891710231e-05, "loss": 0.0021, "step": 27075 }, { "epoch": 40.44776119402985, "grad_norm": 0.0302734375, "learning_rate": 5.95982076176251e-05, "loss": 0.002, "step": 27100 }, { "epoch": 40.485074626865675, "grad_norm": 0.0361328125, "learning_rate": 5.956086631814788e-05, "loss": 0.0019, "step": 27125 }, { "epoch": 40.52238805970149, "grad_norm": 0.02880859375, "learning_rate": 5.9523525018670655e-05, "loss": 0.002, "step": 27150 }, { "epoch": 40.559701492537314, "grad_norm": 0.03955078125, "learning_rate": 5.9486183719193436e-05, "loss": 0.0018, "step": 27175 }, { "epoch": 40.59701492537313, "grad_norm": 0.012451171875, "learning_rate": 5.944884241971621e-05, "loss": 0.0018, "step": 27200 }, { "epoch": 40.634328358208954, "grad_norm": 0.048828125, "learning_rate": 5.941150112023899e-05, "loss": 0.0018, "step": 27225 }, { "epoch": 40.67164179104478, "grad_norm": 0.056884765625, "learning_rate": 5.9374159820761766e-05, "loss": 0.0022, "step": 27250 }, { "epoch": 40.708955223880594, "grad_norm": 0.05859375, "learning_rate": 5.933681852128454e-05, "loss": 0.0022, "step": 27275 }, { "epoch": 40.74626865671642, "grad_norm": 0.036376953125, "learning_rate": 5.929947722180732e-05, "loss": 0.0021, "step": 27300 }, { "epoch": 40.78358208955224, "grad_norm": 0.0294189453125, "learning_rate": 5.9262135922330096e-05, "loss": 0.0022, "step": 27325 }, { "epoch": 40.82089552238806, "grad_norm": 0.0517578125, "learning_rate": 5.922479462285288e-05, "loss": 0.0024, "step": 27350 }, { "epoch": 40.85820895522388, "grad_norm": 0.03466796875, "learning_rate": 5.918745332337565e-05, "loss": 0.0023, "step": 27375 }, { "epoch": 40.8955223880597, "grad_norm": 0.03515625, "learning_rate": 5.915011202389843e-05, "loss": 0.0023, "step": 27400 }, { "epoch": 40.93283582089552, "grad_norm": 0.0262451171875, "learning_rate": 5.911277072442122e-05, "loss": 0.0022, "step": 27425 }, { "epoch": 40.97014925373134, "grad_norm": 0.050048828125, "learning_rate": 5.9075429424943995e-05, "loss": 0.0019, "step": 27450 }, { "epoch": 41.007462686567166, "grad_norm": 0.0289306640625, "learning_rate": 5.9038088125466776e-05, "loss": 0.0021, "step": 27475 }, { "epoch": 41.04477611940298, "grad_norm": 0.0234375, "learning_rate": 5.900074682598955e-05, "loss": 0.0018, "step": 27500 }, { "epoch": 41.082089552238806, "grad_norm": 0.06201171875, "learning_rate": 5.8963405526512325e-05, "loss": 0.0017, "step": 27525 }, { "epoch": 41.11940298507463, "grad_norm": 0.036376953125, "learning_rate": 5.8926064227035106e-05, "loss": 0.0017, "step": 27550 }, { "epoch": 41.156716417910445, "grad_norm": 0.03466796875, "learning_rate": 5.888872292755788e-05, "loss": 0.0018, "step": 27575 }, { "epoch": 41.19402985074627, "grad_norm": 0.033447265625, "learning_rate": 5.885138162808066e-05, "loss": 0.0019, "step": 27600 }, { "epoch": 41.23134328358209, "grad_norm": 0.051513671875, "learning_rate": 5.8814040328603436e-05, "loss": 0.0019, "step": 27625 }, { "epoch": 41.26865671641791, "grad_norm": 0.0233154296875, "learning_rate": 5.877669902912622e-05, "loss": 0.0017, "step": 27650 }, { "epoch": 41.30597014925373, "grad_norm": 0.0198974609375, "learning_rate": 5.873935772964899e-05, "loss": 0.0021, "step": 27675 }, { "epoch": 41.343283582089555, "grad_norm": 0.02734375, "learning_rate": 5.8702016430171766e-05, "loss": 0.0023, "step": 27700 }, { "epoch": 41.38059701492537, "grad_norm": 0.0306396484375, "learning_rate": 5.866467513069455e-05, "loss": 0.0024, "step": 27725 }, { "epoch": 41.417910447761194, "grad_norm": 0.044677734375, "learning_rate": 5.862733383121732e-05, "loss": 0.0021, "step": 27750 }, { "epoch": 41.45522388059702, "grad_norm": 0.05224609375, "learning_rate": 5.858999253174011e-05, "loss": 0.0022, "step": 27775 }, { "epoch": 41.492537313432834, "grad_norm": 0.034912109375, "learning_rate": 5.855265123226289e-05, "loss": 0.0018, "step": 27800 }, { "epoch": 41.52985074626866, "grad_norm": 0.1171875, "learning_rate": 5.8515309932785665e-05, "loss": 0.0021, "step": 27825 }, { "epoch": 41.56716417910448, "grad_norm": 0.043212890625, "learning_rate": 5.8477968633308446e-05, "loss": 0.0024, "step": 27850 }, { "epoch": 41.6044776119403, "grad_norm": 0.0279541015625, "learning_rate": 5.844062733383122e-05, "loss": 0.0023, "step": 27875 }, { "epoch": 41.64179104477612, "grad_norm": 0.171875, "learning_rate": 5.8403286034354e-05, "loss": 0.0038, "step": 27900 }, { "epoch": 41.67910447761194, "grad_norm": 0.357421875, "learning_rate": 5.8365944734876776e-05, "loss": 0.0109, "step": 27925 }, { "epoch": 41.71641791044776, "grad_norm": 0.201171875, "learning_rate": 5.832860343539955e-05, "loss": 0.0136, "step": 27950 }, { "epoch": 41.75373134328358, "grad_norm": 0.23046875, "learning_rate": 5.829126213592233e-05, "loss": 0.0126, "step": 27975 }, { "epoch": 41.791044776119406, "grad_norm": 0.232421875, "learning_rate": 5.8253920836445106e-05, "loss": 0.0142, "step": 28000 }, { "epoch": 41.82835820895522, "grad_norm": 0.26953125, "learning_rate": 5.821657953696789e-05, "loss": 0.0139, "step": 28025 }, { "epoch": 41.865671641791046, "grad_norm": 0.1728515625, "learning_rate": 5.817923823749066e-05, "loss": 0.0101, "step": 28050 }, { "epoch": 41.90298507462687, "grad_norm": 0.248046875, "learning_rate": 5.814189693801344e-05, "loss": 0.0097, "step": 28075 }, { "epoch": 41.940298507462686, "grad_norm": 0.1630859375, "learning_rate": 5.810455563853623e-05, "loss": 0.0092, "step": 28100 }, { "epoch": 41.97761194029851, "grad_norm": 0.1630859375, "learning_rate": 5.8067214339059005e-05, "loss": 0.0074, "step": 28125 }, { "epoch": 42.014925373134325, "grad_norm": 0.2177734375, "learning_rate": 5.8029873039581786e-05, "loss": 0.0066, "step": 28150 }, { "epoch": 42.05223880597015, "grad_norm": 0.1103515625, "learning_rate": 5.799253174010456e-05, "loss": 0.0055, "step": 28175 }, { "epoch": 42.08955223880597, "grad_norm": 0.1318359375, "learning_rate": 5.7955190440627335e-05, "loss": 0.0053, "step": 28200 }, { "epoch": 42.12686567164179, "grad_norm": 0.12060546875, "learning_rate": 5.7917849141150116e-05, "loss": 0.0054, "step": 28225 }, { "epoch": 42.16417910447761, "grad_norm": 0.07470703125, "learning_rate": 5.788050784167289e-05, "loss": 0.0056, "step": 28250 }, { "epoch": 42.201492537313435, "grad_norm": 0.16796875, "learning_rate": 5.784316654219567e-05, "loss": 0.0042, "step": 28275 }, { "epoch": 42.23880597014925, "grad_norm": 0.07568359375, "learning_rate": 5.7805825242718446e-05, "loss": 0.0046, "step": 28300 }, { "epoch": 42.276119402985074, "grad_norm": 0.1689453125, "learning_rate": 5.776848394324123e-05, "loss": 0.005, "step": 28325 }, { "epoch": 42.3134328358209, "grad_norm": 0.1826171875, "learning_rate": 5.7731142643764e-05, "loss": 0.0046, "step": 28350 }, { "epoch": 42.350746268656714, "grad_norm": 0.053955078125, "learning_rate": 5.769380134428678e-05, "loss": 0.0042, "step": 28375 }, { "epoch": 42.38805970149254, "grad_norm": 0.146484375, "learning_rate": 5.765646004480956e-05, "loss": 0.0049, "step": 28400 }, { "epoch": 42.42537313432836, "grad_norm": 0.11376953125, "learning_rate": 5.7619118745332345e-05, "loss": 0.0044, "step": 28425 }, { "epoch": 42.46268656716418, "grad_norm": 0.11328125, "learning_rate": 5.758177744585512e-05, "loss": 0.0046, "step": 28450 }, { "epoch": 42.5, "grad_norm": 0.056640625, "learning_rate": 5.75444361463779e-05, "loss": 0.0047, "step": 28475 }, { "epoch": 42.53731343283582, "grad_norm": 0.20703125, "learning_rate": 5.7507094846900675e-05, "loss": 0.0046, "step": 28500 }, { "epoch": 42.57462686567164, "grad_norm": 0.11474609375, "learning_rate": 5.7469753547423456e-05, "loss": 0.004, "step": 28525 }, { "epoch": 42.61194029850746, "grad_norm": 0.1142578125, "learning_rate": 5.743241224794623e-05, "loss": 0.0036, "step": 28550 }, { "epoch": 42.649253731343286, "grad_norm": 0.162109375, "learning_rate": 5.739507094846901e-05, "loss": 0.0044, "step": 28575 }, { "epoch": 42.6865671641791, "grad_norm": 0.11279296875, "learning_rate": 5.7357729648991786e-05, "loss": 0.005, "step": 28600 }, { "epoch": 42.723880597014926, "grad_norm": 0.208984375, "learning_rate": 5.732038834951457e-05, "loss": 0.0041, "step": 28625 }, { "epoch": 42.76119402985075, "grad_norm": 0.185546875, "learning_rate": 5.728304705003734e-05, "loss": 0.0041, "step": 28650 }, { "epoch": 42.798507462686565, "grad_norm": 0.05419921875, "learning_rate": 5.7245705750560116e-05, "loss": 0.0046, "step": 28675 }, { "epoch": 42.83582089552239, "grad_norm": 0.039306640625, "learning_rate": 5.72083644510829e-05, "loss": 0.004, "step": 28700 }, { "epoch": 42.87313432835821, "grad_norm": 0.193359375, "learning_rate": 5.717102315160567e-05, "loss": 0.0047, "step": 28725 }, { "epoch": 42.91044776119403, "grad_norm": 0.138671875, "learning_rate": 5.713368185212846e-05, "loss": 0.0042, "step": 28750 }, { "epoch": 42.94776119402985, "grad_norm": 0.076171875, "learning_rate": 5.709634055265124e-05, "loss": 0.0041, "step": 28775 }, { "epoch": 42.985074626865675, "grad_norm": 0.1201171875, "learning_rate": 5.7058999253174015e-05, "loss": 0.0038, "step": 28800 }, { "epoch": 43.02238805970149, "grad_norm": 0.037841796875, "learning_rate": 5.7021657953696797e-05, "loss": 0.0025, "step": 28825 }, { "epoch": 43.059701492537314, "grad_norm": 0.047607421875, "learning_rate": 5.698431665421957e-05, "loss": 0.0022, "step": 28850 }, { "epoch": 43.09701492537314, "grad_norm": 0.037841796875, "learning_rate": 5.694697535474235e-05, "loss": 0.0018, "step": 28875 }, { "epoch": 43.134328358208954, "grad_norm": 0.034912109375, "learning_rate": 5.6909634055265127e-05, "loss": 0.0021, "step": 28900 }, { "epoch": 43.17164179104478, "grad_norm": 0.03369140625, "learning_rate": 5.68722927557879e-05, "loss": 0.0019, "step": 28925 }, { "epoch": 43.208955223880594, "grad_norm": 0.0245361328125, "learning_rate": 5.683495145631068e-05, "loss": 0.0019, "step": 28950 }, { "epoch": 43.24626865671642, "grad_norm": 0.09228515625, "learning_rate": 5.6797610156833456e-05, "loss": 0.0019, "step": 28975 }, { "epoch": 43.28358208955224, "grad_norm": 0.037841796875, "learning_rate": 5.676026885735624e-05, "loss": 0.0017, "step": 29000 }, { "epoch": 43.32089552238806, "grad_norm": 0.10205078125, "learning_rate": 5.672292755787901e-05, "loss": 0.0022, "step": 29025 }, { "epoch": 43.35820895522388, "grad_norm": 0.051513671875, "learning_rate": 5.668558625840179e-05, "loss": 0.002, "step": 29050 }, { "epoch": 43.3955223880597, "grad_norm": 0.232421875, "learning_rate": 5.664824495892457e-05, "loss": 0.0023, "step": 29075 }, { "epoch": 43.43283582089552, "grad_norm": 0.0255126953125, "learning_rate": 5.6610903659447356e-05, "loss": 0.002, "step": 29100 }, { "epoch": 43.47014925373134, "grad_norm": 0.0230712890625, "learning_rate": 5.657356235997014e-05, "loss": 0.0021, "step": 29125 }, { "epoch": 43.507462686567166, "grad_norm": 0.037353515625, "learning_rate": 5.653622106049291e-05, "loss": 0.0022, "step": 29150 }, { "epoch": 43.54477611940298, "grad_norm": 0.03662109375, "learning_rate": 5.6498879761015686e-05, "loss": 0.0019, "step": 29175 }, { "epoch": 43.582089552238806, "grad_norm": 0.037109375, "learning_rate": 5.646153846153847e-05, "loss": 0.002, "step": 29200 }, { "epoch": 43.61940298507463, "grad_norm": 0.037353515625, "learning_rate": 5.642419716206124e-05, "loss": 0.0019, "step": 29225 }, { "epoch": 43.656716417910445, "grad_norm": 0.03125, "learning_rate": 5.638685586258402e-05, "loss": 0.0018, "step": 29250 }, { "epoch": 43.69402985074627, "grad_norm": 0.043212890625, "learning_rate": 5.63495145631068e-05, "loss": 0.0022, "step": 29275 }, { "epoch": 43.73134328358209, "grad_norm": 0.039794921875, "learning_rate": 5.631217326362958e-05, "loss": 0.0021, "step": 29300 }, { "epoch": 43.76865671641791, "grad_norm": 0.03662109375, "learning_rate": 5.627483196415235e-05, "loss": 0.0022, "step": 29325 }, { "epoch": 43.80597014925373, "grad_norm": 0.040771484375, "learning_rate": 5.623749066467513e-05, "loss": 0.0022, "step": 29350 }, { "epoch": 43.843283582089555, "grad_norm": 0.11865234375, "learning_rate": 5.620014936519791e-05, "loss": 0.0025, "step": 29375 }, { "epoch": 43.88059701492537, "grad_norm": 0.05029296875, "learning_rate": 5.616280806572068e-05, "loss": 0.0021, "step": 29400 }, { "epoch": 43.917910447761194, "grad_norm": 0.0247802734375, "learning_rate": 5.612546676624347e-05, "loss": 0.0019, "step": 29425 }, { "epoch": 43.95522388059702, "grad_norm": 0.03076171875, "learning_rate": 5.608812546676625e-05, "loss": 0.0021, "step": 29450 }, { "epoch": 43.992537313432834, "grad_norm": 0.044189453125, "learning_rate": 5.6050784167289026e-05, "loss": 0.0021, "step": 29475 }, { "epoch": 44.02985074626866, "grad_norm": 0.038330078125, "learning_rate": 5.601344286781181e-05, "loss": 0.0018, "step": 29500 }, { "epoch": 44.06716417910448, "grad_norm": 0.0311279296875, "learning_rate": 5.597610156833458e-05, "loss": 0.0016, "step": 29525 }, { "epoch": 44.1044776119403, "grad_norm": 0.038330078125, "learning_rate": 5.593876026885736e-05, "loss": 0.0019, "step": 29550 }, { "epoch": 44.14179104477612, "grad_norm": 0.045166015625, "learning_rate": 5.590141896938014e-05, "loss": 0.0018, "step": 29575 }, { "epoch": 44.17910447761194, "grad_norm": 0.027099609375, "learning_rate": 5.586407766990291e-05, "loss": 0.0017, "step": 29600 }, { "epoch": 44.21641791044776, "grad_norm": 0.0186767578125, "learning_rate": 5.582673637042569e-05, "loss": 0.0022, "step": 29625 }, { "epoch": 44.25373134328358, "grad_norm": 0.032958984375, "learning_rate": 5.578939507094847e-05, "loss": 0.0017, "step": 29650 }, { "epoch": 44.291044776119406, "grad_norm": 0.040771484375, "learning_rate": 5.575205377147125e-05, "loss": 0.0019, "step": 29675 }, { "epoch": 44.32835820895522, "grad_norm": 0.041748046875, "learning_rate": 5.571471247199402e-05, "loss": 0.0017, "step": 29700 }, { "epoch": 44.365671641791046, "grad_norm": 0.05078125, "learning_rate": 5.5677371172516804e-05, "loss": 0.0022, "step": 29725 }, { "epoch": 44.40298507462686, "grad_norm": 0.0322265625, "learning_rate": 5.564002987303959e-05, "loss": 0.0018, "step": 29750 }, { "epoch": 44.440298507462686, "grad_norm": 0.028076171875, "learning_rate": 5.5602688573562366e-05, "loss": 0.0019, "step": 29775 }, { "epoch": 44.47761194029851, "grad_norm": 0.03662109375, "learning_rate": 5.556534727408515e-05, "loss": 0.0018, "step": 29800 }, { "epoch": 44.514925373134325, "grad_norm": 0.05126953125, "learning_rate": 5.552800597460792e-05, "loss": 0.0019, "step": 29825 }, { "epoch": 44.55223880597015, "grad_norm": 0.0206298828125, "learning_rate": 5.5490664675130696e-05, "loss": 0.0015, "step": 29850 }, { "epoch": 44.58955223880597, "grad_norm": 0.028076171875, "learning_rate": 5.545332337565348e-05, "loss": 0.0016, "step": 29875 }, { "epoch": 44.62686567164179, "grad_norm": 0.0361328125, "learning_rate": 5.541598207617625e-05, "loss": 0.0017, "step": 29900 }, { "epoch": 44.66417910447761, "grad_norm": 0.044677734375, "learning_rate": 5.537864077669903e-05, "loss": 0.0022, "step": 29925 }, { "epoch": 44.701492537313435, "grad_norm": 0.03369140625, "learning_rate": 5.534129947722181e-05, "loss": 0.0022, "step": 29950 }, { "epoch": 44.73880597014925, "grad_norm": 0.0272216796875, "learning_rate": 5.530395817774459e-05, "loss": 0.0017, "step": 29975 }, { "epoch": 44.776119402985074, "grad_norm": 0.052001953125, "learning_rate": 5.526661687826736e-05, "loss": 0.0022, "step": 30000 }, { "epoch": 44.8134328358209, "grad_norm": 0.05224609375, "learning_rate": 5.5229275578790144e-05, "loss": 0.0019, "step": 30025 }, { "epoch": 44.850746268656714, "grad_norm": 0.04833984375, "learning_rate": 5.519193427931292e-05, "loss": 0.0022, "step": 30050 }, { "epoch": 44.88805970149254, "grad_norm": 0.046875, "learning_rate": 5.5154592979835706e-05, "loss": 0.0021, "step": 30075 }, { "epoch": 44.92537313432836, "grad_norm": 0.031494140625, "learning_rate": 5.511725168035848e-05, "loss": 0.0022, "step": 30100 }, { "epoch": 44.96268656716418, "grad_norm": 0.0206298828125, "learning_rate": 5.507991038088126e-05, "loss": 0.0019, "step": 30125 }, { "epoch": 45.0, "grad_norm": 0.049560546875, "learning_rate": 5.5042569081404036e-05, "loss": 0.0021, "step": 30150 }, { "epoch": 45.03731343283582, "grad_norm": 0.0296630859375, "learning_rate": 5.500522778192682e-05, "loss": 0.0016, "step": 30175 }, { "epoch": 45.07462686567164, "grad_norm": 0.0308837890625, "learning_rate": 5.496788648244959e-05, "loss": 0.0016, "step": 30200 }, { "epoch": 45.11194029850746, "grad_norm": 0.02587890625, "learning_rate": 5.493054518297237e-05, "loss": 0.0015, "step": 30225 }, { "epoch": 45.149253731343286, "grad_norm": 0.032958984375, "learning_rate": 5.489320388349515e-05, "loss": 0.002, "step": 30250 }, { "epoch": 45.1865671641791, "grad_norm": 0.0311279296875, "learning_rate": 5.485586258401793e-05, "loss": 0.0019, "step": 30275 }, { "epoch": 45.223880597014926, "grad_norm": 0.0177001953125, "learning_rate": 5.48185212845407e-05, "loss": 0.0019, "step": 30300 }, { "epoch": 45.26119402985075, "grad_norm": 0.036865234375, "learning_rate": 5.478117998506348e-05, "loss": 0.0021, "step": 30325 }, { "epoch": 45.298507462686565, "grad_norm": 0.031494140625, "learning_rate": 5.474383868558626e-05, "loss": 0.0019, "step": 30350 }, { "epoch": 45.33582089552239, "grad_norm": 0.061279296875, "learning_rate": 5.470649738610903e-05, "loss": 0.0019, "step": 30375 }, { "epoch": 45.37313432835821, "grad_norm": 0.045166015625, "learning_rate": 5.4669156086631814e-05, "loss": 0.002, "step": 30400 }, { "epoch": 45.41044776119403, "grad_norm": 0.0283203125, "learning_rate": 5.46318147871546e-05, "loss": 0.0016, "step": 30425 }, { "epoch": 45.44776119402985, "grad_norm": 0.036865234375, "learning_rate": 5.4594473487677376e-05, "loss": 0.002, "step": 30450 }, { "epoch": 45.485074626865675, "grad_norm": 0.03173828125, "learning_rate": 5.455713218820016e-05, "loss": 0.0017, "step": 30475 }, { "epoch": 45.52238805970149, "grad_norm": 0.041748046875, "learning_rate": 5.451979088872293e-05, "loss": 0.002, "step": 30500 }, { "epoch": 45.559701492537314, "grad_norm": 0.037109375, "learning_rate": 5.448244958924571e-05, "loss": 0.0018, "step": 30525 }, { "epoch": 45.59701492537313, "grad_norm": 0.0274658203125, "learning_rate": 5.444510828976849e-05, "loss": 0.0021, "step": 30550 }, { "epoch": 45.634328358208954, "grad_norm": 0.048583984375, "learning_rate": 5.440776699029126e-05, "loss": 0.0019, "step": 30575 }, { "epoch": 45.67164179104478, "grad_norm": 0.0361328125, "learning_rate": 5.437042569081404e-05, "loss": 0.0021, "step": 30600 }, { "epoch": 45.708955223880594, "grad_norm": 0.050048828125, "learning_rate": 5.433308439133682e-05, "loss": 0.002, "step": 30625 }, { "epoch": 45.74626865671642, "grad_norm": 0.031982421875, "learning_rate": 5.42957430918596e-05, "loss": 0.0021, "step": 30650 }, { "epoch": 45.78358208955224, "grad_norm": 0.047119140625, "learning_rate": 5.425840179238237e-05, "loss": 0.002, "step": 30675 }, { "epoch": 45.82089552238806, "grad_norm": 0.0244140625, "learning_rate": 5.4221060492905154e-05, "loss": 0.0018, "step": 30700 }, { "epoch": 45.85820895522388, "grad_norm": 0.04443359375, "learning_rate": 5.418371919342793e-05, "loss": 0.0021, "step": 30725 }, { "epoch": 45.8955223880597, "grad_norm": 0.056884765625, "learning_rate": 5.4146377893950716e-05, "loss": 0.0019, "step": 30750 }, { "epoch": 45.93283582089552, "grad_norm": 0.04443359375, "learning_rate": 5.41090365944735e-05, "loss": 0.002, "step": 30775 }, { "epoch": 45.97014925373134, "grad_norm": 0.025390625, "learning_rate": 5.407169529499627e-05, "loss": 0.0021, "step": 30800 }, { "epoch": 46.007462686567166, "grad_norm": 0.0283203125, "learning_rate": 5.4034353995519046e-05, "loss": 0.0017, "step": 30825 }, { "epoch": 46.04477611940298, "grad_norm": 0.0303955078125, "learning_rate": 5.399701269604183e-05, "loss": 0.0017, "step": 30850 }, { "epoch": 46.082089552238806, "grad_norm": 0.031494140625, "learning_rate": 5.39596713965646e-05, "loss": 0.0017, "step": 30875 }, { "epoch": 46.11940298507463, "grad_norm": 0.0615234375, "learning_rate": 5.392233009708738e-05, "loss": 0.0019, "step": 30900 }, { "epoch": 46.156716417910445, "grad_norm": 0.034912109375, "learning_rate": 5.388498879761016e-05, "loss": 0.0016, "step": 30925 }, { "epoch": 46.19402985074627, "grad_norm": 0.0302734375, "learning_rate": 5.384764749813294e-05, "loss": 0.0015, "step": 30950 }, { "epoch": 46.23134328358209, "grad_norm": 0.0191650390625, "learning_rate": 5.381030619865571e-05, "loss": 0.0016, "step": 30975 }, { "epoch": 46.26865671641791, "grad_norm": 0.0303955078125, "learning_rate": 5.377296489917849e-05, "loss": 0.0017, "step": 31000 }, { "epoch": 46.30597014925373, "grad_norm": 0.033935546875, "learning_rate": 5.373562359970127e-05, "loss": 0.0016, "step": 31025 }, { "epoch": 46.343283582089555, "grad_norm": 0.024658203125, "learning_rate": 5.369828230022404e-05, "loss": 0.0019, "step": 31050 }, { "epoch": 46.38059701492537, "grad_norm": 0.0252685546875, "learning_rate": 5.366094100074683e-05, "loss": 0.002, "step": 31075 }, { "epoch": 46.417910447761194, "grad_norm": 0.041015625, "learning_rate": 5.362359970126961e-05, "loss": 0.0021, "step": 31100 }, { "epoch": 46.45522388059702, "grad_norm": 0.050537109375, "learning_rate": 5.3586258401792387e-05, "loss": 0.002, "step": 31125 }, { "epoch": 46.492537313432834, "grad_norm": 0.030517578125, "learning_rate": 5.354891710231517e-05, "loss": 0.0019, "step": 31150 }, { "epoch": 46.52985074626866, "grad_norm": 0.0242919921875, "learning_rate": 5.351157580283794e-05, "loss": 0.0019, "step": 31175 }, { "epoch": 46.56716417910448, "grad_norm": 0.038818359375, "learning_rate": 5.347423450336072e-05, "loss": 0.0022, "step": 31200 }, { "epoch": 46.6044776119403, "grad_norm": 0.054931640625, "learning_rate": 5.34368932038835e-05, "loss": 0.002, "step": 31225 }, { "epoch": 46.64179104477612, "grad_norm": 0.0308837890625, "learning_rate": 5.339955190440627e-05, "loss": 0.0018, "step": 31250 }, { "epoch": 46.67910447761194, "grad_norm": 0.049072265625, "learning_rate": 5.336221060492905e-05, "loss": 0.0022, "step": 31275 }, { "epoch": 46.71641791044776, "grad_norm": 0.06005859375, "learning_rate": 5.332486930545183e-05, "loss": 0.0019, "step": 31300 }, { "epoch": 46.75373134328358, "grad_norm": 0.042236328125, "learning_rate": 5.328752800597461e-05, "loss": 0.0018, "step": 31325 }, { "epoch": 46.791044776119406, "grad_norm": 0.03857421875, "learning_rate": 5.325018670649738e-05, "loss": 0.0021, "step": 31350 }, { "epoch": 46.82835820895522, "grad_norm": 0.05126953125, "learning_rate": 5.3212845407020164e-05, "loss": 0.0022, "step": 31375 }, { "epoch": 46.865671641791046, "grad_norm": 0.03271484375, "learning_rate": 5.317550410754295e-05, "loss": 0.002, "step": 31400 }, { "epoch": 46.90298507462687, "grad_norm": 0.01708984375, "learning_rate": 5.313816280806573e-05, "loss": 0.0021, "step": 31425 }, { "epoch": 46.940298507462686, "grad_norm": 0.040771484375, "learning_rate": 5.310082150858851e-05, "loss": 0.0024, "step": 31450 }, { "epoch": 46.97761194029851, "grad_norm": 0.0294189453125, "learning_rate": 5.306348020911128e-05, "loss": 0.0022, "step": 31475 }, { "epoch": 47.014925373134325, "grad_norm": 0.0191650390625, "learning_rate": 5.302613890963406e-05, "loss": 0.0018, "step": 31500 }, { "epoch": 47.05223880597015, "grad_norm": 0.0380859375, "learning_rate": 5.298879761015684e-05, "loss": 0.002, "step": 31525 }, { "epoch": 47.08955223880597, "grad_norm": 0.032958984375, "learning_rate": 5.295145631067961e-05, "loss": 0.0018, "step": 31550 }, { "epoch": 47.12686567164179, "grad_norm": 0.045166015625, "learning_rate": 5.2914115011202393e-05, "loss": 0.0018, "step": 31575 }, { "epoch": 47.16417910447761, "grad_norm": 0.0400390625, "learning_rate": 5.287677371172517e-05, "loss": 0.0016, "step": 31600 }, { "epoch": 47.201492537313435, "grad_norm": 0.024658203125, "learning_rate": 5.283943241224795e-05, "loss": 0.0018, "step": 31625 }, { "epoch": 47.23880597014925, "grad_norm": 0.037841796875, "learning_rate": 5.280209111277072e-05, "loss": 0.0019, "step": 31650 }, { "epoch": 47.276119402985074, "grad_norm": 0.037109375, "learning_rate": 5.2764749813293505e-05, "loss": 0.0016, "step": 31675 }, { "epoch": 47.3134328358209, "grad_norm": 0.0250244140625, "learning_rate": 5.272740851381628e-05, "loss": 0.0016, "step": 31700 }, { "epoch": 47.350746268656714, "grad_norm": 0.05224609375, "learning_rate": 5.269006721433905e-05, "loss": 0.002, "step": 31725 }, { "epoch": 47.38805970149254, "grad_norm": 0.04541015625, "learning_rate": 5.265272591486184e-05, "loss": 0.0019, "step": 31750 }, { "epoch": 47.42537313432836, "grad_norm": 0.0279541015625, "learning_rate": 5.261538461538462e-05, "loss": 0.0019, "step": 31775 }, { "epoch": 47.46268656716418, "grad_norm": 0.0419921875, "learning_rate": 5.25780433159074e-05, "loss": 0.002, "step": 31800 }, { "epoch": 47.5, "grad_norm": 0.0167236328125, "learning_rate": 5.254070201643018e-05, "loss": 0.0018, "step": 31825 }, { "epoch": 47.53731343283582, "grad_norm": 0.034423828125, "learning_rate": 5.250336071695295e-05, "loss": 0.002, "step": 31850 }, { "epoch": 47.57462686567164, "grad_norm": 0.041748046875, "learning_rate": 5.2466019417475734e-05, "loss": 0.0022, "step": 31875 }, { "epoch": 47.61194029850746, "grad_norm": 0.031494140625, "learning_rate": 5.242867811799851e-05, "loss": 0.0021, "step": 31900 }, { "epoch": 47.649253731343286, "grad_norm": 0.045166015625, "learning_rate": 5.239133681852129e-05, "loss": 0.0019, "step": 31925 }, { "epoch": 47.6865671641791, "grad_norm": 0.032470703125, "learning_rate": 5.2353995519044064e-05, "loss": 0.002, "step": 31950 }, { "epoch": 47.723880597014926, "grad_norm": 0.051025390625, "learning_rate": 5.231665421956684e-05, "loss": 0.0022, "step": 31975 }, { "epoch": 47.76119402985075, "grad_norm": 0.050048828125, "learning_rate": 5.227931292008962e-05, "loss": 0.0021, "step": 32000 }, { "epoch": 47.798507462686565, "grad_norm": 0.048828125, "learning_rate": 5.2241971620612394e-05, "loss": 0.0021, "step": 32025 }, { "epoch": 47.83582089552239, "grad_norm": 0.037353515625, "learning_rate": 5.2204630321135175e-05, "loss": 0.0022, "step": 32050 }, { "epoch": 47.87313432835821, "grad_norm": 0.0322265625, "learning_rate": 5.216728902165796e-05, "loss": 0.0019, "step": 32075 }, { "epoch": 47.91044776119403, "grad_norm": 0.029541015625, "learning_rate": 5.212994772218074e-05, "loss": 0.0023, "step": 32100 }, { "epoch": 47.94776119402985, "grad_norm": 0.04248046875, "learning_rate": 5.209260642270352e-05, "loss": 0.0019, "step": 32125 }, { "epoch": 47.985074626865675, "grad_norm": 0.057373046875, "learning_rate": 5.205526512322629e-05, "loss": 0.002, "step": 32150 }, { "epoch": 48.02238805970149, "grad_norm": 0.0198974609375, "learning_rate": 5.2017923823749074e-05, "loss": 0.0019, "step": 32175 }, { "epoch": 48.059701492537314, "grad_norm": 0.0205078125, "learning_rate": 5.198058252427185e-05, "loss": 0.0015, "step": 32200 }, { "epoch": 48.09701492537314, "grad_norm": 0.0380859375, "learning_rate": 5.194324122479462e-05, "loss": 0.0013, "step": 32225 }, { "epoch": 48.134328358208954, "grad_norm": 0.033203125, "learning_rate": 5.1905899925317404e-05, "loss": 0.0016, "step": 32250 }, { "epoch": 48.17164179104478, "grad_norm": 0.0284423828125, "learning_rate": 5.186855862584018e-05, "loss": 0.0016, "step": 32275 }, { "epoch": 48.208955223880594, "grad_norm": 0.038818359375, "learning_rate": 5.183121732636296e-05, "loss": 0.0019, "step": 32300 }, { "epoch": 48.24626865671642, "grad_norm": 0.041748046875, "learning_rate": 5.1793876026885734e-05, "loss": 0.0018, "step": 32325 }, { "epoch": 48.28358208955224, "grad_norm": 0.048095703125, "learning_rate": 5.1756534727408515e-05, "loss": 0.0019, "step": 32350 }, { "epoch": 48.32089552238806, "grad_norm": 0.0233154296875, "learning_rate": 5.171919342793129e-05, "loss": 0.0018, "step": 32375 }, { "epoch": 48.35820895522388, "grad_norm": 0.0341796875, "learning_rate": 5.168185212845408e-05, "loss": 0.0018, "step": 32400 }, { "epoch": 48.3955223880597, "grad_norm": 0.0247802734375, "learning_rate": 5.164451082897686e-05, "loss": 0.0019, "step": 32425 }, { "epoch": 48.43283582089552, "grad_norm": 0.03759765625, "learning_rate": 5.160716952949963e-05, "loss": 0.0019, "step": 32450 }, { "epoch": 48.47014925373134, "grad_norm": 0.037353515625, "learning_rate": 5.156982823002241e-05, "loss": 0.002, "step": 32475 }, { "epoch": 48.507462686567166, "grad_norm": 0.03466796875, "learning_rate": 5.153248693054519e-05, "loss": 0.002, "step": 32500 }, { "epoch": 48.54477611940298, "grad_norm": 0.0252685546875, "learning_rate": 5.149514563106796e-05, "loss": 0.0021, "step": 32525 }, { "epoch": 48.582089552238806, "grad_norm": 0.034912109375, "learning_rate": 5.1457804331590744e-05, "loss": 0.0021, "step": 32550 }, { "epoch": 48.61940298507463, "grad_norm": 0.0223388671875, "learning_rate": 5.142046303211352e-05, "loss": 0.0021, "step": 32575 }, { "epoch": 48.656716417910445, "grad_norm": 0.0458984375, "learning_rate": 5.13831217326363e-05, "loss": 0.0019, "step": 32600 }, { "epoch": 48.69402985074627, "grad_norm": 0.037841796875, "learning_rate": 5.1345780433159074e-05, "loss": 0.0024, "step": 32625 }, { "epoch": 48.73134328358209, "grad_norm": 0.0303955078125, "learning_rate": 5.130843913368185e-05, "loss": 0.0018, "step": 32650 }, { "epoch": 48.76865671641791, "grad_norm": 0.044677734375, "learning_rate": 5.127109783420463e-05, "loss": 0.0023, "step": 32675 }, { "epoch": 48.80597014925373, "grad_norm": 0.015625, "learning_rate": 5.1233756534727404e-05, "loss": 0.0021, "step": 32700 }, { "epoch": 48.843283582089555, "grad_norm": 0.0419921875, "learning_rate": 5.119641523525019e-05, "loss": 0.0021, "step": 32725 }, { "epoch": 48.88059701492537, "grad_norm": 0.047119140625, "learning_rate": 5.115907393577297e-05, "loss": 0.002, "step": 32750 }, { "epoch": 48.917910447761194, "grad_norm": 0.037109375, "learning_rate": 5.112173263629575e-05, "loss": 0.0022, "step": 32775 }, { "epoch": 48.95522388059702, "grad_norm": 0.034423828125, "learning_rate": 5.108439133681853e-05, "loss": 0.0026, "step": 32800 }, { "epoch": 48.992537313432834, "grad_norm": 0.0625, "learning_rate": 5.10470500373413e-05, "loss": 0.0024, "step": 32825 }, { "epoch": 49.02985074626866, "grad_norm": 0.027587890625, "learning_rate": 5.1009708737864084e-05, "loss": 0.0016, "step": 32850 }, { "epoch": 49.06716417910448, "grad_norm": 0.0289306640625, "learning_rate": 5.097236743838686e-05, "loss": 0.0018, "step": 32875 }, { "epoch": 49.1044776119403, "grad_norm": 0.0439453125, "learning_rate": 5.093502613890963e-05, "loss": 0.0017, "step": 32900 }, { "epoch": 49.14179104477612, "grad_norm": 0.0301513671875, "learning_rate": 5.0897684839432414e-05, "loss": 0.0021, "step": 32925 }, { "epoch": 49.17910447761194, "grad_norm": 0.042236328125, "learning_rate": 5.086034353995519e-05, "loss": 0.0018, "step": 32950 }, { "epoch": 49.21641791044776, "grad_norm": 0.0267333984375, "learning_rate": 5.082300224047797e-05, "loss": 0.0018, "step": 32975 }, { "epoch": 49.25373134328358, "grad_norm": 0.03515625, "learning_rate": 5.0785660941000744e-05, "loss": 0.0019, "step": 33000 }, { "epoch": 49.291044776119406, "grad_norm": 0.03466796875, "learning_rate": 5.0748319641523525e-05, "loss": 0.0018, "step": 33025 }, { "epoch": 49.32835820895522, "grad_norm": 0.036376953125, "learning_rate": 5.07109783420463e-05, "loss": 0.0018, "step": 33050 }, { "epoch": 49.365671641791046, "grad_norm": 0.03955078125, "learning_rate": 5.067363704256909e-05, "loss": 0.0018, "step": 33075 }, { "epoch": 49.40298507462686, "grad_norm": 0.041259765625, "learning_rate": 5.063629574309187e-05, "loss": 0.0021, "step": 33100 }, { "epoch": 49.440298507462686, "grad_norm": 0.0284423828125, "learning_rate": 5.059895444361464e-05, "loss": 0.0018, "step": 33125 }, { "epoch": 49.47761194029851, "grad_norm": 0.03369140625, "learning_rate": 5.056161314413742e-05, "loss": 0.002, "step": 33150 }, { "epoch": 49.514925373134325, "grad_norm": 0.0390625, "learning_rate": 5.05242718446602e-05, "loss": 0.002, "step": 33175 }, { "epoch": 49.55223880597015, "grad_norm": 0.04150390625, "learning_rate": 5.048693054518297e-05, "loss": 0.0021, "step": 33200 }, { "epoch": 49.58955223880597, "grad_norm": 0.04443359375, "learning_rate": 5.0449589245705754e-05, "loss": 0.0021, "step": 33225 }, { "epoch": 49.62686567164179, "grad_norm": 0.0361328125, "learning_rate": 5.041224794622853e-05, "loss": 0.0021, "step": 33250 }, { "epoch": 49.66417910447761, "grad_norm": 0.021484375, "learning_rate": 5.037490664675131e-05, "loss": 0.0021, "step": 33275 }, { "epoch": 49.701492537313435, "grad_norm": 0.05126953125, "learning_rate": 5.0337565347274084e-05, "loss": 0.002, "step": 33300 }, { "epoch": 49.73880597014925, "grad_norm": 0.02783203125, "learning_rate": 5.0300224047796865e-05, "loss": 0.0021, "step": 33325 }, { "epoch": 49.776119402985074, "grad_norm": 0.0439453125, "learning_rate": 5.026288274831964e-05, "loss": 0.0022, "step": 33350 }, { "epoch": 49.8134328358209, "grad_norm": 0.03564453125, "learning_rate": 5.0225541448842414e-05, "loss": 0.0019, "step": 33375 }, { "epoch": 49.850746268656714, "grad_norm": 0.03271484375, "learning_rate": 5.01882001493652e-05, "loss": 0.0021, "step": 33400 }, { "epoch": 49.88805970149254, "grad_norm": 0.053466796875, "learning_rate": 5.015085884988798e-05, "loss": 0.0022, "step": 33425 }, { "epoch": 49.92537313432836, "grad_norm": 0.0390625, "learning_rate": 5.011351755041076e-05, "loss": 0.0022, "step": 33450 }, { "epoch": 49.96268656716418, "grad_norm": 0.015869140625, "learning_rate": 5.007617625093354e-05, "loss": 0.002, "step": 33475 }, { "epoch": 50.0, "grad_norm": 0.0205078125, "learning_rate": 5.003883495145631e-05, "loss": 0.0021, "step": 33500 }, { "epoch": 50.03731343283582, "grad_norm": 0.0654296875, "learning_rate": 5.0001493651979094e-05, "loss": 0.002, "step": 33525 }, { "epoch": 50.07462686567164, "grad_norm": 0.036865234375, "learning_rate": 4.996415235250187e-05, "loss": 0.0018, "step": 33550 }, { "epoch": 50.11194029850746, "grad_norm": 0.0308837890625, "learning_rate": 4.992681105302465e-05, "loss": 0.0017, "step": 33575 }, { "epoch": 50.149253731343286, "grad_norm": 0.0576171875, "learning_rate": 4.9889469753547424e-05, "loss": 0.0015, "step": 33600 }, { "epoch": 50.1865671641791, "grad_norm": 0.0400390625, "learning_rate": 4.98521284540702e-05, "loss": 0.0015, "step": 33625 }, { "epoch": 50.223880597014926, "grad_norm": 0.01275634765625, "learning_rate": 4.981478715459299e-05, "loss": 0.0019, "step": 33650 }, { "epoch": 50.26119402985075, "grad_norm": 0.046875, "learning_rate": 4.977744585511576e-05, "loss": 0.0018, "step": 33675 }, { "epoch": 50.298507462686565, "grad_norm": 0.020751953125, "learning_rate": 4.974010455563854e-05, "loss": 0.0017, "step": 33700 }, { "epoch": 50.33582089552239, "grad_norm": 0.0267333984375, "learning_rate": 4.970276325616132e-05, "loss": 0.0017, "step": 33725 }, { "epoch": 50.37313432835821, "grad_norm": 0.036865234375, "learning_rate": 4.966542195668409e-05, "loss": 0.0016, "step": 33750 }, { "epoch": 50.41044776119403, "grad_norm": 0.043212890625, "learning_rate": 4.962808065720687e-05, "loss": 0.0019, "step": 33775 }, { "epoch": 50.44776119402985, "grad_norm": 0.0439453125, "learning_rate": 4.959073935772965e-05, "loss": 0.002, "step": 33800 }, { "epoch": 50.485074626865675, "grad_norm": 0.035888671875, "learning_rate": 4.9553398058252435e-05, "loss": 0.0018, "step": 33825 }, { "epoch": 50.52238805970149, "grad_norm": 0.03076171875, "learning_rate": 4.951605675877521e-05, "loss": 0.002, "step": 33850 }, { "epoch": 50.559701492537314, "grad_norm": 0.0260009765625, "learning_rate": 4.947871545929798e-05, "loss": 0.0019, "step": 33875 }, { "epoch": 50.59701492537313, "grad_norm": 0.044921875, "learning_rate": 4.9441374159820765e-05, "loss": 0.0021, "step": 33900 }, { "epoch": 50.634328358208954, "grad_norm": 0.037109375, "learning_rate": 4.940403286034354e-05, "loss": 0.0022, "step": 33925 }, { "epoch": 50.67164179104478, "grad_norm": 0.036376953125, "learning_rate": 4.936669156086632e-05, "loss": 0.0024, "step": 33950 }, { "epoch": 50.708955223880594, "grad_norm": 0.0242919921875, "learning_rate": 4.93293502613891e-05, "loss": 0.002, "step": 33975 }, { "epoch": 50.74626865671642, "grad_norm": 0.056884765625, "learning_rate": 4.9292008961911876e-05, "loss": 0.0021, "step": 34000 }, { "epoch": 50.78358208955224, "grad_norm": 0.03369140625, "learning_rate": 4.925466766243466e-05, "loss": 0.002, "step": 34025 }, { "epoch": 50.82089552238806, "grad_norm": 0.025390625, "learning_rate": 4.921732636295743e-05, "loss": 0.0021, "step": 34050 }, { "epoch": 50.85820895522388, "grad_norm": 0.037353515625, "learning_rate": 4.917998506348021e-05, "loss": 0.0024, "step": 34075 }, { "epoch": 50.8955223880597, "grad_norm": 0.072265625, "learning_rate": 4.914264376400299e-05, "loss": 0.0025, "step": 34100 }, { "epoch": 50.93283582089552, "grad_norm": 0.04833984375, "learning_rate": 4.910530246452577e-05, "loss": 0.0023, "step": 34125 }, { "epoch": 50.97014925373134, "grad_norm": 0.03271484375, "learning_rate": 4.906796116504855e-05, "loss": 0.0023, "step": 34150 }, { "epoch": 51.007462686567166, "grad_norm": 0.02490234375, "learning_rate": 4.9030619865571324e-05, "loss": 0.002, "step": 34175 }, { "epoch": 51.04477611940298, "grad_norm": 0.038330078125, "learning_rate": 4.8993278566094105e-05, "loss": 0.0015, "step": 34200 }, { "epoch": 51.082089552238806, "grad_norm": 0.041259765625, "learning_rate": 4.895593726661688e-05, "loss": 0.0018, "step": 34225 }, { "epoch": 51.11940298507463, "grad_norm": 0.04296875, "learning_rate": 4.891859596713966e-05, "loss": 0.002, "step": 34250 }, { "epoch": 51.156716417910445, "grad_norm": 0.0673828125, "learning_rate": 4.8881254667662435e-05, "loss": 0.002, "step": 34275 }, { "epoch": 51.19402985074627, "grad_norm": 0.041259765625, "learning_rate": 4.8843913368185216e-05, "loss": 0.0022, "step": 34300 }, { "epoch": 51.23134328358209, "grad_norm": 0.2158203125, "learning_rate": 4.8806572068708e-05, "loss": 0.0044, "step": 34325 }, { "epoch": 51.26865671641791, "grad_norm": 0.25390625, "learning_rate": 4.876923076923077e-05, "loss": 0.0094, "step": 34350 }, { "epoch": 51.30597014925373, "grad_norm": 0.2216796875, "learning_rate": 4.873188946975355e-05, "loss": 0.0096, "step": 34375 }, { "epoch": 51.343283582089555, "grad_norm": 0.19140625, "learning_rate": 4.869454817027633e-05, "loss": 0.0086, "step": 34400 }, { "epoch": 51.38059701492537, "grad_norm": 0.232421875, "learning_rate": 4.86572068707991e-05, "loss": 0.0071, "step": 34425 }, { "epoch": 51.417910447761194, "grad_norm": 0.236328125, "learning_rate": 4.861986557132188e-05, "loss": 0.0068, "step": 34450 }, { "epoch": 51.45522388059702, "grad_norm": 0.11767578125, "learning_rate": 4.8582524271844664e-05, "loss": 0.0063, "step": 34475 }, { "epoch": 51.492537313432834, "grad_norm": 0.1875, "learning_rate": 4.8545182972367445e-05, "loss": 0.0058, "step": 34500 }, { "epoch": 51.52985074626866, "grad_norm": 0.1259765625, "learning_rate": 4.850784167289022e-05, "loss": 0.0058, "step": 34525 }, { "epoch": 51.56716417910448, "grad_norm": 0.1240234375, "learning_rate": 4.8470500373412994e-05, "loss": 0.0052, "step": 34550 }, { "epoch": 51.6044776119403, "grad_norm": 0.08837890625, "learning_rate": 4.8433159073935775e-05, "loss": 0.0048, "step": 34575 }, { "epoch": 51.64179104477612, "grad_norm": 0.111328125, "learning_rate": 4.839581777445855e-05, "loss": 0.0053, "step": 34600 }, { "epoch": 51.67910447761194, "grad_norm": 0.10107421875, "learning_rate": 4.835847647498133e-05, "loss": 0.0042, "step": 34625 }, { "epoch": 51.71641791044776, "grad_norm": 0.1728515625, "learning_rate": 4.832113517550411e-05, "loss": 0.0042, "step": 34650 }, { "epoch": 51.75373134328358, "grad_norm": 0.15625, "learning_rate": 4.8283793876026886e-05, "loss": 0.004, "step": 34675 }, { "epoch": 51.791044776119406, "grad_norm": 0.123046875, "learning_rate": 4.824645257654967e-05, "loss": 0.0047, "step": 34700 }, { "epoch": 51.82835820895522, "grad_norm": 0.1044921875, "learning_rate": 4.820911127707244e-05, "loss": 0.0044, "step": 34725 }, { "epoch": 51.865671641791046, "grad_norm": 0.1826171875, "learning_rate": 4.817176997759522e-05, "loss": 0.0043, "step": 34750 }, { "epoch": 51.90298507462687, "grad_norm": 0.064453125, "learning_rate": 4.8134428678118e-05, "loss": 0.0038, "step": 34775 }, { "epoch": 51.940298507462686, "grad_norm": 0.1552734375, "learning_rate": 4.809708737864078e-05, "loss": 0.0037, "step": 34800 }, { "epoch": 51.97761194029851, "grad_norm": 0.1474609375, "learning_rate": 4.805974607916356e-05, "loss": 0.0036, "step": 34825 }, { "epoch": 52.014925373134325, "grad_norm": 0.044921875, "learning_rate": 4.8022404779686334e-05, "loss": 0.0032, "step": 34850 }, { "epoch": 52.05223880597015, "grad_norm": 0.181640625, "learning_rate": 4.7985063480209115e-05, "loss": 0.0022, "step": 34875 }, { "epoch": 52.08955223880597, "grad_norm": 0.0791015625, "learning_rate": 4.794772218073189e-05, "loss": 0.0029, "step": 34900 }, { "epoch": 52.12686567164179, "grad_norm": 0.2001953125, "learning_rate": 4.791038088125467e-05, "loss": 0.0024, "step": 34925 }, { "epoch": 52.16417910447761, "grad_norm": 0.07763671875, "learning_rate": 4.7873039581777445e-05, "loss": 0.0026, "step": 34950 }, { "epoch": 52.201492537313435, "grad_norm": 0.263671875, "learning_rate": 4.7835698282300226e-05, "loss": 0.0025, "step": 34975 }, { "epoch": 52.23880597014925, "grad_norm": 0.08544921875, "learning_rate": 4.779835698282301e-05, "loss": 0.0026, "step": 35000 }, { "epoch": 52.276119402985074, "grad_norm": 0.0296630859375, "learning_rate": 4.776101568334578e-05, "loss": 0.0022, "step": 35025 }, { "epoch": 52.3134328358209, "grad_norm": 0.03662109375, "learning_rate": 4.772367438386856e-05, "loss": 0.0028, "step": 35050 }, { "epoch": 52.350746268656714, "grad_norm": 0.0303955078125, "learning_rate": 4.768633308439134e-05, "loss": 0.0024, "step": 35075 }, { "epoch": 52.38805970149254, "grad_norm": 0.04052734375, "learning_rate": 4.764899178491412e-05, "loss": 0.0027, "step": 35100 }, { "epoch": 52.42537313432836, "grad_norm": 0.07470703125, "learning_rate": 4.761165048543689e-05, "loss": 0.0025, "step": 35125 }, { "epoch": 52.46268656716418, "grad_norm": 0.0439453125, "learning_rate": 4.7574309185959674e-05, "loss": 0.0022, "step": 35150 }, { "epoch": 52.5, "grad_norm": 0.1123046875, "learning_rate": 4.7536967886482455e-05, "loss": 0.0027, "step": 35175 }, { "epoch": 52.53731343283582, "grad_norm": 0.044677734375, "learning_rate": 4.749962658700523e-05, "loss": 0.0024, "step": 35200 }, { "epoch": 52.57462686567164, "grad_norm": 0.087890625, "learning_rate": 4.746228528752801e-05, "loss": 0.0021, "step": 35225 }, { "epoch": 52.61194029850746, "grad_norm": 0.052001953125, "learning_rate": 4.7424943988050785e-05, "loss": 0.0026, "step": 35250 }, { "epoch": 52.649253731343286, "grad_norm": 0.0257568359375, "learning_rate": 4.738760268857356e-05, "loss": 0.0027, "step": 35275 }, { "epoch": 52.6865671641791, "grad_norm": 0.036376953125, "learning_rate": 4.735026138909635e-05, "loss": 0.0025, "step": 35300 }, { "epoch": 52.723880597014926, "grad_norm": 0.041259765625, "learning_rate": 4.731292008961912e-05, "loss": 0.0029, "step": 35325 }, { "epoch": 52.76119402985075, "grad_norm": 0.0615234375, "learning_rate": 4.72755787901419e-05, "loss": 0.0023, "step": 35350 }, { "epoch": 52.798507462686565, "grad_norm": 0.06640625, "learning_rate": 4.723823749066468e-05, "loss": 0.0029, "step": 35375 }, { "epoch": 52.83582089552239, "grad_norm": 0.041259765625, "learning_rate": 4.720089619118745e-05, "loss": 0.0027, "step": 35400 }, { "epoch": 52.87313432835821, "grad_norm": 0.0634765625, "learning_rate": 4.716355489171023e-05, "loss": 0.0029, "step": 35425 }, { "epoch": 52.91044776119403, "grad_norm": 0.05078125, "learning_rate": 4.712621359223301e-05, "loss": 0.003, "step": 35450 }, { "epoch": 52.94776119402985, "grad_norm": 0.03125, "learning_rate": 4.7088872292755795e-05, "loss": 0.0024, "step": 35475 }, { "epoch": 52.985074626865675, "grad_norm": 0.04296875, "learning_rate": 4.705153099327857e-05, "loss": 0.0024, "step": 35500 }, { "epoch": 53.02238805970149, "grad_norm": 0.044921875, "learning_rate": 4.7014189693801344e-05, "loss": 0.002, "step": 35525 }, { "epoch": 53.059701492537314, "grad_norm": 0.0302734375, "learning_rate": 4.6976848394324125e-05, "loss": 0.0021, "step": 35550 }, { "epoch": 53.09701492537314, "grad_norm": 0.1591796875, "learning_rate": 4.69395070948469e-05, "loss": 0.0018, "step": 35575 }, { "epoch": 53.134328358208954, "grad_norm": 0.0361328125, "learning_rate": 4.690216579536968e-05, "loss": 0.0019, "step": 35600 }, { "epoch": 53.17164179104478, "grad_norm": 0.031982421875, "learning_rate": 4.686482449589246e-05, "loss": 0.0017, "step": 35625 }, { "epoch": 53.208955223880594, "grad_norm": 0.051513671875, "learning_rate": 4.6827483196415236e-05, "loss": 0.002, "step": 35650 }, { "epoch": 53.24626865671642, "grad_norm": 0.024169921875, "learning_rate": 4.679014189693802e-05, "loss": 0.0019, "step": 35675 }, { "epoch": 53.28358208955224, "grad_norm": 0.034423828125, "learning_rate": 4.675280059746079e-05, "loss": 0.002, "step": 35700 }, { "epoch": 53.32089552238806, "grad_norm": 0.058349609375, "learning_rate": 4.671545929798357e-05, "loss": 0.0024, "step": 35725 }, { "epoch": 53.35820895522388, "grad_norm": 0.040283203125, "learning_rate": 4.667811799850635e-05, "loss": 0.002, "step": 35750 }, { "epoch": 53.3955223880597, "grad_norm": 0.0301513671875, "learning_rate": 4.664077669902913e-05, "loss": 0.002, "step": 35775 }, { "epoch": 53.43283582089552, "grad_norm": 0.03564453125, "learning_rate": 4.660343539955191e-05, "loss": 0.002, "step": 35800 }, { "epoch": 53.47014925373134, "grad_norm": 0.0361328125, "learning_rate": 4.6566094100074684e-05, "loss": 0.002, "step": 35825 }, { "epoch": 53.507462686567166, "grad_norm": 0.041259765625, "learning_rate": 4.6528752800597466e-05, "loss": 0.0021, "step": 35850 }, { "epoch": 53.54477611940298, "grad_norm": 0.018310546875, "learning_rate": 4.649141150112024e-05, "loss": 0.0015, "step": 35875 }, { "epoch": 53.582089552238806, "grad_norm": 0.031494140625, "learning_rate": 4.645407020164302e-05, "loss": 0.0019, "step": 35900 }, { "epoch": 53.61940298507463, "grad_norm": 0.040771484375, "learning_rate": 4.6416728902165795e-05, "loss": 0.0017, "step": 35925 }, { "epoch": 53.656716417910445, "grad_norm": 0.0208740234375, "learning_rate": 4.637938760268857e-05, "loss": 0.0018, "step": 35950 }, { "epoch": 53.69402985074627, "grad_norm": 0.0299072265625, "learning_rate": 4.634204630321136e-05, "loss": 0.0018, "step": 35975 }, { "epoch": 53.73134328358209, "grad_norm": 0.0235595703125, "learning_rate": 4.630470500373413e-05, "loss": 0.002, "step": 36000 }, { "epoch": 53.76865671641791, "grad_norm": 0.032470703125, "learning_rate": 4.6267363704256913e-05, "loss": 0.0019, "step": 36025 }, { "epoch": 53.80597014925373, "grad_norm": 0.04052734375, "learning_rate": 4.623002240477969e-05, "loss": 0.0019, "step": 36050 }, { "epoch": 53.843283582089555, "grad_norm": 0.048828125, "learning_rate": 4.619268110530246e-05, "loss": 0.0019, "step": 36075 }, { "epoch": 53.88059701492537, "grad_norm": 0.032958984375, "learning_rate": 4.615533980582524e-05, "loss": 0.0018, "step": 36100 }, { "epoch": 53.917910447761194, "grad_norm": 0.031982421875, "learning_rate": 4.6117998506348025e-05, "loss": 0.0016, "step": 36125 }, { "epoch": 53.95522388059702, "grad_norm": 0.0142822265625, "learning_rate": 4.6080657206870806e-05, "loss": 0.002, "step": 36150 }, { "epoch": 53.992537313432834, "grad_norm": 0.052490234375, "learning_rate": 4.604331590739358e-05, "loss": 0.0022, "step": 36175 }, { "epoch": 54.02985074626866, "grad_norm": 0.0244140625, "learning_rate": 4.6005974607916355e-05, "loss": 0.0019, "step": 36200 }, { "epoch": 54.06716417910448, "grad_norm": 0.04296875, "learning_rate": 4.5968633308439136e-05, "loss": 0.0018, "step": 36225 }, { "epoch": 54.1044776119403, "grad_norm": 0.04541015625, "learning_rate": 4.593129200896191e-05, "loss": 0.0017, "step": 36250 }, { "epoch": 54.14179104477612, "grad_norm": 0.0203857421875, "learning_rate": 4.589395070948469e-05, "loss": 0.0016, "step": 36275 }, { "epoch": 54.17910447761194, "grad_norm": 0.03369140625, "learning_rate": 4.585660941000747e-05, "loss": 0.0017, "step": 36300 }, { "epoch": 54.21641791044776, "grad_norm": 0.04443359375, "learning_rate": 4.581926811053025e-05, "loss": 0.0015, "step": 36325 }, { "epoch": 54.25373134328358, "grad_norm": 0.0380859375, "learning_rate": 4.578192681105303e-05, "loss": 0.0017, "step": 36350 }, { "epoch": 54.291044776119406, "grad_norm": 0.044677734375, "learning_rate": 4.57445855115758e-05, "loss": 0.0017, "step": 36375 }, { "epoch": 54.32835820895522, "grad_norm": 0.03076171875, "learning_rate": 4.5707244212098584e-05, "loss": 0.0018, "step": 36400 }, { "epoch": 54.365671641791046, "grad_norm": 0.050537109375, "learning_rate": 4.566990291262136e-05, "loss": 0.002, "step": 36425 }, { "epoch": 54.40298507462686, "grad_norm": 0.0224609375, "learning_rate": 4.563256161314414e-05, "loss": 0.0018, "step": 36450 }, { "epoch": 54.440298507462686, "grad_norm": 0.046142578125, "learning_rate": 4.559522031366692e-05, "loss": 0.0018, "step": 36475 }, { "epoch": 54.47761194029851, "grad_norm": 0.037109375, "learning_rate": 4.5557879014189695e-05, "loss": 0.0019, "step": 36500 }, { "epoch": 54.514925373134325, "grad_norm": 0.03564453125, "learning_rate": 4.5520537714712476e-05, "loss": 0.0019, "step": 36525 }, { "epoch": 54.55223880597015, "grad_norm": 0.0439453125, "learning_rate": 4.548319641523525e-05, "loss": 0.0021, "step": 36550 }, { "epoch": 54.58955223880597, "grad_norm": 0.041748046875, "learning_rate": 4.544585511575803e-05, "loss": 0.002, "step": 36575 }, { "epoch": 54.62686567164179, "grad_norm": 0.04248046875, "learning_rate": 4.5408513816280806e-05, "loss": 0.0019, "step": 36600 }, { "epoch": 54.66417910447761, "grad_norm": 0.02685546875, "learning_rate": 4.537117251680359e-05, "loss": 0.0021, "step": 36625 }, { "epoch": 54.701492537313435, "grad_norm": 0.03955078125, "learning_rate": 4.533383121732637e-05, "loss": 0.0019, "step": 36650 }, { "epoch": 54.73880597014925, "grad_norm": 0.037353515625, "learning_rate": 4.529648991784914e-05, "loss": 0.0021, "step": 36675 }, { "epoch": 54.776119402985074, "grad_norm": 0.0269775390625, "learning_rate": 4.5259148618371924e-05, "loss": 0.0018, "step": 36700 }, { "epoch": 54.8134328358209, "grad_norm": 0.03369140625, "learning_rate": 4.52218073188947e-05, "loss": 0.0021, "step": 36725 }, { "epoch": 54.850746268656714, "grad_norm": 0.022216796875, "learning_rate": 4.518446601941748e-05, "loss": 0.0017, "step": 36750 }, { "epoch": 54.88805970149254, "grad_norm": 0.041015625, "learning_rate": 4.5147124719940254e-05, "loss": 0.0018, "step": 36775 }, { "epoch": 54.92537313432836, "grad_norm": 0.04052734375, "learning_rate": 4.5109783420463035e-05, "loss": 0.002, "step": 36800 }, { "epoch": 54.96268656716418, "grad_norm": 0.01611328125, "learning_rate": 4.5072442120985816e-05, "loss": 0.0018, "step": 36825 }, { "epoch": 55.0, "grad_norm": 0.062255859375, "learning_rate": 4.503510082150859e-05, "loss": 0.0019, "step": 36850 }, { "epoch": 55.03731343283582, "grad_norm": 0.03125, "learning_rate": 4.499775952203137e-05, "loss": 0.0019, "step": 36875 }, { "epoch": 55.07462686567164, "grad_norm": 0.0380859375, "learning_rate": 4.4960418222554146e-05, "loss": 0.0018, "step": 36900 }, { "epoch": 55.11194029850746, "grad_norm": 0.041748046875, "learning_rate": 4.492307692307692e-05, "loss": 0.0016, "step": 36925 }, { "epoch": 55.149253731343286, "grad_norm": 0.0361328125, "learning_rate": 4.488573562359971e-05, "loss": 0.0017, "step": 36950 }, { "epoch": 55.1865671641791, "grad_norm": 0.031494140625, "learning_rate": 4.484839432412248e-05, "loss": 0.0017, "step": 36975 }, { "epoch": 55.223880597014926, "grad_norm": 0.054443359375, "learning_rate": 4.4811053024645264e-05, "loss": 0.0017, "step": 37000 }, { "epoch": 55.26119402985075, "grad_norm": 0.055419921875, "learning_rate": 4.477371172516804e-05, "loss": 0.0019, "step": 37025 }, { "epoch": 55.298507462686565, "grad_norm": 0.04296875, "learning_rate": 4.473637042569081e-05, "loss": 0.0018, "step": 37050 }, { "epoch": 55.33582089552239, "grad_norm": 0.0244140625, "learning_rate": 4.4699029126213594e-05, "loss": 0.002, "step": 37075 }, { "epoch": 55.37313432835821, "grad_norm": 0.04296875, "learning_rate": 4.466168782673637e-05, "loss": 0.0019, "step": 37100 }, { "epoch": 55.41044776119403, "grad_norm": 0.05029296875, "learning_rate": 4.4624346527259156e-05, "loss": 0.0017, "step": 37125 }, { "epoch": 55.44776119402985, "grad_norm": 0.040771484375, "learning_rate": 4.458700522778193e-05, "loss": 0.0018, "step": 37150 }, { "epoch": 55.485074626865675, "grad_norm": 0.0380859375, "learning_rate": 4.4549663928304705e-05, "loss": 0.0019, "step": 37175 }, { "epoch": 55.52238805970149, "grad_norm": 0.025390625, "learning_rate": 4.4512322628827486e-05, "loss": 0.0018, "step": 37200 }, { "epoch": 55.559701492537314, "grad_norm": 0.033935546875, "learning_rate": 4.447498132935026e-05, "loss": 0.0018, "step": 37225 }, { "epoch": 55.59701492537313, "grad_norm": 0.050537109375, "learning_rate": 4.443764002987304e-05, "loss": 0.0022, "step": 37250 }, { "epoch": 55.634328358208954, "grad_norm": 0.049072265625, "learning_rate": 4.4400298730395816e-05, "loss": 0.002, "step": 37275 }, { "epoch": 55.67164179104478, "grad_norm": 0.0225830078125, "learning_rate": 4.43629574309186e-05, "loss": 0.0019, "step": 37300 }, { "epoch": 55.708955223880594, "grad_norm": 0.029541015625, "learning_rate": 4.432561613144138e-05, "loss": 0.0018, "step": 37325 }, { "epoch": 55.74626865671642, "grad_norm": 0.0439453125, "learning_rate": 4.428827483196415e-05, "loss": 0.0017, "step": 37350 }, { "epoch": 55.78358208955224, "grad_norm": 0.0625, "learning_rate": 4.4250933532486934e-05, "loss": 0.0023, "step": 37375 }, { "epoch": 55.82089552238806, "grad_norm": 0.045654296875, "learning_rate": 4.421359223300971e-05, "loss": 0.0018, "step": 37400 }, { "epoch": 55.85820895522388, "grad_norm": 0.041259765625, "learning_rate": 4.417625093353249e-05, "loss": 0.0019, "step": 37425 }, { "epoch": 55.8955223880597, "grad_norm": 0.039306640625, "learning_rate": 4.413890963405527e-05, "loss": 0.0017, "step": 37450 }, { "epoch": 55.93283582089552, "grad_norm": 0.039794921875, "learning_rate": 4.4101568334578045e-05, "loss": 0.002, "step": 37475 }, { "epoch": 55.97014925373134, "grad_norm": 0.044189453125, "learning_rate": 4.4064227035100826e-05, "loss": 0.002, "step": 37500 }, { "epoch": 56.007462686567166, "grad_norm": 0.03076171875, "learning_rate": 4.40268857356236e-05, "loss": 0.0019, "step": 37525 }, { "epoch": 56.04477611940298, "grad_norm": 0.0341796875, "learning_rate": 4.398954443614638e-05, "loss": 0.0017, "step": 37550 }, { "epoch": 56.082089552238806, "grad_norm": 0.032958984375, "learning_rate": 4.3952203136669156e-05, "loss": 0.0016, "step": 37575 }, { "epoch": 56.11940298507463, "grad_norm": 0.028564453125, "learning_rate": 4.391486183719193e-05, "loss": 0.0017, "step": 37600 }, { "epoch": 56.156716417910445, "grad_norm": 0.0322265625, "learning_rate": 4.387752053771472e-05, "loss": 0.0017, "step": 37625 }, { "epoch": 56.19402985074627, "grad_norm": 0.0274658203125, "learning_rate": 4.384017923823749e-05, "loss": 0.0017, "step": 37650 }, { "epoch": 56.23134328358209, "grad_norm": 0.0220947265625, "learning_rate": 4.3802837938760274e-05, "loss": 0.0017, "step": 37675 }, { "epoch": 56.26865671641791, "grad_norm": 0.044921875, "learning_rate": 4.376549663928305e-05, "loss": 0.0016, "step": 37700 }, { "epoch": 56.30597014925373, "grad_norm": 0.0177001953125, "learning_rate": 4.372815533980582e-05, "loss": 0.0016, "step": 37725 }, { "epoch": 56.343283582089555, "grad_norm": 0.033447265625, "learning_rate": 4.3690814040328604e-05, "loss": 0.0018, "step": 37750 }, { "epoch": 56.38059701492537, "grad_norm": 0.04345703125, "learning_rate": 4.365347274085138e-05, "loss": 0.002, "step": 37775 }, { "epoch": 56.417910447761194, "grad_norm": 0.0390625, "learning_rate": 4.3616131441374167e-05, "loss": 0.002, "step": 37800 }, { "epoch": 56.45522388059702, "grad_norm": 0.0264892578125, "learning_rate": 4.357879014189694e-05, "loss": 0.0018, "step": 37825 }, { "epoch": 56.492537313432834, "grad_norm": 0.032470703125, "learning_rate": 4.3541448842419715e-05, "loss": 0.0018, "step": 37850 }, { "epoch": 56.52985074626866, "grad_norm": 0.02978515625, "learning_rate": 4.3504107542942496e-05, "loss": 0.0018, "step": 37875 }, { "epoch": 56.56716417910448, "grad_norm": 0.035400390625, "learning_rate": 4.346676624346527e-05, "loss": 0.002, "step": 37900 }, { "epoch": 56.6044776119403, "grad_norm": 0.052978515625, "learning_rate": 4.342942494398805e-05, "loss": 0.002, "step": 37925 }, { "epoch": 56.64179104477612, "grad_norm": 0.03955078125, "learning_rate": 4.339208364451083e-05, "loss": 0.002, "step": 37950 }, { "epoch": 56.67910447761194, "grad_norm": 0.0225830078125, "learning_rate": 4.335474234503361e-05, "loss": 0.0022, "step": 37975 }, { "epoch": 56.71641791044776, "grad_norm": 0.03564453125, "learning_rate": 4.331740104555639e-05, "loss": 0.002, "step": 38000 }, { "epoch": 56.75373134328358, "grad_norm": 0.0341796875, "learning_rate": 4.328005974607916e-05, "loss": 0.002, "step": 38025 }, { "epoch": 56.791044776119406, "grad_norm": 0.0303955078125, "learning_rate": 4.3242718446601944e-05, "loss": 0.002, "step": 38050 }, { "epoch": 56.82835820895522, "grad_norm": 0.0361328125, "learning_rate": 4.320537714712472e-05, "loss": 0.002, "step": 38075 }, { "epoch": 56.865671641791046, "grad_norm": 0.036376953125, "learning_rate": 4.31680358476475e-05, "loss": 0.002, "step": 38100 }, { "epoch": 56.90298507462687, "grad_norm": 0.01953125, "learning_rate": 4.313069454817028e-05, "loss": 0.002, "step": 38125 }, { "epoch": 56.940298507462686, "grad_norm": 0.0263671875, "learning_rate": 4.3093353248693056e-05, "loss": 0.0018, "step": 38150 }, { "epoch": 56.97761194029851, "grad_norm": 0.03857421875, "learning_rate": 4.305601194921584e-05, "loss": 0.0018, "step": 38175 }, { "epoch": 57.014925373134325, "grad_norm": 0.0308837890625, "learning_rate": 4.301867064973861e-05, "loss": 0.0021, "step": 38200 }, { "epoch": 57.05223880597015, "grad_norm": 0.033935546875, "learning_rate": 4.298132935026139e-05, "loss": 0.0019, "step": 38225 }, { "epoch": 57.08955223880597, "grad_norm": 0.0289306640625, "learning_rate": 4.294398805078417e-05, "loss": 0.0016, "step": 38250 }, { "epoch": 57.12686567164179, "grad_norm": 0.0341796875, "learning_rate": 4.290664675130695e-05, "loss": 0.0017, "step": 38275 }, { "epoch": 57.16417910447761, "grad_norm": 0.0224609375, "learning_rate": 4.286930545182973e-05, "loss": 0.0017, "step": 38300 }, { "epoch": 57.201492537313435, "grad_norm": 0.036865234375, "learning_rate": 4.28319641523525e-05, "loss": 0.0018, "step": 38325 }, { "epoch": 57.23880597014925, "grad_norm": 0.044189453125, "learning_rate": 4.2794622852875285e-05, "loss": 0.0018, "step": 38350 }, { "epoch": 57.276119402985074, "grad_norm": 0.041748046875, "learning_rate": 4.275728155339806e-05, "loss": 0.0017, "step": 38375 }, { "epoch": 57.3134328358209, "grad_norm": 0.0296630859375, "learning_rate": 4.271994025392084e-05, "loss": 0.0017, "step": 38400 }, { "epoch": 57.350746268656714, "grad_norm": 0.023193359375, "learning_rate": 4.2682598954443615e-05, "loss": 0.0018, "step": 38425 }, { "epoch": 57.38805970149254, "grad_norm": 0.0225830078125, "learning_rate": 4.2645257654966396e-05, "loss": 0.0019, "step": 38450 }, { "epoch": 57.42537313432836, "grad_norm": 0.035400390625, "learning_rate": 4.260791635548918e-05, "loss": 0.0017, "step": 38475 }, { "epoch": 57.46268656716418, "grad_norm": 0.020751953125, "learning_rate": 4.257057505601195e-05, "loss": 0.002, "step": 38500 }, { "epoch": 57.5, "grad_norm": 0.0341796875, "learning_rate": 4.253323375653473e-05, "loss": 0.0019, "step": 38525 }, { "epoch": 57.53731343283582, "grad_norm": 0.015869140625, "learning_rate": 4.249589245705751e-05, "loss": 0.002, "step": 38550 }, { "epoch": 57.57462686567164, "grad_norm": 0.00787353515625, "learning_rate": 4.245855115758028e-05, "loss": 0.0017, "step": 38575 }, { "epoch": 57.61194029850746, "grad_norm": 0.0361328125, "learning_rate": 4.242120985810306e-05, "loss": 0.0018, "step": 38600 }, { "epoch": 57.649253731343286, "grad_norm": 0.04345703125, "learning_rate": 4.2383868558625844e-05, "loss": 0.0021, "step": 38625 }, { "epoch": 57.6865671641791, "grad_norm": 0.0380859375, "learning_rate": 4.2346527259148625e-05, "loss": 0.0019, "step": 38650 }, { "epoch": 57.723880597014926, "grad_norm": 0.026123046875, "learning_rate": 4.23091859596714e-05, "loss": 0.0019, "step": 38675 }, { "epoch": 57.76119402985075, "grad_norm": 0.03857421875, "learning_rate": 4.2271844660194174e-05, "loss": 0.002, "step": 38700 }, { "epoch": 57.798507462686565, "grad_norm": 0.03125, "learning_rate": 4.2234503360716955e-05, "loss": 0.0021, "step": 38725 }, { "epoch": 57.83582089552239, "grad_norm": 0.040283203125, "learning_rate": 4.219716206123973e-05, "loss": 0.0021, "step": 38750 }, { "epoch": 57.87313432835821, "grad_norm": 0.031982421875, "learning_rate": 4.215982076176252e-05, "loss": 0.002, "step": 38775 }, { "epoch": 57.91044776119403, "grad_norm": 0.04345703125, "learning_rate": 4.212247946228529e-05, "loss": 0.0022, "step": 38800 }, { "epoch": 57.94776119402985, "grad_norm": 0.047607421875, "learning_rate": 4.2085138162808066e-05, "loss": 0.0018, "step": 38825 }, { "epoch": 57.985074626865675, "grad_norm": 0.050537109375, "learning_rate": 4.204779686333085e-05, "loss": 0.0017, "step": 38850 }, { "epoch": 58.02238805970149, "grad_norm": 0.030029296875, "learning_rate": 4.201045556385362e-05, "loss": 0.0019, "step": 38875 }, { "epoch": 58.059701492537314, "grad_norm": 0.0311279296875, "learning_rate": 4.19731142643764e-05, "loss": 0.0018, "step": 38900 }, { "epoch": 58.09701492537314, "grad_norm": 0.06689453125, "learning_rate": 4.193577296489918e-05, "loss": 0.0019, "step": 38925 }, { "epoch": 58.134328358208954, "grad_norm": 0.03564453125, "learning_rate": 4.189843166542196e-05, "loss": 0.0017, "step": 38950 }, { "epoch": 58.17164179104478, "grad_norm": 0.0439453125, "learning_rate": 4.186109036594474e-05, "loss": 0.0017, "step": 38975 }, { "epoch": 58.208955223880594, "grad_norm": 0.041015625, "learning_rate": 4.1823749066467514e-05, "loss": 0.0017, "step": 39000 }, { "epoch": 58.24626865671642, "grad_norm": 0.0478515625, "learning_rate": 4.1786407766990295e-05, "loss": 0.0016, "step": 39025 }, { "epoch": 58.28358208955224, "grad_norm": 0.0478515625, "learning_rate": 4.174906646751307e-05, "loss": 0.0019, "step": 39050 }, { "epoch": 58.32089552238806, "grad_norm": 0.0264892578125, "learning_rate": 4.171172516803585e-05, "loss": 0.0019, "step": 39075 }, { "epoch": 58.35820895522388, "grad_norm": 0.033447265625, "learning_rate": 4.1674383868558625e-05, "loss": 0.0019, "step": 39100 }, { "epoch": 58.3955223880597, "grad_norm": 0.039306640625, "learning_rate": 4.1637042569081406e-05, "loss": 0.002, "step": 39125 }, { "epoch": 58.43283582089552, "grad_norm": 0.046630859375, "learning_rate": 4.159970126960419e-05, "loss": 0.0017, "step": 39150 }, { "epoch": 58.47014925373134, "grad_norm": 0.0206298828125, "learning_rate": 4.156235997012696e-05, "loss": 0.0018, "step": 39175 }, { "epoch": 58.507462686567166, "grad_norm": 0.05810546875, "learning_rate": 4.152501867064974e-05, "loss": 0.0018, "step": 39200 }, { "epoch": 58.54477611940298, "grad_norm": 0.050048828125, "learning_rate": 4.148767737117252e-05, "loss": 0.0017, "step": 39225 }, { "epoch": 58.582089552238806, "grad_norm": 0.03125, "learning_rate": 4.145033607169529e-05, "loss": 0.0018, "step": 39250 }, { "epoch": 58.61940298507463, "grad_norm": 0.01806640625, "learning_rate": 4.141299477221808e-05, "loss": 0.0019, "step": 39275 }, { "epoch": 58.656716417910445, "grad_norm": 0.028564453125, "learning_rate": 4.1375653472740854e-05, "loss": 0.002, "step": 39300 }, { "epoch": 58.69402985074627, "grad_norm": 0.037109375, "learning_rate": 4.1338312173263635e-05, "loss": 0.0019, "step": 39325 }, { "epoch": 58.73134328358209, "grad_norm": 0.002349853515625, "learning_rate": 4.130097087378641e-05, "loss": 0.0019, "step": 39350 }, { "epoch": 58.76865671641791, "grad_norm": 0.0439453125, "learning_rate": 4.1263629574309184e-05, "loss": 0.0019, "step": 39375 }, { "epoch": 58.80597014925373, "grad_norm": 0.041748046875, "learning_rate": 4.1226288274831965e-05, "loss": 0.0023, "step": 39400 }, { "epoch": 58.843283582089555, "grad_norm": 0.05126953125, "learning_rate": 4.118894697535474e-05, "loss": 0.0021, "step": 39425 }, { "epoch": 58.88059701492537, "grad_norm": 0.045166015625, "learning_rate": 4.115160567587753e-05, "loss": 0.002, "step": 39450 }, { "epoch": 58.917910447761194, "grad_norm": 0.031982421875, "learning_rate": 4.11142643764003e-05, "loss": 0.0022, "step": 39475 }, { "epoch": 58.95522388059702, "grad_norm": 0.03125, "learning_rate": 4.1076923076923076e-05, "loss": 0.0019, "step": 39500 }, { "epoch": 58.992537313432834, "grad_norm": 0.0419921875, "learning_rate": 4.103958177744586e-05, "loss": 0.002, "step": 39525 }, { "epoch": 59.02985074626866, "grad_norm": 0.0289306640625, "learning_rate": 4.100224047796863e-05, "loss": 0.0017, "step": 39550 }, { "epoch": 59.06716417910448, "grad_norm": 0.02001953125, "learning_rate": 4.096489917849141e-05, "loss": 0.0017, "step": 39575 }, { "epoch": 59.1044776119403, "grad_norm": 0.0361328125, "learning_rate": 4.092755787901419e-05, "loss": 0.0017, "step": 39600 }, { "epoch": 59.14179104477612, "grad_norm": 0.0189208984375, "learning_rate": 4.089021657953697e-05, "loss": 0.0018, "step": 39625 }, { "epoch": 59.17910447761194, "grad_norm": 0.0216064453125, "learning_rate": 4.085287528005975e-05, "loss": 0.0015, "step": 39650 }, { "epoch": 59.21641791044776, "grad_norm": 0.038818359375, "learning_rate": 4.0815533980582524e-05, "loss": 0.0017, "step": 39675 }, { "epoch": 59.25373134328358, "grad_norm": 0.025390625, "learning_rate": 4.0778192681105305e-05, "loss": 0.0019, "step": 39700 }, { "epoch": 59.291044776119406, "grad_norm": 0.040771484375, "learning_rate": 4.074085138162808e-05, "loss": 0.0017, "step": 39725 }, { "epoch": 59.32835820895522, "grad_norm": 0.0230712890625, "learning_rate": 4.070351008215086e-05, "loss": 0.0016, "step": 39750 }, { "epoch": 59.365671641791046, "grad_norm": 0.0240478515625, "learning_rate": 4.066616878267364e-05, "loss": 0.0018, "step": 39775 }, { "epoch": 59.40298507462686, "grad_norm": 0.041748046875, "learning_rate": 4.0628827483196416e-05, "loss": 0.0017, "step": 39800 }, { "epoch": 59.440298507462686, "grad_norm": 0.042236328125, "learning_rate": 4.05914861837192e-05, "loss": 0.0019, "step": 39825 }, { "epoch": 59.47761194029851, "grad_norm": 0.0439453125, "learning_rate": 4.055414488424197e-05, "loss": 0.0018, "step": 39850 }, { "epoch": 59.514925373134325, "grad_norm": 0.04443359375, "learning_rate": 4.051680358476475e-05, "loss": 0.0019, "step": 39875 }, { "epoch": 59.55223880597015, "grad_norm": 0.041259765625, "learning_rate": 4.047946228528753e-05, "loss": 0.0017, "step": 39900 }, { "epoch": 59.58955223880597, "grad_norm": 0.047119140625, "learning_rate": 4.044212098581031e-05, "loss": 0.002, "step": 39925 }, { "epoch": 59.62686567164179, "grad_norm": 0.02783203125, "learning_rate": 4.040477968633309e-05, "loss": 0.0023, "step": 39950 }, { "epoch": 59.66417910447761, "grad_norm": 0.06640625, "learning_rate": 4.0367438386855864e-05, "loss": 0.0021, "step": 39975 }, { "epoch": 59.701492537313435, "grad_norm": 0.033447265625, "learning_rate": 4.0330097087378645e-05, "loss": 0.0019, "step": 40000 }, { "epoch": 59.73880597014925, "grad_norm": 0.0302734375, "learning_rate": 4.029275578790142e-05, "loss": 0.0021, "step": 40025 }, { "epoch": 59.776119402985074, "grad_norm": 0.015625, "learning_rate": 4.02554144884242e-05, "loss": 0.0019, "step": 40050 }, { "epoch": 59.8134328358209, "grad_norm": 0.032958984375, "learning_rate": 4.0218073188946975e-05, "loss": 0.0021, "step": 40075 }, { "epoch": 59.850746268656714, "grad_norm": 0.04443359375, "learning_rate": 4.0180731889469756e-05, "loss": 0.002, "step": 40100 }, { "epoch": 59.88805970149254, "grad_norm": 0.0238037109375, "learning_rate": 4.014339058999254e-05, "loss": 0.0018, "step": 40125 }, { "epoch": 59.92537313432836, "grad_norm": 0.0390625, "learning_rate": 4.010604929051531e-05, "loss": 0.0018, "step": 40150 }, { "epoch": 59.96268656716418, "grad_norm": 0.015625, "learning_rate": 4.006870799103809e-05, "loss": 0.002, "step": 40175 }, { "epoch": 60.0, "grad_norm": 0.0361328125, "learning_rate": 4.003136669156087e-05, "loss": 0.0022, "step": 40200 }, { "epoch": 60.03731343283582, "grad_norm": 0.0311279296875, "learning_rate": 3.999402539208364e-05, "loss": 0.0017, "step": 40225 }, { "epoch": 60.07462686567164, "grad_norm": 0.019287109375, "learning_rate": 3.995668409260642e-05, "loss": 0.0018, "step": 40250 }, { "epoch": 60.11194029850746, "grad_norm": 0.028564453125, "learning_rate": 3.9919342793129204e-05, "loss": 0.0017, "step": 40275 }, { "epoch": 60.149253731343286, "grad_norm": 0.0294189453125, "learning_rate": 3.9882001493651986e-05, "loss": 0.002, "step": 40300 }, { "epoch": 60.1865671641791, "grad_norm": 0.017333984375, "learning_rate": 3.984466019417476e-05, "loss": 0.0016, "step": 40325 }, { "epoch": 60.223880597014926, "grad_norm": 0.02734375, "learning_rate": 3.9807318894697534e-05, "loss": 0.0015, "step": 40350 }, { "epoch": 60.26119402985075, "grad_norm": 0.0361328125, "learning_rate": 3.9769977595220316e-05, "loss": 0.002, "step": 40375 }, { "epoch": 60.298507462686565, "grad_norm": 0.026611328125, "learning_rate": 3.973263629574309e-05, "loss": 0.0017, "step": 40400 }, { "epoch": 60.33582089552239, "grad_norm": 0.03515625, "learning_rate": 3.969529499626587e-05, "loss": 0.0019, "step": 40425 }, { "epoch": 60.37313432835821, "grad_norm": 0.024169921875, "learning_rate": 3.965795369678865e-05, "loss": 0.0019, "step": 40450 }, { "epoch": 60.41044776119403, "grad_norm": 0.0291748046875, "learning_rate": 3.962061239731143e-05, "loss": 0.0021, "step": 40475 }, { "epoch": 60.44776119402985, "grad_norm": 0.0341796875, "learning_rate": 3.958327109783421e-05, "loss": 0.0019, "step": 40500 }, { "epoch": 60.485074626865675, "grad_norm": 0.03466796875, "learning_rate": 3.954592979835698e-05, "loss": 0.0016, "step": 40525 }, { "epoch": 60.52238805970149, "grad_norm": 0.05078125, "learning_rate": 3.950858849887976e-05, "loss": 0.0018, "step": 40550 }, { "epoch": 60.559701492537314, "grad_norm": 0.028564453125, "learning_rate": 3.947124719940254e-05, "loss": 0.0022, "step": 40575 }, { "epoch": 60.59701492537313, "grad_norm": 0.037841796875, "learning_rate": 3.943390589992532e-05, "loss": 0.0018, "step": 40600 }, { "epoch": 60.634328358208954, "grad_norm": 0.0225830078125, "learning_rate": 3.93965646004481e-05, "loss": 0.0019, "step": 40625 }, { "epoch": 60.67164179104478, "grad_norm": 0.03759765625, "learning_rate": 3.9359223300970875e-05, "loss": 0.0018, "step": 40650 }, { "epoch": 60.708955223880594, "grad_norm": 0.0390625, "learning_rate": 3.9321882001493656e-05, "loss": 0.002, "step": 40675 }, { "epoch": 60.74626865671642, "grad_norm": 0.017822265625, "learning_rate": 3.928454070201643e-05, "loss": 0.0022, "step": 40700 }, { "epoch": 60.78358208955224, "grad_norm": 0.017578125, "learning_rate": 3.924719940253921e-05, "loss": 0.002, "step": 40725 }, { "epoch": 60.82089552238806, "grad_norm": 0.0458984375, "learning_rate": 3.9209858103061986e-05, "loss": 0.0018, "step": 40750 }, { "epoch": 60.85820895522388, "grad_norm": 0.023681640625, "learning_rate": 3.917251680358477e-05, "loss": 0.002, "step": 40775 }, { "epoch": 60.8955223880597, "grad_norm": 0.02734375, "learning_rate": 3.913517550410755e-05, "loss": 0.0018, "step": 40800 }, { "epoch": 60.93283582089552, "grad_norm": 0.01123046875, "learning_rate": 3.909783420463032e-05, "loss": 0.0017, "step": 40825 }, { "epoch": 60.97014925373134, "grad_norm": 0.057373046875, "learning_rate": 3.9060492905153104e-05, "loss": 0.0021, "step": 40850 }, { "epoch": 61.007462686567166, "grad_norm": 0.024658203125, "learning_rate": 3.902315160567588e-05, "loss": 0.0022, "step": 40875 }, { "epoch": 61.04477611940298, "grad_norm": 0.033447265625, "learning_rate": 3.898581030619865e-05, "loss": 0.0015, "step": 40900 }, { "epoch": 61.082089552238806, "grad_norm": 0.03076171875, "learning_rate": 3.8948469006721434e-05, "loss": 0.0018, "step": 40925 }, { "epoch": 61.11940298507463, "grad_norm": 0.022216796875, "learning_rate": 3.8911127707244215e-05, "loss": 0.0017, "step": 40950 }, { "epoch": 61.156716417910445, "grad_norm": 0.04150390625, "learning_rate": 3.8873786407766996e-05, "loss": 0.0019, "step": 40975 }, { "epoch": 61.19402985074627, "grad_norm": 0.068359375, "learning_rate": 3.883644510828977e-05, "loss": 0.0016, "step": 41000 }, { "epoch": 61.23134328358209, "grad_norm": 0.0264892578125, "learning_rate": 3.8799103808812545e-05, "loss": 0.0019, "step": 41025 }, { "epoch": 61.26865671641791, "grad_norm": 0.0400390625, "learning_rate": 3.8761762509335326e-05, "loss": 0.0016, "step": 41050 }, { "epoch": 61.30597014925373, "grad_norm": 0.0380859375, "learning_rate": 3.87244212098581e-05, "loss": 0.0019, "step": 41075 }, { "epoch": 61.343283582089555, "grad_norm": 0.0322265625, "learning_rate": 3.868707991038089e-05, "loss": 0.002, "step": 41100 }, { "epoch": 61.38059701492537, "grad_norm": 0.042236328125, "learning_rate": 3.864973861090366e-05, "loss": 0.002, "step": 41125 }, { "epoch": 61.417910447761194, "grad_norm": 0.0238037109375, "learning_rate": 3.8612397311426444e-05, "loss": 0.002, "step": 41150 }, { "epoch": 61.45522388059702, "grad_norm": 0.013916015625, "learning_rate": 3.857505601194922e-05, "loss": 0.0019, "step": 41175 }, { "epoch": 61.492537313432834, "grad_norm": 0.02880859375, "learning_rate": 3.853771471247199e-05, "loss": 0.0018, "step": 41200 }, { "epoch": 61.52985074626866, "grad_norm": 0.046142578125, "learning_rate": 3.8500373412994774e-05, "loss": 0.0017, "step": 41225 }, { "epoch": 61.56716417910448, "grad_norm": 0.0302734375, "learning_rate": 3.846303211351755e-05, "loss": 0.0017, "step": 41250 }, { "epoch": 61.6044776119403, "grad_norm": 0.044921875, "learning_rate": 3.8425690814040336e-05, "loss": 0.0019, "step": 41275 }, { "epoch": 61.64179104477612, "grad_norm": 0.028564453125, "learning_rate": 3.838834951456311e-05, "loss": 0.002, "step": 41300 }, { "epoch": 61.67910447761194, "grad_norm": 0.0311279296875, "learning_rate": 3.8351008215085885e-05, "loss": 0.0018, "step": 41325 }, { "epoch": 61.71641791044776, "grad_norm": 0.06982421875, "learning_rate": 3.8313666915608666e-05, "loss": 0.0022, "step": 41350 }, { "epoch": 61.75373134328358, "grad_norm": 0.032470703125, "learning_rate": 3.827632561613144e-05, "loss": 0.0018, "step": 41375 }, { "epoch": 61.791044776119406, "grad_norm": 0.03759765625, "learning_rate": 3.823898431665422e-05, "loss": 0.0018, "step": 41400 }, { "epoch": 61.82835820895522, "grad_norm": 0.02099609375, "learning_rate": 3.8201643017177e-05, "loss": 0.0016, "step": 41425 }, { "epoch": 61.865671641791046, "grad_norm": 0.0301513671875, "learning_rate": 3.816430171769978e-05, "loss": 0.0019, "step": 41450 }, { "epoch": 61.90298507462687, "grad_norm": 0.040283203125, "learning_rate": 3.812696041822256e-05, "loss": 0.0022, "step": 41475 }, { "epoch": 61.940298507462686, "grad_norm": 0.04052734375, "learning_rate": 3.808961911874533e-05, "loss": 0.002, "step": 41500 }, { "epoch": 61.97761194029851, "grad_norm": 0.042724609375, "learning_rate": 3.8052277819268114e-05, "loss": 0.0023, "step": 41525 }, { "epoch": 62.014925373134325, "grad_norm": 0.0255126953125, "learning_rate": 3.801493651979089e-05, "loss": 0.0018, "step": 41550 }, { "epoch": 62.05223880597015, "grad_norm": 0.0302734375, "learning_rate": 3.797759522031367e-05, "loss": 0.002, "step": 41575 }, { "epoch": 62.08955223880597, "grad_norm": 0.028076171875, "learning_rate": 3.794025392083645e-05, "loss": 0.0017, "step": 41600 }, { "epoch": 62.12686567164179, "grad_norm": 0.02001953125, "learning_rate": 3.7902912621359225e-05, "loss": 0.0017, "step": 41625 }, { "epoch": 62.16417910447761, "grad_norm": 0.0191650390625, "learning_rate": 3.7865571321882006e-05, "loss": 0.0019, "step": 41650 }, { "epoch": 62.201492537313435, "grad_norm": 0.034423828125, "learning_rate": 3.782823002240478e-05, "loss": 0.0018, "step": 41675 }, { "epoch": 62.23880597014925, "grad_norm": 0.034423828125, "learning_rate": 3.779088872292756e-05, "loss": 0.0018, "step": 41700 }, { "epoch": 62.276119402985074, "grad_norm": 0.03466796875, "learning_rate": 3.7753547423450336e-05, "loss": 0.0019, "step": 41725 }, { "epoch": 62.3134328358209, "grad_norm": 0.0164794921875, "learning_rate": 3.771620612397311e-05, "loss": 0.0016, "step": 41750 }, { "epoch": 62.350746268656714, "grad_norm": 0.0247802734375, "learning_rate": 3.76788648244959e-05, "loss": 0.0019, "step": 41775 }, { "epoch": 62.38805970149254, "grad_norm": 0.03564453125, "learning_rate": 3.764152352501867e-05, "loss": 0.0018, "step": 41800 }, { "epoch": 62.42537313432836, "grad_norm": 0.04541015625, "learning_rate": 3.7604182225541454e-05, "loss": 0.0019, "step": 41825 }, { "epoch": 62.46268656716418, "grad_norm": 0.051513671875, "learning_rate": 3.756684092606423e-05, "loss": 0.002, "step": 41850 }, { "epoch": 62.5, "grad_norm": 0.0213623046875, "learning_rate": 3.7529499626587e-05, "loss": 0.0019, "step": 41875 }, { "epoch": 62.53731343283582, "grad_norm": 0.0361328125, "learning_rate": 3.7492158327109784e-05, "loss": 0.0021, "step": 41900 }, { "epoch": 62.57462686567164, "grad_norm": 0.04052734375, "learning_rate": 3.7454817027632565e-05, "loss": 0.0023, "step": 41925 }, { "epoch": 62.61194029850746, "grad_norm": 0.0250244140625, "learning_rate": 3.7417475728155346e-05, "loss": 0.0018, "step": 41950 }, { "epoch": 62.649253731343286, "grad_norm": 0.031005859375, "learning_rate": 3.738013442867812e-05, "loss": 0.0019, "step": 41975 }, { "epoch": 62.6865671641791, "grad_norm": 0.033203125, "learning_rate": 3.7342793129200895e-05, "loss": 0.0021, "step": 42000 }, { "epoch": 62.723880597014926, "grad_norm": 0.02099609375, "learning_rate": 3.7305451829723676e-05, "loss": 0.0019, "step": 42025 }, { "epoch": 62.76119402985075, "grad_norm": 0.040283203125, "learning_rate": 3.726811053024645e-05, "loss": 0.002, "step": 42050 }, { "epoch": 62.798507462686565, "grad_norm": 0.031982421875, "learning_rate": 3.723076923076923e-05, "loss": 0.0019, "step": 42075 }, { "epoch": 62.83582089552239, "grad_norm": 0.0380859375, "learning_rate": 3.719342793129201e-05, "loss": 0.0022, "step": 42100 }, { "epoch": 62.87313432835821, "grad_norm": 0.038330078125, "learning_rate": 3.715608663181479e-05, "loss": 0.0019, "step": 42125 }, { "epoch": 62.91044776119403, "grad_norm": 0.019287109375, "learning_rate": 3.711874533233757e-05, "loss": 0.0021, "step": 42150 }, { "epoch": 62.94776119402985, "grad_norm": 0.0250244140625, "learning_rate": 3.708140403286034e-05, "loss": 0.0019, "step": 42175 }, { "epoch": 62.985074626865675, "grad_norm": 0.0291748046875, "learning_rate": 3.7044062733383124e-05, "loss": 0.0019, "step": 42200 }, { "epoch": 63.02238805970149, "grad_norm": 0.0245361328125, "learning_rate": 3.70067214339059e-05, "loss": 0.002, "step": 42225 }, { "epoch": 63.059701492537314, "grad_norm": 0.0216064453125, "learning_rate": 3.696938013442868e-05, "loss": 0.0017, "step": 42250 }, { "epoch": 63.09701492537314, "grad_norm": 0.038818359375, "learning_rate": 3.693203883495146e-05, "loss": 0.002, "step": 42275 }, { "epoch": 63.134328358208954, "grad_norm": 0.026611328125, "learning_rate": 3.6894697535474235e-05, "loss": 0.0014, "step": 42300 }, { "epoch": 63.17164179104478, "grad_norm": 0.0380859375, "learning_rate": 3.6857356235997016e-05, "loss": 0.0019, "step": 42325 }, { "epoch": 63.208955223880594, "grad_norm": 0.0233154296875, "learning_rate": 3.682001493651979e-05, "loss": 0.0016, "step": 42350 }, { "epoch": 63.24626865671642, "grad_norm": 0.052490234375, "learning_rate": 3.678267363704257e-05, "loss": 0.0022, "step": 42375 }, { "epoch": 63.28358208955224, "grad_norm": 0.039794921875, "learning_rate": 3.6745332337565346e-05, "loss": 0.0016, "step": 42400 }, { "epoch": 63.32089552238806, "grad_norm": 0.050537109375, "learning_rate": 3.670799103808813e-05, "loss": 0.002, "step": 42425 }, { "epoch": 63.35820895522388, "grad_norm": 0.0194091796875, "learning_rate": 3.667064973861091e-05, "loss": 0.0015, "step": 42450 }, { "epoch": 63.3955223880597, "grad_norm": 0.0198974609375, "learning_rate": 3.663330843913368e-05, "loss": 0.0018, "step": 42475 }, { "epoch": 63.43283582089552, "grad_norm": 0.0306396484375, "learning_rate": 3.6595967139656464e-05, "loss": 0.0016, "step": 42500 }, { "epoch": 63.47014925373134, "grad_norm": 0.041015625, "learning_rate": 3.655862584017924e-05, "loss": 0.0019, "step": 42525 }, { "epoch": 63.507462686567166, "grad_norm": 0.19921875, "learning_rate": 3.652128454070201e-05, "loss": 0.0024, "step": 42550 }, { "epoch": 63.54477611940298, "grad_norm": 0.05810546875, "learning_rate": 3.6483943241224794e-05, "loss": 0.0026, "step": 42575 }, { "epoch": 63.582089552238806, "grad_norm": 0.17578125, "learning_rate": 3.6446601941747576e-05, "loss": 0.0032, "step": 42600 }, { "epoch": 63.61940298507463, "grad_norm": 0.197265625, "learning_rate": 3.640926064227036e-05, "loss": 0.003, "step": 42625 }, { "epoch": 63.656716417910445, "grad_norm": 0.05224609375, "learning_rate": 3.637191934279313e-05, "loss": 0.0028, "step": 42650 }, { "epoch": 63.69402985074627, "grad_norm": 0.087890625, "learning_rate": 3.633457804331591e-05, "loss": 0.0031, "step": 42675 }, { "epoch": 63.73134328358209, "grad_norm": 0.2060546875, "learning_rate": 3.629723674383869e-05, "loss": 0.0026, "step": 42700 }, { "epoch": 63.76865671641791, "grad_norm": 0.1298828125, "learning_rate": 3.625989544436146e-05, "loss": 0.0027, "step": 42725 }, { "epoch": 63.80597014925373, "grad_norm": 0.1396484375, "learning_rate": 3.622255414488425e-05, "loss": 0.003, "step": 42750 }, { "epoch": 63.843283582089555, "grad_norm": 0.041748046875, "learning_rate": 3.618521284540702e-05, "loss": 0.0024, "step": 42775 }, { "epoch": 63.88059701492537, "grad_norm": 0.037353515625, "learning_rate": 3.6147871545929805e-05, "loss": 0.0026, "step": 42800 }, { "epoch": 63.917910447761194, "grad_norm": 0.036865234375, "learning_rate": 3.611053024645258e-05, "loss": 0.0026, "step": 42825 }, { "epoch": 63.95522388059702, "grad_norm": 0.029296875, "learning_rate": 3.607318894697535e-05, "loss": 0.0025, "step": 42850 }, { "epoch": 63.992537313432834, "grad_norm": 0.0247802734375, "learning_rate": 3.6035847647498135e-05, "loss": 0.0027, "step": 42875 }, { "epoch": 64.02985074626865, "grad_norm": 0.0732421875, "learning_rate": 3.599850634802091e-05, "loss": 0.0021, "step": 42900 }, { "epoch": 64.06716417910448, "grad_norm": 0.0303955078125, "learning_rate": 3.59611650485437e-05, "loss": 0.0022, "step": 42925 }, { "epoch": 64.1044776119403, "grad_norm": 0.034423828125, "learning_rate": 3.592382374906647e-05, "loss": 0.0017, "step": 42950 }, { "epoch": 64.14179104477611, "grad_norm": 0.027099609375, "learning_rate": 3.5886482449589246e-05, "loss": 0.0018, "step": 42975 }, { "epoch": 64.17910447761194, "grad_norm": 0.02294921875, "learning_rate": 3.584914115011203e-05, "loss": 0.0018, "step": 43000 }, { "epoch": 64.21641791044776, "grad_norm": 0.03857421875, "learning_rate": 3.58117998506348e-05, "loss": 0.0022, "step": 43025 }, { "epoch": 64.25373134328358, "grad_norm": 0.134765625, "learning_rate": 3.577445855115758e-05, "loss": 0.0023, "step": 43050 }, { "epoch": 64.2910447761194, "grad_norm": 0.162109375, "learning_rate": 3.573711725168036e-05, "loss": 0.0021, "step": 43075 }, { "epoch": 64.32835820895522, "grad_norm": 0.057373046875, "learning_rate": 3.569977595220314e-05, "loss": 0.0021, "step": 43100 }, { "epoch": 64.36567164179104, "grad_norm": 0.029296875, "learning_rate": 3.566243465272592e-05, "loss": 0.0017, "step": 43125 }, { "epoch": 64.40298507462687, "grad_norm": 0.1669921875, "learning_rate": 3.5625093353248694e-05, "loss": 0.002, "step": 43150 }, { "epoch": 64.44029850746269, "grad_norm": 0.0830078125, "learning_rate": 3.5587752053771475e-05, "loss": 0.0021, "step": 43175 }, { "epoch": 64.4776119402985, "grad_norm": 0.07421875, "learning_rate": 3.555041075429425e-05, "loss": 0.0021, "step": 43200 }, { "epoch": 64.51492537313433, "grad_norm": 0.0390625, "learning_rate": 3.551306945481703e-05, "loss": 0.0021, "step": 43225 }, { "epoch": 64.55223880597015, "grad_norm": 0.07421875, "learning_rate": 3.547572815533981e-05, "loss": 0.0019, "step": 43250 }, { "epoch": 64.58955223880596, "grad_norm": 0.04931640625, "learning_rate": 3.5438386855862586e-05, "loss": 0.0022, "step": 43275 }, { "epoch": 64.6268656716418, "grad_norm": 0.0302734375, "learning_rate": 3.540104555638537e-05, "loss": 0.0021, "step": 43300 }, { "epoch": 64.66417910447761, "grad_norm": 0.0294189453125, "learning_rate": 3.536370425690814e-05, "loss": 0.002, "step": 43325 }, { "epoch": 64.70149253731343, "grad_norm": 0.055419921875, "learning_rate": 3.532636295743092e-05, "loss": 0.0021, "step": 43350 }, { "epoch": 64.73880597014926, "grad_norm": 0.048095703125, "learning_rate": 3.52890216579537e-05, "loss": 0.002, "step": 43375 }, { "epoch": 64.77611940298507, "grad_norm": 0.09619140625, "learning_rate": 3.525168035847647e-05, "loss": 0.0022, "step": 43400 }, { "epoch": 64.81343283582089, "grad_norm": 0.032470703125, "learning_rate": 3.521433905899926e-05, "loss": 0.0021, "step": 43425 }, { "epoch": 64.85074626865672, "grad_norm": 0.0498046875, "learning_rate": 3.5176997759522034e-05, "loss": 0.002, "step": 43450 }, { "epoch": 64.88805970149254, "grad_norm": 0.033447265625, "learning_rate": 3.5139656460044815e-05, "loss": 0.0018, "step": 43475 }, { "epoch": 64.92537313432835, "grad_norm": 0.041015625, "learning_rate": 3.510231516056759e-05, "loss": 0.0021, "step": 43500 }, { "epoch": 64.96268656716418, "grad_norm": 0.040771484375, "learning_rate": 3.5064973861090364e-05, "loss": 0.0021, "step": 43525 }, { "epoch": 65.0, "grad_norm": 0.038330078125, "learning_rate": 3.5027632561613145e-05, "loss": 0.002, "step": 43550 }, { "epoch": 65.03731343283582, "grad_norm": 0.0390625, "learning_rate": 3.499029126213592e-05, "loss": 0.0017, "step": 43575 }, { "epoch": 65.07462686567165, "grad_norm": 0.027099609375, "learning_rate": 3.495294996265871e-05, "loss": 0.0016, "step": 43600 }, { "epoch": 65.11194029850746, "grad_norm": 0.032470703125, "learning_rate": 3.491560866318148e-05, "loss": 0.0017, "step": 43625 }, { "epoch": 65.14925373134328, "grad_norm": 0.031494140625, "learning_rate": 3.4878267363704256e-05, "loss": 0.0017, "step": 43650 }, { "epoch": 65.18656716417911, "grad_norm": 0.02490234375, "learning_rate": 3.484092606422704e-05, "loss": 0.0016, "step": 43675 }, { "epoch": 65.22388059701493, "grad_norm": 0.044921875, "learning_rate": 3.480358476474981e-05, "loss": 0.0018, "step": 43700 }, { "epoch": 65.26119402985074, "grad_norm": 0.0341796875, "learning_rate": 3.476624346527259e-05, "loss": 0.0017, "step": 43725 }, { "epoch": 65.29850746268657, "grad_norm": 0.035400390625, "learning_rate": 3.4728902165795374e-05, "loss": 0.0017, "step": 43750 }, { "epoch": 65.33582089552239, "grad_norm": 0.0205078125, "learning_rate": 3.469156086631815e-05, "loss": 0.0017, "step": 43775 }, { "epoch": 65.3731343283582, "grad_norm": 0.023681640625, "learning_rate": 3.465421956684093e-05, "loss": 0.0017, "step": 43800 }, { "epoch": 65.41044776119404, "grad_norm": 0.033203125, "learning_rate": 3.4616878267363704e-05, "loss": 0.0017, "step": 43825 }, { "epoch": 65.44776119402985, "grad_norm": 0.017578125, "learning_rate": 3.4579536967886485e-05, "loss": 0.0018, "step": 43850 }, { "epoch": 65.48507462686567, "grad_norm": 0.03515625, "learning_rate": 3.454219566840926e-05, "loss": 0.0017, "step": 43875 }, { "epoch": 65.5223880597015, "grad_norm": 0.0390625, "learning_rate": 3.450485436893204e-05, "loss": 0.0019, "step": 43900 }, { "epoch": 65.55970149253731, "grad_norm": 0.01202392578125, "learning_rate": 3.446751306945482e-05, "loss": 0.0017, "step": 43925 }, { "epoch": 65.59701492537313, "grad_norm": 0.039306640625, "learning_rate": 3.4430171769977596e-05, "loss": 0.0017, "step": 43950 }, { "epoch": 65.63432835820896, "grad_norm": 0.034423828125, "learning_rate": 3.439283047050038e-05, "loss": 0.0016, "step": 43975 }, { "epoch": 65.67164179104478, "grad_norm": 0.03564453125, "learning_rate": 3.435548917102315e-05, "loss": 0.0018, "step": 44000 }, { "epoch": 65.7089552238806, "grad_norm": 0.0198974609375, "learning_rate": 3.431814787154593e-05, "loss": 0.0018, "step": 44025 }, { "epoch": 65.74626865671642, "grad_norm": 0.029052734375, "learning_rate": 3.428080657206871e-05, "loss": 0.0018, "step": 44050 }, { "epoch": 65.78358208955224, "grad_norm": 0.03759765625, "learning_rate": 3.424346527259149e-05, "loss": 0.002, "step": 44075 }, { "epoch": 65.82089552238806, "grad_norm": 0.038818359375, "learning_rate": 3.420612397311427e-05, "loss": 0.0017, "step": 44100 }, { "epoch": 65.85820895522389, "grad_norm": 0.020263671875, "learning_rate": 3.4168782673637044e-05, "loss": 0.0017, "step": 44125 }, { "epoch": 65.8955223880597, "grad_norm": 0.046875, "learning_rate": 3.4131441374159825e-05, "loss": 0.0017, "step": 44150 }, { "epoch": 65.93283582089552, "grad_norm": 0.039794921875, "learning_rate": 3.40941000746826e-05, "loss": 0.002, "step": 44175 }, { "epoch": 65.97014925373135, "grad_norm": 0.045166015625, "learning_rate": 3.405675877520538e-05, "loss": 0.0018, "step": 44200 }, { "epoch": 66.00746268656717, "grad_norm": 0.02783203125, "learning_rate": 3.4019417475728155e-05, "loss": 0.0017, "step": 44225 }, { "epoch": 66.04477611940298, "grad_norm": 0.038330078125, "learning_rate": 3.3982076176250936e-05, "loss": 0.0016, "step": 44250 }, { "epoch": 66.08208955223881, "grad_norm": 0.0302734375, "learning_rate": 3.394473487677372e-05, "loss": 0.0017, "step": 44275 }, { "epoch": 66.11940298507463, "grad_norm": 0.02392578125, "learning_rate": 3.390739357729649e-05, "loss": 0.0016, "step": 44300 }, { "epoch": 66.15671641791045, "grad_norm": 0.0257568359375, "learning_rate": 3.387005227781927e-05, "loss": 0.0018, "step": 44325 }, { "epoch": 66.19402985074628, "grad_norm": 0.0245361328125, "learning_rate": 3.383271097834205e-05, "loss": 0.0015, "step": 44350 }, { "epoch": 66.23134328358209, "grad_norm": 0.0260009765625, "learning_rate": 3.379536967886482e-05, "loss": 0.0015, "step": 44375 }, { "epoch": 66.26865671641791, "grad_norm": 0.035888671875, "learning_rate": 3.37580283793876e-05, "loss": 0.0018, "step": 44400 }, { "epoch": 66.30597014925372, "grad_norm": 0.0380859375, "learning_rate": 3.3720687079910384e-05, "loss": 0.0018, "step": 44425 }, { "epoch": 66.34328358208955, "grad_norm": 0.049560546875, "learning_rate": 3.3683345780433165e-05, "loss": 0.0015, "step": 44450 }, { "epoch": 66.38059701492537, "grad_norm": 0.03125, "learning_rate": 3.364600448095594e-05, "loss": 0.0017, "step": 44475 }, { "epoch": 66.41791044776119, "grad_norm": 0.0242919921875, "learning_rate": 3.3608663181478714e-05, "loss": 0.0017, "step": 44500 }, { "epoch": 66.45522388059702, "grad_norm": 0.028564453125, "learning_rate": 3.3571321882001495e-05, "loss": 0.0018, "step": 44525 }, { "epoch": 66.49253731343283, "grad_norm": 0.0211181640625, "learning_rate": 3.353398058252427e-05, "loss": 0.0021, "step": 44550 }, { "epoch": 66.52985074626865, "grad_norm": 0.01422119140625, "learning_rate": 3.349663928304706e-05, "loss": 0.0018, "step": 44575 }, { "epoch": 66.56716417910448, "grad_norm": 0.017578125, "learning_rate": 3.345929798356983e-05, "loss": 0.0017, "step": 44600 }, { "epoch": 66.6044776119403, "grad_norm": 0.0079345703125, "learning_rate": 3.3421956684092606e-05, "loss": 0.0019, "step": 44625 }, { "epoch": 66.64179104477611, "grad_norm": 0.02978515625, "learning_rate": 3.338461538461539e-05, "loss": 0.0021, "step": 44650 }, { "epoch": 66.67910447761194, "grad_norm": 0.03759765625, "learning_rate": 3.334727408513816e-05, "loss": 0.0017, "step": 44675 }, { "epoch": 66.71641791044776, "grad_norm": 0.04150390625, "learning_rate": 3.330993278566094e-05, "loss": 0.0019, "step": 44700 }, { "epoch": 66.75373134328358, "grad_norm": 0.0303955078125, "learning_rate": 3.327259148618372e-05, "loss": 0.0019, "step": 44725 }, { "epoch": 66.7910447761194, "grad_norm": 0.01953125, "learning_rate": 3.32352501867065e-05, "loss": 0.0016, "step": 44750 }, { "epoch": 66.82835820895522, "grad_norm": 0.0179443359375, "learning_rate": 3.319790888722928e-05, "loss": 0.0019, "step": 44775 }, { "epoch": 66.86567164179104, "grad_norm": 0.040283203125, "learning_rate": 3.3160567587752054e-05, "loss": 0.0018, "step": 44800 }, { "epoch": 66.90298507462687, "grad_norm": 0.0255126953125, "learning_rate": 3.3123226288274836e-05, "loss": 0.0021, "step": 44825 }, { "epoch": 66.94029850746269, "grad_norm": 0.028564453125, "learning_rate": 3.308588498879761e-05, "loss": 0.0018, "step": 44850 }, { "epoch": 66.9776119402985, "grad_norm": 0.035888671875, "learning_rate": 3.304854368932039e-05, "loss": 0.0021, "step": 44875 }, { "epoch": 67.01492537313433, "grad_norm": 0.032470703125, "learning_rate": 3.3011202389843165e-05, "loss": 0.0017, "step": 44900 }, { "epoch": 67.05223880597015, "grad_norm": 0.032958984375, "learning_rate": 3.297386109036595e-05, "loss": 0.0014, "step": 44925 }, { "epoch": 67.08955223880596, "grad_norm": 0.02978515625, "learning_rate": 3.293651979088873e-05, "loss": 0.0016, "step": 44950 }, { "epoch": 67.1268656716418, "grad_norm": 0.031982421875, "learning_rate": 3.28991784914115e-05, "loss": 0.0018, "step": 44975 }, { "epoch": 67.16417910447761, "grad_norm": 0.046630859375, "learning_rate": 3.286183719193428e-05, "loss": 0.0017, "step": 45000 }, { "epoch": 67.20149253731343, "grad_norm": 0.033447265625, "learning_rate": 3.282449589245706e-05, "loss": 0.0019, "step": 45025 }, { "epoch": 67.23880597014926, "grad_norm": 0.031494140625, "learning_rate": 3.278715459297983e-05, "loss": 0.0018, "step": 45050 }, { "epoch": 67.27611940298507, "grad_norm": 0.034423828125, "learning_rate": 3.274981329350262e-05, "loss": 0.0018, "step": 45075 }, { "epoch": 67.31343283582089, "grad_norm": 0.0400390625, "learning_rate": 3.2712471994025395e-05, "loss": 0.0018, "step": 45100 }, { "epoch": 67.35074626865672, "grad_norm": 0.029541015625, "learning_rate": 3.2675130694548176e-05, "loss": 0.0021, "step": 45125 }, { "epoch": 67.38805970149254, "grad_norm": 0.047607421875, "learning_rate": 3.263778939507095e-05, "loss": 0.002, "step": 45150 }, { "epoch": 67.42537313432835, "grad_norm": 0.04296875, "learning_rate": 3.2600448095593724e-05, "loss": 0.0018, "step": 45175 }, { "epoch": 67.46268656716418, "grad_norm": 0.024169921875, "learning_rate": 3.2563106796116506e-05, "loss": 0.0018, "step": 45200 }, { "epoch": 67.5, "grad_norm": 0.0272216796875, "learning_rate": 3.252576549663928e-05, "loss": 0.0016, "step": 45225 }, { "epoch": 67.53731343283582, "grad_norm": 0.03125, "learning_rate": 3.248842419716207e-05, "loss": 0.0017, "step": 45250 }, { "epoch": 67.57462686567165, "grad_norm": 0.022216796875, "learning_rate": 3.245108289768484e-05, "loss": 0.0016, "step": 45275 }, { "epoch": 67.61194029850746, "grad_norm": 0.030517578125, "learning_rate": 3.241374159820762e-05, "loss": 0.0019, "step": 45300 }, { "epoch": 67.64925373134328, "grad_norm": 0.0216064453125, "learning_rate": 3.23764002987304e-05, "loss": 0.002, "step": 45325 }, { "epoch": 67.68656716417911, "grad_norm": 0.03271484375, "learning_rate": 3.233905899925317e-05, "loss": 0.0017, "step": 45350 }, { "epoch": 67.72388059701493, "grad_norm": 0.03857421875, "learning_rate": 3.2301717699775954e-05, "loss": 0.0019, "step": 45375 }, { "epoch": 67.76119402985074, "grad_norm": 0.03125, "learning_rate": 3.226437640029873e-05, "loss": 0.0018, "step": 45400 }, { "epoch": 67.79850746268657, "grad_norm": 0.0306396484375, "learning_rate": 3.222703510082151e-05, "loss": 0.0017, "step": 45425 }, { "epoch": 67.83582089552239, "grad_norm": 0.048095703125, "learning_rate": 3.218969380134429e-05, "loss": 0.002, "step": 45450 }, { "epoch": 67.8731343283582, "grad_norm": 0.08544921875, "learning_rate": 3.2152352501867065e-05, "loss": 0.0016, "step": 45475 }, { "epoch": 67.91044776119404, "grad_norm": 0.0537109375, "learning_rate": 3.2115011202389846e-05, "loss": 0.0017, "step": 45500 }, { "epoch": 67.94776119402985, "grad_norm": 0.058837890625, "learning_rate": 3.207766990291262e-05, "loss": 0.002, "step": 45525 }, { "epoch": 67.98507462686567, "grad_norm": 0.04345703125, "learning_rate": 3.20403286034354e-05, "loss": 0.0016, "step": 45550 }, { "epoch": 68.0223880597015, "grad_norm": 0.0301513671875, "learning_rate": 3.200298730395818e-05, "loss": 0.0019, "step": 45575 }, { "epoch": 68.05970149253731, "grad_norm": 0.032958984375, "learning_rate": 3.196564600448096e-05, "loss": 0.0018, "step": 45600 }, { "epoch": 68.09701492537313, "grad_norm": 0.036376953125, "learning_rate": 3.192830470500374e-05, "loss": 0.0019, "step": 45625 }, { "epoch": 68.13432835820896, "grad_norm": 0.0130615234375, "learning_rate": 3.189096340552651e-05, "loss": 0.0017, "step": 45650 }, { "epoch": 68.17164179104478, "grad_norm": 0.044921875, "learning_rate": 3.1853622106049294e-05, "loss": 0.0016, "step": 45675 }, { "epoch": 68.2089552238806, "grad_norm": 0.0216064453125, "learning_rate": 3.181628080657207e-05, "loss": 0.0015, "step": 45700 }, { "epoch": 68.24626865671642, "grad_norm": 0.03515625, "learning_rate": 3.177893950709485e-05, "loss": 0.0016, "step": 45725 }, { "epoch": 68.28358208955224, "grad_norm": 0.0206298828125, "learning_rate": 3.174159820761763e-05, "loss": 0.0017, "step": 45750 }, { "epoch": 68.32089552238806, "grad_norm": 0.032958984375, "learning_rate": 3.1704256908140405e-05, "loss": 0.0017, "step": 45775 }, { "epoch": 68.35820895522389, "grad_norm": 0.03173828125, "learning_rate": 3.1666915608663186e-05, "loss": 0.0015, "step": 45800 }, { "epoch": 68.3955223880597, "grad_norm": 0.025390625, "learning_rate": 3.162957430918596e-05, "loss": 0.0015, "step": 45825 }, { "epoch": 68.43283582089552, "grad_norm": 0.042236328125, "learning_rate": 3.159223300970874e-05, "loss": 0.0018, "step": 45850 }, { "epoch": 68.47014925373135, "grad_norm": 0.033447265625, "learning_rate": 3.1554891710231516e-05, "loss": 0.0018, "step": 45875 }, { "epoch": 68.50746268656717, "grad_norm": 0.0234375, "learning_rate": 3.15175504107543e-05, "loss": 0.0018, "step": 45900 }, { "epoch": 68.54477611940298, "grad_norm": 0.006622314453125, "learning_rate": 3.148020911127708e-05, "loss": 0.0019, "step": 45925 }, { "epoch": 68.58208955223881, "grad_norm": 0.038330078125, "learning_rate": 3.144286781179985e-05, "loss": 0.0015, "step": 45950 }, { "epoch": 68.61940298507463, "grad_norm": 0.0419921875, "learning_rate": 3.1405526512322634e-05, "loss": 0.0019, "step": 45975 }, { "epoch": 68.65671641791045, "grad_norm": 0.01507568359375, "learning_rate": 3.136818521284541e-05, "loss": 0.0017, "step": 46000 }, { "epoch": 68.69402985074628, "grad_norm": 0.041259765625, "learning_rate": 3.133084391336818e-05, "loss": 0.0016, "step": 46025 }, { "epoch": 68.73134328358209, "grad_norm": 0.037841796875, "learning_rate": 3.1293502613890964e-05, "loss": 0.0017, "step": 46050 }, { "epoch": 68.76865671641791, "grad_norm": 0.031982421875, "learning_rate": 3.1256161314413745e-05, "loss": 0.002, "step": 46075 }, { "epoch": 68.80597014925372, "grad_norm": 0.03662109375, "learning_rate": 3.1218820014936526e-05, "loss": 0.002, "step": 46100 }, { "epoch": 68.84328358208955, "grad_norm": 0.0361328125, "learning_rate": 3.11814787154593e-05, "loss": 0.0019, "step": 46125 }, { "epoch": 68.88059701492537, "grad_norm": 0.0286865234375, "learning_rate": 3.1144137415982075e-05, "loss": 0.002, "step": 46150 }, { "epoch": 68.91791044776119, "grad_norm": 0.0498046875, "learning_rate": 3.1106796116504856e-05, "loss": 0.0017, "step": 46175 }, { "epoch": 68.95522388059702, "grad_norm": 0.029296875, "learning_rate": 3.106945481702763e-05, "loss": 0.002, "step": 46200 }, { "epoch": 68.99253731343283, "grad_norm": 0.0252685546875, "learning_rate": 3.103211351755041e-05, "loss": 0.0016, "step": 46225 }, { "epoch": 69.02985074626865, "grad_norm": 0.0380859375, "learning_rate": 3.099477221807319e-05, "loss": 0.0018, "step": 46250 }, { "epoch": 69.06716417910448, "grad_norm": 0.044189453125, "learning_rate": 3.095743091859597e-05, "loss": 0.0018, "step": 46275 }, { "epoch": 69.1044776119403, "grad_norm": 0.03857421875, "learning_rate": 3.092008961911875e-05, "loss": 0.002, "step": 46300 }, { "epoch": 69.14179104477611, "grad_norm": 0.035888671875, "learning_rate": 3.088274831964152e-05, "loss": 0.0017, "step": 46325 }, { "epoch": 69.17910447761194, "grad_norm": 0.0244140625, "learning_rate": 3.0845407020164304e-05, "loss": 0.0018, "step": 46350 }, { "epoch": 69.21641791044776, "grad_norm": 0.037109375, "learning_rate": 3.080806572068708e-05, "loss": 0.0019, "step": 46375 }, { "epoch": 69.25373134328358, "grad_norm": 0.0289306640625, "learning_rate": 3.077072442120986e-05, "loss": 0.0015, "step": 46400 }, { "epoch": 69.2910447761194, "grad_norm": 0.036865234375, "learning_rate": 3.073338312173264e-05, "loss": 0.0016, "step": 46425 }, { "epoch": 69.32835820895522, "grad_norm": 0.032470703125, "learning_rate": 3.0696041822255415e-05, "loss": 0.0017, "step": 46450 }, { "epoch": 69.36567164179104, "grad_norm": 0.0361328125, "learning_rate": 3.0658700522778196e-05, "loss": 0.0018, "step": 46475 }, { "epoch": 69.40298507462687, "grad_norm": 0.0380859375, "learning_rate": 3.062135922330097e-05, "loss": 0.0017, "step": 46500 }, { "epoch": 69.44029850746269, "grad_norm": 0.03662109375, "learning_rate": 3.058401792382375e-05, "loss": 0.0019, "step": 46525 }, { "epoch": 69.4776119402985, "grad_norm": 0.0260009765625, "learning_rate": 3.0546676624346526e-05, "loss": 0.0017, "step": 46550 }, { "epoch": 69.51492537313433, "grad_norm": 0.037353515625, "learning_rate": 3.050933532486931e-05, "loss": 0.002, "step": 46575 }, { "epoch": 69.55223880597015, "grad_norm": 0.037841796875, "learning_rate": 3.0471994025392085e-05, "loss": 0.0017, "step": 46600 }, { "epoch": 69.58955223880596, "grad_norm": 0.0286865234375, "learning_rate": 3.0434652725914863e-05, "loss": 0.0018, "step": 46625 }, { "epoch": 69.6268656716418, "grad_norm": 0.02685546875, "learning_rate": 3.039731142643764e-05, "loss": 0.0019, "step": 46650 }, { "epoch": 69.66417910447761, "grad_norm": 0.042236328125, "learning_rate": 3.035997012696042e-05, "loss": 0.0017, "step": 46675 }, { "epoch": 69.70149253731343, "grad_norm": 0.024658203125, "learning_rate": 3.0322628827483196e-05, "loss": 0.0016, "step": 46700 }, { "epoch": 69.73880597014926, "grad_norm": 0.0279541015625, "learning_rate": 3.0285287528005974e-05, "loss": 0.0016, "step": 46725 }, { "epoch": 69.77611940298507, "grad_norm": 0.029052734375, "learning_rate": 3.0247946228528755e-05, "loss": 0.0021, "step": 46750 }, { "epoch": 69.81343283582089, "grad_norm": 0.03955078125, "learning_rate": 3.0210604929051533e-05, "loss": 0.0017, "step": 46775 }, { "epoch": 69.85074626865672, "grad_norm": 0.03466796875, "learning_rate": 3.017326362957431e-05, "loss": 0.0015, "step": 46800 }, { "epoch": 69.88805970149254, "grad_norm": 0.03466796875, "learning_rate": 3.013592233009709e-05, "loss": 0.0016, "step": 46825 }, { "epoch": 69.92537313432835, "grad_norm": 0.0390625, "learning_rate": 3.0098581030619866e-05, "loss": 0.0017, "step": 46850 }, { "epoch": 69.96268656716418, "grad_norm": 0.050537109375, "learning_rate": 3.0061239731142644e-05, "loss": 0.0017, "step": 46875 }, { "epoch": 70.0, "grad_norm": 0.05615234375, "learning_rate": 3.0023898431665425e-05, "loss": 0.0018, "step": 46900 }, { "epoch": 70.03731343283582, "grad_norm": 0.0208740234375, "learning_rate": 2.9986557132188203e-05, "loss": 0.0016, "step": 46925 }, { "epoch": 70.07462686567165, "grad_norm": 0.03271484375, "learning_rate": 2.994921583271098e-05, "loss": 0.0016, "step": 46950 }, { "epoch": 70.11194029850746, "grad_norm": 0.0223388671875, "learning_rate": 2.991187453323376e-05, "loss": 0.0015, "step": 46975 }, { "epoch": 70.14925373134328, "grad_norm": 0.0279541015625, "learning_rate": 2.9874533233756537e-05, "loss": 0.0018, "step": 47000 }, { "epoch": 70.18656716417911, "grad_norm": 0.03173828125, "learning_rate": 2.9837191934279314e-05, "loss": 0.0018, "step": 47025 }, { "epoch": 70.22388059701493, "grad_norm": 0.01025390625, "learning_rate": 2.979985063480209e-05, "loss": 0.0017, "step": 47050 }, { "epoch": 70.26119402985074, "grad_norm": 0.034423828125, "learning_rate": 2.9762509335324873e-05, "loss": 0.0018, "step": 47075 }, { "epoch": 70.29850746268657, "grad_norm": 0.0247802734375, "learning_rate": 2.972516803584765e-05, "loss": 0.0017, "step": 47100 }, { "epoch": 70.33582089552239, "grad_norm": 0.04296875, "learning_rate": 2.968782673637043e-05, "loss": 0.0018, "step": 47125 }, { "epoch": 70.3731343283582, "grad_norm": 0.02099609375, "learning_rate": 2.9650485436893207e-05, "loss": 0.0019, "step": 47150 }, { "epoch": 70.41044776119404, "grad_norm": 0.021240234375, "learning_rate": 2.961314413741598e-05, "loss": 0.0018, "step": 47175 }, { "epoch": 70.44776119402985, "grad_norm": 0.031494140625, "learning_rate": 2.957580283793876e-05, "loss": 0.0019, "step": 47200 }, { "epoch": 70.48507462686567, "grad_norm": 0.0306396484375, "learning_rate": 2.9538461538461543e-05, "loss": 0.0017, "step": 47225 }, { "epoch": 70.5223880597015, "grad_norm": 0.0140380859375, "learning_rate": 2.950112023898432e-05, "loss": 0.0015, "step": 47250 }, { "epoch": 70.55970149253731, "grad_norm": 0.032470703125, "learning_rate": 2.94637789395071e-05, "loss": 0.0013, "step": 47275 }, { "epoch": 70.59701492537313, "grad_norm": 0.02783203125, "learning_rate": 2.9426437640029873e-05, "loss": 0.0018, "step": 47300 }, { "epoch": 70.63432835820896, "grad_norm": 0.033447265625, "learning_rate": 2.938909634055265e-05, "loss": 0.0017, "step": 47325 }, { "epoch": 70.67164179104478, "grad_norm": 0.01953125, "learning_rate": 2.935175504107543e-05, "loss": 0.0016, "step": 47350 }, { "epoch": 70.7089552238806, "grad_norm": 0.0159912109375, "learning_rate": 2.9314413741598207e-05, "loss": 0.0016, "step": 47375 }, { "epoch": 70.74626865671642, "grad_norm": 0.033935546875, "learning_rate": 2.927707244212099e-05, "loss": 0.002, "step": 47400 }, { "epoch": 70.78358208955224, "grad_norm": 0.0306396484375, "learning_rate": 2.9239731142643766e-05, "loss": 0.0019, "step": 47425 }, { "epoch": 70.82089552238806, "grad_norm": 0.034912109375, "learning_rate": 2.9202389843166543e-05, "loss": 0.0018, "step": 47450 }, { "epoch": 70.85820895522389, "grad_norm": 0.044189453125, "learning_rate": 2.916504854368932e-05, "loss": 0.002, "step": 47475 }, { "epoch": 70.8955223880597, "grad_norm": 0.035888671875, "learning_rate": 2.91277072442121e-05, "loss": 0.002, "step": 47500 }, { "epoch": 70.93283582089552, "grad_norm": 0.038330078125, "learning_rate": 2.9090365944734877e-05, "loss": 0.0018, "step": 47525 }, { "epoch": 70.97014925373135, "grad_norm": 0.03271484375, "learning_rate": 2.9053024645257655e-05, "loss": 0.0019, "step": 47550 }, { "epoch": 71.00746268656717, "grad_norm": 0.04150390625, "learning_rate": 2.9015683345780436e-05, "loss": 0.0019, "step": 47575 }, { "epoch": 71.04477611940298, "grad_norm": 0.0262451171875, "learning_rate": 2.8978342046303214e-05, "loss": 0.0018, "step": 47600 }, { "epoch": 71.08208955223881, "grad_norm": 0.029052734375, "learning_rate": 2.894100074682599e-05, "loss": 0.0017, "step": 47625 }, { "epoch": 71.11940298507463, "grad_norm": 0.052001953125, "learning_rate": 2.890365944734877e-05, "loss": 0.0018, "step": 47650 }, { "epoch": 71.15671641791045, "grad_norm": 0.03173828125, "learning_rate": 2.8866318147871547e-05, "loss": 0.002, "step": 47675 }, { "epoch": 71.19402985074628, "grad_norm": 0.02685546875, "learning_rate": 2.8828976848394325e-05, "loss": 0.0019, "step": 47700 }, { "epoch": 71.23134328358209, "grad_norm": 0.0267333984375, "learning_rate": 2.8791635548917106e-05, "loss": 0.0018, "step": 47725 }, { "epoch": 71.26865671641791, "grad_norm": 0.037353515625, "learning_rate": 2.8754294249439884e-05, "loss": 0.0017, "step": 47750 }, { "epoch": 71.30597014925372, "grad_norm": 0.0194091796875, "learning_rate": 2.871695294996266e-05, "loss": 0.0015, "step": 47775 }, { "epoch": 71.34328358208955, "grad_norm": 0.028076171875, "learning_rate": 2.867961165048544e-05, "loss": 0.0015, "step": 47800 }, { "epoch": 71.38059701492537, "grad_norm": 0.026611328125, "learning_rate": 2.8642270351008217e-05, "loss": 0.0015, "step": 47825 }, { "epoch": 71.41791044776119, "grad_norm": 0.0238037109375, "learning_rate": 2.8604929051530995e-05, "loss": 0.0017, "step": 47850 }, { "epoch": 71.45522388059702, "grad_norm": 0.0242919921875, "learning_rate": 2.856758775205377e-05, "loss": 0.0015, "step": 47875 }, { "epoch": 71.49253731343283, "grad_norm": 0.05517578125, "learning_rate": 2.8530246452576554e-05, "loss": 0.0017, "step": 47900 }, { "epoch": 71.52985074626865, "grad_norm": 0.036865234375, "learning_rate": 2.849290515309933e-05, "loss": 0.0018, "step": 47925 }, { "epoch": 71.56716417910448, "grad_norm": 0.028076171875, "learning_rate": 2.845556385362211e-05, "loss": 0.0016, "step": 47950 }, { "epoch": 71.6044776119403, "grad_norm": 0.0390625, "learning_rate": 2.8418222554144887e-05, "loss": 0.0018, "step": 47975 }, { "epoch": 71.64179104477611, "grad_norm": 0.0299072265625, "learning_rate": 2.838088125466766e-05, "loss": 0.0017, "step": 48000 }, { "epoch": 71.67910447761194, "grad_norm": 0.0277099609375, "learning_rate": 2.834353995519044e-05, "loss": 0.002, "step": 48025 }, { "epoch": 71.71641791044776, "grad_norm": 0.036865234375, "learning_rate": 2.8306198655713217e-05, "loss": 0.0017, "step": 48050 }, { "epoch": 71.75373134328358, "grad_norm": 0.031494140625, "learning_rate": 2.8268857356236e-05, "loss": 0.0018, "step": 48075 }, { "epoch": 71.7910447761194, "grad_norm": 0.02587890625, "learning_rate": 2.823151605675878e-05, "loss": 0.0015, "step": 48100 }, { "epoch": 71.82835820895522, "grad_norm": 0.035400390625, "learning_rate": 2.8194174757281554e-05, "loss": 0.0018, "step": 48125 }, { "epoch": 71.86567164179104, "grad_norm": 0.0223388671875, "learning_rate": 2.815683345780433e-05, "loss": 0.0016, "step": 48150 }, { "epoch": 71.90298507462687, "grad_norm": 0.0419921875, "learning_rate": 2.811949215832711e-05, "loss": 0.0018, "step": 48175 }, { "epoch": 71.94029850746269, "grad_norm": 0.036376953125, "learning_rate": 2.8082150858849887e-05, "loss": 0.002, "step": 48200 }, { "epoch": 71.9776119402985, "grad_norm": 0.06640625, "learning_rate": 2.804480955937267e-05, "loss": 0.0021, "step": 48225 }, { "epoch": 72.01492537313433, "grad_norm": 0.0303955078125, "learning_rate": 2.8007468259895446e-05, "loss": 0.0019, "step": 48250 }, { "epoch": 72.05223880597015, "grad_norm": 0.041748046875, "learning_rate": 2.7970126960418224e-05, "loss": 0.0016, "step": 48275 }, { "epoch": 72.08955223880596, "grad_norm": 0.006927490234375, "learning_rate": 2.7932785660941e-05, "loss": 0.0014, "step": 48300 }, { "epoch": 72.1268656716418, "grad_norm": 0.035888671875, "learning_rate": 2.789544436146378e-05, "loss": 0.0017, "step": 48325 }, { "epoch": 72.16417910447761, "grad_norm": 0.032958984375, "learning_rate": 2.7858103061986557e-05, "loss": 0.0018, "step": 48350 }, { "epoch": 72.20149253731343, "grad_norm": 0.032470703125, "learning_rate": 2.7820761762509335e-05, "loss": 0.0018, "step": 48375 }, { "epoch": 72.23880597014926, "grad_norm": 0.03564453125, "learning_rate": 2.7783420463032116e-05, "loss": 0.0019, "step": 48400 }, { "epoch": 72.27611940298507, "grad_norm": 0.035888671875, "learning_rate": 2.7746079163554894e-05, "loss": 0.0014, "step": 48425 }, { "epoch": 72.31343283582089, "grad_norm": 0.024658203125, "learning_rate": 2.7708737864077672e-05, "loss": 0.0019, "step": 48450 }, { "epoch": 72.35074626865672, "grad_norm": 0.02685546875, "learning_rate": 2.767139656460045e-05, "loss": 0.0016, "step": 48475 }, { "epoch": 72.38805970149254, "grad_norm": 0.026611328125, "learning_rate": 2.7634055265123227e-05, "loss": 0.0019, "step": 48500 }, { "epoch": 72.42537313432835, "grad_norm": 0.0011138916015625, "learning_rate": 2.7596713965646005e-05, "loss": 0.0016, "step": 48525 }, { "epoch": 72.46268656716418, "grad_norm": 0.0595703125, "learning_rate": 2.7559372666168786e-05, "loss": 0.0019, "step": 48550 }, { "epoch": 72.5, "grad_norm": 0.0322265625, "learning_rate": 2.7522031366691564e-05, "loss": 0.0019, "step": 48575 }, { "epoch": 72.53731343283582, "grad_norm": 0.040283203125, "learning_rate": 2.7484690067214342e-05, "loss": 0.0016, "step": 48600 }, { "epoch": 72.57462686567165, "grad_norm": 0.0341796875, "learning_rate": 2.744734876773712e-05, "loss": 0.0018, "step": 48625 }, { "epoch": 72.61194029850746, "grad_norm": 0.031494140625, "learning_rate": 2.7410007468259897e-05, "loss": 0.0017, "step": 48650 }, { "epoch": 72.64925373134328, "grad_norm": 0.042724609375, "learning_rate": 2.7372666168782675e-05, "loss": 0.0017, "step": 48675 }, { "epoch": 72.68656716417911, "grad_norm": 0.030517578125, "learning_rate": 2.733532486930545e-05, "loss": 0.002, "step": 48700 }, { "epoch": 72.72388059701493, "grad_norm": 0.024658203125, "learning_rate": 2.7297983569828234e-05, "loss": 0.002, "step": 48725 }, { "epoch": 72.76119402985074, "grad_norm": 0.02685546875, "learning_rate": 2.7260642270351012e-05, "loss": 0.0018, "step": 48750 }, { "epoch": 72.79850746268657, "grad_norm": 0.028564453125, "learning_rate": 2.722330097087379e-05, "loss": 0.0019, "step": 48775 }, { "epoch": 72.83582089552239, "grad_norm": 0.052490234375, "learning_rate": 2.7185959671396567e-05, "loss": 0.0016, "step": 48800 }, { "epoch": 72.8731343283582, "grad_norm": 0.03369140625, "learning_rate": 2.7148618371919342e-05, "loss": 0.0019, "step": 48825 }, { "epoch": 72.91044776119404, "grad_norm": 0.02685546875, "learning_rate": 2.711127707244212e-05, "loss": 0.0019, "step": 48850 }, { "epoch": 72.94776119402985, "grad_norm": 0.041259765625, "learning_rate": 2.7073935772964897e-05, "loss": 0.0019, "step": 48875 }, { "epoch": 72.98507462686567, "grad_norm": 0.04248046875, "learning_rate": 2.7036594473487682e-05, "loss": 0.0016, "step": 48900 }, { "epoch": 73.0223880597015, "grad_norm": 0.02099609375, "learning_rate": 2.699925317401046e-05, "loss": 0.0017, "step": 48925 }, { "epoch": 73.05970149253731, "grad_norm": 0.0279541015625, "learning_rate": 2.6961911874533234e-05, "loss": 0.0018, "step": 48950 }, { "epoch": 73.09701492537313, "grad_norm": 0.0322265625, "learning_rate": 2.6924570575056012e-05, "loss": 0.0017, "step": 48975 }, { "epoch": 73.13432835820896, "grad_norm": 0.028564453125, "learning_rate": 2.688722927557879e-05, "loss": 0.0016, "step": 49000 }, { "epoch": 73.17164179104478, "grad_norm": 0.018798828125, "learning_rate": 2.6849887976101568e-05, "loss": 0.002, "step": 49025 }, { "epoch": 73.2089552238806, "grad_norm": 0.033203125, "learning_rate": 2.6812546676624352e-05, "loss": 0.0015, "step": 49050 }, { "epoch": 73.24626865671642, "grad_norm": 0.0260009765625, "learning_rate": 2.6775205377147126e-05, "loss": 0.0017, "step": 49075 }, { "epoch": 73.28358208955224, "grad_norm": 0.0277099609375, "learning_rate": 2.6737864077669904e-05, "loss": 0.0017, "step": 49100 }, { "epoch": 73.32089552238806, "grad_norm": 0.0172119140625, "learning_rate": 2.6700522778192682e-05, "loss": 0.0016, "step": 49125 }, { "epoch": 73.35820895522389, "grad_norm": 0.025634765625, "learning_rate": 2.666318147871546e-05, "loss": 0.0019, "step": 49150 }, { "epoch": 73.3955223880597, "grad_norm": 0.04931640625, "learning_rate": 2.6625840179238238e-05, "loss": 0.0019, "step": 49175 }, { "epoch": 73.43283582089552, "grad_norm": 0.0233154296875, "learning_rate": 2.6588498879761015e-05, "loss": 0.0016, "step": 49200 }, { "epoch": 73.47014925373135, "grad_norm": 0.038818359375, "learning_rate": 2.6551157580283797e-05, "loss": 0.0016, "step": 49225 }, { "epoch": 73.50746268656717, "grad_norm": 0.0185546875, "learning_rate": 2.6513816280806574e-05, "loss": 0.0017, "step": 49250 }, { "epoch": 73.54477611940298, "grad_norm": 0.0272216796875, "learning_rate": 2.6476474981329352e-05, "loss": 0.0016, "step": 49275 }, { "epoch": 73.58208955223881, "grad_norm": 0.0361328125, "learning_rate": 2.643913368185213e-05, "loss": 0.0016, "step": 49300 }, { "epoch": 73.61940298507463, "grad_norm": 0.025634765625, "learning_rate": 2.6401792382374908e-05, "loss": 0.0019, "step": 49325 }, { "epoch": 73.65671641791045, "grad_norm": 0.037841796875, "learning_rate": 2.6364451082897685e-05, "loss": 0.0017, "step": 49350 }, { "epoch": 73.69402985074628, "grad_norm": 0.03369140625, "learning_rate": 2.6327109783420463e-05, "loss": 0.0019, "step": 49375 }, { "epoch": 73.73134328358209, "grad_norm": 0.030517578125, "learning_rate": 2.6289768483943244e-05, "loss": 0.0019, "step": 49400 }, { "epoch": 73.76865671641791, "grad_norm": 0.04150390625, "learning_rate": 2.6252427184466022e-05, "loss": 0.0017, "step": 49425 }, { "epoch": 73.80597014925372, "grad_norm": 0.015625, "learning_rate": 2.62150858849888e-05, "loss": 0.0017, "step": 49450 }, { "epoch": 73.84328358208955, "grad_norm": 0.0303955078125, "learning_rate": 2.6177744585511578e-05, "loss": 0.0021, "step": 49475 }, { "epoch": 73.88059701492537, "grad_norm": 0.046875, "learning_rate": 2.6140403286034356e-05, "loss": 0.0018, "step": 49500 }, { "epoch": 73.91791044776119, "grad_norm": 0.0230712890625, "learning_rate": 2.610306198655713e-05, "loss": 0.0019, "step": 49525 }, { "epoch": 73.95522388059702, "grad_norm": 0.033447265625, "learning_rate": 2.6065720687079915e-05, "loss": 0.0018, "step": 49550 }, { "epoch": 73.99253731343283, "grad_norm": 0.0289306640625, "learning_rate": 2.6028379387602692e-05, "loss": 0.0018, "step": 49575 }, { "epoch": 74.02985074626865, "grad_norm": 0.035888671875, "learning_rate": 2.599103808812547e-05, "loss": 0.0018, "step": 49600 }, { "epoch": 74.06716417910448, "grad_norm": 0.0284423828125, "learning_rate": 2.5953696788648248e-05, "loss": 0.0017, "step": 49625 }, { "epoch": 74.1044776119403, "grad_norm": 0.0225830078125, "learning_rate": 2.5916355489171022e-05, "loss": 0.0019, "step": 49650 }, { "epoch": 74.14179104477611, "grad_norm": 0.042236328125, "learning_rate": 2.58790141896938e-05, "loss": 0.0016, "step": 49675 }, { "epoch": 74.17910447761194, "grad_norm": 0.027099609375, "learning_rate": 2.5841672890216578e-05, "loss": 0.0015, "step": 49700 }, { "epoch": 74.21641791044776, "grad_norm": 0.046142578125, "learning_rate": 2.5804331590739362e-05, "loss": 0.0015, "step": 49725 }, { "epoch": 74.25373134328358, "grad_norm": 0.052734375, "learning_rate": 2.576699029126214e-05, "loss": 0.0018, "step": 49750 }, { "epoch": 74.2910447761194, "grad_norm": 0.032958984375, "learning_rate": 2.5729648991784915e-05, "loss": 0.0015, "step": 49775 }, { "epoch": 74.32835820895522, "grad_norm": 0.044921875, "learning_rate": 2.5692307692307692e-05, "loss": 0.0016, "step": 49800 }, { "epoch": 74.36567164179104, "grad_norm": 0.033447265625, "learning_rate": 2.565496639283047e-05, "loss": 0.002, "step": 49825 }, { "epoch": 74.40298507462687, "grad_norm": 0.025146484375, "learning_rate": 2.5617625093353248e-05, "loss": 0.0017, "step": 49850 }, { "epoch": 74.44029850746269, "grad_norm": 0.036376953125, "learning_rate": 2.5580283793876026e-05, "loss": 0.0017, "step": 49875 }, { "epoch": 74.4776119402985, "grad_norm": 0.03759765625, "learning_rate": 2.5542942494398807e-05, "loss": 0.0016, "step": 49900 }, { "epoch": 74.51492537313433, "grad_norm": 0.028564453125, "learning_rate": 2.5505601194921585e-05, "loss": 0.0018, "step": 49925 }, { "epoch": 74.55223880597015, "grad_norm": 0.0250244140625, "learning_rate": 2.5468259895444362e-05, "loss": 0.0015, "step": 49950 }, { "epoch": 74.58955223880596, "grad_norm": 0.0244140625, "learning_rate": 2.543091859596714e-05, "loss": 0.0018, "step": 49975 }, { "epoch": 74.6268656716418, "grad_norm": 0.031494140625, "learning_rate": 2.5393577296489918e-05, "loss": 0.0016, "step": 50000 }, { "epoch": 74.66417910447761, "grad_norm": 0.04296875, "learning_rate": 2.5356235997012696e-05, "loss": 0.0021, "step": 50025 }, { "epoch": 74.70149253731343, "grad_norm": 0.046630859375, "learning_rate": 2.5318894697535477e-05, "loss": 0.0018, "step": 50050 }, { "epoch": 74.73880597014926, "grad_norm": 0.041748046875, "learning_rate": 2.5281553398058255e-05, "loss": 0.0018, "step": 50075 }, { "epoch": 74.77611940298507, "grad_norm": 0.0269775390625, "learning_rate": 2.5244212098581033e-05, "loss": 0.0018, "step": 50100 }, { "epoch": 74.81343283582089, "grad_norm": 0.039306640625, "learning_rate": 2.520687079910381e-05, "loss": 0.0016, "step": 50125 }, { "epoch": 74.85074626865672, "grad_norm": 0.03173828125, "learning_rate": 2.5169529499626588e-05, "loss": 0.0018, "step": 50150 }, { "epoch": 74.88805970149254, "grad_norm": 0.03369140625, "learning_rate": 2.5132188200149366e-05, "loss": 0.0018, "step": 50175 }, { "epoch": 74.92537313432835, "grad_norm": 0.036376953125, "learning_rate": 2.5094846900672144e-05, "loss": 0.0019, "step": 50200 }, { "epoch": 74.96268656716418, "grad_norm": 0.0206298828125, "learning_rate": 2.5057505601194925e-05, "loss": 0.0018, "step": 50225 }, { "epoch": 75.0, "grad_norm": 0.0286865234375, "learning_rate": 2.5020164301717703e-05, "loss": 0.0018, "step": 50250 }, { "epoch": 75.03731343283582, "grad_norm": 0.027099609375, "learning_rate": 2.498282300224048e-05, "loss": 0.0015, "step": 50275 }, { "epoch": 75.07462686567165, "grad_norm": 0.03857421875, "learning_rate": 2.4945481702763258e-05, "loss": 0.0016, "step": 50300 }, { "epoch": 75.11194029850746, "grad_norm": 0.030517578125, "learning_rate": 2.4908140403286036e-05, "loss": 0.002, "step": 50325 }, { "epoch": 75.14925373134328, "grad_norm": 0.0380859375, "learning_rate": 2.4870799103808814e-05, "loss": 0.0016, "step": 50350 }, { "epoch": 75.18656716417911, "grad_norm": 0.025146484375, "learning_rate": 2.483345780433159e-05, "loss": 0.0018, "step": 50375 }, { "epoch": 75.22388059701493, "grad_norm": 0.033203125, "learning_rate": 2.479611650485437e-05, "loss": 0.0017, "step": 50400 }, { "epoch": 75.26119402985074, "grad_norm": 0.0191650390625, "learning_rate": 2.475877520537715e-05, "loss": 0.0018, "step": 50425 }, { "epoch": 75.29850746268657, "grad_norm": 0.0303955078125, "learning_rate": 2.4721433905899928e-05, "loss": 0.0016, "step": 50450 }, { "epoch": 75.33582089552239, "grad_norm": 0.03955078125, "learning_rate": 2.4684092606422703e-05, "loss": 0.0016, "step": 50475 }, { "epoch": 75.3731343283582, "grad_norm": 0.0203857421875, "learning_rate": 2.4646751306945484e-05, "loss": 0.0019, "step": 50500 }, { "epoch": 75.41044776119404, "grad_norm": 0.03955078125, "learning_rate": 2.460941000746826e-05, "loss": 0.0016, "step": 50525 }, { "epoch": 75.44776119402985, "grad_norm": 0.03271484375, "learning_rate": 2.457206870799104e-05, "loss": 0.0016, "step": 50550 }, { "epoch": 75.48507462686567, "grad_norm": 0.0267333984375, "learning_rate": 2.4534727408513817e-05, "loss": 0.002, "step": 50575 }, { "epoch": 75.5223880597015, "grad_norm": 0.035888671875, "learning_rate": 2.4497386109036595e-05, "loss": 0.0019, "step": 50600 }, { "epoch": 75.55970149253731, "grad_norm": 0.006500244140625, "learning_rate": 2.4460044809559373e-05, "loss": 0.0019, "step": 50625 }, { "epoch": 75.59701492537313, "grad_norm": 0.03564453125, "learning_rate": 2.442270351008215e-05, "loss": 0.0018, "step": 50650 }, { "epoch": 75.63432835820896, "grad_norm": 0.0220947265625, "learning_rate": 2.4385362210604932e-05, "loss": 0.0017, "step": 50675 }, { "epoch": 75.67164179104478, "grad_norm": 0.033935546875, "learning_rate": 2.434802091112771e-05, "loss": 0.0015, "step": 50700 }, { "epoch": 75.7089552238806, "grad_norm": 0.0233154296875, "learning_rate": 2.4310679611650487e-05, "loss": 0.0019, "step": 50725 }, { "epoch": 75.74626865671642, "grad_norm": 0.0308837890625, "learning_rate": 2.4273338312173265e-05, "loss": 0.0018, "step": 50750 }, { "epoch": 75.78358208955224, "grad_norm": 0.0283203125, "learning_rate": 2.4235997012696043e-05, "loss": 0.0015, "step": 50775 }, { "epoch": 75.82089552238806, "grad_norm": 0.0303955078125, "learning_rate": 2.419865571321882e-05, "loss": 0.0018, "step": 50800 }, { "epoch": 75.85820895522389, "grad_norm": 0.038330078125, "learning_rate": 2.41613144137416e-05, "loss": 0.0016, "step": 50825 }, { "epoch": 75.8955223880597, "grad_norm": 0.01904296875, "learning_rate": 2.412397311426438e-05, "loss": 0.0016, "step": 50850 }, { "epoch": 75.93283582089552, "grad_norm": 0.042236328125, "learning_rate": 2.4086631814787154e-05, "loss": 0.0018, "step": 50875 }, { "epoch": 75.97014925373135, "grad_norm": 0.0303955078125, "learning_rate": 2.4049290515309932e-05, "loss": 0.0019, "step": 50900 }, { "epoch": 76.00746268656717, "grad_norm": 0.0296630859375, "learning_rate": 2.4011949215832713e-05, "loss": 0.0017, "step": 50925 }, { "epoch": 76.04477611940298, "grad_norm": 0.025390625, "learning_rate": 2.397460791635549e-05, "loss": 0.0018, "step": 50950 }, { "epoch": 76.08208955223881, "grad_norm": 0.0361328125, "learning_rate": 2.393726661687827e-05, "loss": 0.0016, "step": 50975 }, { "epoch": 76.11940298507463, "grad_norm": 0.033447265625, "learning_rate": 2.3899925317401046e-05, "loss": 0.0018, "step": 51000 }, { "epoch": 76.15671641791045, "grad_norm": 0.035888671875, "learning_rate": 2.3862584017923824e-05, "loss": 0.0017, "step": 51025 }, { "epoch": 76.19402985074628, "grad_norm": 0.03515625, "learning_rate": 2.3825242718446602e-05, "loss": 0.0016, "step": 51050 }, { "epoch": 76.23134328358209, "grad_norm": 0.041259765625, "learning_rate": 2.378790141896938e-05, "loss": 0.0016, "step": 51075 }, { "epoch": 76.26865671641791, "grad_norm": 0.03076171875, "learning_rate": 2.375056011949216e-05, "loss": 0.002, "step": 51100 }, { "epoch": 76.30597014925372, "grad_norm": 0.02880859375, "learning_rate": 2.371321882001494e-05, "loss": 0.0016, "step": 51125 }, { "epoch": 76.34328358208955, "grad_norm": 0.04541015625, "learning_rate": 2.3675877520537716e-05, "loss": 0.002, "step": 51150 }, { "epoch": 76.38059701492537, "grad_norm": 0.0115966796875, "learning_rate": 2.3638536221060494e-05, "loss": 0.0014, "step": 51175 }, { "epoch": 76.41791044776119, "grad_norm": 0.03564453125, "learning_rate": 2.3601194921583272e-05, "loss": 0.0019, "step": 51200 }, { "epoch": 76.45522388059702, "grad_norm": 0.017822265625, "learning_rate": 2.356385362210605e-05, "loss": 0.0016, "step": 51225 }, { "epoch": 76.49253731343283, "grad_norm": 0.03564453125, "learning_rate": 2.352651232262883e-05, "loss": 0.0016, "step": 51250 }, { "epoch": 76.52985074626865, "grad_norm": 0.040283203125, "learning_rate": 2.348917102315161e-05, "loss": 0.0017, "step": 51275 }, { "epoch": 76.56716417910448, "grad_norm": 0.018798828125, "learning_rate": 2.3451829723674383e-05, "loss": 0.0016, "step": 51300 }, { "epoch": 76.6044776119403, "grad_norm": 0.039794921875, "learning_rate": 2.341448842419716e-05, "loss": 0.0016, "step": 51325 }, { "epoch": 76.64179104477611, "grad_norm": 0.0255126953125, "learning_rate": 2.3377147124719942e-05, "loss": 0.0016, "step": 51350 }, { "epoch": 76.67910447761194, "grad_norm": 0.030029296875, "learning_rate": 2.333980582524272e-05, "loss": 0.0018, "step": 51375 }, { "epoch": 76.71641791044776, "grad_norm": 0.036865234375, "learning_rate": 2.3302464525765498e-05, "loss": 0.0019, "step": 51400 }, { "epoch": 76.75373134328358, "grad_norm": 0.0235595703125, "learning_rate": 2.3265123226288275e-05, "loss": 0.0016, "step": 51425 }, { "epoch": 76.7910447761194, "grad_norm": 0.0308837890625, "learning_rate": 2.3227781926811053e-05, "loss": 0.0017, "step": 51450 }, { "epoch": 76.82835820895522, "grad_norm": 0.02490234375, "learning_rate": 2.319044062733383e-05, "loss": 0.0017, "step": 51475 }, { "epoch": 76.86567164179104, "grad_norm": 0.03955078125, "learning_rate": 2.3153099327856612e-05, "loss": 0.0019, "step": 51500 }, { "epoch": 76.90298507462687, "grad_norm": 0.03955078125, "learning_rate": 2.311575802837939e-05, "loss": 0.0017, "step": 51525 }, { "epoch": 76.94029850746269, "grad_norm": 0.013916015625, "learning_rate": 2.3078416728902168e-05, "loss": 0.0019, "step": 51550 }, { "epoch": 76.9776119402985, "grad_norm": 0.0205078125, "learning_rate": 2.3041075429424945e-05, "loss": 0.0018, "step": 51575 }, { "epoch": 77.01492537313433, "grad_norm": 0.0263671875, "learning_rate": 2.3003734129947723e-05, "loss": 0.0016, "step": 51600 }, { "epoch": 77.05223880597015, "grad_norm": 0.03369140625, "learning_rate": 2.29663928304705e-05, "loss": 0.0018, "step": 51625 }, { "epoch": 77.08955223880596, "grad_norm": 0.03955078125, "learning_rate": 2.292905153099328e-05, "loss": 0.0017, "step": 51650 }, { "epoch": 77.1268656716418, "grad_norm": 0.0322265625, "learning_rate": 2.289171023151606e-05, "loss": 0.0017, "step": 51675 }, { "epoch": 77.16417910447761, "grad_norm": 0.02880859375, "learning_rate": 2.2854368932038834e-05, "loss": 0.0017, "step": 51700 }, { "epoch": 77.20149253731343, "grad_norm": 0.0220947265625, "learning_rate": 2.2817027632561612e-05, "loss": 0.0017, "step": 51725 }, { "epoch": 77.23880597014926, "grad_norm": 0.018310546875, "learning_rate": 2.2779686333084393e-05, "loss": 0.0014, "step": 51750 }, { "epoch": 77.27611940298507, "grad_norm": 0.046142578125, "learning_rate": 2.274234503360717e-05, "loss": 0.0016, "step": 51775 }, { "epoch": 77.31343283582089, "grad_norm": 0.0439453125, "learning_rate": 2.270500373412995e-05, "loss": 0.0016, "step": 51800 }, { "epoch": 77.35074626865672, "grad_norm": 0.035400390625, "learning_rate": 2.2667662434652727e-05, "loss": 0.0018, "step": 51825 }, { "epoch": 77.38805970149254, "grad_norm": 0.0303955078125, "learning_rate": 2.2630321135175504e-05, "loss": 0.0017, "step": 51850 }, { "epoch": 77.42537313432835, "grad_norm": 0.0458984375, "learning_rate": 2.2592979835698282e-05, "loss": 0.0018, "step": 51875 }, { "epoch": 77.46268656716418, "grad_norm": 0.031494140625, "learning_rate": 2.255563853622106e-05, "loss": 0.0017, "step": 51900 }, { "epoch": 77.5, "grad_norm": 0.0272216796875, "learning_rate": 2.251829723674384e-05, "loss": 0.0017, "step": 51925 }, { "epoch": 77.53731343283582, "grad_norm": 0.042724609375, "learning_rate": 2.248095593726662e-05, "loss": 0.0018, "step": 51950 }, { "epoch": 77.57462686567165, "grad_norm": 0.036865234375, "learning_rate": 2.2443614637789397e-05, "loss": 0.0017, "step": 51975 }, { "epoch": 77.61194029850746, "grad_norm": 0.029052734375, "learning_rate": 2.2406273338312175e-05, "loss": 0.0018, "step": 52000 }, { "epoch": 77.64925373134328, "grad_norm": 0.035400390625, "learning_rate": 2.2368932038834952e-05, "loss": 0.0016, "step": 52025 }, { "epoch": 77.68656716417911, "grad_norm": 0.0245361328125, "learning_rate": 2.233159073935773e-05, "loss": 0.0017, "step": 52050 }, { "epoch": 77.72388059701493, "grad_norm": 0.032470703125, "learning_rate": 2.229424943988051e-05, "loss": 0.0016, "step": 52075 }, { "epoch": 77.76119402985074, "grad_norm": 0.012939453125, "learning_rate": 2.225690814040329e-05, "loss": 0.0018, "step": 52100 }, { "epoch": 77.79850746268657, "grad_norm": 0.036376953125, "learning_rate": 2.2219566840926063e-05, "loss": 0.0017, "step": 52125 }, { "epoch": 77.83582089552239, "grad_norm": 0.03369140625, "learning_rate": 2.218222554144884e-05, "loss": 0.002, "step": 52150 }, { "epoch": 77.8731343283582, "grad_norm": 0.03369140625, "learning_rate": 2.2144884241971622e-05, "loss": 0.002, "step": 52175 }, { "epoch": 77.91044776119404, "grad_norm": 0.03271484375, "learning_rate": 2.21075429424944e-05, "loss": 0.0016, "step": 52200 }, { "epoch": 77.94776119402985, "grad_norm": 0.0257568359375, "learning_rate": 2.2070201643017178e-05, "loss": 0.0015, "step": 52225 }, { "epoch": 77.98507462686567, "grad_norm": 0.0361328125, "learning_rate": 2.2032860343539956e-05, "loss": 0.0018, "step": 52250 }, { "epoch": 78.0223880597015, "grad_norm": 0.031982421875, "learning_rate": 2.1995519044062734e-05, "loss": 0.002, "step": 52275 }, { "epoch": 78.05970149253731, "grad_norm": 0.0306396484375, "learning_rate": 2.195817774458551e-05, "loss": 0.0017, "step": 52300 }, { "epoch": 78.09701492537313, "grad_norm": 0.025634765625, "learning_rate": 2.1920836445108293e-05, "loss": 0.0016, "step": 52325 }, { "epoch": 78.13432835820896, "grad_norm": 0.034423828125, "learning_rate": 2.188349514563107e-05, "loss": 0.0015, "step": 52350 }, { "epoch": 78.17164179104478, "grad_norm": 0.043212890625, "learning_rate": 2.1846153846153848e-05, "loss": 0.0017, "step": 52375 }, { "epoch": 78.2089552238806, "grad_norm": 0.0115966796875, "learning_rate": 2.1808812546676622e-05, "loss": 0.0017, "step": 52400 }, { "epoch": 78.24626865671642, "grad_norm": 0.038818359375, "learning_rate": 2.1771471247199404e-05, "loss": 0.0016, "step": 52425 }, { "epoch": 78.28358208955224, "grad_norm": 0.0230712890625, "learning_rate": 2.173412994772218e-05, "loss": 0.0019, "step": 52450 }, { "epoch": 78.32089552238806, "grad_norm": 0.0205078125, "learning_rate": 2.169678864824496e-05, "loss": 0.0017, "step": 52475 }, { "epoch": 78.35820895522389, "grad_norm": 0.027099609375, "learning_rate": 2.165944734876774e-05, "loss": 0.0017, "step": 52500 }, { "epoch": 78.3955223880597, "grad_norm": 0.0220947265625, "learning_rate": 2.1622106049290515e-05, "loss": 0.0018, "step": 52525 }, { "epoch": 78.43283582089552, "grad_norm": 0.028076171875, "learning_rate": 2.1584764749813293e-05, "loss": 0.0018, "step": 52550 }, { "epoch": 78.47014925373135, "grad_norm": 0.023681640625, "learning_rate": 2.1547423450336074e-05, "loss": 0.0017, "step": 52575 }, { "epoch": 78.50746268656717, "grad_norm": 0.0341796875, "learning_rate": 2.151008215085885e-05, "loss": 0.0016, "step": 52600 }, { "epoch": 78.54477611940298, "grad_norm": 0.024169921875, "learning_rate": 2.147274085138163e-05, "loss": 0.0017, "step": 52625 }, { "epoch": 78.58208955223881, "grad_norm": 0.023193359375, "learning_rate": 2.1435399551904407e-05, "loss": 0.0019, "step": 52650 }, { "epoch": 78.61940298507463, "grad_norm": 0.032958984375, "learning_rate": 2.1398058252427185e-05, "loss": 0.0015, "step": 52675 }, { "epoch": 78.65671641791045, "grad_norm": 0.033203125, "learning_rate": 2.1360716952949963e-05, "loss": 0.0019, "step": 52700 }, { "epoch": 78.69402985074628, "grad_norm": 0.0322265625, "learning_rate": 2.132337565347274e-05, "loss": 0.0018, "step": 52725 }, { "epoch": 78.73134328358209, "grad_norm": 0.0439453125, "learning_rate": 2.128603435399552e-05, "loss": 0.0019, "step": 52750 }, { "epoch": 78.76865671641791, "grad_norm": 0.024658203125, "learning_rate": 2.12486930545183e-05, "loss": 0.0019, "step": 52775 }, { "epoch": 78.80597014925372, "grad_norm": 0.02783203125, "learning_rate": 2.1211351755041077e-05, "loss": 0.0016, "step": 52800 }, { "epoch": 78.84328358208955, "grad_norm": 0.026611328125, "learning_rate": 2.1174010455563855e-05, "loss": 0.0013, "step": 52825 }, { "epoch": 78.88059701492537, "grad_norm": 0.0267333984375, "learning_rate": 2.1136669156086633e-05, "loss": 0.0016, "step": 52850 }, { "epoch": 78.91791044776119, "grad_norm": 0.0400390625, "learning_rate": 2.109932785660941e-05, "loss": 0.0018, "step": 52875 }, { "epoch": 78.95522388059702, "grad_norm": 0.01544189453125, "learning_rate": 2.106198655713219e-05, "loss": 0.0016, "step": 52900 }, { "epoch": 78.99253731343283, "grad_norm": 0.052490234375, "learning_rate": 2.102464525765497e-05, "loss": 0.0017, "step": 52925 }, { "epoch": 79.02985074626865, "grad_norm": 0.033447265625, "learning_rate": 2.0987303958177744e-05, "loss": 0.0015, "step": 52950 }, { "epoch": 79.06716417910448, "grad_norm": 0.037841796875, "learning_rate": 2.094996265870052e-05, "loss": 0.0015, "step": 52975 }, { "epoch": 79.1044776119403, "grad_norm": 0.036376953125, "learning_rate": 2.0912621359223303e-05, "loss": 0.0016, "step": 53000 }, { "epoch": 79.14179104477611, "grad_norm": 0.020263671875, "learning_rate": 2.087528005974608e-05, "loss": 0.0014, "step": 53025 }, { "epoch": 79.17910447761194, "grad_norm": 0.03857421875, "learning_rate": 2.083793876026886e-05, "loss": 0.0015, "step": 53050 }, { "epoch": 79.21641791044776, "grad_norm": 0.0400390625, "learning_rate": 2.0800597460791636e-05, "loss": 0.0017, "step": 53075 }, { "epoch": 79.25373134328358, "grad_norm": 0.0224609375, "learning_rate": 2.0763256161314414e-05, "loss": 0.0017, "step": 53100 }, { "epoch": 79.2910447761194, "grad_norm": 0.029052734375, "learning_rate": 2.0725914861837192e-05, "loss": 0.0017, "step": 53125 }, { "epoch": 79.32835820895522, "grad_norm": 0.04052734375, "learning_rate": 2.0688573562359973e-05, "loss": 0.0018, "step": 53150 }, { "epoch": 79.36567164179104, "grad_norm": 0.044921875, "learning_rate": 2.065123226288275e-05, "loss": 0.0016, "step": 53175 }, { "epoch": 79.40298507462687, "grad_norm": 0.031982421875, "learning_rate": 2.061389096340553e-05, "loss": 0.0018, "step": 53200 }, { "epoch": 79.44029850746269, "grad_norm": 0.025146484375, "learning_rate": 2.0576549663928303e-05, "loss": 0.0017, "step": 53225 }, { "epoch": 79.4776119402985, "grad_norm": 0.031982421875, "learning_rate": 2.0539208364451084e-05, "loss": 0.0017, "step": 53250 }, { "epoch": 79.51492537313433, "grad_norm": 0.0257568359375, "learning_rate": 2.0501867064973862e-05, "loss": 0.0018, "step": 53275 }, { "epoch": 79.55223880597015, "grad_norm": 0.0238037109375, "learning_rate": 2.046452576549664e-05, "loss": 0.002, "step": 53300 }, { "epoch": 79.58955223880596, "grad_norm": 0.02978515625, "learning_rate": 2.042718446601942e-05, "loss": 0.0016, "step": 53325 }, { "epoch": 79.6268656716418, "grad_norm": 0.0289306640625, "learning_rate": 2.0389843166542195e-05, "loss": 0.0018, "step": 53350 }, { "epoch": 79.66417910447761, "grad_norm": 0.026123046875, "learning_rate": 2.0352501867064973e-05, "loss": 0.0016, "step": 53375 }, { "epoch": 79.70149253731343, "grad_norm": 0.03173828125, "learning_rate": 2.0315160567587754e-05, "loss": 0.0017, "step": 53400 }, { "epoch": 79.73880597014926, "grad_norm": 0.02490234375, "learning_rate": 2.0277819268110532e-05, "loss": 0.0017, "step": 53425 }, { "epoch": 79.77611940298507, "grad_norm": 0.015625, "learning_rate": 2.024047796863331e-05, "loss": 0.0017, "step": 53450 }, { "epoch": 79.81343283582089, "grad_norm": 0.03076171875, "learning_rate": 2.0203136669156088e-05, "loss": 0.0018, "step": 53475 }, { "epoch": 79.85074626865672, "grad_norm": 0.021240234375, "learning_rate": 2.0165795369678865e-05, "loss": 0.0015, "step": 53500 }, { "epoch": 79.88805970149254, "grad_norm": 0.026123046875, "learning_rate": 2.0128454070201643e-05, "loss": 0.0018, "step": 53525 }, { "epoch": 79.92537313432835, "grad_norm": 0.04345703125, "learning_rate": 2.009111277072442e-05, "loss": 0.0019, "step": 53550 }, { "epoch": 79.96268656716418, "grad_norm": 0.01519775390625, "learning_rate": 2.0053771471247202e-05, "loss": 0.0018, "step": 53575 }, { "epoch": 80.0, "grad_norm": 0.0576171875, "learning_rate": 2.001643017176998e-05, "loss": 0.0018, "step": 53600 }, { "epoch": 80.03731343283582, "grad_norm": 0.04833984375, "learning_rate": 1.9979088872292758e-05, "loss": 0.0016, "step": 53625 }, { "epoch": 80.07462686567165, "grad_norm": 0.032470703125, "learning_rate": 1.9941747572815535e-05, "loss": 0.0018, "step": 53650 }, { "epoch": 80.11194029850746, "grad_norm": 0.030029296875, "learning_rate": 1.9904406273338313e-05, "loss": 0.0015, "step": 53675 }, { "epoch": 80.14925373134328, "grad_norm": 0.039794921875, "learning_rate": 1.986706497386109e-05, "loss": 0.0014, "step": 53700 }, { "epoch": 80.18656716417911, "grad_norm": 0.03857421875, "learning_rate": 1.982972367438387e-05, "loss": 0.0016, "step": 53725 }, { "epoch": 80.22388059701493, "grad_norm": 0.04736328125, "learning_rate": 1.979238237490665e-05, "loss": 0.0019, "step": 53750 }, { "epoch": 80.26119402985074, "grad_norm": 0.0303955078125, "learning_rate": 1.9755041075429424e-05, "loss": 0.0016, "step": 53775 }, { "epoch": 80.29850746268657, "grad_norm": 0.025390625, "learning_rate": 1.9717699775952202e-05, "loss": 0.0016, "step": 53800 }, { "epoch": 80.33582089552239, "grad_norm": 0.0291748046875, "learning_rate": 1.9680358476474983e-05, "loss": 0.0016, "step": 53825 }, { "epoch": 80.3731343283582, "grad_norm": 0.01953125, "learning_rate": 1.964301717699776e-05, "loss": 0.0019, "step": 53850 }, { "epoch": 80.41044776119404, "grad_norm": 0.0245361328125, "learning_rate": 1.960567587752054e-05, "loss": 0.0018, "step": 53875 }, { "epoch": 80.44776119402985, "grad_norm": 0.035400390625, "learning_rate": 1.9568334578043317e-05, "loss": 0.0016, "step": 53900 }, { "epoch": 80.48507462686567, "grad_norm": 0.031494140625, "learning_rate": 1.9530993278566094e-05, "loss": 0.0016, "step": 53925 }, { "epoch": 80.5223880597015, "grad_norm": 0.033935546875, "learning_rate": 1.9493651979088872e-05, "loss": 0.0018, "step": 53950 }, { "epoch": 80.55970149253731, "grad_norm": 0.0257568359375, "learning_rate": 1.945631067961165e-05, "loss": 0.0016, "step": 53975 }, { "epoch": 80.59701492537313, "grad_norm": 0.06298828125, "learning_rate": 1.941896938013443e-05, "loss": 0.0018, "step": 54000 }, { "epoch": 80.63432835820896, "grad_norm": 0.0478515625, "learning_rate": 1.938162808065721e-05, "loss": 0.0015, "step": 54025 }, { "epoch": 80.67164179104478, "grad_norm": 0.038818359375, "learning_rate": 1.9344286781179983e-05, "loss": 0.0018, "step": 54050 }, { "epoch": 80.7089552238806, "grad_norm": 0.055908203125, "learning_rate": 1.9306945481702764e-05, "loss": 0.0018, "step": 54075 }, { "epoch": 80.74626865671642, "grad_norm": 0.02734375, "learning_rate": 1.9269604182225542e-05, "loss": 0.0014, "step": 54100 }, { "epoch": 80.78358208955224, "grad_norm": 0.017822265625, "learning_rate": 1.923226288274832e-05, "loss": 0.0018, "step": 54125 }, { "epoch": 80.82089552238806, "grad_norm": 0.047607421875, "learning_rate": 1.91949215832711e-05, "loss": 0.0016, "step": 54150 }, { "epoch": 80.85820895522389, "grad_norm": 0.025146484375, "learning_rate": 1.9157580283793876e-05, "loss": 0.0019, "step": 54175 }, { "epoch": 80.8955223880597, "grad_norm": 0.052734375, "learning_rate": 1.9120238984316653e-05, "loss": 0.0018, "step": 54200 }, { "epoch": 80.93283582089552, "grad_norm": 0.030029296875, "learning_rate": 1.908289768483943e-05, "loss": 0.0016, "step": 54225 }, { "epoch": 80.97014925373135, "grad_norm": 0.0262451171875, "learning_rate": 1.9045556385362212e-05, "loss": 0.0019, "step": 54250 }, { "epoch": 81.00746268656717, "grad_norm": 0.028076171875, "learning_rate": 1.900821508588499e-05, "loss": 0.0017, "step": 54275 }, { "epoch": 81.04477611940298, "grad_norm": 0.028564453125, "learning_rate": 1.8970873786407768e-05, "loss": 0.0018, "step": 54300 }, { "epoch": 81.08208955223881, "grad_norm": 0.034912109375, "learning_rate": 1.8933532486930546e-05, "loss": 0.0018, "step": 54325 }, { "epoch": 81.11940298507463, "grad_norm": 0.0262451171875, "learning_rate": 1.8896191187453323e-05, "loss": 0.0016, "step": 54350 }, { "epoch": 81.15671641791045, "grad_norm": 0.0306396484375, "learning_rate": 1.88588498879761e-05, "loss": 0.0017, "step": 54375 }, { "epoch": 81.19402985074628, "grad_norm": 0.035888671875, "learning_rate": 1.8821508588498882e-05, "loss": 0.0017, "step": 54400 }, { "epoch": 81.23134328358209, "grad_norm": 0.02392578125, "learning_rate": 1.878416728902166e-05, "loss": 0.0019, "step": 54425 }, { "epoch": 81.26865671641791, "grad_norm": 0.02392578125, "learning_rate": 1.8746825989544438e-05, "loss": 0.0017, "step": 54450 }, { "epoch": 81.30597014925372, "grad_norm": 0.0286865234375, "learning_rate": 1.8709484690067216e-05, "loss": 0.0016, "step": 54475 }, { "epoch": 81.34328358208955, "grad_norm": 0.0478515625, "learning_rate": 1.8672143390589994e-05, "loss": 0.0017, "step": 54500 }, { "epoch": 81.38059701492537, "grad_norm": 0.0281982421875, "learning_rate": 1.863480209111277e-05, "loss": 0.0017, "step": 54525 }, { "epoch": 81.41791044776119, "grad_norm": 0.0228271484375, "learning_rate": 1.859746079163555e-05, "loss": 0.0018, "step": 54550 }, { "epoch": 81.45522388059702, "grad_norm": 0.025390625, "learning_rate": 1.856011949215833e-05, "loss": 0.0017, "step": 54575 }, { "epoch": 81.49253731343283, "grad_norm": 0.03076171875, "learning_rate": 1.8522778192681105e-05, "loss": 0.0015, "step": 54600 }, { "epoch": 81.52985074626865, "grad_norm": 0.036865234375, "learning_rate": 1.8485436893203882e-05, "loss": 0.0017, "step": 54625 }, { "epoch": 81.56716417910448, "grad_norm": 0.021728515625, "learning_rate": 1.8448095593726664e-05, "loss": 0.0018, "step": 54650 }, { "epoch": 81.6044776119403, "grad_norm": 0.03173828125, "learning_rate": 1.841075429424944e-05, "loss": 0.0016, "step": 54675 }, { "epoch": 81.64179104477611, "grad_norm": 0.03271484375, "learning_rate": 1.837341299477222e-05, "loss": 0.0018, "step": 54700 }, { "epoch": 81.67910447761194, "grad_norm": 0.049072265625, "learning_rate": 1.8336071695294997e-05, "loss": 0.0014, "step": 54725 }, { "epoch": 81.71641791044776, "grad_norm": 0.048583984375, "learning_rate": 1.8298730395817775e-05, "loss": 0.0018, "step": 54750 }, { "epoch": 81.75373134328358, "grad_norm": 0.036376953125, "learning_rate": 1.8261389096340553e-05, "loss": 0.0016, "step": 54775 }, { "epoch": 81.7910447761194, "grad_norm": 0.035888671875, "learning_rate": 1.822404779686333e-05, "loss": 0.0015, "step": 54800 }, { "epoch": 81.82835820895522, "grad_norm": 0.0264892578125, "learning_rate": 1.818670649738611e-05, "loss": 0.0013, "step": 54825 }, { "epoch": 81.86567164179104, "grad_norm": 0.032958984375, "learning_rate": 1.814936519790889e-05, "loss": 0.0017, "step": 54850 }, { "epoch": 81.90298507462687, "grad_norm": 0.0218505859375, "learning_rate": 1.8112023898431664e-05, "loss": 0.0017, "step": 54875 }, { "epoch": 81.94029850746269, "grad_norm": 0.05078125, "learning_rate": 1.8074682598954445e-05, "loss": 0.0018, "step": 54900 }, { "epoch": 81.9776119402985, "grad_norm": 0.025634765625, "learning_rate": 1.8037341299477223e-05, "loss": 0.0016, "step": 54925 }, { "epoch": 82.01492537313433, "grad_norm": 0.024658203125, "learning_rate": 1.8e-05, "loss": 0.0018, "step": 54950 }, { "epoch": 82.05223880597015, "grad_norm": 0.0341796875, "learning_rate": 1.796265870052278e-05, "loss": 0.0017, "step": 54975 }, { "epoch": 82.08955223880596, "grad_norm": 0.031494140625, "learning_rate": 1.7925317401045556e-05, "loss": 0.0015, "step": 55000 }, { "epoch": 82.1268656716418, "grad_norm": 0.0380859375, "learning_rate": 1.7887976101568334e-05, "loss": 0.0016, "step": 55025 }, { "epoch": 82.16417910447761, "grad_norm": 0.024169921875, "learning_rate": 1.785063480209111e-05, "loss": 0.0016, "step": 55050 }, { "epoch": 82.20149253731343, "grad_norm": 0.0277099609375, "learning_rate": 1.7813293502613893e-05, "loss": 0.0017, "step": 55075 }, { "epoch": 82.23880597014926, "grad_norm": 0.030517578125, "learning_rate": 1.777595220313667e-05, "loss": 0.0014, "step": 55100 }, { "epoch": 82.27611940298507, "grad_norm": 0.0284423828125, "learning_rate": 1.773861090365945e-05, "loss": 0.0017, "step": 55125 }, { "epoch": 82.31343283582089, "grad_norm": 0.041259765625, "learning_rate": 1.7701269604182226e-05, "loss": 0.0016, "step": 55150 }, { "epoch": 82.35074626865672, "grad_norm": 0.036376953125, "learning_rate": 1.7663928304705004e-05, "loss": 0.0016, "step": 55175 }, { "epoch": 82.38805970149254, "grad_norm": 0.031982421875, "learning_rate": 1.762658700522778e-05, "loss": 0.0018, "step": 55200 }, { "epoch": 82.42537313432835, "grad_norm": 0.02587890625, "learning_rate": 1.7589245705750563e-05, "loss": 0.0018, "step": 55225 }, { "epoch": 82.46268656716418, "grad_norm": 0.035400390625, "learning_rate": 1.755190440627334e-05, "loss": 0.0016, "step": 55250 }, { "epoch": 82.5, "grad_norm": 0.038330078125, "learning_rate": 1.751456310679612e-05, "loss": 0.0014, "step": 55275 }, { "epoch": 82.53731343283582, "grad_norm": 0.0289306640625, "learning_rate": 1.7477221807318893e-05, "loss": 0.0015, "step": 55300 }, { "epoch": 82.57462686567165, "grad_norm": 0.033447265625, "learning_rate": 1.7439880507841674e-05, "loss": 0.0016, "step": 55325 }, { "epoch": 82.61194029850746, "grad_norm": 0.03515625, "learning_rate": 1.7402539208364452e-05, "loss": 0.0018, "step": 55350 }, { "epoch": 82.64925373134328, "grad_norm": 0.03515625, "learning_rate": 1.736519790888723e-05, "loss": 0.0017, "step": 55375 }, { "epoch": 82.68656716417911, "grad_norm": 0.023681640625, "learning_rate": 1.732785660941001e-05, "loss": 0.0018, "step": 55400 }, { "epoch": 82.72388059701493, "grad_norm": 0.0228271484375, "learning_rate": 1.7290515309932785e-05, "loss": 0.0018, "step": 55425 }, { "epoch": 82.76119402985074, "grad_norm": 0.0255126953125, "learning_rate": 1.7253174010455563e-05, "loss": 0.0015, "step": 55450 }, { "epoch": 82.79850746268657, "grad_norm": 0.040771484375, "learning_rate": 1.7215832710978344e-05, "loss": 0.0017, "step": 55475 }, { "epoch": 82.83582089552239, "grad_norm": 0.030029296875, "learning_rate": 1.7178491411501122e-05, "loss": 0.0018, "step": 55500 }, { "epoch": 82.8731343283582, "grad_norm": 0.032958984375, "learning_rate": 1.71411501120239e-05, "loss": 0.0019, "step": 55525 }, { "epoch": 82.91044776119404, "grad_norm": 0.031982421875, "learning_rate": 1.7103808812546677e-05, "loss": 0.0016, "step": 55550 }, { "epoch": 82.94776119402985, "grad_norm": 0.039794921875, "learning_rate": 1.7066467513069455e-05, "loss": 0.0016, "step": 55575 }, { "epoch": 82.98507462686567, "grad_norm": 0.030029296875, "learning_rate": 1.7029126213592233e-05, "loss": 0.0018, "step": 55600 }, { "epoch": 83.0223880597015, "grad_norm": 0.0257568359375, "learning_rate": 1.699178491411501e-05, "loss": 0.0017, "step": 55625 }, { "epoch": 83.05970149253731, "grad_norm": 0.03759765625, "learning_rate": 1.6954443614637792e-05, "loss": 0.0018, "step": 55650 }, { "epoch": 83.09701492537313, "grad_norm": 0.038818359375, "learning_rate": 1.691710231516057e-05, "loss": 0.0016, "step": 55675 }, { "epoch": 83.13432835820896, "grad_norm": 0.0289306640625, "learning_rate": 1.6879761015683344e-05, "loss": 0.0014, "step": 55700 }, { "epoch": 83.17164179104478, "grad_norm": 0.0322265625, "learning_rate": 1.6842419716206125e-05, "loss": 0.0017, "step": 55725 }, { "epoch": 83.2089552238806, "grad_norm": 0.0233154296875, "learning_rate": 1.6805078416728903e-05, "loss": 0.002, "step": 55750 }, { "epoch": 83.24626865671642, "grad_norm": 0.034912109375, "learning_rate": 1.676773711725168e-05, "loss": 0.0019, "step": 55775 }, { "epoch": 83.28358208955224, "grad_norm": 0.0201416015625, "learning_rate": 1.673039581777446e-05, "loss": 0.0017, "step": 55800 }, { "epoch": 83.32089552238806, "grad_norm": 0.045654296875, "learning_rate": 1.6693054518297236e-05, "loss": 0.0017, "step": 55825 }, { "epoch": 83.35820895522389, "grad_norm": 0.047607421875, "learning_rate": 1.6655713218820014e-05, "loss": 0.0017, "step": 55850 }, { "epoch": 83.3955223880597, "grad_norm": 0.031494140625, "learning_rate": 1.6618371919342792e-05, "loss": 0.0016, "step": 55875 }, { "epoch": 83.43283582089552, "grad_norm": 0.030517578125, "learning_rate": 1.6581030619865573e-05, "loss": 0.0015, "step": 55900 }, { "epoch": 83.47014925373135, "grad_norm": 0.031494140625, "learning_rate": 1.654368932038835e-05, "loss": 0.0017, "step": 55925 }, { "epoch": 83.50746268656717, "grad_norm": 0.0206298828125, "learning_rate": 1.650634802091113e-05, "loss": 0.0013, "step": 55950 }, { "epoch": 83.54477611940298, "grad_norm": 0.037109375, "learning_rate": 1.6469006721433907e-05, "loss": 0.0014, "step": 55975 }, { "epoch": 83.58208955223881, "grad_norm": 0.03515625, "learning_rate": 1.6431665421956684e-05, "loss": 0.0017, "step": 56000 }, { "epoch": 83.61940298507463, "grad_norm": 0.032470703125, "learning_rate": 1.6394324122479462e-05, "loss": 0.0017, "step": 56025 }, { "epoch": 83.65671641791045, "grad_norm": 0.0308837890625, "learning_rate": 1.6356982823002243e-05, "loss": 0.0017, "step": 56050 }, { "epoch": 83.69402985074628, "grad_norm": 0.03125, "learning_rate": 1.631964152352502e-05, "loss": 0.0016, "step": 56075 }, { "epoch": 83.73134328358209, "grad_norm": 0.04052734375, "learning_rate": 1.62823002240478e-05, "loss": 0.0015, "step": 56100 }, { "epoch": 83.76865671641791, "grad_norm": 0.031982421875, "learning_rate": 1.6244958924570573e-05, "loss": 0.0017, "step": 56125 }, { "epoch": 83.80597014925372, "grad_norm": 0.038330078125, "learning_rate": 1.6207617625093354e-05, "loss": 0.0018, "step": 56150 }, { "epoch": 83.84328358208955, "grad_norm": 0.037841796875, "learning_rate": 1.6170276325616132e-05, "loss": 0.0017, "step": 56175 }, { "epoch": 83.88059701492537, "grad_norm": 0.042724609375, "learning_rate": 1.613293502613891e-05, "loss": 0.0017, "step": 56200 }, { "epoch": 83.91791044776119, "grad_norm": 0.035888671875, "learning_rate": 1.609559372666169e-05, "loss": 0.0018, "step": 56225 }, { "epoch": 83.95522388059702, "grad_norm": 0.0296630859375, "learning_rate": 1.6058252427184466e-05, "loss": 0.0018, "step": 56250 }, { "epoch": 83.99253731343283, "grad_norm": 0.031494140625, "learning_rate": 1.6020911127707243e-05, "loss": 0.0018, "step": 56275 }, { "epoch": 84.02985074626865, "grad_norm": 0.03857421875, "learning_rate": 1.5983569828230024e-05, "loss": 0.0016, "step": 56300 }, { "epoch": 84.06716417910448, "grad_norm": 0.043701171875, "learning_rate": 1.5946228528752802e-05, "loss": 0.0017, "step": 56325 }, { "epoch": 84.1044776119403, "grad_norm": 0.031982421875, "learning_rate": 1.590888722927558e-05, "loss": 0.0015, "step": 56350 }, { "epoch": 84.14179104477611, "grad_norm": 0.036376953125, "learning_rate": 1.5871545929798358e-05, "loss": 0.0016, "step": 56375 }, { "epoch": 84.17910447761194, "grad_norm": 0.041259765625, "learning_rate": 1.5834204630321136e-05, "loss": 0.0015, "step": 56400 }, { "epoch": 84.21641791044776, "grad_norm": 0.045166015625, "learning_rate": 1.5796863330843913e-05, "loss": 0.0018, "step": 56425 }, { "epoch": 84.25373134328358, "grad_norm": 0.047119140625, "learning_rate": 1.575952203136669e-05, "loss": 0.0019, "step": 56450 }, { "epoch": 84.2910447761194, "grad_norm": 0.031494140625, "learning_rate": 1.5722180731889472e-05, "loss": 0.0016, "step": 56475 }, { "epoch": 84.32835820895522, "grad_norm": 0.03125, "learning_rate": 1.568483943241225e-05, "loss": 0.0017, "step": 56500 }, { "epoch": 84.36567164179104, "grad_norm": 0.036865234375, "learning_rate": 1.5647498132935025e-05, "loss": 0.0016, "step": 56525 }, { "epoch": 84.40298507462687, "grad_norm": 0.04443359375, "learning_rate": 1.5610156833457806e-05, "loss": 0.0018, "step": 56550 }, { "epoch": 84.44029850746269, "grad_norm": 0.0546875, "learning_rate": 1.5572815533980583e-05, "loss": 0.0017, "step": 56575 }, { "epoch": 84.4776119402985, "grad_norm": 0.032470703125, "learning_rate": 1.553547423450336e-05, "loss": 0.0017, "step": 56600 }, { "epoch": 84.51492537313433, "grad_norm": 0.0206298828125, "learning_rate": 1.549813293502614e-05, "loss": 0.0016, "step": 56625 }, { "epoch": 84.55223880597015, "grad_norm": 0.0625, "learning_rate": 1.5460791635548917e-05, "loss": 0.0016, "step": 56650 }, { "epoch": 84.58955223880596, "grad_norm": 0.043701171875, "learning_rate": 1.5423450336071695e-05, "loss": 0.0016, "step": 56675 }, { "epoch": 84.6268656716418, "grad_norm": 0.02783203125, "learning_rate": 1.5386109036594472e-05, "loss": 0.0016, "step": 56700 }, { "epoch": 84.66417910447761, "grad_norm": 0.044921875, "learning_rate": 1.5348767737117254e-05, "loss": 0.0016, "step": 56725 }, { "epoch": 84.70149253731343, "grad_norm": 0.03173828125, "learning_rate": 1.531142643764003e-05, "loss": 0.0017, "step": 56750 }, { "epoch": 84.73880597014926, "grad_norm": 0.02392578125, "learning_rate": 1.527408513816281e-05, "loss": 0.0016, "step": 56775 }, { "epoch": 84.77611940298507, "grad_norm": 0.056640625, "learning_rate": 1.5236743838685589e-05, "loss": 0.0015, "step": 56800 }, { "epoch": 84.81343283582089, "grad_norm": 0.0255126953125, "learning_rate": 1.5199402539208365e-05, "loss": 0.0016, "step": 56825 }, { "epoch": 84.85074626865672, "grad_norm": 0.034912109375, "learning_rate": 1.5162061239731142e-05, "loss": 0.0016, "step": 56850 }, { "epoch": 84.88805970149254, "grad_norm": 0.03662109375, "learning_rate": 1.512471994025392e-05, "loss": 0.0018, "step": 56875 }, { "epoch": 84.92537313432835, "grad_norm": 0.034423828125, "learning_rate": 1.50873786407767e-05, "loss": 0.0016, "step": 56900 }, { "epoch": 84.96268656716418, "grad_norm": 0.02978515625, "learning_rate": 1.5050037341299478e-05, "loss": 0.0019, "step": 56925 }, { "epoch": 85.0, "grad_norm": 0.03173828125, "learning_rate": 1.5012696041822255e-05, "loss": 0.0018, "step": 56950 }, { "epoch": 85.03731343283582, "grad_norm": 0.03515625, "learning_rate": 1.4975354742345035e-05, "loss": 0.0017, "step": 56975 }, { "epoch": 85.07462686567165, "grad_norm": 0.040771484375, "learning_rate": 1.4938013442867813e-05, "loss": 0.0015, "step": 57000 }, { "epoch": 85.11194029850746, "grad_norm": 0.0242919921875, "learning_rate": 1.490067214339059e-05, "loss": 0.0015, "step": 57025 }, { "epoch": 85.14925373134328, "grad_norm": 0.0289306640625, "learning_rate": 1.486333084391337e-05, "loss": 0.0016, "step": 57050 }, { "epoch": 85.18656716417911, "grad_norm": 0.03466796875, "learning_rate": 1.4825989544436148e-05, "loss": 0.0016, "step": 57075 }, { "epoch": 85.22388059701493, "grad_norm": 0.034423828125, "learning_rate": 1.4788648244958925e-05, "loss": 0.0017, "step": 57100 }, { "epoch": 85.26119402985074, "grad_norm": 0.0220947265625, "learning_rate": 1.4751306945481702e-05, "loss": 0.0017, "step": 57125 }, { "epoch": 85.29850746268657, "grad_norm": 0.0240478515625, "learning_rate": 1.4713965646004483e-05, "loss": 0.0012, "step": 57150 }, { "epoch": 85.33582089552239, "grad_norm": 0.03515625, "learning_rate": 1.4676624346527259e-05, "loss": 0.0019, "step": 57175 }, { "epoch": 85.3731343283582, "grad_norm": 0.036865234375, "learning_rate": 1.4639283047050037e-05, "loss": 0.0017, "step": 57200 }, { "epoch": 85.41044776119404, "grad_norm": 0.03515625, "learning_rate": 1.4601941747572818e-05, "loss": 0.0016, "step": 57225 }, { "epoch": 85.44776119402985, "grad_norm": 0.030517578125, "learning_rate": 1.4564600448095594e-05, "loss": 0.0016, "step": 57250 }, { "epoch": 85.48507462686567, "grad_norm": 0.031494140625, "learning_rate": 1.4527259148618372e-05, "loss": 0.0016, "step": 57275 }, { "epoch": 85.5223880597015, "grad_norm": 0.03125, "learning_rate": 1.4489917849141151e-05, "loss": 0.0014, "step": 57300 }, { "epoch": 85.55970149253731, "grad_norm": 0.0341796875, "learning_rate": 1.4452576549663929e-05, "loss": 0.0016, "step": 57325 }, { "epoch": 85.59701492537313, "grad_norm": 0.031982421875, "learning_rate": 1.4415235250186707e-05, "loss": 0.0016, "step": 57350 }, { "epoch": 85.63432835820896, "grad_norm": 0.0361328125, "learning_rate": 1.4377893950709486e-05, "loss": 0.0016, "step": 57375 }, { "epoch": 85.67164179104478, "grad_norm": 0.040771484375, "learning_rate": 1.4340552651232264e-05, "loss": 0.0014, "step": 57400 }, { "epoch": 85.7089552238806, "grad_norm": 0.0274658203125, "learning_rate": 1.4303211351755042e-05, "loss": 0.0018, "step": 57425 }, { "epoch": 85.74626865671642, "grad_norm": 0.0322265625, "learning_rate": 1.426587005227782e-05, "loss": 0.0016, "step": 57450 }, { "epoch": 85.78358208955224, "grad_norm": 0.02685546875, "learning_rate": 1.4228528752800599e-05, "loss": 0.0018, "step": 57475 }, { "epoch": 85.82089552238806, "grad_norm": 0.03369140625, "learning_rate": 1.4191187453323377e-05, "loss": 0.0016, "step": 57500 }, { "epoch": 85.85820895522389, "grad_norm": 0.03369140625, "learning_rate": 1.4153846153846153e-05, "loss": 0.0016, "step": 57525 }, { "epoch": 85.8955223880597, "grad_norm": 0.0439453125, "learning_rate": 1.4116504854368934e-05, "loss": 0.0019, "step": 57550 }, { "epoch": 85.93283582089552, "grad_norm": 0.042236328125, "learning_rate": 1.4079163554891712e-05, "loss": 0.002, "step": 57575 }, { "epoch": 85.97014925373135, "grad_norm": 0.03857421875, "learning_rate": 1.4041822255414488e-05, "loss": 0.0021, "step": 57600 }, { "epoch": 86.00746268656717, "grad_norm": 0.038818359375, "learning_rate": 1.4004480955937269e-05, "loss": 0.0015, "step": 57625 }, { "epoch": 86.04477611940298, "grad_norm": 0.0201416015625, "learning_rate": 1.3967139656460045e-05, "loss": 0.0015, "step": 57650 }, { "epoch": 86.08208955223881, "grad_norm": 0.0390625, "learning_rate": 1.3929798356982823e-05, "loss": 0.0017, "step": 57675 }, { "epoch": 86.11940298507463, "grad_norm": 0.044677734375, "learning_rate": 1.38924570575056e-05, "loss": 0.0016, "step": 57700 }, { "epoch": 86.15671641791045, "grad_norm": 0.01397705078125, "learning_rate": 1.385511575802838e-05, "loss": 0.0015, "step": 57725 }, { "epoch": 86.19402985074628, "grad_norm": 0.0279541015625, "learning_rate": 1.3817774458551158e-05, "loss": 0.0016, "step": 57750 }, { "epoch": 86.23134328358209, "grad_norm": 0.0361328125, "learning_rate": 1.3780433159073936e-05, "loss": 0.0016, "step": 57775 }, { "epoch": 86.26865671641791, "grad_norm": 0.030029296875, "learning_rate": 1.3743091859596715e-05, "loss": 0.0016, "step": 57800 }, { "epoch": 86.30597014925372, "grad_norm": 0.021240234375, "learning_rate": 1.3705750560119493e-05, "loss": 0.0014, "step": 57825 }, { "epoch": 86.34328358208955, "grad_norm": 0.045654296875, "learning_rate": 1.366840926064227e-05, "loss": 0.0016, "step": 57850 }, { "epoch": 86.38059701492537, "grad_norm": 0.031494140625, "learning_rate": 1.363106796116505e-05, "loss": 0.0018, "step": 57875 }, { "epoch": 86.41791044776119, "grad_norm": 0.03759765625, "learning_rate": 1.3593726661687828e-05, "loss": 0.0019, "step": 57900 }, { "epoch": 86.45522388059702, "grad_norm": 0.030029296875, "learning_rate": 1.3556385362210606e-05, "loss": 0.0017, "step": 57925 }, { "epoch": 86.49253731343283, "grad_norm": 0.032470703125, "learning_rate": 1.3519044062733382e-05, "loss": 0.0016, "step": 57950 }, { "epoch": 86.52985074626865, "grad_norm": 0.029541015625, "learning_rate": 1.3481702763256163e-05, "loss": 0.0015, "step": 57975 }, { "epoch": 86.56716417910448, "grad_norm": 0.03759765625, "learning_rate": 1.344436146377894e-05, "loss": 0.0017, "step": 58000 }, { "epoch": 86.6044776119403, "grad_norm": 0.0205078125, "learning_rate": 1.3407020164301717e-05, "loss": 0.0019, "step": 58025 }, { "epoch": 86.64179104477611, "grad_norm": 0.03369140625, "learning_rate": 1.3369678864824498e-05, "loss": 0.0016, "step": 58050 }, { "epoch": 86.67910447761194, "grad_norm": 0.04296875, "learning_rate": 1.3332337565347274e-05, "loss": 0.0017, "step": 58075 }, { "epoch": 86.71641791044776, "grad_norm": 0.0257568359375, "learning_rate": 1.3294996265870052e-05, "loss": 0.0016, "step": 58100 }, { "epoch": 86.75373134328358, "grad_norm": 0.0322265625, "learning_rate": 1.3257654966392833e-05, "loss": 0.0018, "step": 58125 }, { "epoch": 86.7910447761194, "grad_norm": 0.01904296875, "learning_rate": 1.322031366691561e-05, "loss": 0.0016, "step": 58150 }, { "epoch": 86.82835820895522, "grad_norm": 0.038330078125, "learning_rate": 1.3182972367438387e-05, "loss": 0.0015, "step": 58175 }, { "epoch": 86.86567164179104, "grad_norm": 0.02685546875, "learning_rate": 1.3145631067961165e-05, "loss": 0.0019, "step": 58200 }, { "epoch": 86.90298507462687, "grad_norm": 0.01092529296875, "learning_rate": 1.3108289768483944e-05, "loss": 0.0016, "step": 58225 }, { "epoch": 86.94029850746269, "grad_norm": 0.044677734375, "learning_rate": 1.3070948469006722e-05, "loss": 0.0015, "step": 58250 }, { "epoch": 86.9776119402985, "grad_norm": 0.02685546875, "learning_rate": 1.30336071695295e-05, "loss": 0.0017, "step": 58275 }, { "epoch": 87.01492537313433, "grad_norm": 0.03857421875, "learning_rate": 1.299626587005228e-05, "loss": 0.0016, "step": 58300 }, { "epoch": 87.05223880597015, "grad_norm": 0.02685546875, "learning_rate": 1.2958924570575057e-05, "loss": 0.0015, "step": 58325 }, { "epoch": 87.08955223880596, "grad_norm": 0.0194091796875, "learning_rate": 1.2921583271097833e-05, "loss": 0.0018, "step": 58350 }, { "epoch": 87.1268656716418, "grad_norm": 0.0281982421875, "learning_rate": 1.2884241971620614e-05, "loss": 0.0016, "step": 58375 }, { "epoch": 87.16417910447761, "grad_norm": 0.0361328125, "learning_rate": 1.2846900672143392e-05, "loss": 0.0015, "step": 58400 }, { "epoch": 87.20149253731343, "grad_norm": 0.031982421875, "learning_rate": 1.2809559372666168e-05, "loss": 0.0018, "step": 58425 }, { "epoch": 87.23880597014926, "grad_norm": 0.049072265625, "learning_rate": 1.2772218073188946e-05, "loss": 0.0014, "step": 58450 }, { "epoch": 87.27611940298507, "grad_norm": 0.03173828125, "learning_rate": 1.2734876773711727e-05, "loss": 0.0015, "step": 58475 }, { "epoch": 87.31343283582089, "grad_norm": 0.0255126953125, "learning_rate": 1.2697535474234503e-05, "loss": 0.0016, "step": 58500 }, { "epoch": 87.35074626865672, "grad_norm": 0.03369140625, "learning_rate": 1.2660194174757281e-05, "loss": 0.0017, "step": 58525 }, { "epoch": 87.38805970149254, "grad_norm": 0.0277099609375, "learning_rate": 1.262285287528006e-05, "loss": 0.0017, "step": 58550 }, { "epoch": 87.42537313432835, "grad_norm": 0.0264892578125, "learning_rate": 1.2585511575802838e-05, "loss": 0.0017, "step": 58575 }, { "epoch": 87.46268656716418, "grad_norm": 0.028076171875, "learning_rate": 1.2548170276325616e-05, "loss": 0.0016, "step": 58600 }, { "epoch": 87.5, "grad_norm": 0.03076171875, "learning_rate": 1.2510828976848396e-05, "loss": 0.0017, "step": 58625 }, { "epoch": 87.53731343283582, "grad_norm": 0.03125, "learning_rate": 1.2473487677371173e-05, "loss": 0.0016, "step": 58650 }, { "epoch": 87.57462686567165, "grad_norm": 0.035400390625, "learning_rate": 1.2436146377893951e-05, "loss": 0.0019, "step": 58675 }, { "epoch": 87.61194029850746, "grad_norm": 0.0308837890625, "learning_rate": 1.2398805078416729e-05, "loss": 0.0016, "step": 58700 }, { "epoch": 87.64925373134328, "grad_norm": 0.04150390625, "learning_rate": 1.2361463778939508e-05, "loss": 0.0016, "step": 58725 }, { "epoch": 87.68656716417911, "grad_norm": 0.03369140625, "learning_rate": 1.2324122479462286e-05, "loss": 0.0018, "step": 58750 }, { "epoch": 87.72388059701493, "grad_norm": 0.0380859375, "learning_rate": 1.2286781179985064e-05, "loss": 0.0016, "step": 58775 }, { "epoch": 87.76119402985074, "grad_norm": 0.0264892578125, "learning_rate": 1.2249439880507842e-05, "loss": 0.0018, "step": 58800 }, { "epoch": 87.79850746268657, "grad_norm": 0.0380859375, "learning_rate": 1.2212098581030621e-05, "loss": 0.0016, "step": 58825 }, { "epoch": 87.83582089552239, "grad_norm": 0.0216064453125, "learning_rate": 1.2174757281553399e-05, "loss": 0.0016, "step": 58850 }, { "epoch": 87.8731343283582, "grad_norm": 0.04541015625, "learning_rate": 1.2137415982076177e-05, "loss": 0.0016, "step": 58875 }, { "epoch": 87.91044776119404, "grad_norm": 0.02783203125, "learning_rate": 1.2100074682598955e-05, "loss": 0.0017, "step": 58900 }, { "epoch": 87.94776119402985, "grad_norm": 0.0289306640625, "learning_rate": 1.2062733383121732e-05, "loss": 0.0016, "step": 58925 }, { "epoch": 87.98507462686567, "grad_norm": 0.035888671875, "learning_rate": 1.2025392083644512e-05, "loss": 0.0017, "step": 58950 }, { "epoch": 88.0223880597015, "grad_norm": 0.021484375, "learning_rate": 1.198805078416729e-05, "loss": 0.0015, "step": 58975 }, { "epoch": 88.05970149253731, "grad_norm": 0.0341796875, "learning_rate": 1.1950709484690067e-05, "loss": 0.0018, "step": 59000 }, { "epoch": 88.09701492537313, "grad_norm": 0.033935546875, "learning_rate": 1.1913368185212847e-05, "loss": 0.0016, "step": 59025 }, { "epoch": 88.13432835820896, "grad_norm": 0.03369140625, "learning_rate": 1.1876026885735623e-05, "loss": 0.0013, "step": 59050 }, { "epoch": 88.17164179104478, "grad_norm": 0.05126953125, "learning_rate": 1.1838685586258402e-05, "loss": 0.0018, "step": 59075 }, { "epoch": 88.2089552238806, "grad_norm": 0.032958984375, "learning_rate": 1.180134428678118e-05, "loss": 0.0018, "step": 59100 }, { "epoch": 88.24626865671642, "grad_norm": 0.025634765625, "learning_rate": 1.1764002987303958e-05, "loss": 0.0014, "step": 59125 }, { "epoch": 88.28358208955224, "grad_norm": 0.0240478515625, "learning_rate": 1.1726661687826738e-05, "loss": 0.0017, "step": 59150 }, { "epoch": 88.32089552238806, "grad_norm": 0.02294921875, "learning_rate": 1.1689320388349515e-05, "loss": 0.0017, "step": 59175 }, { "epoch": 88.35820895522389, "grad_norm": 0.0208740234375, "learning_rate": 1.1651979088872293e-05, "loss": 0.0015, "step": 59200 }, { "epoch": 88.3955223880597, "grad_norm": 0.042236328125, "learning_rate": 1.1614637789395073e-05, "loss": 0.0018, "step": 59225 }, { "epoch": 88.43283582089552, "grad_norm": 0.0191650390625, "learning_rate": 1.1577296489917849e-05, "loss": 0.0016, "step": 59250 }, { "epoch": 88.47014925373135, "grad_norm": 0.03271484375, "learning_rate": 1.1539955190440628e-05, "loss": 0.0015, "step": 59275 }, { "epoch": 88.50746268656717, "grad_norm": 0.045654296875, "learning_rate": 1.1502613890963406e-05, "loss": 0.0017, "step": 59300 }, { "epoch": 88.54477611940298, "grad_norm": 0.0211181640625, "learning_rate": 1.1465272591486184e-05, "loss": 0.0015, "step": 59325 }, { "epoch": 88.58208955223881, "grad_norm": 0.0235595703125, "learning_rate": 1.1427931292008963e-05, "loss": 0.0018, "step": 59350 }, { "epoch": 88.61940298507463, "grad_norm": 0.0284423828125, "learning_rate": 1.1390589992531741e-05, "loss": 0.0014, "step": 59375 }, { "epoch": 88.65671641791045, "grad_norm": 0.041748046875, "learning_rate": 1.1353248693054519e-05, "loss": 0.0015, "step": 59400 }, { "epoch": 88.69402985074628, "grad_norm": 0.031982421875, "learning_rate": 1.1315907393577297e-05, "loss": 0.0016, "step": 59425 }, { "epoch": 88.73134328358209, "grad_norm": 0.0242919921875, "learning_rate": 1.1278566094100074e-05, "loss": 0.0019, "step": 59450 }, { "epoch": 88.76865671641791, "grad_norm": 0.01495361328125, "learning_rate": 1.1241224794622854e-05, "loss": 0.0017, "step": 59475 }, { "epoch": 88.80597014925372, "grad_norm": 0.036865234375, "learning_rate": 1.1203883495145632e-05, "loss": 0.0016, "step": 59500 }, { "epoch": 88.84328358208955, "grad_norm": 0.04541015625, "learning_rate": 1.116654219566841e-05, "loss": 0.0018, "step": 59525 }, { "epoch": 88.88059701492537, "grad_norm": 0.0301513671875, "learning_rate": 1.1129200896191189e-05, "loss": 0.0019, "step": 59550 }, { "epoch": 88.91791044776119, "grad_norm": 0.03173828125, "learning_rate": 1.1091859596713967e-05, "loss": 0.0016, "step": 59575 }, { "epoch": 88.95522388059702, "grad_norm": 0.03125, "learning_rate": 1.1054518297236744e-05, "loss": 0.0017, "step": 59600 }, { "epoch": 88.99253731343283, "grad_norm": 0.0279541015625, "learning_rate": 1.1017176997759522e-05, "loss": 0.0014, "step": 59625 }, { "epoch": 89.02985074626865, "grad_norm": 0.0205078125, "learning_rate": 1.0979835698282302e-05, "loss": 0.0014, "step": 59650 }, { "epoch": 89.06716417910448, "grad_norm": 0.017578125, "learning_rate": 1.094249439880508e-05, "loss": 0.0013, "step": 59675 }, { "epoch": 89.1044776119403, "grad_norm": 0.0299072265625, "learning_rate": 1.0905153099327857e-05, "loss": 0.002, "step": 59700 }, { "epoch": 89.14179104477611, "grad_norm": 0.0169677734375, "learning_rate": 1.0867811799850635e-05, "loss": 0.0013, "step": 59725 }, { "epoch": 89.17910447761194, "grad_norm": 0.02880859375, "learning_rate": 1.0830470500373413e-05, "loss": 0.0018, "step": 59750 }, { "epoch": 89.21641791044776, "grad_norm": 0.025146484375, "learning_rate": 1.0793129200896192e-05, "loss": 0.0017, "step": 59775 }, { "epoch": 89.25373134328358, "grad_norm": 0.0267333984375, "learning_rate": 1.075578790141897e-05, "loss": 0.0015, "step": 59800 }, { "epoch": 89.2910447761194, "grad_norm": 0.0257568359375, "learning_rate": 1.0718446601941748e-05, "loss": 0.002, "step": 59825 }, { "epoch": 89.32835820895522, "grad_norm": 0.027099609375, "learning_rate": 1.0681105302464527e-05, "loss": 0.0016, "step": 59850 }, { "epoch": 89.36567164179104, "grad_norm": 0.0177001953125, "learning_rate": 1.0643764002987303e-05, "loss": 0.0016, "step": 59875 }, { "epoch": 89.40298507462687, "grad_norm": 0.043212890625, "learning_rate": 1.0606422703510083e-05, "loss": 0.0014, "step": 59900 }, { "epoch": 89.44029850746269, "grad_norm": 0.037841796875, "learning_rate": 1.056908140403286e-05, "loss": 0.0016, "step": 59925 }, { "epoch": 89.4776119402985, "grad_norm": 0.0233154296875, "learning_rate": 1.0531740104555638e-05, "loss": 0.0017, "step": 59950 }, { "epoch": 89.51492537313433, "grad_norm": 0.0361328125, "learning_rate": 1.0494398805078418e-05, "loss": 0.0016, "step": 59975 }, { "epoch": 89.55223880597015, "grad_norm": 0.0184326171875, "learning_rate": 1.0457057505601196e-05, "loss": 0.0015, "step": 60000 }, { "epoch": 89.58955223880596, "grad_norm": 0.0299072265625, "learning_rate": 1.0419716206123974e-05, "loss": 0.0016, "step": 60025 }, { "epoch": 89.6268656716418, "grad_norm": 0.026611328125, "learning_rate": 1.0382374906646753e-05, "loss": 0.0016, "step": 60050 }, { "epoch": 89.66417910447761, "grad_norm": 0.0240478515625, "learning_rate": 1.0345033607169529e-05, "loss": 0.0015, "step": 60075 }, { "epoch": 89.70149253731343, "grad_norm": 0.020751953125, "learning_rate": 1.0307692307692309e-05, "loss": 0.0015, "step": 60100 }, { "epoch": 89.73880597014926, "grad_norm": 0.0205078125, "learning_rate": 1.0270351008215086e-05, "loss": 0.0015, "step": 60125 }, { "epoch": 89.77611940298507, "grad_norm": 0.041015625, "learning_rate": 1.0233009708737864e-05, "loss": 0.0016, "step": 60150 }, { "epoch": 89.81343283582089, "grad_norm": 0.0257568359375, "learning_rate": 1.0195668409260644e-05, "loss": 0.0015, "step": 60175 }, { "epoch": 89.85074626865672, "grad_norm": 0.045654296875, "learning_rate": 1.0158327109783421e-05, "loss": 0.0021, "step": 60200 }, { "epoch": 89.88805970149254, "grad_norm": 0.028076171875, "learning_rate": 1.01209858103062e-05, "loss": 0.0017, "step": 60225 }, { "epoch": 89.92537313432835, "grad_norm": 0.0341796875, "learning_rate": 1.0083644510828977e-05, "loss": 0.0016, "step": 60250 }, { "epoch": 89.96268656716418, "grad_norm": 0.025146484375, "learning_rate": 1.0046303211351755e-05, "loss": 0.0017, "step": 60275 }, { "epoch": 90.0, "grad_norm": 0.041015625, "learning_rate": 1.0008961911874534e-05, "loss": 0.0018, "step": 60300 }, { "epoch": 90.03731343283582, "grad_norm": 0.042236328125, "learning_rate": 9.971620612397312e-06, "loss": 0.0015, "step": 60325 }, { "epoch": 90.07462686567165, "grad_norm": 0.0291748046875, "learning_rate": 9.93427931292009e-06, "loss": 0.0015, "step": 60350 }, { "epoch": 90.11194029850746, "grad_norm": 0.034423828125, "learning_rate": 9.896938013442868e-06, "loss": 0.0016, "step": 60375 }, { "epoch": 90.14925373134328, "grad_norm": 0.0238037109375, "learning_rate": 9.859596713965647e-06, "loss": 0.0016, "step": 60400 }, { "epoch": 90.18656716417911, "grad_norm": 0.0269775390625, "learning_rate": 9.822255414488425e-06, "loss": 0.0017, "step": 60425 }, { "epoch": 90.22388059701493, "grad_norm": 0.0296630859375, "learning_rate": 9.784914115011203e-06, "loss": 0.0017, "step": 60450 }, { "epoch": 90.26119402985074, "grad_norm": 0.0194091796875, "learning_rate": 9.747572815533982e-06, "loss": 0.0017, "step": 60475 }, { "epoch": 90.29850746268657, "grad_norm": 0.030029296875, "learning_rate": 9.710231516056758e-06, "loss": 0.0015, "step": 60500 }, { "epoch": 90.33582089552239, "grad_norm": 0.0252685546875, "learning_rate": 9.672890216579538e-06, "loss": 0.0018, "step": 60525 }, { "epoch": 90.3731343283582, "grad_norm": 0.0299072265625, "learning_rate": 9.635548917102315e-06, "loss": 0.0019, "step": 60550 }, { "epoch": 90.41044776119404, "grad_norm": 0.041015625, "learning_rate": 9.598207617625093e-06, "loss": 0.0017, "step": 60575 }, { "epoch": 90.44776119402985, "grad_norm": 0.0380859375, "learning_rate": 9.560866318147873e-06, "loss": 0.0017, "step": 60600 }, { "epoch": 90.48507462686567, "grad_norm": 0.034423828125, "learning_rate": 9.523525018670649e-06, "loss": 0.0016, "step": 60625 }, { "epoch": 90.5223880597015, "grad_norm": 0.037841796875, "learning_rate": 9.486183719193428e-06, "loss": 0.0018, "step": 60650 }, { "epoch": 90.55970149253731, "grad_norm": 0.029541015625, "learning_rate": 9.448842419716208e-06, "loss": 0.0015, "step": 60675 }, { "epoch": 90.59701492537313, "grad_norm": 0.0166015625, "learning_rate": 9.411501120238984e-06, "loss": 0.0015, "step": 60700 }, { "epoch": 90.63432835820896, "grad_norm": 0.020263671875, "learning_rate": 9.374159820761763e-06, "loss": 0.0014, "step": 60725 }, { "epoch": 90.67164179104478, "grad_norm": 0.021728515625, "learning_rate": 9.336818521284541e-06, "loss": 0.0016, "step": 60750 }, { "epoch": 90.7089552238806, "grad_norm": 0.03466796875, "learning_rate": 9.299477221807319e-06, "loss": 0.0016, "step": 60775 }, { "epoch": 90.74626865671642, "grad_norm": 0.04150390625, "learning_rate": 9.262135922330098e-06, "loss": 0.0015, "step": 60800 }, { "epoch": 90.78358208955224, "grad_norm": 0.0264892578125, "learning_rate": 9.224794622852876e-06, "loss": 0.0017, "step": 60825 }, { "epoch": 90.82089552238806, "grad_norm": 0.0281982421875, "learning_rate": 9.187453323375654e-06, "loss": 0.0016, "step": 60850 }, { "epoch": 90.85820895522389, "grad_norm": 0.023681640625, "learning_rate": 9.150112023898432e-06, "loss": 0.0018, "step": 60875 }, { "epoch": 90.8955223880597, "grad_norm": 0.025390625, "learning_rate": 9.11277072442121e-06, "loss": 0.0014, "step": 60900 }, { "epoch": 90.93283582089552, "grad_norm": 0.046630859375, "learning_rate": 9.075429424943989e-06, "loss": 0.0017, "step": 60925 }, { "epoch": 90.97014925373135, "grad_norm": 0.037109375, "learning_rate": 9.038088125466767e-06, "loss": 0.0016, "step": 60950 }, { "epoch": 91.00746268656717, "grad_norm": 0.039794921875, "learning_rate": 9.000746825989545e-06, "loss": 0.0016, "step": 60975 }, { "epoch": 91.04477611940298, "grad_norm": 0.0284423828125, "learning_rate": 8.963405526512324e-06, "loss": 0.0016, "step": 61000 }, { "epoch": 91.08208955223881, "grad_norm": 0.029541015625, "learning_rate": 8.926064227035102e-06, "loss": 0.0016, "step": 61025 }, { "epoch": 91.11940298507463, "grad_norm": 0.025146484375, "learning_rate": 8.88872292755788e-06, "loss": 0.0018, "step": 61050 }, { "epoch": 91.15671641791045, "grad_norm": 0.026611328125, "learning_rate": 8.851381628080657e-06, "loss": 0.0016, "step": 61075 }, { "epoch": 91.19402985074628, "grad_norm": 0.04052734375, "learning_rate": 8.814040328603435e-06, "loss": 0.0018, "step": 61100 }, { "epoch": 91.23134328358209, "grad_norm": 0.03466796875, "learning_rate": 8.776699029126215e-06, "loss": 0.0016, "step": 61125 }, { "epoch": 91.26865671641791, "grad_norm": 0.0211181640625, "learning_rate": 8.739357729648992e-06, "loss": 0.0013, "step": 61150 }, { "epoch": 91.30597014925372, "grad_norm": 0.0439453125, "learning_rate": 8.70201643017177e-06, "loss": 0.0018, "step": 61175 }, { "epoch": 91.34328358208955, "grad_norm": 0.03173828125, "learning_rate": 8.664675130694548e-06, "loss": 0.0013, "step": 61200 }, { "epoch": 91.38059701492537, "grad_norm": 0.0263671875, "learning_rate": 8.627333831217327e-06, "loss": 0.0015, "step": 61225 }, { "epoch": 91.41791044776119, "grad_norm": 0.0296630859375, "learning_rate": 8.589992531740105e-06, "loss": 0.0015, "step": 61250 }, { "epoch": 91.45522388059702, "grad_norm": 0.03125, "learning_rate": 8.552651232262883e-06, "loss": 0.0017, "step": 61275 }, { "epoch": 91.49253731343283, "grad_norm": 0.0279541015625, "learning_rate": 8.515309932785662e-06, "loss": 0.0017, "step": 61300 }, { "epoch": 91.52985074626865, "grad_norm": 0.027587890625, "learning_rate": 8.477968633308439e-06, "loss": 0.0018, "step": 61325 }, { "epoch": 91.56716417910448, "grad_norm": 0.01416015625, "learning_rate": 8.440627333831218e-06, "loss": 0.0015, "step": 61350 }, { "epoch": 91.6044776119403, "grad_norm": 0.021728515625, "learning_rate": 8.403286034353996e-06, "loss": 0.0014, "step": 61375 }, { "epoch": 91.64179104477611, "grad_norm": 0.02880859375, "learning_rate": 8.365944734876774e-06, "loss": 0.0016, "step": 61400 }, { "epoch": 91.67910447761194, "grad_norm": 0.0208740234375, "learning_rate": 8.328603435399553e-06, "loss": 0.0015, "step": 61425 }, { "epoch": 91.71641791044776, "grad_norm": 0.035400390625, "learning_rate": 8.29126213592233e-06, "loss": 0.0017, "step": 61450 }, { "epoch": 91.75373134328358, "grad_norm": 0.0244140625, "learning_rate": 8.253920836445109e-06, "loss": 0.0015, "step": 61475 }, { "epoch": 91.7910447761194, "grad_norm": 0.033935546875, "learning_rate": 8.216579536967888e-06, "loss": 0.0018, "step": 61500 }, { "epoch": 91.82835820895522, "grad_norm": 0.01348876953125, "learning_rate": 8.179238237490664e-06, "loss": 0.0016, "step": 61525 }, { "epoch": 91.86567164179104, "grad_norm": 0.0302734375, "learning_rate": 8.141896938013444e-06, "loss": 0.0016, "step": 61550 }, { "epoch": 91.90298507462687, "grad_norm": 0.038330078125, "learning_rate": 8.104555638536222e-06, "loss": 0.0018, "step": 61575 }, { "epoch": 91.94029850746269, "grad_norm": 0.0240478515625, "learning_rate": 8.067214339059e-06, "loss": 0.0017, "step": 61600 }, { "epoch": 91.9776119402985, "grad_norm": 0.03515625, "learning_rate": 8.029873039581779e-06, "loss": 0.0015, "step": 61625 }, { "epoch": 92.01492537313433, "grad_norm": 0.012451171875, "learning_rate": 7.992531740104557e-06, "loss": 0.0018, "step": 61650 }, { "epoch": 92.05223880597015, "grad_norm": 0.036865234375, "learning_rate": 7.955190440627334e-06, "loss": 0.0016, "step": 61675 }, { "epoch": 92.08955223880596, "grad_norm": 0.0498046875, "learning_rate": 7.917849141150112e-06, "loss": 0.0016, "step": 61700 }, { "epoch": 92.1268656716418, "grad_norm": 0.0281982421875, "learning_rate": 7.88050784167289e-06, "loss": 0.0016, "step": 61725 }, { "epoch": 92.16417910447761, "grad_norm": 0.03759765625, "learning_rate": 7.84316654219567e-06, "loss": 0.0016, "step": 61750 }, { "epoch": 92.20149253731343, "grad_norm": 0.02685546875, "learning_rate": 7.805825242718447e-06, "loss": 0.0016, "step": 61775 }, { "epoch": 92.23880597014926, "grad_norm": 0.033203125, "learning_rate": 7.768483943241225e-06, "loss": 0.0016, "step": 61800 }, { "epoch": 92.27611940298507, "grad_norm": 0.037109375, "learning_rate": 7.731142643764003e-06, "loss": 0.0016, "step": 61825 }, { "epoch": 92.31343283582089, "grad_norm": 0.032470703125, "learning_rate": 7.693801344286782e-06, "loss": 0.0016, "step": 61850 }, { "epoch": 92.35074626865672, "grad_norm": 0.0211181640625, "learning_rate": 7.65646004480956e-06, "loss": 0.0016, "step": 61875 }, { "epoch": 92.38805970149254, "grad_norm": 0.026123046875, "learning_rate": 7.619118745332338e-06, "loss": 0.0014, "step": 61900 }, { "epoch": 92.42537313432835, "grad_norm": 0.0302734375, "learning_rate": 7.581777445855116e-06, "loss": 0.0015, "step": 61925 }, { "epoch": 92.46268656716418, "grad_norm": 0.032958984375, "learning_rate": 7.544436146377893e-06, "loss": 0.0016, "step": 61950 }, { "epoch": 92.5, "grad_norm": 0.0234375, "learning_rate": 7.507094846900673e-06, "loss": 0.0017, "step": 61975 }, { "epoch": 92.53731343283582, "grad_norm": 0.0235595703125, "learning_rate": 7.4697535474234514e-06, "loss": 0.0019, "step": 62000 }, { "epoch": 92.57462686567165, "grad_norm": 0.033935546875, "learning_rate": 7.432412247946228e-06, "loss": 0.0016, "step": 62025 }, { "epoch": 92.61194029850746, "grad_norm": 0.0341796875, "learning_rate": 7.395070948469007e-06, "loss": 0.0015, "step": 62050 }, { "epoch": 92.64925373134328, "grad_norm": 0.0186767578125, "learning_rate": 7.357729648991785e-06, "loss": 0.0015, "step": 62075 }, { "epoch": 92.68656716417911, "grad_norm": 0.03466796875, "learning_rate": 7.3203883495145634e-06, "loss": 0.0015, "step": 62100 }, { "epoch": 92.72388059701493, "grad_norm": 0.051513671875, "learning_rate": 7.283047050037342e-06, "loss": 0.0015, "step": 62125 }, { "epoch": 92.76119402985074, "grad_norm": 0.0294189453125, "learning_rate": 7.24570575056012e-06, "loss": 0.0018, "step": 62150 }, { "epoch": 92.79850746268657, "grad_norm": 0.04541015625, "learning_rate": 7.2083644510828985e-06, "loss": 0.0018, "step": 62175 }, { "epoch": 92.83582089552239, "grad_norm": 0.031982421875, "learning_rate": 7.171023151605675e-06, "loss": 0.0017, "step": 62200 }, { "epoch": 92.8731343283582, "grad_norm": 0.042724609375, "learning_rate": 7.133681852128454e-06, "loss": 0.0017, "step": 62225 }, { "epoch": 92.91044776119404, "grad_norm": 0.039794921875, "learning_rate": 7.096340552651233e-06, "loss": 0.0017, "step": 62250 }, { "epoch": 92.94776119402985, "grad_norm": 0.06396484375, "learning_rate": 7.0589992531740105e-06, "loss": 0.0016, "step": 62275 }, { "epoch": 92.98507462686567, "grad_norm": 0.02587890625, "learning_rate": 7.021657953696789e-06, "loss": 0.0016, "step": 62300 }, { "epoch": 93.0223880597015, "grad_norm": 0.03466796875, "learning_rate": 6.984316654219567e-06, "loss": 0.0017, "step": 62325 }, { "epoch": 93.05970149253731, "grad_norm": 0.036376953125, "learning_rate": 6.9469753547423455e-06, "loss": 0.0016, "step": 62350 }, { "epoch": 93.09701492537313, "grad_norm": 0.0322265625, "learning_rate": 6.909634055265124e-06, "loss": 0.0017, "step": 62375 }, { "epoch": 93.13432835820896, "grad_norm": 0.041748046875, "learning_rate": 6.872292755787901e-06, "loss": 0.0017, "step": 62400 }, { "epoch": 93.17164179104478, "grad_norm": 0.0322265625, "learning_rate": 6.83495145631068e-06, "loss": 0.0015, "step": 62425 }, { "epoch": 93.2089552238806, "grad_norm": 0.01495361328125, "learning_rate": 6.797610156833459e-06, "loss": 0.0016, "step": 62450 }, { "epoch": 93.24626865671642, "grad_norm": 0.0308837890625, "learning_rate": 6.760268857356236e-06, "loss": 0.0017, "step": 62475 }, { "epoch": 93.28358208955224, "grad_norm": 0.04296875, "learning_rate": 6.722927557879015e-06, "loss": 0.0018, "step": 62500 }, { "epoch": 93.32089552238806, "grad_norm": 0.033447265625, "learning_rate": 6.6855862584017925e-06, "loss": 0.0015, "step": 62525 }, { "epoch": 93.35820895522389, "grad_norm": 0.033447265625, "learning_rate": 6.648244958924571e-06, "loss": 0.0016, "step": 62550 }, { "epoch": 93.3955223880597, "grad_norm": 0.033935546875, "learning_rate": 6.61090365944735e-06, "loss": 0.0013, "step": 62575 }, { "epoch": 93.43283582089552, "grad_norm": 0.0291748046875, "learning_rate": 6.573562359970127e-06, "loss": 0.0015, "step": 62600 }, { "epoch": 93.47014925373135, "grad_norm": 0.038818359375, "learning_rate": 6.536221060492906e-06, "loss": 0.0018, "step": 62625 }, { "epoch": 93.50746268656717, "grad_norm": 0.0240478515625, "learning_rate": 6.498879761015683e-06, "loss": 0.0015, "step": 62650 }, { "epoch": 93.54477611940298, "grad_norm": 0.0220947265625, "learning_rate": 6.461538461538462e-06, "loss": 0.0014, "step": 62675 }, { "epoch": 93.58208955223881, "grad_norm": 0.035888671875, "learning_rate": 6.42419716206124e-06, "loss": 0.0019, "step": 62700 }, { "epoch": 93.61940298507463, "grad_norm": 0.031494140625, "learning_rate": 6.386855862584018e-06, "loss": 0.0015, "step": 62725 }, { "epoch": 93.65671641791045, "grad_norm": 0.0191650390625, "learning_rate": 6.349514563106797e-06, "loss": 0.0016, "step": 62750 }, { "epoch": 93.69402985074628, "grad_norm": 0.042236328125, "learning_rate": 6.312173263629574e-06, "loss": 0.0016, "step": 62775 }, { "epoch": 93.73134328358209, "grad_norm": 0.035888671875, "learning_rate": 6.274831964152353e-06, "loss": 0.0015, "step": 62800 }, { "epoch": 93.76865671641791, "grad_norm": 0.030517578125, "learning_rate": 6.237490664675131e-06, "loss": 0.0018, "step": 62825 }, { "epoch": 93.80597014925372, "grad_norm": 0.0218505859375, "learning_rate": 6.200149365197909e-06, "loss": 0.0017, "step": 62850 }, { "epoch": 93.84328358208955, "grad_norm": 0.03564453125, "learning_rate": 6.162808065720687e-06, "loss": 0.0016, "step": 62875 }, { "epoch": 93.88059701492537, "grad_norm": 0.0286865234375, "learning_rate": 6.125466766243466e-06, "loss": 0.0015, "step": 62900 }, { "epoch": 93.91791044776119, "grad_norm": 0.02880859375, "learning_rate": 6.088125466766244e-06, "loss": 0.0015, "step": 62925 }, { "epoch": 93.95522388059702, "grad_norm": 0.033447265625, "learning_rate": 6.050784167289022e-06, "loss": 0.0018, "step": 62950 }, { "epoch": 93.99253731343283, "grad_norm": 0.03076171875, "learning_rate": 6.0134428678118e-06, "loss": 0.0015, "step": 62975 }, { "epoch": 94.02985074626865, "grad_norm": 0.0260009765625, "learning_rate": 5.976101568334579e-06, "loss": 0.0015, "step": 63000 }, { "epoch": 94.06716417910448, "grad_norm": 0.0250244140625, "learning_rate": 5.938760268857357e-06, "loss": 0.0015, "step": 63025 }, { "epoch": 94.1044776119403, "grad_norm": 0.044677734375, "learning_rate": 5.9014189693801344e-06, "loss": 0.0018, "step": 63050 }, { "epoch": 94.14179104477611, "grad_norm": 0.0003147125244140625, "learning_rate": 5.864077669902913e-06, "loss": 0.0014, "step": 63075 }, { "epoch": 94.17910447761194, "grad_norm": 0.02880859375, "learning_rate": 5.826736370425691e-06, "loss": 0.0016, "step": 63100 }, { "epoch": 94.21641791044776, "grad_norm": 0.040283203125, "learning_rate": 5.7893950709484695e-06, "loss": 0.0015, "step": 63125 }, { "epoch": 94.25373134328358, "grad_norm": 0.031494140625, "learning_rate": 5.752053771471247e-06, "loss": 0.0016, "step": 63150 }, { "epoch": 94.2910447761194, "grad_norm": 0.051025390625, "learning_rate": 5.714712471994026e-06, "loss": 0.0017, "step": 63175 }, { "epoch": 94.32835820895522, "grad_norm": 0.03466796875, "learning_rate": 5.677371172516804e-06, "loss": 0.0014, "step": 63200 }, { "epoch": 94.36567164179104, "grad_norm": 0.01373291015625, "learning_rate": 5.6400298730395815e-06, "loss": 0.0015, "step": 63225 }, { "epoch": 94.40298507462687, "grad_norm": 0.0341796875, "learning_rate": 5.60268857356236e-06, "loss": 0.0015, "step": 63250 }, { "epoch": 94.44029850746269, "grad_norm": 0.021240234375, "learning_rate": 5.565347274085139e-06, "loss": 0.0015, "step": 63275 }, { "epoch": 94.4776119402985, "grad_norm": 0.0255126953125, "learning_rate": 5.5280059746079165e-06, "loss": 0.0016, "step": 63300 }, { "epoch": 94.51492537313433, "grad_norm": 0.041015625, "learning_rate": 5.490664675130694e-06, "loss": 0.0017, "step": 63325 }, { "epoch": 94.55223880597015, "grad_norm": 0.024658203125, "learning_rate": 5.453323375653473e-06, "loss": 0.0015, "step": 63350 }, { "epoch": 94.58955223880596, "grad_norm": 0.0244140625, "learning_rate": 5.4159820761762516e-06, "loss": 0.0016, "step": 63375 }, { "epoch": 94.6268656716418, "grad_norm": 0.0380859375, "learning_rate": 5.378640776699029e-06, "loss": 0.0019, "step": 63400 }, { "epoch": 94.66417910447761, "grad_norm": 0.0296630859375, "learning_rate": 5.341299477221807e-06, "loss": 0.0018, "step": 63425 }, { "epoch": 94.70149253731343, "grad_norm": 0.047607421875, "learning_rate": 5.303958177744586e-06, "loss": 0.0018, "step": 63450 }, { "epoch": 94.73880597014926, "grad_norm": 0.035400390625, "learning_rate": 5.266616878267364e-06, "loss": 0.0017, "step": 63475 }, { "epoch": 94.77611940298507, "grad_norm": 0.039794921875, "learning_rate": 5.229275578790142e-06, "loss": 0.0016, "step": 63500 }, { "epoch": 94.81343283582089, "grad_norm": 0.033447265625, "learning_rate": 5.191934279312921e-06, "loss": 0.0017, "step": 63525 }, { "epoch": 94.85074626865672, "grad_norm": 0.023193359375, "learning_rate": 5.154592979835699e-06, "loss": 0.0016, "step": 63550 }, { "epoch": 94.88805970149254, "grad_norm": 0.020263671875, "learning_rate": 5.117251680358476e-06, "loss": 0.0014, "step": 63575 }, { "epoch": 94.92537313432835, "grad_norm": 0.035888671875, "learning_rate": 5.079910380881255e-06, "loss": 0.0017, "step": 63600 }, { "epoch": 94.96268656716418, "grad_norm": 0.038818359375, "learning_rate": 5.042569081404034e-06, "loss": 0.0018, "step": 63625 }, { "epoch": 95.0, "grad_norm": 0.037841796875, "learning_rate": 5.005227781926811e-06, "loss": 0.0017, "step": 63650 }, { "epoch": 95.03731343283582, "grad_norm": 0.0196533203125, "learning_rate": 4.967886482449589e-06, "loss": 0.0015, "step": 63675 }, { "epoch": 95.07462686567165, "grad_norm": 0.0296630859375, "learning_rate": 4.930545182972368e-06, "loss": 0.0018, "step": 63700 }, { "epoch": 95.11194029850746, "grad_norm": 0.0322265625, "learning_rate": 4.8932038834951465e-06, "loss": 0.0015, "step": 63725 }, { "epoch": 95.14925373134328, "grad_norm": 0.033935546875, "learning_rate": 4.855862584017924e-06, "loss": 0.0014, "step": 63750 }, { "epoch": 95.18656716417911, "grad_norm": 0.041259765625, "learning_rate": 4.818521284540702e-06, "loss": 0.0017, "step": 63775 }, { "epoch": 95.22388059701493, "grad_norm": 0.0263671875, "learning_rate": 4.781179985063481e-06, "loss": 0.0014, "step": 63800 }, { "epoch": 95.26119402985074, "grad_norm": 0.027587890625, "learning_rate": 4.7438386855862584e-06, "loss": 0.0017, "step": 63825 }, { "epoch": 95.29850746268657, "grad_norm": 0.03076171875, "learning_rate": 4.706497386109037e-06, "loss": 0.0015, "step": 63850 }, { "epoch": 95.33582089552239, "grad_norm": 0.03369140625, "learning_rate": 4.669156086631815e-06, "loss": 0.0016, "step": 63875 }, { "epoch": 95.3731343283582, "grad_norm": 0.03125, "learning_rate": 4.6318147871545935e-06, "loss": 0.0015, "step": 63900 }, { "epoch": 95.41044776119404, "grad_norm": 0.0286865234375, "learning_rate": 4.594473487677371e-06, "loss": 0.0018, "step": 63925 }, { "epoch": 95.44776119402985, "grad_norm": 0.0250244140625, "learning_rate": 4.557132188200149e-06, "loss": 0.0017, "step": 63950 }, { "epoch": 95.48507462686567, "grad_norm": 0.029541015625, "learning_rate": 4.519790888722928e-06, "loss": 0.0016, "step": 63975 }, { "epoch": 95.5223880597015, "grad_norm": 0.0419921875, "learning_rate": 4.482449589245706e-06, "loss": 0.0018, "step": 64000 }, { "epoch": 95.55970149253731, "grad_norm": 0.036865234375, "learning_rate": 4.445108289768484e-06, "loss": 0.0016, "step": 64025 }, { "epoch": 95.59701492537313, "grad_norm": 0.04345703125, "learning_rate": 4.407766990291262e-06, "loss": 0.0014, "step": 64050 }, { "epoch": 95.63432835820896, "grad_norm": 0.0306396484375, "learning_rate": 4.3704256908140405e-06, "loss": 0.0018, "step": 64075 }, { "epoch": 95.67164179104478, "grad_norm": 0.03857421875, "learning_rate": 4.333084391336819e-06, "loss": 0.0014, "step": 64100 }, { "epoch": 95.7089552238806, "grad_norm": 0.0216064453125, "learning_rate": 4.295743091859597e-06, "loss": 0.0017, "step": 64125 }, { "epoch": 95.74626865671642, "grad_norm": 0.0291748046875, "learning_rate": 4.258401792382375e-06, "loss": 0.0016, "step": 64150 }, { "epoch": 95.78358208955224, "grad_norm": 0.051025390625, "learning_rate": 4.221060492905153e-06, "loss": 0.0019, "step": 64175 }, { "epoch": 95.82089552238806, "grad_norm": 0.025390625, "learning_rate": 4.183719193427932e-06, "loss": 0.0016, "step": 64200 }, { "epoch": 95.85820895522389, "grad_norm": 0.037109375, "learning_rate": 4.14637789395071e-06, "loss": 0.0016, "step": 64225 }, { "epoch": 95.8955223880597, "grad_norm": 0.0250244140625, "learning_rate": 4.1090365944734875e-06, "loss": 0.0015, "step": 64250 }, { "epoch": 95.93283582089552, "grad_norm": 0.053466796875, "learning_rate": 4.071695294996266e-06, "loss": 0.0016, "step": 64275 }, { "epoch": 95.97014925373135, "grad_norm": 0.043212890625, "learning_rate": 4.034353995519044e-06, "loss": 0.0015, "step": 64300 }, { "epoch": 96.00746268656717, "grad_norm": 0.020263671875, "learning_rate": 3.997012696041823e-06, "loss": 0.0015, "step": 64325 }, { "epoch": 96.04477611940298, "grad_norm": 0.03955078125, "learning_rate": 3.959671396564601e-06, "loss": 0.0014, "step": 64350 }, { "epoch": 96.08208955223881, "grad_norm": 0.0272216796875, "learning_rate": 3.922330097087379e-06, "loss": 0.0016, "step": 64375 }, { "epoch": 96.11940298507463, "grad_norm": 0.033935546875, "learning_rate": 3.884988797610157e-06, "loss": 0.0016, "step": 64400 }, { "epoch": 96.15671641791045, "grad_norm": 0.021728515625, "learning_rate": 3.8476474981329346e-06, "loss": 0.0016, "step": 64425 }, { "epoch": 96.19402985074628, "grad_norm": 0.0419921875, "learning_rate": 3.8103061986557136e-06, "loss": 0.0018, "step": 64450 }, { "epoch": 96.23134328358209, "grad_norm": 0.0272216796875, "learning_rate": 3.772964899178492e-06, "loss": 0.0018, "step": 64475 }, { "epoch": 96.26865671641791, "grad_norm": 0.02490234375, "learning_rate": 3.7356235997012696e-06, "loss": 0.0015, "step": 64500 }, { "epoch": 96.30597014925372, "grad_norm": 0.03759765625, "learning_rate": 3.698282300224048e-06, "loss": 0.0017, "step": 64525 }, { "epoch": 96.34328358208955, "grad_norm": 0.02685546875, "learning_rate": 3.660941000746826e-06, "loss": 0.0016, "step": 64550 }, { "epoch": 96.38059701492537, "grad_norm": 0.037109375, "learning_rate": 3.6235997012696047e-06, "loss": 0.0014, "step": 64575 }, { "epoch": 96.41791044776119, "grad_norm": 0.034912109375, "learning_rate": 3.586258401792383e-06, "loss": 0.0017, "step": 64600 }, { "epoch": 96.45522388059702, "grad_norm": 0.04248046875, "learning_rate": 3.5489171023151606e-06, "loss": 0.0015, "step": 64625 }, { "epoch": 96.49253731343283, "grad_norm": 0.02685546875, "learning_rate": 3.511575802837939e-06, "loss": 0.0017, "step": 64650 }, { "epoch": 96.52985074626865, "grad_norm": 0.042236328125, "learning_rate": 3.4742345033607166e-06, "loss": 0.0016, "step": 64675 }, { "epoch": 96.56716417910448, "grad_norm": 0.028564453125, "learning_rate": 3.4368932038834957e-06, "loss": 0.0016, "step": 64700 }, { "epoch": 96.6044776119403, "grad_norm": 0.0247802734375, "learning_rate": 3.3995519044062735e-06, "loss": 0.0016, "step": 64725 }, { "epoch": 96.64179104477611, "grad_norm": 0.0361328125, "learning_rate": 3.3622106049290517e-06, "loss": 0.0016, "step": 64750 }, { "epoch": 96.67910447761194, "grad_norm": 0.02490234375, "learning_rate": 3.32486930545183e-06, "loss": 0.0018, "step": 64775 }, { "epoch": 96.71641791044776, "grad_norm": 0.04296875, "learning_rate": 3.2875280059746077e-06, "loss": 0.0017, "step": 64800 }, { "epoch": 96.75373134328358, "grad_norm": 0.0390625, "learning_rate": 3.2501867064973863e-06, "loss": 0.0016, "step": 64825 }, { "epoch": 96.7910447761194, "grad_norm": 0.031494140625, "learning_rate": 3.2128454070201645e-06, "loss": 0.0016, "step": 64850 }, { "epoch": 96.82835820895522, "grad_norm": 0.0242919921875, "learning_rate": 3.1755041075429427e-06, "loss": 0.0016, "step": 64875 }, { "epoch": 96.86567164179104, "grad_norm": 0.03173828125, "learning_rate": 3.1381628080657205e-06, "loss": 0.0015, "step": 64900 }, { "epoch": 96.90298507462687, "grad_norm": 0.01348876953125, "learning_rate": 3.100821508588499e-06, "loss": 0.0017, "step": 64925 }, { "epoch": 96.94029850746269, "grad_norm": 0.036376953125, "learning_rate": 3.063480209111277e-06, "loss": 0.0016, "step": 64950 }, { "epoch": 96.9776119402985, "grad_norm": 0.0252685546875, "learning_rate": 3.0261389096340555e-06, "loss": 0.0017, "step": 64975 }, { "epoch": 97.01492537313433, "grad_norm": 0.0341796875, "learning_rate": 2.9887976101568333e-06, "loss": 0.0015, "step": 65000 }, { "epoch": 97.05223880597015, "grad_norm": 0.0224609375, "learning_rate": 2.951456310679612e-06, "loss": 0.0017, "step": 65025 }, { "epoch": 97.08955223880596, "grad_norm": 0.03173828125, "learning_rate": 2.91411501120239e-06, "loss": 0.0015, "step": 65050 }, { "epoch": 97.1268656716418, "grad_norm": 0.03662109375, "learning_rate": 2.876773711725168e-06, "loss": 0.0018, "step": 65075 }, { "epoch": 97.16417910447761, "grad_norm": 0.033203125, "learning_rate": 2.8394324122479466e-06, "loss": 0.0017, "step": 65100 }, { "epoch": 97.20149253731343, "grad_norm": 0.0234375, "learning_rate": 2.8020911127707244e-06, "loss": 0.0013, "step": 65125 }, { "epoch": 97.23880597014926, "grad_norm": 0.0234375, "learning_rate": 2.764749813293503e-06, "loss": 0.0015, "step": 65150 }, { "epoch": 97.27611940298507, "grad_norm": 0.0419921875, "learning_rate": 2.7274085138162808e-06, "loss": 0.0014, "step": 65175 }, { "epoch": 97.31343283582089, "grad_norm": 0.0322265625, "learning_rate": 2.6900672143390594e-06, "loss": 0.0019, "step": 65200 }, { "epoch": 97.35074626865672, "grad_norm": 0.037353515625, "learning_rate": 2.652725914861837e-06, "loss": 0.0017, "step": 65225 }, { "epoch": 97.38805970149254, "grad_norm": 0.048095703125, "learning_rate": 2.6153846153846154e-06, "loss": 0.0017, "step": 65250 }, { "epoch": 97.42537313432835, "grad_norm": 0.025390625, "learning_rate": 2.5780433159073936e-06, "loss": 0.0017, "step": 65275 }, { "epoch": 97.46268656716418, "grad_norm": 0.02734375, "learning_rate": 2.540702016430172e-06, "loss": 0.0014, "step": 65300 }, { "epoch": 97.5, "grad_norm": 0.032958984375, "learning_rate": 2.50336071695295e-06, "loss": 0.0016, "step": 65325 }, { "epoch": 97.53731343283582, "grad_norm": 0.0252685546875, "learning_rate": 2.4660194174757282e-06, "loss": 0.0016, "step": 65350 }, { "epoch": 97.57462686567165, "grad_norm": 0.047607421875, "learning_rate": 2.4286781179985064e-06, "loss": 0.0014, "step": 65375 }, { "epoch": 97.61194029850746, "grad_norm": 0.038818359375, "learning_rate": 2.3913368185212846e-06, "loss": 0.0015, "step": 65400 }, { "epoch": 97.64925373134328, "grad_norm": 0.040771484375, "learning_rate": 2.353995519044063e-06, "loss": 0.0014, "step": 65425 }, { "epoch": 97.68656716417911, "grad_norm": 0.0301513671875, "learning_rate": 2.316654219566841e-06, "loss": 0.0017, "step": 65450 }, { "epoch": 97.72388059701493, "grad_norm": 0.0264892578125, "learning_rate": 2.2793129200896193e-06, "loss": 0.0016, "step": 65475 }, { "epoch": 97.76119402985074, "grad_norm": 0.03271484375, "learning_rate": 2.2419716206123975e-06, "loss": 0.0016, "step": 65500 }, { "epoch": 97.79850746268657, "grad_norm": 0.0216064453125, "learning_rate": 2.2046303211351757e-06, "loss": 0.0016, "step": 65525 }, { "epoch": 97.83582089552239, "grad_norm": 0.0281982421875, "learning_rate": 2.167289021657954e-06, "loss": 0.0016, "step": 65550 }, { "epoch": 97.8731343283582, "grad_norm": 0.035400390625, "learning_rate": 2.129947722180732e-06, "loss": 0.0015, "step": 65575 }, { "epoch": 97.91044776119404, "grad_norm": 0.0400390625, "learning_rate": 2.0926064227035103e-06, "loss": 0.0016, "step": 65600 }, { "epoch": 97.94776119402985, "grad_norm": 0.04052734375, "learning_rate": 2.0552651232262885e-06, "loss": 0.0017, "step": 65625 }, { "epoch": 97.98507462686567, "grad_norm": 0.0279541015625, "learning_rate": 2.0179238237490667e-06, "loss": 0.0016, "step": 65650 }, { "epoch": 98.0223880597015, "grad_norm": 0.0302734375, "learning_rate": 1.9805825242718445e-06, "loss": 0.0016, "step": 65675 }, { "epoch": 98.05970149253731, "grad_norm": 0.03564453125, "learning_rate": 1.943241224794623e-06, "loss": 0.0016, "step": 65700 }, { "epoch": 98.09701492537313, "grad_norm": 0.0274658203125, "learning_rate": 1.9058999253174011e-06, "loss": 0.0018, "step": 65725 }, { "epoch": 98.13432835820896, "grad_norm": 0.034912109375, "learning_rate": 1.8685586258401795e-06, "loss": 0.0014, "step": 65750 }, { "epoch": 98.17164179104478, "grad_norm": 0.0272216796875, "learning_rate": 1.8312173263629575e-06, "loss": 0.0017, "step": 65775 }, { "epoch": 98.2089552238806, "grad_norm": 0.04736328125, "learning_rate": 1.7938760268857355e-06, "loss": 0.0016, "step": 65800 }, { "epoch": 98.24626865671642, "grad_norm": 0.03759765625, "learning_rate": 1.756534727408514e-06, "loss": 0.0016, "step": 65825 }, { "epoch": 98.28358208955224, "grad_norm": 0.04052734375, "learning_rate": 1.719193427931292e-06, "loss": 0.0016, "step": 65850 }, { "epoch": 98.32089552238806, "grad_norm": 0.01458740234375, "learning_rate": 1.6818521284540704e-06, "loss": 0.0016, "step": 65875 }, { "epoch": 98.35820895522389, "grad_norm": 0.0294189453125, "learning_rate": 1.6445108289768484e-06, "loss": 0.0018, "step": 65900 }, { "epoch": 98.3955223880597, "grad_norm": 0.048583984375, "learning_rate": 1.6071695294996268e-06, "loss": 0.0015, "step": 65925 }, { "epoch": 98.43283582089552, "grad_norm": 0.034423828125, "learning_rate": 1.5698282300224048e-06, "loss": 0.0015, "step": 65950 }, { "epoch": 98.47014925373135, "grad_norm": 0.031494140625, "learning_rate": 1.5324869305451832e-06, "loss": 0.0017, "step": 65975 }, { "epoch": 98.50746268656717, "grad_norm": 0.02392578125, "learning_rate": 1.4951456310679614e-06, "loss": 0.0017, "step": 66000 }, { "epoch": 98.54477611940298, "grad_norm": 0.032958984375, "learning_rate": 1.4578043315907394e-06, "loss": 0.0017, "step": 66025 }, { "epoch": 98.58208955223881, "grad_norm": 0.030517578125, "learning_rate": 1.4204630321135176e-06, "loss": 0.0015, "step": 66050 }, { "epoch": 98.61940298507463, "grad_norm": 0.034912109375, "learning_rate": 1.3831217326362958e-06, "loss": 0.0015, "step": 66075 }, { "epoch": 98.65671641791045, "grad_norm": 0.048095703125, "learning_rate": 1.345780433159074e-06, "loss": 0.0017, "step": 66100 }, { "epoch": 98.69402985074628, "grad_norm": 0.0301513671875, "learning_rate": 1.3084391336818522e-06, "loss": 0.0016, "step": 66125 }, { "epoch": 98.73134328358209, "grad_norm": 0.036376953125, "learning_rate": 1.2710978342046304e-06, "loss": 0.0014, "step": 66150 }, { "epoch": 98.76865671641791, "grad_norm": 0.0194091796875, "learning_rate": 1.2337565347274084e-06, "loss": 0.0015, "step": 66175 }, { "epoch": 98.80597014925372, "grad_norm": 0.033935546875, "learning_rate": 1.1964152352501866e-06, "loss": 0.0017, "step": 66200 }, { "epoch": 98.84328358208955, "grad_norm": 0.030029296875, "learning_rate": 1.159073935772965e-06, "loss": 0.0016, "step": 66225 }, { "epoch": 98.88059701492537, "grad_norm": 0.02392578125, "learning_rate": 1.1217326362957433e-06, "loss": 0.0019, "step": 66250 }, { "epoch": 98.91791044776119, "grad_norm": 0.0272216796875, "learning_rate": 1.0843913368185215e-06, "loss": 0.0015, "step": 66275 }, { "epoch": 98.95522388059702, "grad_norm": 0.0263671875, "learning_rate": 1.0470500373412997e-06, "loss": 0.0014, "step": 66300 }, { "epoch": 98.99253731343283, "grad_norm": 0.03466796875, "learning_rate": 1.0097087378640777e-06, "loss": 0.0018, "step": 66325 }, { "epoch": 99.02985074626865, "grad_norm": 0.041015625, "learning_rate": 9.723674383868559e-07, "loss": 0.0016, "step": 66350 }, { "epoch": 99.06716417910448, "grad_norm": 0.03662109375, "learning_rate": 9.350261389096341e-07, "loss": 0.0014, "step": 66375 }, { "epoch": 99.1044776119403, "grad_norm": 0.03466796875, "learning_rate": 8.976848394324123e-07, "loss": 0.0018, "step": 66400 }, { "epoch": 99.14179104477611, "grad_norm": 0.026611328125, "learning_rate": 8.603435399551905e-07, "loss": 0.0016, "step": 66425 }, { "epoch": 99.17910447761194, "grad_norm": 0.034912109375, "learning_rate": 8.230022404779686e-07, "loss": 0.0015, "step": 66450 }, { "epoch": 99.21641791044776, "grad_norm": 0.039794921875, "learning_rate": 7.856609410007468e-07, "loss": 0.0016, "step": 66475 }, { "epoch": 99.25373134328358, "grad_norm": 0.03515625, "learning_rate": 7.48319641523525e-07, "loss": 0.0014, "step": 66500 }, { "epoch": 99.2910447761194, "grad_norm": 0.0301513671875, "learning_rate": 7.109783420463032e-07, "loss": 0.0018, "step": 66525 }, { "epoch": 99.32835820895522, "grad_norm": 0.0252685546875, "learning_rate": 6.736370425690814e-07, "loss": 0.0018, "step": 66550 }, { "epoch": 99.36567164179104, "grad_norm": 0.038818359375, "learning_rate": 6.362957430918596e-07, "loss": 0.0018, "step": 66575 }, { "epoch": 99.40298507462687, "grad_norm": 0.0247802734375, "learning_rate": 5.989544436146378e-07, "loss": 0.0014, "step": 66600 }, { "epoch": 99.44029850746269, "grad_norm": 0.0250244140625, "learning_rate": 5.61613144137416e-07, "loss": 0.0016, "step": 66625 }, { "epoch": 99.4776119402985, "grad_norm": 0.01556396484375, "learning_rate": 5.242718446601941e-07, "loss": 0.0014, "step": 66650 }, { "epoch": 99.51492537313433, "grad_norm": 0.02294921875, "learning_rate": 4.869305451829725e-07, "loss": 0.0015, "step": 66675 }, { "epoch": 99.55223880597015, "grad_norm": 0.0181884765625, "learning_rate": 4.495892457057506e-07, "loss": 0.0015, "step": 66700 }, { "epoch": 99.58955223880596, "grad_norm": 0.03466796875, "learning_rate": 4.1224794622852876e-07, "loss": 0.0015, "step": 66725 }, { "epoch": 99.6268656716418, "grad_norm": 0.021728515625, "learning_rate": 3.7490664675130697e-07, "loss": 0.0018, "step": 66750 } ], "logging_steps": 25, "max_steps": 67000, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.8438794828522525e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }