{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 17560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011396011396011395, "grad_norm": 0.051288899034261703, "learning_rate": 0.0, "loss": 1.2238, "step": 1 }, { "epoch": 0.002279202279202279, "grad_norm": 0.0594889298081398, "learning_rate": 7.112375533428165e-08, "loss": 1.2649, "step": 2 }, { "epoch": 0.003418803418803419, "grad_norm": 0.04890584945678711, "learning_rate": 1.422475106685633e-07, "loss": 1.3352, "step": 3 }, { "epoch": 0.004558404558404558, "grad_norm": 0.05049705132842064, "learning_rate": 2.1337126600284497e-07, "loss": 1.5098, "step": 4 }, { "epoch": 0.005698005698005698, "grad_norm": 0.05083613842725754, "learning_rate": 2.844950213371266e-07, "loss": 1.4295, "step": 5 }, { "epoch": 0.006837606837606838, "grad_norm": 0.044603075832128525, "learning_rate": 3.556187766714083e-07, "loss": 1.2458, "step": 6 }, { "epoch": 0.007977207977207978, "grad_norm": 0.047203429043293, "learning_rate": 4.2674253200568994e-07, "loss": 1.427, "step": 7 }, { "epoch": 0.009116809116809116, "grad_norm": 0.04564230889081955, "learning_rate": 4.978662873399715e-07, "loss": 1.3394, "step": 8 }, { "epoch": 0.010256410256410256, "grad_norm": 0.048336949199438095, "learning_rate": 5.689900426742532e-07, "loss": 1.358, "step": 9 }, { "epoch": 0.011396011396011397, "grad_norm": 0.04875385761260986, "learning_rate": 6.401137980085349e-07, "loss": 1.472, "step": 10 }, { "epoch": 0.012535612535612535, "grad_norm": 0.05315397307276726, "learning_rate": 7.112375533428166e-07, "loss": 1.3342, "step": 11 }, { "epoch": 0.013675213675213675, "grad_norm": 0.044489964842796326, "learning_rate": 7.823613086770982e-07, "loss": 1.3776, "step": 12 }, { "epoch": 0.014814814814814815, "grad_norm": 0.05502444505691528, "learning_rate": 8.534850640113799e-07, "loss": 1.2355, "step": 13 }, { "epoch": 0.015954415954415956, "grad_norm": 0.04652831330895424, "learning_rate": 9.246088193456616e-07, "loss": 1.4382, "step": 14 }, { "epoch": 0.017094017094017096, "grad_norm": 0.046115029603242874, "learning_rate": 9.95732574679943e-07, "loss": 1.358, "step": 15 }, { "epoch": 0.018233618233618232, "grad_norm": 0.048519786447286606, "learning_rate": 1.0668563300142247e-06, "loss": 1.2235, "step": 16 }, { "epoch": 0.019373219373219373, "grad_norm": 0.04774252325296402, "learning_rate": 1.1379800853485064e-06, "loss": 1.3017, "step": 17 }, { "epoch": 0.020512820512820513, "grad_norm": 0.05003888159990311, "learning_rate": 1.2091038406827881e-06, "loss": 1.4428, "step": 18 }, { "epoch": 0.021652421652421653, "grad_norm": 0.04985012859106064, "learning_rate": 1.2802275960170698e-06, "loss": 1.413, "step": 19 }, { "epoch": 0.022792022792022793, "grad_norm": 0.04075890779495239, "learning_rate": 1.3513513513513515e-06, "loss": 1.3317, "step": 20 }, { "epoch": 0.023931623931623933, "grad_norm": 0.048639338463544846, "learning_rate": 1.4224751066856332e-06, "loss": 1.4273, "step": 21 }, { "epoch": 0.02507122507122507, "grad_norm": 0.05434960871934891, "learning_rate": 1.4935988620199147e-06, "loss": 1.3705, "step": 22 }, { "epoch": 0.02621082621082621, "grad_norm": 0.05060447379946709, "learning_rate": 1.5647226173541964e-06, "loss": 1.2599, "step": 23 }, { "epoch": 0.02735042735042735, "grad_norm": 0.05357028543949127, "learning_rate": 1.6358463726884779e-06, "loss": 1.3756, "step": 24 }, { "epoch": 0.02849002849002849, "grad_norm": 0.0445236936211586, "learning_rate": 1.7069701280227598e-06, "loss": 1.5128, "step": 25 }, { "epoch": 0.02962962962962963, "grad_norm": 0.0478951521217823, "learning_rate": 1.7780938833570414e-06, "loss": 1.4071, "step": 26 }, { "epoch": 0.03076923076923077, "grad_norm": 0.05379122495651245, "learning_rate": 1.8492176386913231e-06, "loss": 1.3954, "step": 27 }, { "epoch": 0.03190883190883191, "grad_norm": 0.06472282856702805, "learning_rate": 1.9203413940256046e-06, "loss": 1.2589, "step": 28 }, { "epoch": 0.03304843304843305, "grad_norm": 0.05063191056251526, "learning_rate": 1.991465149359886e-06, "loss": 1.6055, "step": 29 }, { "epoch": 0.03418803418803419, "grad_norm": 0.04690183326601982, "learning_rate": 2.062588904694168e-06, "loss": 1.2893, "step": 30 }, { "epoch": 0.035327635327635325, "grad_norm": 0.04692545905709267, "learning_rate": 2.1337126600284495e-06, "loss": 1.4693, "step": 31 }, { "epoch": 0.036467236467236465, "grad_norm": 0.05206100642681122, "learning_rate": 2.2048364153627314e-06, "loss": 1.1352, "step": 32 }, { "epoch": 0.037606837606837605, "grad_norm": 0.0561656728386879, "learning_rate": 2.275960170697013e-06, "loss": 1.4754, "step": 33 }, { "epoch": 0.038746438746438745, "grad_norm": 0.048342589288949966, "learning_rate": 2.3470839260312948e-06, "loss": 1.4056, "step": 34 }, { "epoch": 0.039886039886039885, "grad_norm": 0.053837891668081284, "learning_rate": 2.4182076813655762e-06, "loss": 1.3333, "step": 35 }, { "epoch": 0.041025641025641026, "grad_norm": 0.07836456596851349, "learning_rate": 2.4893314366998577e-06, "loss": 1.2722, "step": 36 }, { "epoch": 0.042165242165242166, "grad_norm": 0.05720524862408638, "learning_rate": 2.5604551920341396e-06, "loss": 1.5504, "step": 37 }, { "epoch": 0.043304843304843306, "grad_norm": 0.06363226473331451, "learning_rate": 2.631578947368421e-06, "loss": 1.2183, "step": 38 }, { "epoch": 0.044444444444444446, "grad_norm": 0.06157976761460304, "learning_rate": 2.702702702702703e-06, "loss": 1.1364, "step": 39 }, { "epoch": 0.045584045584045586, "grad_norm": 0.051158804446458817, "learning_rate": 2.7738264580369845e-06, "loss": 1.4168, "step": 40 }, { "epoch": 0.046723646723646726, "grad_norm": 0.047691259533166885, "learning_rate": 2.8449502133712664e-06, "loss": 1.306, "step": 41 }, { "epoch": 0.04786324786324787, "grad_norm": 0.057260893285274506, "learning_rate": 2.916073968705548e-06, "loss": 1.4493, "step": 42 }, { "epoch": 0.049002849002849, "grad_norm": 0.05358852446079254, "learning_rate": 2.9871977240398294e-06, "loss": 1.4244, "step": 43 }, { "epoch": 0.05014245014245014, "grad_norm": 0.05413025990128517, "learning_rate": 3.058321479374111e-06, "loss": 1.4488, "step": 44 }, { "epoch": 0.05128205128205128, "grad_norm": 0.05922415107488632, "learning_rate": 3.1294452347083927e-06, "loss": 1.4817, "step": 45 }, { "epoch": 0.05242165242165242, "grad_norm": 0.06017250195145607, "learning_rate": 3.2005689900426746e-06, "loss": 1.4613, "step": 46 }, { "epoch": 0.05356125356125356, "grad_norm": 0.061094995588064194, "learning_rate": 3.2716927453769557e-06, "loss": 1.301, "step": 47 }, { "epoch": 0.0547008547008547, "grad_norm": 0.06163366138935089, "learning_rate": 3.3428165007112376e-06, "loss": 1.489, "step": 48 }, { "epoch": 0.05584045584045584, "grad_norm": 0.06326743960380554, "learning_rate": 3.4139402560455195e-06, "loss": 1.1924, "step": 49 }, { "epoch": 0.05698005698005698, "grad_norm": 0.062128033488988876, "learning_rate": 3.485064011379801e-06, "loss": 1.4623, "step": 50 }, { "epoch": 0.05811965811965812, "grad_norm": 0.05949904024600983, "learning_rate": 3.556187766714083e-06, "loss": 1.4597, "step": 51 }, { "epoch": 0.05925925925925926, "grad_norm": 0.05956437438726425, "learning_rate": 3.6273115220483644e-06, "loss": 1.3425, "step": 52 }, { "epoch": 0.0603988603988604, "grad_norm": 0.06518371403217316, "learning_rate": 3.6984352773826463e-06, "loss": 1.4659, "step": 53 }, { "epoch": 0.06153846153846154, "grad_norm": 0.07117999345064163, "learning_rate": 3.7695590327169273e-06, "loss": 1.3784, "step": 54 }, { "epoch": 0.06267806267806268, "grad_norm": 0.08100444823503494, "learning_rate": 3.840682788051209e-06, "loss": 1.2489, "step": 55 }, { "epoch": 0.06381766381766382, "grad_norm": 0.06958556175231934, "learning_rate": 3.9118065433854916e-06, "loss": 1.4346, "step": 56 }, { "epoch": 0.06495726495726496, "grad_norm": 0.06511950492858887, "learning_rate": 3.982930298719772e-06, "loss": 1.2399, "step": 57 }, { "epoch": 0.0660968660968661, "grad_norm": 0.08113803714513779, "learning_rate": 4.0540540540540545e-06, "loss": 1.3979, "step": 58 }, { "epoch": 0.06723646723646724, "grad_norm": 0.06632170081138611, "learning_rate": 4.125177809388336e-06, "loss": 1.2457, "step": 59 }, { "epoch": 0.06837606837606838, "grad_norm": 0.06847529858350754, "learning_rate": 4.1963015647226175e-06, "loss": 1.3739, "step": 60 }, { "epoch": 0.06951566951566951, "grad_norm": 0.08189208060503006, "learning_rate": 4.267425320056899e-06, "loss": 1.14, "step": 61 }, { "epoch": 0.07065527065527065, "grad_norm": 0.07065827399492264, "learning_rate": 4.338549075391181e-06, "loss": 1.2686, "step": 62 }, { "epoch": 0.07179487179487179, "grad_norm": 0.08157073706388474, "learning_rate": 4.409672830725463e-06, "loss": 1.2876, "step": 63 }, { "epoch": 0.07293447293447293, "grad_norm": 0.09051886945962906, "learning_rate": 4.480796586059744e-06, "loss": 1.4319, "step": 64 }, { "epoch": 0.07407407407407407, "grad_norm": 0.07851537317037582, "learning_rate": 4.551920341394026e-06, "loss": 1.4003, "step": 65 }, { "epoch": 0.07521367521367521, "grad_norm": 0.08588617295026779, "learning_rate": 4.623044096728307e-06, "loss": 1.315, "step": 66 }, { "epoch": 0.07635327635327635, "grad_norm": 0.0719929113984108, "learning_rate": 4.6941678520625895e-06, "loss": 1.2378, "step": 67 }, { "epoch": 0.07749287749287749, "grad_norm": 0.08943946659564972, "learning_rate": 4.76529160739687e-06, "loss": 1.2121, "step": 68 }, { "epoch": 0.07863247863247863, "grad_norm": 0.08529966324567795, "learning_rate": 4.8364153627311525e-06, "loss": 1.4736, "step": 69 }, { "epoch": 0.07977207977207977, "grad_norm": 0.08152305334806442, "learning_rate": 4.907539118065434e-06, "loss": 1.3737, "step": 70 }, { "epoch": 0.08091168091168091, "grad_norm": 0.09641026705503464, "learning_rate": 4.9786628733997155e-06, "loss": 1.3505, "step": 71 }, { "epoch": 0.08205128205128205, "grad_norm": 0.09638162702322006, "learning_rate": 5.049786628733998e-06, "loss": 1.4356, "step": 72 }, { "epoch": 0.08319088319088319, "grad_norm": 0.08865015208721161, "learning_rate": 5.120910384068279e-06, "loss": 1.4189, "step": 73 }, { "epoch": 0.08433048433048433, "grad_norm": 0.09091176092624664, "learning_rate": 5.192034139402561e-06, "loss": 1.208, "step": 74 }, { "epoch": 0.08547008547008547, "grad_norm": 0.09469577670097351, "learning_rate": 5.263157894736842e-06, "loss": 1.3863, "step": 75 }, { "epoch": 0.08660968660968661, "grad_norm": 0.08288253098726273, "learning_rate": 5.334281650071124e-06, "loss": 1.3364, "step": 76 }, { "epoch": 0.08774928774928775, "grad_norm": 0.08115946501493454, "learning_rate": 5.405405405405406e-06, "loss": 1.2981, "step": 77 }, { "epoch": 0.08888888888888889, "grad_norm": 0.09062476456165314, "learning_rate": 5.4765291607396875e-06, "loss": 1.4799, "step": 78 }, { "epoch": 0.09002849002849003, "grad_norm": 0.08192724734544754, "learning_rate": 5.547652916073969e-06, "loss": 1.3866, "step": 79 }, { "epoch": 0.09116809116809117, "grad_norm": 0.09433706849813461, "learning_rate": 5.6187766714082505e-06, "loss": 1.4301, "step": 80 }, { "epoch": 0.09230769230769231, "grad_norm": 0.09139500558376312, "learning_rate": 5.689900426742533e-06, "loss": 1.3532, "step": 81 }, { "epoch": 0.09344729344729345, "grad_norm": 0.09360858052968979, "learning_rate": 5.7610241820768134e-06, "loss": 1.3532, "step": 82 }, { "epoch": 0.0945868945868946, "grad_norm": 0.08104239404201508, "learning_rate": 5.832147937411096e-06, "loss": 1.2504, "step": 83 }, { "epoch": 0.09572649572649573, "grad_norm": 0.09012996405363083, "learning_rate": 5.903271692745377e-06, "loss": 1.3313, "step": 84 }, { "epoch": 0.09686609686609686, "grad_norm": 0.08825211226940155, "learning_rate": 5.974395448079659e-06, "loss": 1.2833, "step": 85 }, { "epoch": 0.098005698005698, "grad_norm": 0.08592044562101364, "learning_rate": 6.045519203413941e-06, "loss": 1.3009, "step": 86 }, { "epoch": 0.09914529914529914, "grad_norm": 0.0852126032114029, "learning_rate": 6.116642958748222e-06, "loss": 1.2452, "step": 87 }, { "epoch": 0.10028490028490028, "grad_norm": 0.11061551421880722, "learning_rate": 6.187766714082504e-06, "loss": 1.3683, "step": 88 }, { "epoch": 0.10142450142450142, "grad_norm": 0.0811811238527298, "learning_rate": 6.2588904694167855e-06, "loss": 1.1574, "step": 89 }, { "epoch": 0.10256410256410256, "grad_norm": 0.09463097155094147, "learning_rate": 6.330014224751067e-06, "loss": 1.3621, "step": 90 }, { "epoch": 0.1037037037037037, "grad_norm": 0.10568013787269592, "learning_rate": 6.401137980085349e-06, "loss": 1.284, "step": 91 }, { "epoch": 0.10484330484330484, "grad_norm": 0.08822279423475266, "learning_rate": 6.472261735419631e-06, "loss": 1.2283, "step": 92 }, { "epoch": 0.10598290598290598, "grad_norm": 0.08739876002073288, "learning_rate": 6.543385490753911e-06, "loss": 1.2791, "step": 93 }, { "epoch": 0.10712250712250712, "grad_norm": 0.09902703016996384, "learning_rate": 6.614509246088194e-06, "loss": 1.3719, "step": 94 }, { "epoch": 0.10826210826210826, "grad_norm": 0.07693609595298767, "learning_rate": 6.685633001422475e-06, "loss": 1.1944, "step": 95 }, { "epoch": 0.1094017094017094, "grad_norm": 0.0802607610821724, "learning_rate": 6.7567567567567575e-06, "loss": 1.2534, "step": 96 }, { "epoch": 0.11054131054131054, "grad_norm": 0.09589564800262451, "learning_rate": 6.827880512091039e-06, "loss": 1.2717, "step": 97 }, { "epoch": 0.11168091168091168, "grad_norm": 0.07705813646316528, "learning_rate": 6.89900426742532e-06, "loss": 1.1472, "step": 98 }, { "epoch": 0.11282051282051282, "grad_norm": 0.08257383108139038, "learning_rate": 6.970128022759602e-06, "loss": 1.1593, "step": 99 }, { "epoch": 0.11396011396011396, "grad_norm": 0.08320498466491699, "learning_rate": 7.0412517780938835e-06, "loss": 1.2375, "step": 100 }, { "epoch": 0.1150997150997151, "grad_norm": 0.07808973640203476, "learning_rate": 7.112375533428166e-06, "loss": 1.2104, "step": 101 }, { "epoch": 0.11623931623931624, "grad_norm": 0.07398729026317596, "learning_rate": 7.183499288762446e-06, "loss": 1.1418, "step": 102 }, { "epoch": 0.11737891737891738, "grad_norm": 0.07872660458087921, "learning_rate": 7.254623044096729e-06, "loss": 1.3381, "step": 103 }, { "epoch": 0.11851851851851852, "grad_norm": 0.07154995948076248, "learning_rate": 7.32574679943101e-06, "loss": 1.1774, "step": 104 }, { "epoch": 0.11965811965811966, "grad_norm": 0.08565530180931091, "learning_rate": 7.3968705547652926e-06, "loss": 1.2758, "step": 105 }, { "epoch": 0.1207977207977208, "grad_norm": 0.09029407799243927, "learning_rate": 7.467994310099574e-06, "loss": 1.2923, "step": 106 }, { "epoch": 0.12193732193732194, "grad_norm": 0.08215060830116272, "learning_rate": 7.539118065433855e-06, "loss": 1.2434, "step": 107 }, { "epoch": 0.12307692307692308, "grad_norm": 0.08336412906646729, "learning_rate": 7.610241820768137e-06, "loss": 1.3831, "step": 108 }, { "epoch": 0.12421652421652421, "grad_norm": 0.07238791882991791, "learning_rate": 7.681365576102418e-06, "loss": 1.2339, "step": 109 }, { "epoch": 0.12535612535612536, "grad_norm": 0.068402960896492, "learning_rate": 7.7524893314367e-06, "loss": 1.2756, "step": 110 }, { "epoch": 0.1264957264957265, "grad_norm": 0.07377321273088455, "learning_rate": 7.823613086770983e-06, "loss": 1.3356, "step": 111 }, { "epoch": 0.12763532763532764, "grad_norm": 0.06662782281637192, "learning_rate": 7.894736842105263e-06, "loss": 1.2729, "step": 112 }, { "epoch": 0.12877492877492877, "grad_norm": 0.061022158712148666, "learning_rate": 7.965860597439544e-06, "loss": 1.0995, "step": 113 }, { "epoch": 0.12991452991452992, "grad_norm": 0.06461257487535477, "learning_rate": 8.036984352773828e-06, "loss": 1.2617, "step": 114 }, { "epoch": 0.13105413105413105, "grad_norm": 0.06148713827133179, "learning_rate": 8.108108108108109e-06, "loss": 1.2552, "step": 115 }, { "epoch": 0.1321937321937322, "grad_norm": 0.06288463622331619, "learning_rate": 8.179231863442389e-06, "loss": 1.1786, "step": 116 }, { "epoch": 0.13333333333333333, "grad_norm": 0.06167013943195343, "learning_rate": 8.250355618776672e-06, "loss": 1.2099, "step": 117 }, { "epoch": 0.13447293447293449, "grad_norm": 0.0636247918009758, "learning_rate": 8.321479374110953e-06, "loss": 1.245, "step": 118 }, { "epoch": 0.1356125356125356, "grad_norm": 0.0682772696018219, "learning_rate": 8.392603129445235e-06, "loss": 1.118, "step": 119 }, { "epoch": 0.13675213675213677, "grad_norm": 0.06304428726434708, "learning_rate": 8.463726884779518e-06, "loss": 1.1541, "step": 120 }, { "epoch": 0.1378917378917379, "grad_norm": 0.06073963642120361, "learning_rate": 8.534850640113798e-06, "loss": 1.1833, "step": 121 }, { "epoch": 0.13903133903133902, "grad_norm": 0.06225947290658951, "learning_rate": 8.60597439544808e-06, "loss": 1.1945, "step": 122 }, { "epoch": 0.14017094017094017, "grad_norm": 0.06997359544038773, "learning_rate": 8.677098150782363e-06, "loss": 1.3949, "step": 123 }, { "epoch": 0.1413105413105413, "grad_norm": 0.06288664788007736, "learning_rate": 8.748221906116644e-06, "loss": 1.1181, "step": 124 }, { "epoch": 0.14245014245014245, "grad_norm": 0.061201855540275574, "learning_rate": 8.819345661450926e-06, "loss": 1.0568, "step": 125 }, { "epoch": 0.14358974358974358, "grad_norm": 0.06435884535312653, "learning_rate": 8.890469416785207e-06, "loss": 1.1947, "step": 126 }, { "epoch": 0.14472934472934473, "grad_norm": 0.06337955594062805, "learning_rate": 8.961593172119488e-06, "loss": 1.0872, "step": 127 }, { "epoch": 0.14586894586894586, "grad_norm": 0.07901694625616074, "learning_rate": 9.03271692745377e-06, "loss": 1.1121, "step": 128 }, { "epoch": 0.147008547008547, "grad_norm": 0.08116520941257477, "learning_rate": 9.103840682788051e-06, "loss": 1.1778, "step": 129 }, { "epoch": 0.14814814814814814, "grad_norm": 0.06507478654384613, "learning_rate": 9.174964438122333e-06, "loss": 1.2631, "step": 130 }, { "epoch": 0.1492877492877493, "grad_norm": 0.059173665940761566, "learning_rate": 9.246088193456614e-06, "loss": 1.0487, "step": 131 }, { "epoch": 0.15042735042735042, "grad_norm": 0.059220798313617706, "learning_rate": 9.317211948790896e-06, "loss": 1.2087, "step": 132 }, { "epoch": 0.15156695156695157, "grad_norm": 0.06327493488788605, "learning_rate": 9.388335704125179e-06, "loss": 1.1046, "step": 133 }, { "epoch": 0.1527065527065527, "grad_norm": 0.05295513942837715, "learning_rate": 9.45945945945946e-06, "loss": 1.1181, "step": 134 }, { "epoch": 0.15384615384615385, "grad_norm": 0.061009056866168976, "learning_rate": 9.53058321479374e-06, "loss": 1.305, "step": 135 }, { "epoch": 0.15498575498575498, "grad_norm": 0.06321488320827484, "learning_rate": 9.601706970128024e-06, "loss": 1.2587, "step": 136 }, { "epoch": 0.15612535612535614, "grad_norm": 0.05951569601893425, "learning_rate": 9.672830725462305e-06, "loss": 1.2959, "step": 137 }, { "epoch": 0.15726495726495726, "grad_norm": 0.0564689040184021, "learning_rate": 9.743954480796586e-06, "loss": 1.0689, "step": 138 }, { "epoch": 0.15840455840455842, "grad_norm": 0.06079903990030289, "learning_rate": 9.815078236130868e-06, "loss": 1.2022, "step": 139 }, { "epoch": 0.15954415954415954, "grad_norm": 0.056464362889528275, "learning_rate": 9.88620199146515e-06, "loss": 1.191, "step": 140 }, { "epoch": 0.1606837606837607, "grad_norm": 0.06172769144177437, "learning_rate": 9.957325746799431e-06, "loss": 1.2116, "step": 141 }, { "epoch": 0.16182336182336182, "grad_norm": 0.05989086627960205, "learning_rate": 1.0028449502133714e-05, "loss": 1.2683, "step": 142 }, { "epoch": 0.16296296296296298, "grad_norm": 0.059347882866859436, "learning_rate": 1.0099573257467996e-05, "loss": 1.1318, "step": 143 }, { "epoch": 0.1641025641025641, "grad_norm": 0.059646185487508774, "learning_rate": 1.0170697012802275e-05, "loss": 1.1439, "step": 144 }, { "epoch": 0.16524216524216523, "grad_norm": 0.0693233460187912, "learning_rate": 1.0241820768136559e-05, "loss": 1.0828, "step": 145 }, { "epoch": 0.16638176638176638, "grad_norm": 0.06251576542854309, "learning_rate": 1.031294452347084e-05, "loss": 1.1576, "step": 146 }, { "epoch": 0.1675213675213675, "grad_norm": 0.06578931957483292, "learning_rate": 1.0384068278805121e-05, "loss": 1.2977, "step": 147 }, { "epoch": 0.16866096866096866, "grad_norm": 0.060304999351501465, "learning_rate": 1.0455192034139403e-05, "loss": 1.1031, "step": 148 }, { "epoch": 0.1698005698005698, "grad_norm": 0.0691312775015831, "learning_rate": 1.0526315789473684e-05, "loss": 1.1003, "step": 149 }, { "epoch": 0.17094017094017094, "grad_norm": 0.05902580916881561, "learning_rate": 1.0597439544807966e-05, "loss": 1.1638, "step": 150 }, { "epoch": 0.17207977207977207, "grad_norm": 0.05804158374667168, "learning_rate": 1.0668563300142247e-05, "loss": 1.0829, "step": 151 }, { "epoch": 0.17321937321937322, "grad_norm": 0.06002949923276901, "learning_rate": 1.073968705547653e-05, "loss": 1.1821, "step": 152 }, { "epoch": 0.17435897435897435, "grad_norm": 0.052708450704813004, "learning_rate": 1.0810810810810812e-05, "loss": 1.1133, "step": 153 }, { "epoch": 0.1754985754985755, "grad_norm": 0.05245179310441017, "learning_rate": 1.0881934566145092e-05, "loss": 1.2376, "step": 154 }, { "epoch": 0.17663817663817663, "grad_norm": 0.05576658993959427, "learning_rate": 1.0953058321479375e-05, "loss": 1.1623, "step": 155 }, { "epoch": 0.17777777777777778, "grad_norm": 0.06146593019366264, "learning_rate": 1.1024182076813657e-05, "loss": 1.0804, "step": 156 }, { "epoch": 0.1789173789173789, "grad_norm": 0.05787685140967369, "learning_rate": 1.1095305832147938e-05, "loss": 1.2134, "step": 157 }, { "epoch": 0.18005698005698006, "grad_norm": 0.05638829246163368, "learning_rate": 1.116642958748222e-05, "loss": 1.2973, "step": 158 }, { "epoch": 0.1811965811965812, "grad_norm": 0.0531807541847229, "learning_rate": 1.1237553342816501e-05, "loss": 1.2086, "step": 159 }, { "epoch": 0.18233618233618235, "grad_norm": 0.058753449469804764, "learning_rate": 1.1308677098150782e-05, "loss": 1.2072, "step": 160 }, { "epoch": 0.18347578347578347, "grad_norm": 0.05654165521264076, "learning_rate": 1.1379800853485066e-05, "loss": 1.2832, "step": 161 }, { "epoch": 0.18461538461538463, "grad_norm": 0.06907007098197937, "learning_rate": 1.1450924608819347e-05, "loss": 1.0622, "step": 162 }, { "epoch": 0.18575498575498575, "grad_norm": 0.053910259157419205, "learning_rate": 1.1522048364153627e-05, "loss": 1.1624, "step": 163 }, { "epoch": 0.1868945868945869, "grad_norm": 0.0608098991215229, "learning_rate": 1.159317211948791e-05, "loss": 1.2696, "step": 164 }, { "epoch": 0.18803418803418803, "grad_norm": 0.05722050368785858, "learning_rate": 1.1664295874822192e-05, "loss": 1.1955, "step": 165 }, { "epoch": 0.1891737891737892, "grad_norm": 0.06585364788770676, "learning_rate": 1.1735419630156473e-05, "loss": 1.1414, "step": 166 }, { "epoch": 0.1903133903133903, "grad_norm": 0.055771175771951675, "learning_rate": 1.1806543385490754e-05, "loss": 1.2297, "step": 167 }, { "epoch": 0.19145299145299147, "grad_norm": 0.06641066074371338, "learning_rate": 1.1877667140825036e-05, "loss": 1.0115, "step": 168 }, { "epoch": 0.1925925925925926, "grad_norm": 0.05692877620458603, "learning_rate": 1.1948790896159317e-05, "loss": 1.1519, "step": 169 }, { "epoch": 0.19373219373219372, "grad_norm": 0.06644362956285477, "learning_rate": 1.2019914651493599e-05, "loss": 1.1783, "step": 170 }, { "epoch": 0.19487179487179487, "grad_norm": 0.06490065902471542, "learning_rate": 1.2091038406827882e-05, "loss": 1.1589, "step": 171 }, { "epoch": 0.196011396011396, "grad_norm": 0.0554165318608284, "learning_rate": 1.2162162162162164e-05, "loss": 1.1418, "step": 172 }, { "epoch": 0.19715099715099715, "grad_norm": 0.053892701864242554, "learning_rate": 1.2233285917496443e-05, "loss": 1.1291, "step": 173 }, { "epoch": 0.19829059829059828, "grad_norm": 0.05443727597594261, "learning_rate": 1.2304409672830727e-05, "loss": 1.1612, "step": 174 }, { "epoch": 0.19943019943019943, "grad_norm": 0.05521461367607117, "learning_rate": 1.2375533428165008e-05, "loss": 1.2097, "step": 175 }, { "epoch": 0.20056980056980056, "grad_norm": 0.060670554637908936, "learning_rate": 1.244665718349929e-05, "loss": 1.0893, "step": 176 }, { "epoch": 0.20170940170940171, "grad_norm": 0.06637254357337952, "learning_rate": 1.2517780938833571e-05, "loss": 1.2011, "step": 177 }, { "epoch": 0.20284900284900284, "grad_norm": 0.059149112552404404, "learning_rate": 1.2588904694167852e-05, "loss": 1.2528, "step": 178 }, { "epoch": 0.203988603988604, "grad_norm": 0.06459205597639084, "learning_rate": 1.2660028449502134e-05, "loss": 1.1478, "step": 179 }, { "epoch": 0.20512820512820512, "grad_norm": 0.05805118754506111, "learning_rate": 1.2731152204836415e-05, "loss": 1.1718, "step": 180 }, { "epoch": 0.20626780626780628, "grad_norm": 0.06261450052261353, "learning_rate": 1.2802275960170699e-05, "loss": 1.1708, "step": 181 }, { "epoch": 0.2074074074074074, "grad_norm": 0.06539241969585419, "learning_rate": 1.287339971550498e-05, "loss": 1.065, "step": 182 }, { "epoch": 0.20854700854700856, "grad_norm": 0.060775551944971085, "learning_rate": 1.2944523470839262e-05, "loss": 1.0918, "step": 183 }, { "epoch": 0.20968660968660968, "grad_norm": 0.07454755157232285, "learning_rate": 1.3015647226173541e-05, "loss": 1.0284, "step": 184 }, { "epoch": 0.21082621082621084, "grad_norm": 0.06023133546113968, "learning_rate": 1.3086770981507823e-05, "loss": 1.0995, "step": 185 }, { "epoch": 0.21196581196581196, "grad_norm": 0.06015422195196152, "learning_rate": 1.3157894736842106e-05, "loss": 1.1216, "step": 186 }, { "epoch": 0.21310541310541312, "grad_norm": 0.07257451117038727, "learning_rate": 1.3229018492176387e-05, "loss": 1.1048, "step": 187 }, { "epoch": 0.21424501424501424, "grad_norm": 0.08165715634822845, "learning_rate": 1.3300142247510669e-05, "loss": 1.0034, "step": 188 }, { "epoch": 0.2153846153846154, "grad_norm": 0.06942833214998245, "learning_rate": 1.337126600284495e-05, "loss": 1.1672, "step": 189 }, { "epoch": 0.21652421652421652, "grad_norm": 0.06412875652313232, "learning_rate": 1.3442389758179234e-05, "loss": 0.9804, "step": 190 }, { "epoch": 0.21766381766381768, "grad_norm": 0.0784495621919632, "learning_rate": 1.3513513513513515e-05, "loss": 1.0473, "step": 191 }, { "epoch": 0.2188034188034188, "grad_norm": 0.0785534530878067, "learning_rate": 1.3584637268847797e-05, "loss": 0.9704, "step": 192 }, { "epoch": 0.21994301994301993, "grad_norm": 0.06472057849168777, "learning_rate": 1.3655761024182078e-05, "loss": 1.0108, "step": 193 }, { "epoch": 0.22108262108262108, "grad_norm": 0.06423155218362808, "learning_rate": 1.3726884779516358e-05, "loss": 1.1732, "step": 194 }, { "epoch": 0.2222222222222222, "grad_norm": 0.06703535467386246, "learning_rate": 1.379800853485064e-05, "loss": 1.1763, "step": 195 }, { "epoch": 0.22336182336182336, "grad_norm": 0.07830598205327988, "learning_rate": 1.3869132290184922e-05, "loss": 1.096, "step": 196 }, { "epoch": 0.2245014245014245, "grad_norm": 0.06796954572200775, "learning_rate": 1.3940256045519204e-05, "loss": 1.2537, "step": 197 }, { "epoch": 0.22564102564102564, "grad_norm": 0.07354679703712463, "learning_rate": 1.4011379800853485e-05, "loss": 1.0042, "step": 198 }, { "epoch": 0.22678062678062677, "grad_norm": 0.06254465132951736, "learning_rate": 1.4082503556187767e-05, "loss": 1.1521, "step": 199 }, { "epoch": 0.22792022792022792, "grad_norm": 0.07095777243375778, "learning_rate": 1.415362731152205e-05, "loss": 0.9821, "step": 200 }, { "epoch": 0.22905982905982905, "grad_norm": 0.06401743739843369, "learning_rate": 1.4224751066856332e-05, "loss": 0.9513, "step": 201 }, { "epoch": 0.2301994301994302, "grad_norm": 0.07533596456050873, "learning_rate": 1.4295874822190613e-05, "loss": 1.0867, "step": 202 }, { "epoch": 0.23133903133903133, "grad_norm": 0.09399830549955368, "learning_rate": 1.4366998577524893e-05, "loss": 1.2624, "step": 203 }, { "epoch": 0.23247863247863249, "grad_norm": 0.07780613750219345, "learning_rate": 1.4438122332859174e-05, "loss": 1.1126, "step": 204 }, { "epoch": 0.2336182336182336, "grad_norm": 0.07109476625919342, "learning_rate": 1.4509246088193457e-05, "loss": 0.9715, "step": 205 }, { "epoch": 0.23475783475783477, "grad_norm": 0.06988522410392761, "learning_rate": 1.4580369843527739e-05, "loss": 1.2099, "step": 206 }, { "epoch": 0.2358974358974359, "grad_norm": 0.05662979930639267, "learning_rate": 1.465149359886202e-05, "loss": 1.1849, "step": 207 }, { "epoch": 0.23703703703703705, "grad_norm": 0.06986989825963974, "learning_rate": 1.4722617354196302e-05, "loss": 1.1834, "step": 208 }, { "epoch": 0.23817663817663817, "grad_norm": 0.0632546991109848, "learning_rate": 1.4793741109530585e-05, "loss": 1.0627, "step": 209 }, { "epoch": 0.23931623931623933, "grad_norm": 0.0932113379240036, "learning_rate": 1.4864864864864867e-05, "loss": 1.0043, "step": 210 }, { "epoch": 0.24045584045584045, "grad_norm": 0.06944374740123749, "learning_rate": 1.4935988620199148e-05, "loss": 1.1636, "step": 211 }, { "epoch": 0.2415954415954416, "grad_norm": 0.07065069675445557, "learning_rate": 1.5007112375533428e-05, "loss": 1.1861, "step": 212 }, { "epoch": 0.24273504273504273, "grad_norm": 0.06531589478254318, "learning_rate": 1.507823613086771e-05, "loss": 1.0869, "step": 213 }, { "epoch": 0.2438746438746439, "grad_norm": 0.07184288650751114, "learning_rate": 1.514935988620199e-05, "loss": 1.1161, "step": 214 }, { "epoch": 0.245014245014245, "grad_norm": 0.07377070188522339, "learning_rate": 1.5220483641536274e-05, "loss": 1.1395, "step": 215 }, { "epoch": 0.24615384615384617, "grad_norm": 0.06514596939086914, "learning_rate": 1.5291607396870555e-05, "loss": 1.2536, "step": 216 }, { "epoch": 0.2472934472934473, "grad_norm": 0.07799336314201355, "learning_rate": 1.5362731152204837e-05, "loss": 1.13, "step": 217 }, { "epoch": 0.24843304843304842, "grad_norm": 0.07933985441923141, "learning_rate": 1.543385490753912e-05, "loss": 1.1127, "step": 218 }, { "epoch": 0.24957264957264957, "grad_norm": 0.07211863994598389, "learning_rate": 1.55049786628734e-05, "loss": 1.1378, "step": 219 }, { "epoch": 0.25071225071225073, "grad_norm": 0.06666900962591171, "learning_rate": 1.5576102418207685e-05, "loss": 1.2198, "step": 220 }, { "epoch": 0.2518518518518518, "grad_norm": 0.07066565752029419, "learning_rate": 1.5647226173541966e-05, "loss": 0.9718, "step": 221 }, { "epoch": 0.252991452991453, "grad_norm": 0.07008588314056396, "learning_rate": 1.5718349928876244e-05, "loss": 1.0966, "step": 222 }, { "epoch": 0.25413105413105413, "grad_norm": 0.07466041296720505, "learning_rate": 1.5789473684210526e-05, "loss": 1.0437, "step": 223 }, { "epoch": 0.2552706552706553, "grad_norm": 0.07254885882139206, "learning_rate": 1.5860597439544807e-05, "loss": 1.1555, "step": 224 }, { "epoch": 0.2564102564102564, "grad_norm": 0.07276211678981781, "learning_rate": 1.593172119487909e-05, "loss": 1.0798, "step": 225 }, { "epoch": 0.25754985754985754, "grad_norm": 0.0700463205575943, "learning_rate": 1.6002844950213374e-05, "loss": 1.0579, "step": 226 }, { "epoch": 0.2586894586894587, "grad_norm": 0.07556290924549103, "learning_rate": 1.6073968705547655e-05, "loss": 1.024, "step": 227 }, { "epoch": 0.25982905982905985, "grad_norm": 0.06487169116735458, "learning_rate": 1.6145092460881937e-05, "loss": 1.1722, "step": 228 }, { "epoch": 0.26096866096866095, "grad_norm": 0.06638690829277039, "learning_rate": 1.6216216216216218e-05, "loss": 1.3604, "step": 229 }, { "epoch": 0.2621082621082621, "grad_norm": 0.0821375772356987, "learning_rate": 1.62873399715505e-05, "loss": 0.9631, "step": 230 }, { "epoch": 0.26324786324786326, "grad_norm": 0.07891014218330383, "learning_rate": 1.6358463726884778e-05, "loss": 1.0452, "step": 231 }, { "epoch": 0.2643874643874644, "grad_norm": 0.06632769107818604, "learning_rate": 1.6429587482219063e-05, "loss": 1.0205, "step": 232 }, { "epoch": 0.2655270655270655, "grad_norm": 0.07042209059000015, "learning_rate": 1.6500711237553344e-05, "loss": 1.1403, "step": 233 }, { "epoch": 0.26666666666666666, "grad_norm": 0.07950495928525925, "learning_rate": 1.6571834992887625e-05, "loss": 1.0552, "step": 234 }, { "epoch": 0.2678062678062678, "grad_norm": 0.07009637355804443, "learning_rate": 1.6642958748221907e-05, "loss": 1.0312, "step": 235 }, { "epoch": 0.26894586894586897, "grad_norm": 0.06583420187234879, "learning_rate": 1.671408250355619e-05, "loss": 1.1817, "step": 236 }, { "epoch": 0.27008547008547007, "grad_norm": 0.06250870227813721, "learning_rate": 1.678520625889047e-05, "loss": 1.1174, "step": 237 }, { "epoch": 0.2712250712250712, "grad_norm": 0.08097542822360992, "learning_rate": 1.685633001422475e-05, "loss": 1.0297, "step": 238 }, { "epoch": 0.2723646723646724, "grad_norm": 0.07908938825130463, "learning_rate": 1.6927453769559036e-05, "loss": 1.0175, "step": 239 }, { "epoch": 0.27350427350427353, "grad_norm": 0.07402563840150833, "learning_rate": 1.6998577524893318e-05, "loss": 1.0153, "step": 240 }, { "epoch": 0.27464387464387463, "grad_norm": 0.07554073631763458, "learning_rate": 1.7069701280227596e-05, "loss": 1.1444, "step": 241 }, { "epoch": 0.2757834757834758, "grad_norm": 0.07560619711875916, "learning_rate": 1.7140825035561877e-05, "loss": 1.0719, "step": 242 }, { "epoch": 0.27692307692307694, "grad_norm": 0.07081248611211777, "learning_rate": 1.721194879089616e-05, "loss": 1.0042, "step": 243 }, { "epoch": 0.27806267806267804, "grad_norm": 0.08599159121513367, "learning_rate": 1.728307254623044e-05, "loss": 0.9159, "step": 244 }, { "epoch": 0.2792022792022792, "grad_norm": 0.07577984780073166, "learning_rate": 1.7354196301564725e-05, "loss": 1.0697, "step": 245 }, { "epoch": 0.28034188034188035, "grad_norm": 0.07293561846017838, "learning_rate": 1.7425320056899007e-05, "loss": 0.9677, "step": 246 }, { "epoch": 0.2814814814814815, "grad_norm": 0.08174625039100647, "learning_rate": 1.7496443812233288e-05, "loss": 0.9469, "step": 247 }, { "epoch": 0.2826210826210826, "grad_norm": 0.084431491792202, "learning_rate": 1.756756756756757e-05, "loss": 1.0253, "step": 248 }, { "epoch": 0.28376068376068375, "grad_norm": 0.08393265306949615, "learning_rate": 1.763869132290185e-05, "loss": 1.1543, "step": 249 }, { "epoch": 0.2849002849002849, "grad_norm": 0.07045919448137283, "learning_rate": 1.770981507823613e-05, "loss": 1.0797, "step": 250 }, { "epoch": 0.28603988603988606, "grad_norm": 0.08641494810581207, "learning_rate": 1.7780938833570414e-05, "loss": 1.1084, "step": 251 }, { "epoch": 0.28717948717948716, "grad_norm": 0.08869914710521698, "learning_rate": 1.7852062588904696e-05, "loss": 1.1342, "step": 252 }, { "epoch": 0.2883190883190883, "grad_norm": 0.07686692476272583, "learning_rate": 1.7923186344238977e-05, "loss": 0.9679, "step": 253 }, { "epoch": 0.28945868945868947, "grad_norm": 0.07310675829648972, "learning_rate": 1.799431009957326e-05, "loss": 1.1295, "step": 254 }, { "epoch": 0.2905982905982906, "grad_norm": 0.09621311724185944, "learning_rate": 1.806543385490754e-05, "loss": 0.9402, "step": 255 }, { "epoch": 0.2917378917378917, "grad_norm": 0.08018381148576736, "learning_rate": 1.813655761024182e-05, "loss": 1.0738, "step": 256 }, { "epoch": 0.2928774928774929, "grad_norm": 0.11396273970603943, "learning_rate": 1.8207681365576103e-05, "loss": 1.0957, "step": 257 }, { "epoch": 0.294017094017094, "grad_norm": 0.06747505813837051, "learning_rate": 1.8278805120910388e-05, "loss": 1.1312, "step": 258 }, { "epoch": 0.2951566951566952, "grad_norm": 0.09053219109773636, "learning_rate": 1.8349928876244666e-05, "loss": 0.9447, "step": 259 }, { "epoch": 0.2962962962962963, "grad_norm": 0.08863767236471176, "learning_rate": 1.8421052631578947e-05, "loss": 1.1523, "step": 260 }, { "epoch": 0.29743589743589743, "grad_norm": 0.09314347803592682, "learning_rate": 1.849217638691323e-05, "loss": 1.0229, "step": 261 }, { "epoch": 0.2985754985754986, "grad_norm": 0.07961639761924744, "learning_rate": 1.856330014224751e-05, "loss": 1.0276, "step": 262 }, { "epoch": 0.29971509971509974, "grad_norm": 0.07756153494119644, "learning_rate": 1.8634423897581792e-05, "loss": 1.0418, "step": 263 }, { "epoch": 0.30085470085470084, "grad_norm": 0.1048012226819992, "learning_rate": 1.8705547652916077e-05, "loss": 0.8217, "step": 264 }, { "epoch": 0.301994301994302, "grad_norm": 0.09964671730995178, "learning_rate": 1.8776671408250358e-05, "loss": 0.9676, "step": 265 }, { "epoch": 0.30313390313390315, "grad_norm": 0.08192896097898483, "learning_rate": 1.884779516358464e-05, "loss": 1.0426, "step": 266 }, { "epoch": 0.30427350427350425, "grad_norm": 0.08869923651218414, "learning_rate": 1.891891891891892e-05, "loss": 0.9093, "step": 267 }, { "epoch": 0.3054131054131054, "grad_norm": 0.07854004949331284, "learning_rate": 1.8990042674253203e-05, "loss": 1.1405, "step": 268 }, { "epoch": 0.30655270655270656, "grad_norm": 0.08240702003240585, "learning_rate": 1.906116642958748e-05, "loss": 1.0521, "step": 269 }, { "epoch": 0.3076923076923077, "grad_norm": 0.09875097125768661, "learning_rate": 1.9132290184921766e-05, "loss": 0.9214, "step": 270 }, { "epoch": 0.3088319088319088, "grad_norm": 0.09866699576377869, "learning_rate": 1.9203413940256047e-05, "loss": 1.0171, "step": 271 }, { "epoch": 0.30997150997150996, "grad_norm": 0.07714200019836426, "learning_rate": 1.927453769559033e-05, "loss": 1.1135, "step": 272 }, { "epoch": 0.3111111111111111, "grad_norm": 0.09378238022327423, "learning_rate": 1.934566145092461e-05, "loss": 1.0331, "step": 273 }, { "epoch": 0.31225071225071227, "grad_norm": 0.10245545208454132, "learning_rate": 1.941678520625889e-05, "loss": 1.0428, "step": 274 }, { "epoch": 0.31339031339031337, "grad_norm": 0.07265407592058182, "learning_rate": 1.9487908961593173e-05, "loss": 1.1834, "step": 275 }, { "epoch": 0.3145299145299145, "grad_norm": 0.10271655768156052, "learning_rate": 1.9559032716927454e-05, "loss": 0.9892, "step": 276 }, { "epoch": 0.3156695156695157, "grad_norm": 0.0895722284913063, "learning_rate": 1.9630156472261736e-05, "loss": 1.1456, "step": 277 }, { "epoch": 0.31680911680911683, "grad_norm": 0.10416832566261292, "learning_rate": 1.9701280227596017e-05, "loss": 0.9345, "step": 278 }, { "epoch": 0.31794871794871793, "grad_norm": 0.10268007218837738, "learning_rate": 1.97724039829303e-05, "loss": 0.967, "step": 279 }, { "epoch": 0.3190883190883191, "grad_norm": 0.08895410597324371, "learning_rate": 1.984352773826458e-05, "loss": 1.0201, "step": 280 }, { "epoch": 0.32022792022792024, "grad_norm": 0.08917368203401566, "learning_rate": 1.9914651493598862e-05, "loss": 1.0123, "step": 281 }, { "epoch": 0.3213675213675214, "grad_norm": 0.1120944395661354, "learning_rate": 1.9985775248933143e-05, "loss": 0.9762, "step": 282 }, { "epoch": 0.3225071225071225, "grad_norm": 0.1041937842965126, "learning_rate": 2.0056899004267428e-05, "loss": 1.0374, "step": 283 }, { "epoch": 0.32364672364672364, "grad_norm": 0.15387901663780212, "learning_rate": 2.012802275960171e-05, "loss": 1.1121, "step": 284 }, { "epoch": 0.3247863247863248, "grad_norm": 0.08792157471179962, "learning_rate": 2.019914651493599e-05, "loss": 1.0123, "step": 285 }, { "epoch": 0.32592592592592595, "grad_norm": 0.08879154175519943, "learning_rate": 2.0270270270270273e-05, "loss": 1.0296, "step": 286 }, { "epoch": 0.32706552706552705, "grad_norm": 0.0798165574669838, "learning_rate": 2.034139402560455e-05, "loss": 0.9502, "step": 287 }, { "epoch": 0.3282051282051282, "grad_norm": 0.07392948120832443, "learning_rate": 2.0412517780938832e-05, "loss": 0.9606, "step": 288 }, { "epoch": 0.32934472934472936, "grad_norm": 0.1018989235162735, "learning_rate": 2.0483641536273117e-05, "loss": 1.0921, "step": 289 }, { "epoch": 0.33048433048433046, "grad_norm": 0.10552415996789932, "learning_rate": 2.05547652916074e-05, "loss": 1.0956, "step": 290 }, { "epoch": 0.3316239316239316, "grad_norm": 0.07778573781251907, "learning_rate": 2.062588904694168e-05, "loss": 1.1337, "step": 291 }, { "epoch": 0.33276353276353277, "grad_norm": 0.10200662165880203, "learning_rate": 2.069701280227596e-05, "loss": 1.0764, "step": 292 }, { "epoch": 0.3339031339031339, "grad_norm": 0.09001302719116211, "learning_rate": 2.0768136557610243e-05, "loss": 1.2007, "step": 293 }, { "epoch": 0.335042735042735, "grad_norm": 0.13165408372879028, "learning_rate": 2.0839260312944524e-05, "loss": 0.8175, "step": 294 }, { "epoch": 0.33618233618233617, "grad_norm": 0.10058224946260452, "learning_rate": 2.0910384068278806e-05, "loss": 1.0426, "step": 295 }, { "epoch": 0.3373219373219373, "grad_norm": 0.08600525557994843, "learning_rate": 2.0981507823613087e-05, "loss": 1.0916, "step": 296 }, { "epoch": 0.3384615384615385, "grad_norm": 0.1157507374882698, "learning_rate": 2.105263157894737e-05, "loss": 1.1185, "step": 297 }, { "epoch": 0.3396011396011396, "grad_norm": 0.10129654407501221, "learning_rate": 2.112375533428165e-05, "loss": 0.9682, "step": 298 }, { "epoch": 0.34074074074074073, "grad_norm": 0.09079531580209732, "learning_rate": 2.1194879089615932e-05, "loss": 1.1603, "step": 299 }, { "epoch": 0.3418803418803419, "grad_norm": 0.11449619382619858, "learning_rate": 2.1266002844950213e-05, "loss": 0.9804, "step": 300 }, { "epoch": 0.34301994301994304, "grad_norm": 0.07949551939964294, "learning_rate": 2.1337126600284495e-05, "loss": 1.1126, "step": 301 }, { "epoch": 0.34415954415954414, "grad_norm": 0.08972650021314621, "learning_rate": 2.140825035561878e-05, "loss": 1.1019, "step": 302 }, { "epoch": 0.3452991452991453, "grad_norm": 0.11598126590251923, "learning_rate": 2.147937411095306e-05, "loss": 0.9736, "step": 303 }, { "epoch": 0.34643874643874645, "grad_norm": 0.10045897960662842, "learning_rate": 2.1550497866287343e-05, "loss": 0.9885, "step": 304 }, { "epoch": 0.3475783475783476, "grad_norm": 0.10639733076095581, "learning_rate": 2.1621621621621624e-05, "loss": 1.0043, "step": 305 }, { "epoch": 0.3487179487179487, "grad_norm": 0.09556877613067627, "learning_rate": 2.1692745376955902e-05, "loss": 1.1844, "step": 306 }, { "epoch": 0.34985754985754985, "grad_norm": 0.1266985535621643, "learning_rate": 2.1763869132290184e-05, "loss": 0.936, "step": 307 }, { "epoch": 0.350997150997151, "grad_norm": 0.10811587423086166, "learning_rate": 2.183499288762447e-05, "loss": 0.9834, "step": 308 }, { "epoch": 0.35213675213675216, "grad_norm": 0.1331746131181717, "learning_rate": 2.190611664295875e-05, "loss": 0.9339, "step": 309 }, { "epoch": 0.35327635327635326, "grad_norm": 0.08796073496341705, "learning_rate": 2.197724039829303e-05, "loss": 1.1413, "step": 310 }, { "epoch": 0.3544159544159544, "grad_norm": 0.10409439355134964, "learning_rate": 2.2048364153627313e-05, "loss": 1.0269, "step": 311 }, { "epoch": 0.35555555555555557, "grad_norm": 0.08840052038431168, "learning_rate": 2.2119487908961594e-05, "loss": 1.0832, "step": 312 }, { "epoch": 0.35669515669515667, "grad_norm": 0.09507548063993454, "learning_rate": 2.2190611664295876e-05, "loss": 1.0578, "step": 313 }, { "epoch": 0.3578347578347578, "grad_norm": 0.08848431706428528, "learning_rate": 2.2261735419630157e-05, "loss": 1.1428, "step": 314 }, { "epoch": 0.358974358974359, "grad_norm": 0.11572188884019852, "learning_rate": 2.233285917496444e-05, "loss": 0.9553, "step": 315 }, { "epoch": 0.36011396011396013, "grad_norm": 0.11000053584575653, "learning_rate": 2.240398293029872e-05, "loss": 1.0818, "step": 316 }, { "epoch": 0.36125356125356123, "grad_norm": 0.0908002182841301, "learning_rate": 2.2475106685633002e-05, "loss": 1.0735, "step": 317 }, { "epoch": 0.3623931623931624, "grad_norm": 0.09504730254411697, "learning_rate": 2.2546230440967283e-05, "loss": 0.9909, "step": 318 }, { "epoch": 0.36353276353276354, "grad_norm": 0.11202896386384964, "learning_rate": 2.2617354196301565e-05, "loss": 0.858, "step": 319 }, { "epoch": 0.3646723646723647, "grad_norm": 0.11501264572143555, "learning_rate": 2.2688477951635846e-05, "loss": 1.0114, "step": 320 }, { "epoch": 0.3658119658119658, "grad_norm": 0.09337083995342255, "learning_rate": 2.275960170697013e-05, "loss": 1.1964, "step": 321 }, { "epoch": 0.36695156695156694, "grad_norm": 0.13113431632518768, "learning_rate": 2.2830725462304413e-05, "loss": 0.9994, "step": 322 }, { "epoch": 0.3680911680911681, "grad_norm": 0.12175516039133072, "learning_rate": 2.2901849217638694e-05, "loss": 1.0177, "step": 323 }, { "epoch": 0.36923076923076925, "grad_norm": 0.0918029248714447, "learning_rate": 2.2972972972972976e-05, "loss": 1.0749, "step": 324 }, { "epoch": 0.37037037037037035, "grad_norm": 0.10947254300117493, "learning_rate": 2.3044096728307254e-05, "loss": 0.9808, "step": 325 }, { "epoch": 0.3715099715099715, "grad_norm": 0.0879134014248848, "learning_rate": 2.3115220483641535e-05, "loss": 1.2289, "step": 326 }, { "epoch": 0.37264957264957266, "grad_norm": 0.11936598271131516, "learning_rate": 2.318634423897582e-05, "loss": 0.899, "step": 327 }, { "epoch": 0.3737891737891738, "grad_norm": 0.10722225904464722, "learning_rate": 2.32574679943101e-05, "loss": 0.9152, "step": 328 }, { "epoch": 0.3749287749287749, "grad_norm": 0.09743420034646988, "learning_rate": 2.3328591749644383e-05, "loss": 0.9961, "step": 329 }, { "epoch": 0.37606837606837606, "grad_norm": 0.11821708083152771, "learning_rate": 2.3399715504978665e-05, "loss": 0.9481, "step": 330 }, { "epoch": 0.3772079772079772, "grad_norm": 0.10703743994235992, "learning_rate": 2.3470839260312946e-05, "loss": 1.0505, "step": 331 }, { "epoch": 0.3783475783475784, "grad_norm": 0.11300036311149597, "learning_rate": 2.3541963015647227e-05, "loss": 0.9798, "step": 332 }, { "epoch": 0.37948717948717947, "grad_norm": 0.10210762917995453, "learning_rate": 2.361308677098151e-05, "loss": 1.0242, "step": 333 }, { "epoch": 0.3806267806267806, "grad_norm": 0.1075112596154213, "learning_rate": 2.368421052631579e-05, "loss": 1.1896, "step": 334 }, { "epoch": 0.3817663817663818, "grad_norm": 0.16434282064437866, "learning_rate": 2.3755334281650072e-05, "loss": 0.8707, "step": 335 }, { "epoch": 0.38290598290598293, "grad_norm": 0.15196382999420166, "learning_rate": 2.3826458036984353e-05, "loss": 0.8818, "step": 336 }, { "epoch": 0.38404558404558403, "grad_norm": 0.09512574970722198, "learning_rate": 2.3897581792318635e-05, "loss": 1.163, "step": 337 }, { "epoch": 0.3851851851851852, "grad_norm": 0.09455592930316925, "learning_rate": 2.3968705547652916e-05, "loss": 1.0461, "step": 338 }, { "epoch": 0.38632478632478634, "grad_norm": 0.1121646985411644, "learning_rate": 2.4039829302987198e-05, "loss": 1.0324, "step": 339 }, { "epoch": 0.38746438746438744, "grad_norm": 0.1113317534327507, "learning_rate": 2.411095305832148e-05, "loss": 1.1161, "step": 340 }, { "epoch": 0.3886039886039886, "grad_norm": 0.13179366290569305, "learning_rate": 2.4182076813655764e-05, "loss": 0.885, "step": 341 }, { "epoch": 0.38974358974358975, "grad_norm": 0.10948283970355988, "learning_rate": 2.4253200568990046e-05, "loss": 1.0989, "step": 342 }, { "epoch": 0.3908831908831909, "grad_norm": 0.10469900816679001, "learning_rate": 2.4324324324324327e-05, "loss": 0.9129, "step": 343 }, { "epoch": 0.392022792022792, "grad_norm": 0.11610392481088638, "learning_rate": 2.4395448079658605e-05, "loss": 1.0976, "step": 344 }, { "epoch": 0.39316239316239315, "grad_norm": 0.08694902807474136, "learning_rate": 2.4466571834992887e-05, "loss": 1.0665, "step": 345 }, { "epoch": 0.3943019943019943, "grad_norm": 0.13263843953609467, "learning_rate": 2.453769559032717e-05, "loss": 1.0275, "step": 346 }, { "epoch": 0.39544159544159546, "grad_norm": 0.11833056807518005, "learning_rate": 2.4608819345661453e-05, "loss": 1.1513, "step": 347 }, { "epoch": 0.39658119658119656, "grad_norm": 0.13152043521404266, "learning_rate": 2.4679943100995735e-05, "loss": 0.9383, "step": 348 }, { "epoch": 0.3977207977207977, "grad_norm": 0.12179744243621826, "learning_rate": 2.4751066856330016e-05, "loss": 1.0757, "step": 349 }, { "epoch": 0.39886039886039887, "grad_norm": 0.12765604257583618, "learning_rate": 2.4822190611664297e-05, "loss": 0.8138, "step": 350 }, { "epoch": 0.4, "grad_norm": 0.09365648031234741, "learning_rate": 2.489331436699858e-05, "loss": 1.032, "step": 351 }, { "epoch": 0.4011396011396011, "grad_norm": 0.11461018770933151, "learning_rate": 2.496443812233286e-05, "loss": 0.9753, "step": 352 }, { "epoch": 0.4022792022792023, "grad_norm": 0.11191020905971527, "learning_rate": 2.5035561877667142e-05, "loss": 0.9008, "step": 353 }, { "epoch": 0.40341880341880343, "grad_norm": 0.10496069490909576, "learning_rate": 2.5106685633001427e-05, "loss": 1.136, "step": 354 }, { "epoch": 0.4045584045584046, "grad_norm": 0.13463138043880463, "learning_rate": 2.5177809388335705e-05, "loss": 1.0126, "step": 355 }, { "epoch": 0.4056980056980057, "grad_norm": 0.10763318091630936, "learning_rate": 2.524893314366999e-05, "loss": 0.9897, "step": 356 }, { "epoch": 0.40683760683760684, "grad_norm": 0.12253414839506149, "learning_rate": 2.5320056899004268e-05, "loss": 0.8868, "step": 357 }, { "epoch": 0.407977207977208, "grad_norm": 0.1294226497411728, "learning_rate": 2.539118065433855e-05, "loss": 1.0077, "step": 358 }, { "epoch": 0.40911680911680914, "grad_norm": 0.08762025088071823, "learning_rate": 2.546230440967283e-05, "loss": 1.1424, "step": 359 }, { "epoch": 0.41025641025641024, "grad_norm": 0.12457738071680069, "learning_rate": 2.5533428165007112e-05, "loss": 0.9711, "step": 360 }, { "epoch": 0.4113960113960114, "grad_norm": 0.10666289180517197, "learning_rate": 2.5604551920341397e-05, "loss": 1.0167, "step": 361 }, { "epoch": 0.41253561253561255, "grad_norm": 0.11070402711629868, "learning_rate": 2.5675675675675675e-05, "loss": 1.0213, "step": 362 }, { "epoch": 0.41367521367521365, "grad_norm": 0.12932102382183075, "learning_rate": 2.574679943100996e-05, "loss": 0.9587, "step": 363 }, { "epoch": 0.4148148148148148, "grad_norm": 0.09432212263345718, "learning_rate": 2.5817923186344238e-05, "loss": 1.0806, "step": 364 }, { "epoch": 0.41595441595441596, "grad_norm": 0.09841327369213104, "learning_rate": 2.5889046941678523e-05, "loss": 1.0408, "step": 365 }, { "epoch": 0.4170940170940171, "grad_norm": 0.08794526755809784, "learning_rate": 2.5960170697012805e-05, "loss": 1.1998, "step": 366 }, { "epoch": 0.4182336182336182, "grad_norm": 0.11082588136196136, "learning_rate": 2.6031294452347083e-05, "loss": 0.8758, "step": 367 }, { "epoch": 0.41937321937321936, "grad_norm": 0.10996060073375702, "learning_rate": 2.6102418207681368e-05, "loss": 0.9663, "step": 368 }, { "epoch": 0.4205128205128205, "grad_norm": 0.14823630452156067, "learning_rate": 2.6173541963015646e-05, "loss": 0.9616, "step": 369 }, { "epoch": 0.42165242165242167, "grad_norm": 0.10944898426532745, "learning_rate": 2.624466571834993e-05, "loss": 0.9641, "step": 370 }, { "epoch": 0.42279202279202277, "grad_norm": 0.11472565680742264, "learning_rate": 2.6315789473684212e-05, "loss": 0.9897, "step": 371 }, { "epoch": 0.4239316239316239, "grad_norm": 0.09975427389144897, "learning_rate": 2.6386913229018493e-05, "loss": 1.201, "step": 372 }, { "epoch": 0.4250712250712251, "grad_norm": 0.13006269931793213, "learning_rate": 2.6458036984352775e-05, "loss": 0.9382, "step": 373 }, { "epoch": 0.42621082621082623, "grad_norm": 0.12275472283363342, "learning_rate": 2.652916073968706e-05, "loss": 0.865, "step": 374 }, { "epoch": 0.42735042735042733, "grad_norm": 0.12334656715393066, "learning_rate": 2.6600284495021338e-05, "loss": 0.8927, "step": 375 }, { "epoch": 0.4284900284900285, "grad_norm": 0.14135070145130157, "learning_rate": 2.6671408250355616e-05, "loss": 0.8875, "step": 376 }, { "epoch": 0.42962962962962964, "grad_norm": 0.1329450160264969, "learning_rate": 2.67425320056899e-05, "loss": 1.0661, "step": 377 }, { "epoch": 0.4307692307692308, "grad_norm": 0.09814272075891495, "learning_rate": 2.6813655761024182e-05, "loss": 1.1496, "step": 378 }, { "epoch": 0.4319088319088319, "grad_norm": 0.09715686738491058, "learning_rate": 2.6884779516358467e-05, "loss": 1.0726, "step": 379 }, { "epoch": 0.43304843304843305, "grad_norm": 0.1709275245666504, "learning_rate": 2.6955903271692745e-05, "loss": 0.7714, "step": 380 }, { "epoch": 0.4341880341880342, "grad_norm": 0.10183672606945038, "learning_rate": 2.702702702702703e-05, "loss": 1.0921, "step": 381 }, { "epoch": 0.43532763532763535, "grad_norm": 0.11255377531051636, "learning_rate": 2.7098150782361308e-05, "loss": 1.1506, "step": 382 }, { "epoch": 0.43646723646723645, "grad_norm": 0.11601104587316513, "learning_rate": 2.7169274537695593e-05, "loss": 0.9496, "step": 383 }, { "epoch": 0.4376068376068376, "grad_norm": 0.09490734338760376, "learning_rate": 2.724039829302987e-05, "loss": 1.0956, "step": 384 }, { "epoch": 0.43874643874643876, "grad_norm": 0.12753692269325256, "learning_rate": 2.7311522048364156e-05, "loss": 0.9264, "step": 385 }, { "epoch": 0.43988603988603986, "grad_norm": 0.15168429911136627, "learning_rate": 2.7382645803698438e-05, "loss": 0.9563, "step": 386 }, { "epoch": 0.441025641025641, "grad_norm": 0.11472778022289276, "learning_rate": 2.7453769559032716e-05, "loss": 1.0482, "step": 387 }, { "epoch": 0.44216524216524217, "grad_norm": 0.1089765802025795, "learning_rate": 2.7524893314367e-05, "loss": 1.1146, "step": 388 }, { "epoch": 0.4433048433048433, "grad_norm": 0.1261737048625946, "learning_rate": 2.759601706970128e-05, "loss": 1.0108, "step": 389 }, { "epoch": 0.4444444444444444, "grad_norm": 0.12204381823539734, "learning_rate": 2.7667140825035563e-05, "loss": 1.0517, "step": 390 }, { "epoch": 0.4455840455840456, "grad_norm": 0.13756977021694183, "learning_rate": 2.7738264580369845e-05, "loss": 0.942, "step": 391 }, { "epoch": 0.44672364672364673, "grad_norm": 0.10421758145093918, "learning_rate": 2.780938833570413e-05, "loss": 1.1082, "step": 392 }, { "epoch": 0.4478632478632479, "grad_norm": 0.12591853737831116, "learning_rate": 2.7880512091038408e-05, "loss": 1.0545, "step": 393 }, { "epoch": 0.449002849002849, "grad_norm": 0.1164020299911499, "learning_rate": 2.7951635846372693e-05, "loss": 1.1423, "step": 394 }, { "epoch": 0.45014245014245013, "grad_norm": 0.11195012927055359, "learning_rate": 2.802275960170697e-05, "loss": 0.9616, "step": 395 }, { "epoch": 0.4512820512820513, "grad_norm": 0.10594327747821808, "learning_rate": 2.8093883357041252e-05, "loss": 0.9514, "step": 396 }, { "epoch": 0.45242165242165244, "grad_norm": 0.10857772827148438, "learning_rate": 2.8165007112375534e-05, "loss": 0.9252, "step": 397 }, { "epoch": 0.45356125356125354, "grad_norm": 0.1399461030960083, "learning_rate": 2.8236130867709815e-05, "loss": 0.8931, "step": 398 }, { "epoch": 0.4547008547008547, "grad_norm": 0.1193869560956955, "learning_rate": 2.83072546230441e-05, "loss": 0.9956, "step": 399 }, { "epoch": 0.45584045584045585, "grad_norm": 0.14120057225227356, "learning_rate": 2.8378378378378378e-05, "loss": 1.0483, "step": 400 }, { "epoch": 0.456980056980057, "grad_norm": 0.13842757046222687, "learning_rate": 2.8449502133712663e-05, "loss": 0.9199, "step": 401 }, { "epoch": 0.4581196581196581, "grad_norm": 0.11650988459587097, "learning_rate": 2.852062588904694e-05, "loss": 1.1315, "step": 402 }, { "epoch": 0.45925925925925926, "grad_norm": 0.11457722634077072, "learning_rate": 2.8591749644381226e-05, "loss": 0.9666, "step": 403 }, { "epoch": 0.4603988603988604, "grad_norm": 0.10708250105381012, "learning_rate": 2.8662873399715508e-05, "loss": 1.2387, "step": 404 }, { "epoch": 0.46153846153846156, "grad_norm": 0.15783163905143738, "learning_rate": 2.8733997155049786e-05, "loss": 0.9538, "step": 405 }, { "epoch": 0.46267806267806266, "grad_norm": 0.11367695033550262, "learning_rate": 2.880512091038407e-05, "loss": 0.8866, "step": 406 }, { "epoch": 0.4638176638176638, "grad_norm": 0.12627817690372467, "learning_rate": 2.887624466571835e-05, "loss": 0.9254, "step": 407 }, { "epoch": 0.46495726495726497, "grad_norm": 0.1505798101425171, "learning_rate": 2.8947368421052634e-05, "loss": 0.7734, "step": 408 }, { "epoch": 0.46609686609686607, "grad_norm": 0.12150489538908005, "learning_rate": 2.9018492176386915e-05, "loss": 1.0682, "step": 409 }, { "epoch": 0.4672364672364672, "grad_norm": 0.11778156459331512, "learning_rate": 2.9089615931721196e-05, "loss": 0.9786, "step": 410 }, { "epoch": 0.4683760683760684, "grad_norm": 0.10478592664003372, "learning_rate": 2.9160739687055478e-05, "loss": 1.153, "step": 411 }, { "epoch": 0.46951566951566953, "grad_norm": 0.12088940292596817, "learning_rate": 2.9231863442389763e-05, "loss": 1.0392, "step": 412 }, { "epoch": 0.47065527065527063, "grad_norm": 0.11632979661226273, "learning_rate": 2.930298719772404e-05, "loss": 1.0859, "step": 413 }, { "epoch": 0.4717948717948718, "grad_norm": 0.11413583159446716, "learning_rate": 2.937411095305832e-05, "loss": 1.1095, "step": 414 }, { "epoch": 0.47293447293447294, "grad_norm": 0.13517241179943085, "learning_rate": 2.9445234708392604e-05, "loss": 0.9988, "step": 415 }, { "epoch": 0.4740740740740741, "grad_norm": 0.12538863718509674, "learning_rate": 2.9516358463726885e-05, "loss": 1.0073, "step": 416 }, { "epoch": 0.4752136752136752, "grad_norm": 0.13839219510555267, "learning_rate": 2.958748221906117e-05, "loss": 0.824, "step": 417 }, { "epoch": 0.47635327635327634, "grad_norm": 0.11742749810218811, "learning_rate": 2.9658605974395448e-05, "loss": 1.0421, "step": 418 }, { "epoch": 0.4774928774928775, "grad_norm": 0.11858450621366501, "learning_rate": 2.9729729729729733e-05, "loss": 0.918, "step": 419 }, { "epoch": 0.47863247863247865, "grad_norm": 0.13121211528778076, "learning_rate": 2.980085348506401e-05, "loss": 1.0158, "step": 420 }, { "epoch": 0.47977207977207975, "grad_norm": 0.11820319294929504, "learning_rate": 2.9871977240398296e-05, "loss": 1.0769, "step": 421 }, { "epoch": 0.4809116809116809, "grad_norm": 0.10469360649585724, "learning_rate": 2.9943100995732574e-05, "loss": 1.0986, "step": 422 }, { "epoch": 0.48205128205128206, "grad_norm": 0.12452942878007889, "learning_rate": 3.0014224751066856e-05, "loss": 1.0794, "step": 423 }, { "epoch": 0.4831908831908832, "grad_norm": 0.11977406591176987, "learning_rate": 3.008534850640114e-05, "loss": 1.0015, "step": 424 }, { "epoch": 0.4843304843304843, "grad_norm": 0.11374548822641373, "learning_rate": 3.015647226173542e-05, "loss": 1.0262, "step": 425 }, { "epoch": 0.48547008547008547, "grad_norm": 0.12518678605556488, "learning_rate": 3.0227596017069704e-05, "loss": 0.953, "step": 426 }, { "epoch": 0.4866096866096866, "grad_norm": 0.1479639708995819, "learning_rate": 3.029871977240398e-05, "loss": 0.7888, "step": 427 }, { "epoch": 0.4877492877492878, "grad_norm": 0.11877167969942093, "learning_rate": 3.0369843527738266e-05, "loss": 0.9604, "step": 428 }, { "epoch": 0.4888888888888889, "grad_norm": 0.12271634489297867, "learning_rate": 3.0440967283072548e-05, "loss": 0.9674, "step": 429 }, { "epoch": 0.49002849002849, "grad_norm": 0.10439113527536392, "learning_rate": 3.0512091038406833e-05, "loss": 1.0302, "step": 430 }, { "epoch": 0.4911680911680912, "grad_norm": 0.13118834793567657, "learning_rate": 3.058321479374111e-05, "loss": 0.9919, "step": 431 }, { "epoch": 0.49230769230769234, "grad_norm": 0.15855973958969116, "learning_rate": 3.065433854907539e-05, "loss": 0.9166, "step": 432 }, { "epoch": 0.49344729344729343, "grad_norm": 0.14117270708084106, "learning_rate": 3.0725462304409674e-05, "loss": 0.9156, "step": 433 }, { "epoch": 0.4945868945868946, "grad_norm": 0.14790265262126923, "learning_rate": 3.0796586059743955e-05, "loss": 0.942, "step": 434 }, { "epoch": 0.49572649572649574, "grad_norm": 0.11447043716907501, "learning_rate": 3.086770981507824e-05, "loss": 1.0346, "step": 435 }, { "epoch": 0.49686609686609684, "grad_norm": 0.12896165251731873, "learning_rate": 3.093883357041252e-05, "loss": 1.0571, "step": 436 }, { "epoch": 0.498005698005698, "grad_norm": 0.13744257390499115, "learning_rate": 3.10099573257468e-05, "loss": 1.0024, "step": 437 }, { "epoch": 0.49914529914529915, "grad_norm": 0.1497306078672409, "learning_rate": 3.108108108108108e-05, "loss": 0.8786, "step": 438 }, { "epoch": 0.5002849002849002, "grad_norm": 0.13461723923683167, "learning_rate": 3.115220483641537e-05, "loss": 0.9883, "step": 439 }, { "epoch": 0.5014245014245015, "grad_norm": 0.15304909646511078, "learning_rate": 3.1223328591749644e-05, "loss": 0.7547, "step": 440 }, { "epoch": 0.5025641025641026, "grad_norm": 0.13834773004055023, "learning_rate": 3.129445234708393e-05, "loss": 0.9412, "step": 441 }, { "epoch": 0.5037037037037037, "grad_norm": 0.12929099798202515, "learning_rate": 3.136557610241821e-05, "loss": 0.997, "step": 442 }, { "epoch": 0.5048433048433049, "grad_norm": 0.1265610009431839, "learning_rate": 3.143669985775249e-05, "loss": 1.0354, "step": 443 }, { "epoch": 0.505982905982906, "grad_norm": 0.13848094642162323, "learning_rate": 3.150782361308677e-05, "loss": 0.8859, "step": 444 }, { "epoch": 0.5071225071225072, "grad_norm": 0.13581381738185883, "learning_rate": 3.157894736842105e-05, "loss": 0.9829, "step": 445 }, { "epoch": 0.5082621082621083, "grad_norm": 0.12351993471384048, "learning_rate": 3.165007112375534e-05, "loss": 0.9417, "step": 446 }, { "epoch": 0.5094017094017094, "grad_norm": 0.16758589446544647, "learning_rate": 3.1721194879089615e-05, "loss": 0.965, "step": 447 }, { "epoch": 0.5105413105413106, "grad_norm": 0.14677603542804718, "learning_rate": 3.17923186344239e-05, "loss": 0.9037, "step": 448 }, { "epoch": 0.5116809116809117, "grad_norm": 0.14332057535648346, "learning_rate": 3.186344238975818e-05, "loss": 0.9081, "step": 449 }, { "epoch": 0.5128205128205128, "grad_norm": 0.14638587832450867, "learning_rate": 3.1934566145092466e-05, "loss": 1.0229, "step": 450 }, { "epoch": 0.513960113960114, "grad_norm": 0.1312592774629593, "learning_rate": 3.200568990042675e-05, "loss": 1.0872, "step": 451 }, { "epoch": 0.5150997150997151, "grad_norm": 0.1265982687473297, "learning_rate": 3.207681365576102e-05, "loss": 1.0864, "step": 452 }, { "epoch": 0.5162393162393163, "grad_norm": 0.10750409960746765, "learning_rate": 3.214793741109531e-05, "loss": 1.061, "step": 453 }, { "epoch": 0.5173789173789174, "grad_norm": 0.12825429439544678, "learning_rate": 3.2219061166429585e-05, "loss": 0.8854, "step": 454 }, { "epoch": 0.5185185185185185, "grad_norm": 0.13439372181892395, "learning_rate": 3.229018492176387e-05, "loss": 1.1182, "step": 455 }, { "epoch": 0.5196581196581197, "grad_norm": 0.1404609978199005, "learning_rate": 3.236130867709815e-05, "loss": 0.9855, "step": 456 }, { "epoch": 0.5207977207977208, "grad_norm": 0.1411384493112564, "learning_rate": 3.2432432432432436e-05, "loss": 0.9081, "step": 457 }, { "epoch": 0.5219373219373219, "grad_norm": 0.12411214411258698, "learning_rate": 3.250355618776672e-05, "loss": 0.9156, "step": 458 }, { "epoch": 0.5230769230769231, "grad_norm": 0.12314049154520035, "learning_rate": 3.2574679943101e-05, "loss": 1.1267, "step": 459 }, { "epoch": 0.5242165242165242, "grad_norm": 0.1416395902633667, "learning_rate": 3.264580369843528e-05, "loss": 0.9506, "step": 460 }, { "epoch": 0.5253561253561253, "grad_norm": 0.15624377131462097, "learning_rate": 3.2716927453769555e-05, "loss": 0.9561, "step": 461 }, { "epoch": 0.5264957264957265, "grad_norm": 0.11642137169837952, "learning_rate": 3.2788051209103844e-05, "loss": 1.0509, "step": 462 }, { "epoch": 0.5276353276353276, "grad_norm": 0.12744508683681488, "learning_rate": 3.2859174964438125e-05, "loss": 0.9713, "step": 463 }, { "epoch": 0.5287749287749288, "grad_norm": 0.1879698932170868, "learning_rate": 3.2930298719772407e-05, "loss": 0.7919, "step": 464 }, { "epoch": 0.5299145299145299, "grad_norm": 0.1330975741147995, "learning_rate": 3.300142247510669e-05, "loss": 0.9648, "step": 465 }, { "epoch": 0.531054131054131, "grad_norm": 0.14698782563209534, "learning_rate": 3.307254623044097e-05, "loss": 0.9859, "step": 466 }, { "epoch": 0.5321937321937322, "grad_norm": 0.11747878044843674, "learning_rate": 3.314366998577525e-05, "loss": 1.0034, "step": 467 }, { "epoch": 0.5333333333333333, "grad_norm": 0.12728990614414215, "learning_rate": 3.321479374110953e-05, "loss": 1.1303, "step": 468 }, { "epoch": 0.5344729344729344, "grad_norm": 0.10790850222110748, "learning_rate": 3.3285917496443814e-05, "loss": 1.0694, "step": 469 }, { "epoch": 0.5356125356125356, "grad_norm": 0.1333288848400116, "learning_rate": 3.3357041251778095e-05, "loss": 0.9974, "step": 470 }, { "epoch": 0.5367521367521367, "grad_norm": 0.10888312757015228, "learning_rate": 3.342816500711238e-05, "loss": 0.9974, "step": 471 }, { "epoch": 0.5378917378917379, "grad_norm": 0.12045443803071976, "learning_rate": 3.349928876244666e-05, "loss": 0.9731, "step": 472 }, { "epoch": 0.539031339031339, "grad_norm": 0.13777302205562592, "learning_rate": 3.357041251778094e-05, "loss": 0.9414, "step": 473 }, { "epoch": 0.5401709401709401, "grad_norm": 0.13722509145736694, "learning_rate": 3.364153627311522e-05, "loss": 1.0066, "step": 474 }, { "epoch": 0.5413105413105413, "grad_norm": 0.14611203968524933, "learning_rate": 3.37126600284495e-05, "loss": 0.9086, "step": 475 }, { "epoch": 0.5424501424501424, "grad_norm": 0.11133522540330887, "learning_rate": 3.3783783783783784e-05, "loss": 0.9846, "step": 476 }, { "epoch": 0.5435897435897435, "grad_norm": 0.14591069519519806, "learning_rate": 3.385490753911807e-05, "loss": 0.9833, "step": 477 }, { "epoch": 0.5447293447293448, "grad_norm": 0.1600605696439743, "learning_rate": 3.392603129445235e-05, "loss": 0.8907, "step": 478 }, { "epoch": 0.5458689458689459, "grad_norm": 0.1226716935634613, "learning_rate": 3.3997155049786636e-05, "loss": 0.9126, "step": 479 }, { "epoch": 0.5470085470085471, "grad_norm": 0.1120891347527504, "learning_rate": 3.406827880512091e-05, "loss": 0.9726, "step": 480 }, { "epoch": 0.5481481481481482, "grad_norm": 0.12946617603302002, "learning_rate": 3.413940256045519e-05, "loss": 0.9613, "step": 481 }, { "epoch": 0.5492877492877493, "grad_norm": 0.16106046736240387, "learning_rate": 3.421052631578947e-05, "loss": 0.9839, "step": 482 }, { "epoch": 0.5504273504273505, "grad_norm": 0.12446320801973343, "learning_rate": 3.4281650071123755e-05, "loss": 1.0735, "step": 483 }, { "epoch": 0.5515669515669516, "grad_norm": 0.14501799643039703, "learning_rate": 3.435277382645804e-05, "loss": 0.8862, "step": 484 }, { "epoch": 0.5527065527065527, "grad_norm": 0.11471837013959885, "learning_rate": 3.442389758179232e-05, "loss": 0.9862, "step": 485 }, { "epoch": 0.5538461538461539, "grad_norm": 0.1463870108127594, "learning_rate": 3.4495021337126606e-05, "loss": 0.9029, "step": 486 }, { "epoch": 0.554985754985755, "grad_norm": 0.16554170846939087, "learning_rate": 3.456614509246088e-05, "loss": 0.9825, "step": 487 }, { "epoch": 0.5561253561253561, "grad_norm": 0.11619739234447479, "learning_rate": 3.463726884779517e-05, "loss": 0.9034, "step": 488 }, { "epoch": 0.5572649572649573, "grad_norm": 0.11377275735139847, "learning_rate": 3.470839260312945e-05, "loss": 0.8842, "step": 489 }, { "epoch": 0.5584045584045584, "grad_norm": 0.12807880342006683, "learning_rate": 3.4779516358463725e-05, "loss": 0.8939, "step": 490 }, { "epoch": 0.5595441595441596, "grad_norm": 0.1383783370256424, "learning_rate": 3.485064011379801e-05, "loss": 0.8719, "step": 491 }, { "epoch": 0.5606837606837607, "grad_norm": 0.12619352340698242, "learning_rate": 3.492176386913229e-05, "loss": 1.0381, "step": 492 }, { "epoch": 0.5618233618233618, "grad_norm": 0.11499320715665817, "learning_rate": 3.4992887624466576e-05, "loss": 1.1203, "step": 493 }, { "epoch": 0.562962962962963, "grad_norm": 0.138152614235878, "learning_rate": 3.506401137980085e-05, "loss": 0.8666, "step": 494 }, { "epoch": 0.5641025641025641, "grad_norm": 0.10287363827228546, "learning_rate": 3.513513513513514e-05, "loss": 1.0991, "step": 495 }, { "epoch": 0.5652421652421652, "grad_norm": 0.12071411311626434, "learning_rate": 3.520625889046942e-05, "loss": 1.058, "step": 496 }, { "epoch": 0.5663817663817664, "grad_norm": 0.12843723595142365, "learning_rate": 3.52773826458037e-05, "loss": 0.9286, "step": 497 }, { "epoch": 0.5675213675213675, "grad_norm": 0.14968761801719666, "learning_rate": 3.5348506401137984e-05, "loss": 0.8306, "step": 498 }, { "epoch": 0.5686609686609687, "grad_norm": 0.1458226442337036, "learning_rate": 3.541963015647226e-05, "loss": 0.9631, "step": 499 }, { "epoch": 0.5698005698005698, "grad_norm": 0.1441773623228073, "learning_rate": 3.5490753911806547e-05, "loss": 0.9238, "step": 500 }, { "epoch": 0.5709401709401709, "grad_norm": 0.1313546597957611, "learning_rate": 3.556187766714083e-05, "loss": 1.0173, "step": 501 }, { "epoch": 0.5720797720797721, "grad_norm": 0.1251901239156723, "learning_rate": 3.563300142247511e-05, "loss": 0.9318, "step": 502 }, { "epoch": 0.5732193732193732, "grad_norm": 0.12387225031852722, "learning_rate": 3.570412517780939e-05, "loss": 0.9456, "step": 503 }, { "epoch": 0.5743589743589743, "grad_norm": 0.1564510017633438, "learning_rate": 3.577524893314367e-05, "loss": 1.017, "step": 504 }, { "epoch": 0.5754985754985755, "grad_norm": 0.14491894841194153, "learning_rate": 3.5846372688477954e-05, "loss": 0.931, "step": 505 }, { "epoch": 0.5766381766381766, "grad_norm": 0.1279730498790741, "learning_rate": 3.5917496443812235e-05, "loss": 0.9221, "step": 506 }, { "epoch": 0.5777777777777777, "grad_norm": 0.12600359320640564, "learning_rate": 3.598862019914652e-05, "loss": 0.9435, "step": 507 }, { "epoch": 0.5789173789173789, "grad_norm": 0.1218702420592308, "learning_rate": 3.60597439544808e-05, "loss": 0.9641, "step": 508 }, { "epoch": 0.58005698005698, "grad_norm": 0.1181662306189537, "learning_rate": 3.613086770981508e-05, "loss": 1.158, "step": 509 }, { "epoch": 0.5811965811965812, "grad_norm": 0.16515326499938965, "learning_rate": 3.620199146514936e-05, "loss": 0.8452, "step": 510 }, { "epoch": 0.5823361823361823, "grad_norm": 0.15059532225131989, "learning_rate": 3.627311522048364e-05, "loss": 0.9731, "step": 511 }, { "epoch": 0.5834757834757834, "grad_norm": 0.132746160030365, "learning_rate": 3.6344238975817924e-05, "loss": 0.9312, "step": 512 }, { "epoch": 0.5846153846153846, "grad_norm": 0.15459783375263214, "learning_rate": 3.6415362731152206e-05, "loss": 0.9408, "step": 513 }, { "epoch": 0.5857549857549857, "grad_norm": 0.19226768612861633, "learning_rate": 3.648648648648649e-05, "loss": 0.776, "step": 514 }, { "epoch": 0.5868945868945868, "grad_norm": 0.1287761926651001, "learning_rate": 3.6557610241820776e-05, "loss": 0.7126, "step": 515 }, { "epoch": 0.588034188034188, "grad_norm": 0.13876792788505554, "learning_rate": 3.662873399715505e-05, "loss": 1.0805, "step": 516 }, { "epoch": 0.5891737891737892, "grad_norm": 0.1499495655298233, "learning_rate": 3.669985775248933e-05, "loss": 1.0702, "step": 517 }, { "epoch": 0.5903133903133904, "grad_norm": 0.1824144423007965, "learning_rate": 3.677098150782361e-05, "loss": 0.8489, "step": 518 }, { "epoch": 0.5914529914529915, "grad_norm": 0.13175229728221893, "learning_rate": 3.6842105263157895e-05, "loss": 1.1439, "step": 519 }, { "epoch": 0.5925925925925926, "grad_norm": 0.12808462977409363, "learning_rate": 3.6913229018492176e-05, "loss": 1.1791, "step": 520 }, { "epoch": 0.5937321937321938, "grad_norm": 0.14545990526676178, "learning_rate": 3.698435277382646e-05, "loss": 0.9442, "step": 521 }, { "epoch": 0.5948717948717949, "grad_norm": 0.11629868298768997, "learning_rate": 3.7055476529160746e-05, "loss": 1.0356, "step": 522 }, { "epoch": 0.596011396011396, "grad_norm": 0.1502312570810318, "learning_rate": 3.712660028449502e-05, "loss": 0.8577, "step": 523 }, { "epoch": 0.5971509971509972, "grad_norm": 0.11484634131193161, "learning_rate": 3.719772403982931e-05, "loss": 0.9702, "step": 524 }, { "epoch": 0.5982905982905983, "grad_norm": 0.13677164912223816, "learning_rate": 3.7268847795163584e-05, "loss": 0.9952, "step": 525 }, { "epoch": 0.5994301994301995, "grad_norm": 0.14317670464515686, "learning_rate": 3.7339971550497865e-05, "loss": 0.8868, "step": 526 }, { "epoch": 0.6005698005698006, "grad_norm": 0.1273491531610489, "learning_rate": 3.741109530583215e-05, "loss": 0.9421, "step": 527 }, { "epoch": 0.6017094017094017, "grad_norm": 0.1210285946726799, "learning_rate": 3.748221906116643e-05, "loss": 1.1468, "step": 528 }, { "epoch": 0.6028490028490029, "grad_norm": 0.13037505745887756, "learning_rate": 3.7553342816500716e-05, "loss": 1.0806, "step": 529 }, { "epoch": 0.603988603988604, "grad_norm": 0.1362902969121933, "learning_rate": 3.762446657183499e-05, "loss": 0.858, "step": 530 }, { "epoch": 0.6051282051282051, "grad_norm": 0.13304390013217926, "learning_rate": 3.769559032716928e-05, "loss": 0.9598, "step": 531 }, { "epoch": 0.6062678062678063, "grad_norm": 0.12246489524841309, "learning_rate": 3.7766714082503554e-05, "loss": 1.0896, "step": 532 }, { "epoch": 0.6074074074074074, "grad_norm": 0.145652174949646, "learning_rate": 3.783783783783784e-05, "loss": 0.943, "step": 533 }, { "epoch": 0.6085470085470085, "grad_norm": 0.1362730711698532, "learning_rate": 3.7908961593172124e-05, "loss": 0.8954, "step": 534 }, { "epoch": 0.6096866096866097, "grad_norm": 0.14306199550628662, "learning_rate": 3.7980085348506405e-05, "loss": 1.0226, "step": 535 }, { "epoch": 0.6108262108262108, "grad_norm": 0.21024058759212494, "learning_rate": 3.805120910384069e-05, "loss": 1.0233, "step": 536 }, { "epoch": 0.611965811965812, "grad_norm": 0.12315906584262848, "learning_rate": 3.812233285917496e-05, "loss": 1.1056, "step": 537 }, { "epoch": 0.6131054131054131, "grad_norm": 0.14994341135025024, "learning_rate": 3.819345661450925e-05, "loss": 0.9163, "step": 538 }, { "epoch": 0.6142450142450142, "grad_norm": 0.14237017929553986, "learning_rate": 3.826458036984353e-05, "loss": 0.9609, "step": 539 }, { "epoch": 0.6153846153846154, "grad_norm": 0.14631733298301697, "learning_rate": 3.833570412517781e-05, "loss": 0.9478, "step": 540 }, { "epoch": 0.6165242165242165, "grad_norm": 0.1300070732831955, "learning_rate": 3.8406827880512094e-05, "loss": 1.0093, "step": 541 }, { "epoch": 0.6176638176638176, "grad_norm": 0.14567865431308746, "learning_rate": 3.8477951635846376e-05, "loss": 1.0256, "step": 542 }, { "epoch": 0.6188034188034188, "grad_norm": 0.12915971875190735, "learning_rate": 3.854907539118066e-05, "loss": 1.0564, "step": 543 }, { "epoch": 0.6199430199430199, "grad_norm": 0.15734048187732697, "learning_rate": 3.862019914651494e-05, "loss": 1.0071, "step": 544 }, { "epoch": 0.6210826210826211, "grad_norm": 0.1503109484910965, "learning_rate": 3.869132290184922e-05, "loss": 0.978, "step": 545 }, { "epoch": 0.6222222222222222, "grad_norm": 0.11910410970449448, "learning_rate": 3.87624466571835e-05, "loss": 0.9611, "step": 546 }, { "epoch": 0.6233618233618233, "grad_norm": 0.12171913683414459, "learning_rate": 3.883357041251778e-05, "loss": 1.1114, "step": 547 }, { "epoch": 0.6245014245014245, "grad_norm": 0.1300986111164093, "learning_rate": 3.8904694167852064e-05, "loss": 1.1386, "step": 548 }, { "epoch": 0.6256410256410256, "grad_norm": 0.11647982150316238, "learning_rate": 3.8975817923186346e-05, "loss": 1.0324, "step": 549 }, { "epoch": 0.6267806267806267, "grad_norm": 0.15904708206653595, "learning_rate": 3.904694167852063e-05, "loss": 0.9948, "step": 550 }, { "epoch": 0.627920227920228, "grad_norm": 0.14891204237937927, "learning_rate": 3.911806543385491e-05, "loss": 1.0068, "step": 551 }, { "epoch": 0.629059829059829, "grad_norm": 0.1580154150724411, "learning_rate": 3.918918918918919e-05, "loss": 0.8471, "step": 552 }, { "epoch": 0.6301994301994301, "grad_norm": 0.20000672340393066, "learning_rate": 3.926031294452347e-05, "loss": 0.8864, "step": 553 }, { "epoch": 0.6313390313390314, "grad_norm": 0.12312767654657364, "learning_rate": 3.933143669985775e-05, "loss": 0.9992, "step": 554 }, { "epoch": 0.6324786324786325, "grad_norm": 0.18159586191177368, "learning_rate": 3.9402560455192035e-05, "loss": 0.9419, "step": 555 }, { "epoch": 0.6336182336182337, "grad_norm": 0.12226369231939316, "learning_rate": 3.9473684210526316e-05, "loss": 0.9954, "step": 556 }, { "epoch": 0.6347578347578348, "grad_norm": 0.15473830699920654, "learning_rate": 3.95448079658606e-05, "loss": 0.9314, "step": 557 }, { "epoch": 0.6358974358974359, "grad_norm": 0.12818306684494019, "learning_rate": 3.961593172119488e-05, "loss": 0.964, "step": 558 }, { "epoch": 0.6370370370370371, "grad_norm": 0.14302092790603638, "learning_rate": 3.968705547652916e-05, "loss": 0.9609, "step": 559 }, { "epoch": 0.6381766381766382, "grad_norm": 0.12458717823028564, "learning_rate": 3.975817923186345e-05, "loss": 1.0974, "step": 560 }, { "epoch": 0.6393162393162393, "grad_norm": 0.14012236893177032, "learning_rate": 3.9829302987197724e-05, "loss": 0.9884, "step": 561 }, { "epoch": 0.6404558404558405, "grad_norm": 0.12315171211957932, "learning_rate": 3.990042674253201e-05, "loss": 0.8699, "step": 562 }, { "epoch": 0.6415954415954416, "grad_norm": 0.11660997569561005, "learning_rate": 3.997155049786629e-05, "loss": 0.9409, "step": 563 }, { "epoch": 0.6427350427350428, "grad_norm": 0.13937942683696747, "learning_rate": 4.004267425320057e-05, "loss": 0.9293, "step": 564 }, { "epoch": 0.6438746438746439, "grad_norm": 0.14188982546329498, "learning_rate": 4.0113798008534856e-05, "loss": 0.9361, "step": 565 }, { "epoch": 0.645014245014245, "grad_norm": 0.12332411855459213, "learning_rate": 4.018492176386913e-05, "loss": 1.0691, "step": 566 }, { "epoch": 0.6461538461538462, "grad_norm": 0.1458745300769806, "learning_rate": 4.025604551920342e-05, "loss": 0.9676, "step": 567 }, { "epoch": 0.6472934472934473, "grad_norm": 0.14251947402954102, "learning_rate": 4.0327169274537694e-05, "loss": 0.7995, "step": 568 }, { "epoch": 0.6484330484330484, "grad_norm": 0.12855298817157745, "learning_rate": 4.039829302987198e-05, "loss": 0.9423, "step": 569 }, { "epoch": 0.6495726495726496, "grad_norm": 0.14354261755943298, "learning_rate": 4.046941678520626e-05, "loss": 0.8593, "step": 570 }, { "epoch": 0.6507122507122507, "grad_norm": 0.14442981779575348, "learning_rate": 4.0540540540540545e-05, "loss": 0.9695, "step": 571 }, { "epoch": 0.6518518518518519, "grad_norm": 0.13497042655944824, "learning_rate": 4.061166429587483e-05, "loss": 1.0034, "step": 572 }, { "epoch": 0.652991452991453, "grad_norm": 0.1462252140045166, "learning_rate": 4.06827880512091e-05, "loss": 0.9364, "step": 573 }, { "epoch": 0.6541310541310541, "grad_norm": 0.1286773383617401, "learning_rate": 4.075391180654339e-05, "loss": 1.0852, "step": 574 }, { "epoch": 0.6552706552706553, "grad_norm": 0.13807645440101624, "learning_rate": 4.0825035561877664e-05, "loss": 0.942, "step": 575 }, { "epoch": 0.6564102564102564, "grad_norm": 0.13492679595947266, "learning_rate": 4.089615931721195e-05, "loss": 0.896, "step": 576 }, { "epoch": 0.6575498575498575, "grad_norm": 0.1306082159280777, "learning_rate": 4.0967283072546234e-05, "loss": 0.9974, "step": 577 }, { "epoch": 0.6586894586894587, "grad_norm": 0.11695270240306854, "learning_rate": 4.1038406827880516e-05, "loss": 0.9322, "step": 578 }, { "epoch": 0.6598290598290598, "grad_norm": 0.1401551365852356, "learning_rate": 4.11095305832148e-05, "loss": 0.8684, "step": 579 }, { "epoch": 0.6609686609686609, "grad_norm": 0.12197654694318771, "learning_rate": 4.118065433854908e-05, "loss": 0.9943, "step": 580 }, { "epoch": 0.6621082621082621, "grad_norm": 0.1340959370136261, "learning_rate": 4.125177809388336e-05, "loss": 1.0246, "step": 581 }, { "epoch": 0.6632478632478632, "grad_norm": 0.13753369450569153, "learning_rate": 4.132290184921764e-05, "loss": 1.0169, "step": 582 }, { "epoch": 0.6643874643874644, "grad_norm": 0.12810128927230835, "learning_rate": 4.139402560455192e-05, "loss": 1.0053, "step": 583 }, { "epoch": 0.6655270655270655, "grad_norm": 0.13264891505241394, "learning_rate": 4.1465149359886204e-05, "loss": 1.0632, "step": 584 }, { "epoch": 0.6666666666666666, "grad_norm": 0.12274113297462463, "learning_rate": 4.1536273115220486e-05, "loss": 1.0965, "step": 585 }, { "epoch": 0.6678062678062678, "grad_norm": 0.1305791437625885, "learning_rate": 4.160739687055477e-05, "loss": 1.0392, "step": 586 }, { "epoch": 0.6689458689458689, "grad_norm": 0.14232182502746582, "learning_rate": 4.167852062588905e-05, "loss": 0.9417, "step": 587 }, { "epoch": 0.67008547008547, "grad_norm": 0.11436719447374344, "learning_rate": 4.174964438122333e-05, "loss": 1.0434, "step": 588 }, { "epoch": 0.6712250712250712, "grad_norm": 0.14634445309638977, "learning_rate": 4.182076813655761e-05, "loss": 0.8567, "step": 589 }, { "epoch": 0.6723646723646723, "grad_norm": 0.1758972406387329, "learning_rate": 4.189189189189189e-05, "loss": 0.9273, "step": 590 }, { "epoch": 0.6735042735042736, "grad_norm": 0.12599514424800873, "learning_rate": 4.1963015647226175e-05, "loss": 1.0205, "step": 591 }, { "epoch": 0.6746438746438747, "grad_norm": 0.1331692785024643, "learning_rate": 4.2034139402560456e-05, "loss": 0.9961, "step": 592 }, { "epoch": 0.6757834757834758, "grad_norm": 0.1400482952594757, "learning_rate": 4.210526315789474e-05, "loss": 1.0854, "step": 593 }, { "epoch": 0.676923076923077, "grad_norm": 0.1251649260520935, "learning_rate": 4.217638691322902e-05, "loss": 1.0257, "step": 594 }, { "epoch": 0.6780626780626781, "grad_norm": 0.12451403588056564, "learning_rate": 4.22475106685633e-05, "loss": 1.0146, "step": 595 }, { "epoch": 0.6792022792022792, "grad_norm": 0.14323706924915314, "learning_rate": 4.231863442389758e-05, "loss": 0.964, "step": 596 }, { "epoch": 0.6803418803418804, "grad_norm": 0.17317193746566772, "learning_rate": 4.2389758179231864e-05, "loss": 0.8161, "step": 597 }, { "epoch": 0.6814814814814815, "grad_norm": 0.13207165896892548, "learning_rate": 4.246088193456615e-05, "loss": 0.994, "step": 598 }, { "epoch": 0.6826210826210827, "grad_norm": 0.13879315555095673, "learning_rate": 4.253200568990043e-05, "loss": 0.9619, "step": 599 }, { "epoch": 0.6837606837606838, "grad_norm": 0.13193094730377197, "learning_rate": 4.2603129445234715e-05, "loss": 1.0706, "step": 600 }, { "epoch": 0.6849002849002849, "grad_norm": 0.1504574865102768, "learning_rate": 4.267425320056899e-05, "loss": 1.0568, "step": 601 }, { "epoch": 0.6860398860398861, "grad_norm": 0.14906713366508484, "learning_rate": 4.274537695590327e-05, "loss": 1.0318, "step": 602 }, { "epoch": 0.6871794871794872, "grad_norm": 0.14570730924606323, "learning_rate": 4.281650071123756e-05, "loss": 0.8339, "step": 603 }, { "epoch": 0.6883190883190883, "grad_norm": 0.1498977392911911, "learning_rate": 4.2887624466571834e-05, "loss": 0.9577, "step": 604 }, { "epoch": 0.6894586894586895, "grad_norm": 0.18780989944934845, "learning_rate": 4.295874822190612e-05, "loss": 0.86, "step": 605 }, { "epoch": 0.6905982905982906, "grad_norm": 0.15209412574768066, "learning_rate": 4.30298719772404e-05, "loss": 0.9174, "step": 606 }, { "epoch": 0.6917378917378917, "grad_norm": 0.14696481823921204, "learning_rate": 4.3100995732574685e-05, "loss": 0.8255, "step": 607 }, { "epoch": 0.6928774928774929, "grad_norm": 0.11013055592775345, "learning_rate": 4.317211948790896e-05, "loss": 1.0111, "step": 608 }, { "epoch": 0.694017094017094, "grad_norm": 0.15130603313446045, "learning_rate": 4.324324324324325e-05, "loss": 0.8892, "step": 609 }, { "epoch": 0.6951566951566952, "grad_norm": 0.17274664342403412, "learning_rate": 4.331436699857753e-05, "loss": 0.9937, "step": 610 }, { "epoch": 0.6962962962962963, "grad_norm": 0.1555788516998291, "learning_rate": 4.3385490753911804e-05, "loss": 0.9402, "step": 611 }, { "epoch": 0.6974358974358974, "grad_norm": 0.1470005214214325, "learning_rate": 4.345661450924609e-05, "loss": 1.0018, "step": 612 }, { "epoch": 0.6985754985754986, "grad_norm": 0.13923917710781097, "learning_rate": 4.352773826458037e-05, "loss": 0.9596, "step": 613 }, { "epoch": 0.6997150997150997, "grad_norm": 0.16846036911010742, "learning_rate": 4.3598862019914656e-05, "loss": 0.9879, "step": 614 }, { "epoch": 0.7008547008547008, "grad_norm": 0.12688805162906647, "learning_rate": 4.366998577524894e-05, "loss": 0.9347, "step": 615 }, { "epoch": 0.701994301994302, "grad_norm": 0.12665636837482452, "learning_rate": 4.374110953058322e-05, "loss": 1.0011, "step": 616 }, { "epoch": 0.7031339031339031, "grad_norm": 0.14885172247886658, "learning_rate": 4.38122332859175e-05, "loss": 0.9189, "step": 617 }, { "epoch": 0.7042735042735043, "grad_norm": 0.13615143299102783, "learning_rate": 4.388335704125178e-05, "loss": 1.0923, "step": 618 }, { "epoch": 0.7054131054131054, "grad_norm": 0.1464335322380066, "learning_rate": 4.395448079658606e-05, "loss": 1.0206, "step": 619 }, { "epoch": 0.7065527065527065, "grad_norm": 0.11906492710113525, "learning_rate": 4.402560455192034e-05, "loss": 1.0424, "step": 620 }, { "epoch": 0.7076923076923077, "grad_norm": 0.16673964262008667, "learning_rate": 4.4096728307254626e-05, "loss": 0.9126, "step": 621 }, { "epoch": 0.7088319088319088, "grad_norm": 0.14345242083072662, "learning_rate": 4.416785206258891e-05, "loss": 0.964, "step": 622 }, { "epoch": 0.7099715099715099, "grad_norm": 0.14964129030704498, "learning_rate": 4.423897581792319e-05, "loss": 0.9402, "step": 623 }, { "epoch": 0.7111111111111111, "grad_norm": 0.10696502774953842, "learning_rate": 4.431009957325747e-05, "loss": 0.9724, "step": 624 }, { "epoch": 0.7122507122507122, "grad_norm": 0.13990813493728638, "learning_rate": 4.438122332859175e-05, "loss": 1.0039, "step": 625 }, { "epoch": 0.7133903133903133, "grad_norm": 0.1518116444349289, "learning_rate": 4.4452347083926033e-05, "loss": 0.8281, "step": 626 }, { "epoch": 0.7145299145299145, "grad_norm": 0.1417427510023117, "learning_rate": 4.4523470839260315e-05, "loss": 1.0623, "step": 627 }, { "epoch": 0.7156695156695156, "grad_norm": 0.13966801762580872, "learning_rate": 4.4594594594594596e-05, "loss": 0.9622, "step": 628 }, { "epoch": 0.7168091168091169, "grad_norm": 0.14032839238643646, "learning_rate": 4.466571834992888e-05, "loss": 1.0334, "step": 629 }, { "epoch": 0.717948717948718, "grad_norm": 0.16751551628112793, "learning_rate": 4.473684210526316e-05, "loss": 0.8946, "step": 630 }, { "epoch": 0.719088319088319, "grad_norm": 0.13247185945510864, "learning_rate": 4.480796586059744e-05, "loss": 0.9699, "step": 631 }, { "epoch": 0.7202279202279203, "grad_norm": 0.10882976651191711, "learning_rate": 4.487908961593172e-05, "loss": 1.0404, "step": 632 }, { "epoch": 0.7213675213675214, "grad_norm": 0.13417808711528778, "learning_rate": 4.4950213371266004e-05, "loss": 0.8785, "step": 633 }, { "epoch": 0.7225071225071225, "grad_norm": 0.11986510455608368, "learning_rate": 4.5021337126600285e-05, "loss": 0.9987, "step": 634 }, { "epoch": 0.7236467236467237, "grad_norm": 0.13292481005191803, "learning_rate": 4.509246088193457e-05, "loss": 1.0561, "step": 635 }, { "epoch": 0.7247863247863248, "grad_norm": 0.14520932734012604, "learning_rate": 4.5163584637268855e-05, "loss": 1.0046, "step": 636 }, { "epoch": 0.725925925925926, "grad_norm": 0.13444307446479797, "learning_rate": 4.523470839260313e-05, "loss": 1.0692, "step": 637 }, { "epoch": 0.7270655270655271, "grad_norm": 0.1184447854757309, "learning_rate": 4.530583214793742e-05, "loss": 0.9649, "step": 638 }, { "epoch": 0.7282051282051282, "grad_norm": 0.13229194283485413, "learning_rate": 4.537695590327169e-05, "loss": 1.0202, "step": 639 }, { "epoch": 0.7293447293447294, "grad_norm": 0.13605216145515442, "learning_rate": 4.5448079658605974e-05, "loss": 0.9658, "step": 640 }, { "epoch": 0.7304843304843305, "grad_norm": 0.12405628710985184, "learning_rate": 4.551920341394026e-05, "loss": 1.0462, "step": 641 }, { "epoch": 0.7316239316239316, "grad_norm": 0.13379064202308655, "learning_rate": 4.559032716927454e-05, "loss": 0.9271, "step": 642 }, { "epoch": 0.7327635327635328, "grad_norm": 0.11972762644290924, "learning_rate": 4.5661450924608825e-05, "loss": 1.016, "step": 643 }, { "epoch": 0.7339031339031339, "grad_norm": 0.14391586184501648, "learning_rate": 4.57325746799431e-05, "loss": 0.9722, "step": 644 }, { "epoch": 0.7350427350427351, "grad_norm": 0.15161892771720886, "learning_rate": 4.580369843527739e-05, "loss": 0.9162, "step": 645 }, { "epoch": 0.7361823361823362, "grad_norm": 0.12622232735157013, "learning_rate": 4.587482219061166e-05, "loss": 1.0141, "step": 646 }, { "epoch": 0.7373219373219373, "grad_norm": 0.12338536232709885, "learning_rate": 4.594594594594595e-05, "loss": 1.0526, "step": 647 }, { "epoch": 0.7384615384615385, "grad_norm": 0.1498706340789795, "learning_rate": 4.601706970128023e-05, "loss": 0.9171, "step": 648 }, { "epoch": 0.7396011396011396, "grad_norm": 0.1674114316701889, "learning_rate": 4.608819345661451e-05, "loss": 0.8037, "step": 649 }, { "epoch": 0.7407407407407407, "grad_norm": 0.16352355480194092, "learning_rate": 4.6159317211948796e-05, "loss": 0.7724, "step": 650 }, { "epoch": 0.7418803418803419, "grad_norm": 0.1498660147190094, "learning_rate": 4.623044096728307e-05, "loss": 0.947, "step": 651 }, { "epoch": 0.743019943019943, "grad_norm": 0.11934972554445267, "learning_rate": 4.630156472261736e-05, "loss": 1.0658, "step": 652 }, { "epoch": 0.7441595441595441, "grad_norm": 0.12216529995203018, "learning_rate": 4.637268847795164e-05, "loss": 1.0102, "step": 653 }, { "epoch": 0.7452991452991453, "grad_norm": 0.14126114547252655, "learning_rate": 4.644381223328592e-05, "loss": 1.0649, "step": 654 }, { "epoch": 0.7464387464387464, "grad_norm": 0.1974281221628189, "learning_rate": 4.65149359886202e-05, "loss": 0.8068, "step": 655 }, { "epoch": 0.7475783475783476, "grad_norm": 0.18848006427288055, "learning_rate": 4.6586059743954485e-05, "loss": 0.8306, "step": 656 }, { "epoch": 0.7487179487179487, "grad_norm": 0.14742237329483032, "learning_rate": 4.6657183499288766e-05, "loss": 0.8745, "step": 657 }, { "epoch": 0.7498575498575498, "grad_norm": 0.12730880081653595, "learning_rate": 4.672830725462304e-05, "loss": 1.0347, "step": 658 }, { "epoch": 0.750997150997151, "grad_norm": 0.16038407385349274, "learning_rate": 4.679943100995733e-05, "loss": 1.0451, "step": 659 }, { "epoch": 0.7521367521367521, "grad_norm": 0.13468502461910248, "learning_rate": 4.687055476529161e-05, "loss": 0.9703, "step": 660 }, { "epoch": 0.7532763532763532, "grad_norm": 0.12653854489326477, "learning_rate": 4.694167852062589e-05, "loss": 1.0373, "step": 661 }, { "epoch": 0.7544159544159544, "grad_norm": 0.1421450674533844, "learning_rate": 4.7012802275960173e-05, "loss": 0.8956, "step": 662 }, { "epoch": 0.7555555555555555, "grad_norm": 0.13512149453163147, "learning_rate": 4.7083926031294455e-05, "loss": 0.9388, "step": 663 }, { "epoch": 0.7566951566951567, "grad_norm": 0.15117526054382324, "learning_rate": 4.7155049786628736e-05, "loss": 0.9357, "step": 664 }, { "epoch": 0.7578347578347578, "grad_norm": 0.15843990445137024, "learning_rate": 4.722617354196302e-05, "loss": 0.8295, "step": 665 }, { "epoch": 0.7589743589743589, "grad_norm": 0.1446264386177063, "learning_rate": 4.72972972972973e-05, "loss": 1.0052, "step": 666 }, { "epoch": 0.7601139601139602, "grad_norm": 0.1448657065629959, "learning_rate": 4.736842105263158e-05, "loss": 0.8908, "step": 667 }, { "epoch": 0.7612535612535613, "grad_norm": 0.13176831603050232, "learning_rate": 4.743954480796586e-05, "loss": 1.0582, "step": 668 }, { "epoch": 0.7623931623931623, "grad_norm": 0.137461856007576, "learning_rate": 4.7510668563300144e-05, "loss": 0.9941, "step": 669 }, { "epoch": 0.7635327635327636, "grad_norm": 0.14670772850513458, "learning_rate": 4.7581792318634425e-05, "loss": 0.9313, "step": 670 }, { "epoch": 0.7646723646723647, "grad_norm": 0.15902718901634216, "learning_rate": 4.765291607396871e-05, "loss": 0.8262, "step": 671 }, { "epoch": 0.7658119658119659, "grad_norm": 0.12516726553440094, "learning_rate": 4.772403982930299e-05, "loss": 0.9315, "step": 672 }, { "epoch": 0.766951566951567, "grad_norm": 0.12681032717227936, "learning_rate": 4.779516358463727e-05, "loss": 0.9177, "step": 673 }, { "epoch": 0.7680911680911681, "grad_norm": 0.119715616106987, "learning_rate": 4.786628733997156e-05, "loss": 1.1384, "step": 674 }, { "epoch": 0.7692307692307693, "grad_norm": 0.1507876068353653, "learning_rate": 4.793741109530583e-05, "loss": 0.857, "step": 675 }, { "epoch": 0.7703703703703704, "grad_norm": 0.12471084296703339, "learning_rate": 4.8008534850640114e-05, "loss": 1.0191, "step": 676 }, { "epoch": 0.7715099715099715, "grad_norm": 0.15394318103790283, "learning_rate": 4.8079658605974396e-05, "loss": 0.8937, "step": 677 }, { "epoch": 0.7726495726495727, "grad_norm": 0.14798788726329803, "learning_rate": 4.815078236130868e-05, "loss": 1.0149, "step": 678 }, { "epoch": 0.7737891737891738, "grad_norm": 0.12096770107746124, "learning_rate": 4.822190611664296e-05, "loss": 0.9544, "step": 679 }, { "epoch": 0.7749287749287749, "grad_norm": 0.1332738697528839, "learning_rate": 4.829302987197724e-05, "loss": 0.8915, "step": 680 }, { "epoch": 0.7760683760683761, "grad_norm": 0.13680146634578705, "learning_rate": 4.836415362731153e-05, "loss": 1.0085, "step": 681 }, { "epoch": 0.7772079772079772, "grad_norm": 0.1211962029337883, "learning_rate": 4.84352773826458e-05, "loss": 0.9987, "step": 682 }, { "epoch": 0.7783475783475784, "grad_norm": 0.17926925420761108, "learning_rate": 4.850640113798009e-05, "loss": 0.9473, "step": 683 }, { "epoch": 0.7794871794871795, "grad_norm": 0.1416856199502945, "learning_rate": 4.8577524893314366e-05, "loss": 0.906, "step": 684 }, { "epoch": 0.7806267806267806, "grad_norm": 0.15555289387702942, "learning_rate": 4.8648648648648654e-05, "loss": 1.0034, "step": 685 }, { "epoch": 0.7817663817663818, "grad_norm": 0.16560415923595428, "learning_rate": 4.8719772403982936e-05, "loss": 0.889, "step": 686 }, { "epoch": 0.7829059829059829, "grad_norm": 0.13473562896251678, "learning_rate": 4.879089615931721e-05, "loss": 1.0351, "step": 687 }, { "epoch": 0.784045584045584, "grad_norm": 0.19042819738388062, "learning_rate": 4.88620199146515e-05, "loss": 0.8398, "step": 688 }, { "epoch": 0.7851851851851852, "grad_norm": 0.16302379965782166, "learning_rate": 4.8933143669985773e-05, "loss": 0.8693, "step": 689 }, { "epoch": 0.7863247863247863, "grad_norm": 0.13653165102005005, "learning_rate": 4.900426742532006e-05, "loss": 0.9177, "step": 690 }, { "epoch": 0.7874643874643875, "grad_norm": 0.1384401172399521, "learning_rate": 4.907539118065434e-05, "loss": 0.8881, "step": 691 }, { "epoch": 0.7886039886039886, "grad_norm": 0.18803095817565918, "learning_rate": 4.9146514935988625e-05, "loss": 0.9258, "step": 692 }, { "epoch": 0.7897435897435897, "grad_norm": 0.1591644138097763, "learning_rate": 4.9217638691322906e-05, "loss": 0.9849, "step": 693 }, { "epoch": 0.7908831908831909, "grad_norm": 0.14878025650978088, "learning_rate": 4.928876244665719e-05, "loss": 0.8069, "step": 694 }, { "epoch": 0.792022792022792, "grad_norm": 0.12474358081817627, "learning_rate": 4.935988620199147e-05, "loss": 1.0147, "step": 695 }, { "epoch": 0.7931623931623931, "grad_norm": 0.13352371752262115, "learning_rate": 4.9431009957325744e-05, "loss": 1.0799, "step": 696 }, { "epoch": 0.7943019943019943, "grad_norm": 0.13235998153686523, "learning_rate": 4.950213371266003e-05, "loss": 1.0102, "step": 697 }, { "epoch": 0.7954415954415954, "grad_norm": 0.17550498247146606, "learning_rate": 4.9573257467994314e-05, "loss": 0.9779, "step": 698 }, { "epoch": 0.7965811965811965, "grad_norm": 0.13955263793468475, "learning_rate": 4.9644381223328595e-05, "loss": 0.9837, "step": 699 }, { "epoch": 0.7977207977207977, "grad_norm": 0.1194535493850708, "learning_rate": 4.9715504978662876e-05, "loss": 1.099, "step": 700 }, { "epoch": 0.7988603988603988, "grad_norm": 0.13582880795001984, "learning_rate": 4.978662873399716e-05, "loss": 1.0856, "step": 701 }, { "epoch": 0.8, "grad_norm": 0.18101084232330322, "learning_rate": 4.985775248933144e-05, "loss": 0.8222, "step": 702 }, { "epoch": 0.8011396011396011, "grad_norm": 0.6233718991279602, "learning_rate": 4.992887624466572e-05, "loss": 1.0047, "step": 703 }, { "epoch": 0.8022792022792022, "grad_norm": 0.1351984590291977, "learning_rate": 5e-05, "loss": 0.8762, "step": 704 }, { "epoch": 0.8034188034188035, "grad_norm": 0.12868115305900574, "learning_rate": 4.999999956584063e-05, "loss": 0.9618, "step": 705 }, { "epoch": 0.8045584045584045, "grad_norm": 0.14354445040225983, "learning_rate": 4.999999826336251e-05, "loss": 0.9572, "step": 706 }, { "epoch": 0.8056980056980056, "grad_norm": 0.12966684997081757, "learning_rate": 4.999999609256572e-05, "loss": 0.9598, "step": 707 }, { "epoch": 0.8068376068376069, "grad_norm": 0.13199159502983093, "learning_rate": 4.99999930534503e-05, "loss": 1.0009, "step": 708 }, { "epoch": 0.807977207977208, "grad_norm": 0.18091583251953125, "learning_rate": 4.999998914601638e-05, "loss": 0.8653, "step": 709 }, { "epoch": 0.8091168091168092, "grad_norm": 0.13866648077964783, "learning_rate": 4.999998437026408e-05, "loss": 0.8447, "step": 710 }, { "epoch": 0.8102564102564103, "grad_norm": 0.13208186626434326, "learning_rate": 4.9999978726193576e-05, "loss": 0.9805, "step": 711 }, { "epoch": 0.8113960113960114, "grad_norm": 0.15436086058616638, "learning_rate": 4.999997221380506e-05, "loss": 0.8638, "step": 712 }, { "epoch": 0.8125356125356126, "grad_norm": 0.1434612274169922, "learning_rate": 4.999996483309876e-05, "loss": 0.9613, "step": 713 }, { "epoch": 0.8136752136752137, "grad_norm": 0.13428765535354614, "learning_rate": 4.999995658407493e-05, "loss": 0.9649, "step": 714 }, { "epoch": 0.8148148148148148, "grad_norm": 0.14199678599834442, "learning_rate": 4.9999947466733854e-05, "loss": 1.0946, "step": 715 }, { "epoch": 0.815954415954416, "grad_norm": 0.15982435643672943, "learning_rate": 4.999993748107586e-05, "loss": 0.9126, "step": 716 }, { "epoch": 0.8170940170940171, "grad_norm": 0.14311882853507996, "learning_rate": 4.999992662710128e-05, "loss": 0.8634, "step": 717 }, { "epoch": 0.8182336182336183, "grad_norm": 0.16027617454528809, "learning_rate": 4.999991490481051e-05, "loss": 0.8648, "step": 718 }, { "epoch": 0.8193732193732194, "grad_norm": 0.1404966562986374, "learning_rate": 4.9999902314203925e-05, "loss": 0.9685, "step": 719 }, { "epoch": 0.8205128205128205, "grad_norm": 0.16552694141864777, "learning_rate": 4.9999888855282e-05, "loss": 0.9336, "step": 720 }, { "epoch": 0.8216524216524217, "grad_norm": 0.15026943385601044, "learning_rate": 4.999987452804518e-05, "loss": 0.923, "step": 721 }, { "epoch": 0.8227920227920228, "grad_norm": 0.16246750950813293, "learning_rate": 4.999985933249397e-05, "loss": 0.8521, "step": 722 }, { "epoch": 0.8239316239316239, "grad_norm": 0.1408422887325287, "learning_rate": 4.999984326862889e-05, "loss": 1.1033, "step": 723 }, { "epoch": 0.8250712250712251, "grad_norm": 0.16698269546031952, "learning_rate": 4.9999826336450506e-05, "loss": 0.7922, "step": 724 }, { "epoch": 0.8262108262108262, "grad_norm": 0.15270265936851501, "learning_rate": 4.99998085359594e-05, "loss": 0.94, "step": 725 }, { "epoch": 0.8273504273504273, "grad_norm": 0.1391606628894806, "learning_rate": 4.99997898671562e-05, "loss": 0.938, "step": 726 }, { "epoch": 0.8284900284900285, "grad_norm": 0.13594835996627808, "learning_rate": 4.999977033004155e-05, "loss": 0.9313, "step": 727 }, { "epoch": 0.8296296296296296, "grad_norm": 0.1513712853193283, "learning_rate": 4.9999749924616126e-05, "loss": 0.9079, "step": 728 }, { "epoch": 0.8307692307692308, "grad_norm": 0.15203820168972015, "learning_rate": 4.999972865088063e-05, "loss": 1.0377, "step": 729 }, { "epoch": 0.8319088319088319, "grad_norm": 0.13688744604587555, "learning_rate": 4.999970650883581e-05, "loss": 0.914, "step": 730 }, { "epoch": 0.833048433048433, "grad_norm": 0.1284022033214569, "learning_rate": 4.999968349848244e-05, "loss": 0.9223, "step": 731 }, { "epoch": 0.8341880341880342, "grad_norm": 0.14970466494560242, "learning_rate": 4.99996596198213e-05, "loss": 0.8377, "step": 732 }, { "epoch": 0.8353276353276353, "grad_norm": 0.1248951181769371, "learning_rate": 4.999963487285325e-05, "loss": 1.0449, "step": 733 }, { "epoch": 0.8364672364672364, "grad_norm": 0.1329878717660904, "learning_rate": 4.9999609257579114e-05, "loss": 1.1384, "step": 734 }, { "epoch": 0.8376068376068376, "grad_norm": 0.14601173996925354, "learning_rate": 4.9999582773999814e-05, "loss": 0.8948, "step": 735 }, { "epoch": 0.8387464387464387, "grad_norm": 0.11806821823120117, "learning_rate": 4.999955542211625e-05, "loss": 1.0158, "step": 736 }, { "epoch": 0.8398860398860399, "grad_norm": 0.15790943801403046, "learning_rate": 4.999952720192938e-05, "loss": 0.9098, "step": 737 }, { "epoch": 0.841025641025641, "grad_norm": 0.125859335064888, "learning_rate": 4.999949811344018e-05, "loss": 0.9797, "step": 738 }, { "epoch": 0.8421652421652421, "grad_norm": 0.12844663858413696, "learning_rate": 4.999946815664966e-05, "loss": 0.9786, "step": 739 }, { "epoch": 0.8433048433048433, "grad_norm": 0.14161966741085052, "learning_rate": 4.999943733155885e-05, "loss": 0.9303, "step": 740 }, { "epoch": 0.8444444444444444, "grad_norm": 0.16080684959888458, "learning_rate": 4.999940563816885e-05, "loss": 0.8871, "step": 741 }, { "epoch": 0.8455840455840455, "grad_norm": 0.16026948392391205, "learning_rate": 4.999937307648074e-05, "loss": 0.81, "step": 742 }, { "epoch": 0.8467236467236468, "grad_norm": 0.14790008962154388, "learning_rate": 4.999933964649565e-05, "loss": 0.8704, "step": 743 }, { "epoch": 0.8478632478632478, "grad_norm": 0.18087877333164215, "learning_rate": 4.9999305348214744e-05, "loss": 0.8639, "step": 744 }, { "epoch": 0.8490028490028491, "grad_norm": 0.14416368305683136, "learning_rate": 4.999927018163922e-05, "loss": 0.9448, "step": 745 }, { "epoch": 0.8501424501424502, "grad_norm": 0.35472309589385986, "learning_rate": 4.999923414677029e-05, "loss": 0.9182, "step": 746 }, { "epoch": 0.8512820512820513, "grad_norm": 0.14872398972511292, "learning_rate": 4.9999197243609216e-05, "loss": 0.9221, "step": 747 }, { "epoch": 0.8524216524216525, "grad_norm": 0.13234694302082062, "learning_rate": 4.999915947215727e-05, "loss": 0.9724, "step": 748 }, { "epoch": 0.8535612535612536, "grad_norm": 0.1492946445941925, "learning_rate": 4.999912083241577e-05, "loss": 0.9239, "step": 749 }, { "epoch": 0.8547008547008547, "grad_norm": 0.15056505799293518, "learning_rate": 4.999908132438605e-05, "loss": 0.7714, "step": 750 }, { "epoch": 0.8558404558404559, "grad_norm": 0.14489904046058655, "learning_rate": 4.999904094806949e-05, "loss": 0.9061, "step": 751 }, { "epoch": 0.856980056980057, "grad_norm": 0.1357349306344986, "learning_rate": 4.9998999703467494e-05, "loss": 0.9917, "step": 752 }, { "epoch": 0.8581196581196581, "grad_norm": 0.13452361524105072, "learning_rate": 4.999895759058149e-05, "loss": 1.1254, "step": 753 }, { "epoch": 0.8592592592592593, "grad_norm": 0.14595651626586914, "learning_rate": 4.9998914609412936e-05, "loss": 0.8541, "step": 754 }, { "epoch": 0.8603988603988604, "grad_norm": 0.12456920742988586, "learning_rate": 4.999887075996333e-05, "loss": 0.9705, "step": 755 }, { "epoch": 0.8615384615384616, "grad_norm": 0.16170814633369446, "learning_rate": 4.9998826042234205e-05, "loss": 1.0038, "step": 756 }, { "epoch": 0.8626780626780627, "grad_norm": 0.13845042884349823, "learning_rate": 4.99987804562271e-05, "loss": 0.8685, "step": 757 }, { "epoch": 0.8638176638176638, "grad_norm": 0.13230374455451965, "learning_rate": 4.99987340019436e-05, "loss": 0.9547, "step": 758 }, { "epoch": 0.864957264957265, "grad_norm": 0.14354543387889862, "learning_rate": 4.999868667938533e-05, "loss": 0.8999, "step": 759 }, { "epoch": 0.8660968660968661, "grad_norm": 0.11111228168010712, "learning_rate": 4.9998638488553916e-05, "loss": 0.9653, "step": 760 }, { "epoch": 0.8672364672364672, "grad_norm": 0.13235290348529816, "learning_rate": 4.999858942945104e-05, "loss": 0.8978, "step": 761 }, { "epoch": 0.8683760683760684, "grad_norm": 0.13394343852996826, "learning_rate": 4.999853950207841e-05, "loss": 0.7864, "step": 762 }, { "epoch": 0.8695156695156695, "grad_norm": 0.13626234233379364, "learning_rate": 4.9998488706437756e-05, "loss": 0.9025, "step": 763 }, { "epoch": 0.8706552706552707, "grad_norm": 0.13922813534736633, "learning_rate": 4.9998437042530846e-05, "loss": 0.9491, "step": 764 }, { "epoch": 0.8717948717948718, "grad_norm": 0.13128039240837097, "learning_rate": 4.999838451035947e-05, "loss": 0.9999, "step": 765 }, { "epoch": 0.8729344729344729, "grad_norm": 0.1395827680826187, "learning_rate": 4.999833110992545e-05, "loss": 0.9253, "step": 766 }, { "epoch": 0.8740740740740741, "grad_norm": 0.13548655807971954, "learning_rate": 4.999827684123065e-05, "loss": 0.9225, "step": 767 }, { "epoch": 0.8752136752136752, "grad_norm": 0.13563495874404907, "learning_rate": 4.999822170427695e-05, "loss": 0.9403, "step": 768 }, { "epoch": 0.8763532763532763, "grad_norm": 0.13124828040599823, "learning_rate": 4.999816569906626e-05, "loss": 0.9772, "step": 769 }, { "epoch": 0.8774928774928775, "grad_norm": 0.1623319834470749, "learning_rate": 4.9998108825600534e-05, "loss": 0.9449, "step": 770 }, { "epoch": 0.8786324786324786, "grad_norm": 0.11730766296386719, "learning_rate": 4.9998051083881745e-05, "loss": 0.957, "step": 771 }, { "epoch": 0.8797720797720797, "grad_norm": 0.1369873583316803, "learning_rate": 4.9997992473911894e-05, "loss": 0.8963, "step": 772 }, { "epoch": 0.8809116809116809, "grad_norm": 0.16026443243026733, "learning_rate": 4.999793299569302e-05, "loss": 0.9455, "step": 773 }, { "epoch": 0.882051282051282, "grad_norm": 0.15574093163013458, "learning_rate": 4.999787264922719e-05, "loss": 0.8671, "step": 774 }, { "epoch": 0.8831908831908832, "grad_norm": 0.144658163189888, "learning_rate": 4.999781143451649e-05, "loss": 0.8767, "step": 775 }, { "epoch": 0.8843304843304843, "grad_norm": 0.140802800655365, "learning_rate": 4.999774935156307e-05, "loss": 0.9449, "step": 776 }, { "epoch": 0.8854700854700854, "grad_norm": 0.1336839646100998, "learning_rate": 4.999768640036906e-05, "loss": 0.895, "step": 777 }, { "epoch": 0.8866096866096866, "grad_norm": 0.1726561337709427, "learning_rate": 4.9997622580936657e-05, "loss": 0.858, "step": 778 }, { "epoch": 0.8877492877492877, "grad_norm": 0.15918029844760895, "learning_rate": 4.9997557893268084e-05, "loss": 0.8667, "step": 779 }, { "epoch": 0.8888888888888888, "grad_norm": 0.14224639534950256, "learning_rate": 4.999749233736558e-05, "loss": 0.9014, "step": 780 }, { "epoch": 0.89002849002849, "grad_norm": 0.18035347759723663, "learning_rate": 4.999742591323142e-05, "loss": 0.7412, "step": 781 }, { "epoch": 0.8911680911680911, "grad_norm": 0.14404892921447754, "learning_rate": 4.9997358620867915e-05, "loss": 0.9408, "step": 782 }, { "epoch": 0.8923076923076924, "grad_norm": 0.14456340670585632, "learning_rate": 4.999729046027741e-05, "loss": 0.8669, "step": 783 }, { "epoch": 0.8934472934472935, "grad_norm": 0.1646738201379776, "learning_rate": 4.999722143146226e-05, "loss": 0.8579, "step": 784 }, { "epoch": 0.8945868945868946, "grad_norm": 0.140689417719841, "learning_rate": 4.999715153442486e-05, "loss": 0.9288, "step": 785 }, { "epoch": 0.8957264957264958, "grad_norm": 0.14522705972194672, "learning_rate": 4.9997080769167656e-05, "loss": 0.8597, "step": 786 }, { "epoch": 0.8968660968660969, "grad_norm": 0.13029351830482483, "learning_rate": 4.9997009135693086e-05, "loss": 1.0401, "step": 787 }, { "epoch": 0.898005698005698, "grad_norm": 0.1511763632297516, "learning_rate": 4.9996936634003654e-05, "loss": 0.8057, "step": 788 }, { "epoch": 0.8991452991452992, "grad_norm": 0.1934126317501068, "learning_rate": 4.9996863264101865e-05, "loss": 0.7158, "step": 789 }, { "epoch": 0.9002849002849003, "grad_norm": 0.12993942201137543, "learning_rate": 4.9996789025990276e-05, "loss": 0.8783, "step": 790 }, { "epoch": 0.9014245014245015, "grad_norm": 0.13194969296455383, "learning_rate": 4.999671391967146e-05, "loss": 0.992, "step": 791 }, { "epoch": 0.9025641025641026, "grad_norm": 0.13282102346420288, "learning_rate": 4.999663794514803e-05, "loss": 0.8413, "step": 792 }, { "epoch": 0.9037037037037037, "grad_norm": 0.14066408574581146, "learning_rate": 4.999656110242262e-05, "loss": 0.9583, "step": 793 }, { "epoch": 0.9048433048433049, "grad_norm": 0.1765558272600174, "learning_rate": 4.999648339149791e-05, "loss": 0.8005, "step": 794 }, { "epoch": 0.905982905982906, "grad_norm": 0.12721358239650726, "learning_rate": 4.9996404812376586e-05, "loss": 0.9387, "step": 795 }, { "epoch": 0.9071225071225071, "grad_norm": 0.15083791315555573, "learning_rate": 4.999632536506138e-05, "loss": 0.8425, "step": 796 }, { "epoch": 0.9082621082621083, "grad_norm": 0.1267702728509903, "learning_rate": 4.999624504955506e-05, "loss": 1.0046, "step": 797 }, { "epoch": 0.9094017094017094, "grad_norm": 0.14876332879066467, "learning_rate": 4.999616386586041e-05, "loss": 0.876, "step": 798 }, { "epoch": 0.9105413105413105, "grad_norm": 0.14986543357372284, "learning_rate": 4.9996081813980246e-05, "loss": 0.9184, "step": 799 }, { "epoch": 0.9116809116809117, "grad_norm": 0.1401684284210205, "learning_rate": 4.9995998893917415e-05, "loss": 0.8891, "step": 800 }, { "epoch": 0.9128205128205128, "grad_norm": 0.13596585392951965, "learning_rate": 4.9995915105674815e-05, "loss": 1.0471, "step": 801 }, { "epoch": 0.913960113960114, "grad_norm": 0.17869295179843903, "learning_rate": 4.999583044925534e-05, "loss": 0.8007, "step": 802 }, { "epoch": 0.9150997150997151, "grad_norm": 0.12465494126081467, "learning_rate": 4.999574492466193e-05, "loss": 0.9676, "step": 803 }, { "epoch": 0.9162393162393162, "grad_norm": 0.1428622603416443, "learning_rate": 4.999565853189756e-05, "loss": 0.9755, "step": 804 }, { "epoch": 0.9173789173789174, "grad_norm": 0.15474775433540344, "learning_rate": 4.999557127096524e-05, "loss": 0.9727, "step": 805 }, { "epoch": 0.9185185185185185, "grad_norm": 0.12727268040180206, "learning_rate": 4.999548314186798e-05, "loss": 0.8311, "step": 806 }, { "epoch": 0.9196581196581196, "grad_norm": 0.15607213973999023, "learning_rate": 4.999539414460886e-05, "loss": 0.7983, "step": 807 }, { "epoch": 0.9207977207977208, "grad_norm": 0.1445525586605072, "learning_rate": 4.999530427919096e-05, "loss": 0.9655, "step": 808 }, { "epoch": 0.9219373219373219, "grad_norm": 0.14794215559959412, "learning_rate": 4.999521354561741e-05, "loss": 0.8569, "step": 809 }, { "epoch": 0.9230769230769231, "grad_norm": 0.1320628821849823, "learning_rate": 4.999512194389136e-05, "loss": 0.8479, "step": 810 }, { "epoch": 0.9242165242165242, "grad_norm": 0.17432069778442383, "learning_rate": 4.999502947401597e-05, "loss": 0.9397, "step": 811 }, { "epoch": 0.9253561253561253, "grad_norm": 0.14456430077552795, "learning_rate": 4.9994936135994484e-05, "loss": 0.9931, "step": 812 }, { "epoch": 0.9264957264957265, "grad_norm": 0.12306395173072815, "learning_rate": 4.999484192983013e-05, "loss": 0.9482, "step": 813 }, { "epoch": 0.9276353276353276, "grad_norm": 0.14281296730041504, "learning_rate": 4.999474685552618e-05, "loss": 0.9634, "step": 814 }, { "epoch": 0.9287749287749287, "grad_norm": 0.13512107729911804, "learning_rate": 4.999465091308593e-05, "loss": 0.9638, "step": 815 }, { "epoch": 0.9299145299145299, "grad_norm": 0.12762989103794098, "learning_rate": 4.999455410251272e-05, "loss": 0.9667, "step": 816 }, { "epoch": 0.931054131054131, "grad_norm": 0.15025532245635986, "learning_rate": 4.9994456423809916e-05, "loss": 0.9261, "step": 817 }, { "epoch": 0.9321937321937321, "grad_norm": 0.1446276307106018, "learning_rate": 4.99943578769809e-05, "loss": 0.8259, "step": 818 }, { "epoch": 0.9333333333333333, "grad_norm": 0.13290417194366455, "learning_rate": 4.99942584620291e-05, "loss": 0.9101, "step": 819 }, { "epoch": 0.9344729344729344, "grad_norm": 0.19974055886268616, "learning_rate": 4.999415817895797e-05, "loss": 0.7009, "step": 820 }, { "epoch": 0.9356125356125357, "grad_norm": 0.16023169457912445, "learning_rate": 4.999405702777099e-05, "loss": 0.7912, "step": 821 }, { "epoch": 0.9367521367521368, "grad_norm": 0.12607376277446747, "learning_rate": 4.9993955008471684e-05, "loss": 1.0653, "step": 822 }, { "epoch": 0.9378917378917379, "grad_norm": 0.14359991252422333, "learning_rate": 4.999385212106358e-05, "loss": 0.9966, "step": 823 }, { "epoch": 0.9390313390313391, "grad_norm": 0.11714358627796173, "learning_rate": 4.999374836555026e-05, "loss": 0.9409, "step": 824 }, { "epoch": 0.9401709401709402, "grad_norm": 0.15341834723949432, "learning_rate": 4.999364374193533e-05, "loss": 1.004, "step": 825 }, { "epoch": 0.9413105413105413, "grad_norm": 0.12442399561405182, "learning_rate": 4.999353825022241e-05, "loss": 0.9952, "step": 826 }, { "epoch": 0.9424501424501425, "grad_norm": 0.12721072137355804, "learning_rate": 4.999343189041518e-05, "loss": 0.8845, "step": 827 }, { "epoch": 0.9435897435897436, "grad_norm": 0.15940479934215546, "learning_rate": 4.999332466251733e-05, "loss": 0.833, "step": 828 }, { "epoch": 0.9447293447293448, "grad_norm": 0.1454949527978897, "learning_rate": 4.999321656653258e-05, "loss": 0.965, "step": 829 }, { "epoch": 0.9458689458689459, "grad_norm": 0.16330890357494354, "learning_rate": 4.999310760246469e-05, "loss": 0.8289, "step": 830 }, { "epoch": 0.947008547008547, "grad_norm": 0.17716649174690247, "learning_rate": 4.999299777031744e-05, "loss": 0.8465, "step": 831 }, { "epoch": 0.9481481481481482, "grad_norm": 0.12264199554920197, "learning_rate": 4.999288707009464e-05, "loss": 1.0592, "step": 832 }, { "epoch": 0.9492877492877493, "grad_norm": 0.1604478657245636, "learning_rate": 4.999277550180015e-05, "loss": 0.9338, "step": 833 }, { "epoch": 0.9504273504273504, "grad_norm": 0.14750933647155762, "learning_rate": 4.9992663065437834e-05, "loss": 0.8592, "step": 834 }, { "epoch": 0.9515669515669516, "grad_norm": 0.15502817928791046, "learning_rate": 4.9992549761011595e-05, "loss": 0.8186, "step": 835 }, { "epoch": 0.9527065527065527, "grad_norm": 0.1494527906179428, "learning_rate": 4.9992435588525385e-05, "loss": 0.9742, "step": 836 }, { "epoch": 0.9538461538461539, "grad_norm": 0.1565469205379486, "learning_rate": 4.9992320547983155e-05, "loss": 0.9885, "step": 837 }, { "epoch": 0.954985754985755, "grad_norm": 0.13517823815345764, "learning_rate": 4.999220463938889e-05, "loss": 0.9043, "step": 838 }, { "epoch": 0.9561253561253561, "grad_norm": 0.14888018369674683, "learning_rate": 4.999208786274664e-05, "loss": 1.0079, "step": 839 }, { "epoch": 0.9572649572649573, "grad_norm": 0.13934040069580078, "learning_rate": 4.999197021806045e-05, "loss": 1.0461, "step": 840 }, { "epoch": 0.9584045584045584, "grad_norm": 0.15188275277614594, "learning_rate": 4.9991851705334395e-05, "loss": 0.8832, "step": 841 }, { "epoch": 0.9595441595441595, "grad_norm": 0.1298612654209137, "learning_rate": 4.9991732324572616e-05, "loss": 1.027, "step": 842 }, { "epoch": 0.9606837606837607, "grad_norm": 0.14358702301979065, "learning_rate": 4.999161207577924e-05, "loss": 1.0147, "step": 843 }, { "epoch": 0.9618233618233618, "grad_norm": 0.15109893679618835, "learning_rate": 4.999149095895845e-05, "loss": 0.9003, "step": 844 }, { "epoch": 0.9629629629629629, "grad_norm": 0.13257522881031036, "learning_rate": 4.999136897411445e-05, "loss": 0.8249, "step": 845 }, { "epoch": 0.9641025641025641, "grad_norm": 0.14299403131008148, "learning_rate": 4.999124612125148e-05, "loss": 0.9263, "step": 846 }, { "epoch": 0.9652421652421652, "grad_norm": 0.1702422946691513, "learning_rate": 4.9991122400373816e-05, "loss": 0.901, "step": 847 }, { "epoch": 0.9663817663817664, "grad_norm": 0.13109320402145386, "learning_rate": 4.999099781148573e-05, "loss": 1.0097, "step": 848 }, { "epoch": 0.9675213675213675, "grad_norm": 0.1618308424949646, "learning_rate": 4.9990872354591566e-05, "loss": 0.8078, "step": 849 }, { "epoch": 0.9686609686609686, "grad_norm": 0.13402412831783295, "learning_rate": 4.999074602969569e-05, "loss": 0.9279, "step": 850 }, { "epoch": 0.9698005698005698, "grad_norm": 0.16500049829483032, "learning_rate": 4.9990618836802474e-05, "loss": 0.8209, "step": 851 }, { "epoch": 0.9709401709401709, "grad_norm": 0.13208149373531342, "learning_rate": 4.999049077591634e-05, "loss": 0.9877, "step": 852 }, { "epoch": 0.972079772079772, "grad_norm": 0.12604838609695435, "learning_rate": 4.999036184704174e-05, "loss": 0.943, "step": 853 }, { "epoch": 0.9732193732193732, "grad_norm": 0.13807249069213867, "learning_rate": 4.999023205018315e-05, "loss": 0.9512, "step": 854 }, { "epoch": 0.9743589743589743, "grad_norm": 0.15906868875026703, "learning_rate": 4.999010138534507e-05, "loss": 0.8642, "step": 855 }, { "epoch": 0.9754985754985755, "grad_norm": 0.1484721601009369, "learning_rate": 4.998996985253206e-05, "loss": 0.9906, "step": 856 }, { "epoch": 0.9766381766381766, "grad_norm": 0.15350721776485443, "learning_rate": 4.998983745174866e-05, "loss": 0.859, "step": 857 }, { "epoch": 0.9777777777777777, "grad_norm": 0.1252312809228897, "learning_rate": 4.9989704182999494e-05, "loss": 1.1386, "step": 858 }, { "epoch": 0.978917378917379, "grad_norm": 0.12633676826953888, "learning_rate": 4.998957004628917e-05, "loss": 1.0245, "step": 859 }, { "epoch": 0.98005698005698, "grad_norm": 0.15019890666007996, "learning_rate": 4.998943504162236e-05, "loss": 1.1532, "step": 860 }, { "epoch": 0.9811965811965812, "grad_norm": 0.13429071009159088, "learning_rate": 4.998929916900376e-05, "loss": 0.8846, "step": 861 }, { "epoch": 0.9823361823361824, "grad_norm": 0.15395046770572662, "learning_rate": 4.998916242843806e-05, "loss": 0.9478, "step": 862 }, { "epoch": 0.9834757834757835, "grad_norm": 0.1540457010269165, "learning_rate": 4.998902481993004e-05, "loss": 0.9995, "step": 863 }, { "epoch": 0.9846153846153847, "grad_norm": 0.12777718901634216, "learning_rate": 4.9988886343484464e-05, "loss": 1.1051, "step": 864 }, { "epoch": 0.9857549857549858, "grad_norm": 0.1231025978922844, "learning_rate": 4.998874699910615e-05, "loss": 1.0039, "step": 865 }, { "epoch": 0.9868945868945869, "grad_norm": 0.16673798859119415, "learning_rate": 4.9988606786799934e-05, "loss": 0.7986, "step": 866 }, { "epoch": 0.9880341880341881, "grad_norm": 0.12741558253765106, "learning_rate": 4.998846570657069e-05, "loss": 0.8754, "step": 867 }, { "epoch": 0.9891737891737892, "grad_norm": 0.13659051060676575, "learning_rate": 4.99883237584233e-05, "loss": 0.8626, "step": 868 }, { "epoch": 0.9903133903133903, "grad_norm": 0.1726822704076767, "learning_rate": 4.998818094236272e-05, "loss": 0.7511, "step": 869 }, { "epoch": 0.9914529914529915, "grad_norm": 0.15878382325172424, "learning_rate": 4.998803725839388e-05, "loss": 0.8088, "step": 870 }, { "epoch": 0.9925925925925926, "grad_norm": 0.1454508900642395, "learning_rate": 4.9987892706521804e-05, "loss": 0.9454, "step": 871 }, { "epoch": 0.9937321937321937, "grad_norm": 0.16745467483997345, "learning_rate": 4.9987747286751496e-05, "loss": 0.7823, "step": 872 }, { "epoch": 0.9948717948717949, "grad_norm": 0.12460538744926453, "learning_rate": 4.998760099908801e-05, "loss": 1.0157, "step": 873 }, { "epoch": 0.996011396011396, "grad_norm": 0.13874247670173645, "learning_rate": 4.998745384353642e-05, "loss": 0.8699, "step": 874 }, { "epoch": 0.9971509971509972, "grad_norm": 0.12949658930301666, "learning_rate": 4.998730582010184e-05, "loss": 1.0591, "step": 875 }, { "epoch": 0.9982905982905983, "grad_norm": 0.11852507293224335, "learning_rate": 4.998715692878942e-05, "loss": 1.0606, "step": 876 }, { "epoch": 0.9994301994301994, "grad_norm": 0.14709822833538055, "learning_rate": 4.998700716960432e-05, "loss": 0.9475, "step": 877 }, { "epoch": 1.0, "grad_norm": 0.29294446110725403, "learning_rate": 4.998685654255175e-05, "loss": 0.7418, "step": 878 }, { "epoch": 1.0011396011396012, "grad_norm": 0.13779255747795105, "learning_rate": 4.998670504763694e-05, "loss": 0.9504, "step": 879 }, { "epoch": 1.0022792022792022, "grad_norm": 0.15006691217422485, "learning_rate": 4.998655268486514e-05, "loss": 0.8422, "step": 880 }, { "epoch": 1.0034188034188034, "grad_norm": 0.13469147682189941, "learning_rate": 4.998639945424166e-05, "loss": 0.974, "step": 881 }, { "epoch": 1.0045584045584046, "grad_norm": 0.1585700362920761, "learning_rate": 4.998624535577182e-05, "loss": 0.7951, "step": 882 }, { "epoch": 1.0056980056980056, "grad_norm": 0.14132528007030487, "learning_rate": 4.998609038946096e-05, "loss": 1.0296, "step": 883 }, { "epoch": 1.0068376068376068, "grad_norm": 0.15219303965568542, "learning_rate": 4.998593455531446e-05, "loss": 0.8658, "step": 884 }, { "epoch": 1.007977207977208, "grad_norm": 0.13966584205627441, "learning_rate": 4.998577785333775e-05, "loss": 0.8944, "step": 885 }, { "epoch": 1.009116809116809, "grad_norm": 0.16139642894268036, "learning_rate": 4.998562028353626e-05, "loss": 0.8068, "step": 886 }, { "epoch": 1.0102564102564102, "grad_norm": 0.12903019785881042, "learning_rate": 4.998546184591547e-05, "loss": 0.8911, "step": 887 }, { "epoch": 1.0113960113960114, "grad_norm": 0.13643798232078552, "learning_rate": 4.9985302540480884e-05, "loss": 0.9591, "step": 888 }, { "epoch": 1.0125356125356126, "grad_norm": 0.17136724293231964, "learning_rate": 4.9985142367238024e-05, "loss": 0.893, "step": 889 }, { "epoch": 1.0136752136752136, "grad_norm": 0.11987712234258652, "learning_rate": 4.998498132619247e-05, "loss": 0.9551, "step": 890 }, { "epoch": 1.0148148148148148, "grad_norm": 0.1422736942768097, "learning_rate": 4.998481941734979e-05, "loss": 0.7688, "step": 891 }, { "epoch": 1.015954415954416, "grad_norm": 0.1568872332572937, "learning_rate": 4.998465664071563e-05, "loss": 0.8926, "step": 892 }, { "epoch": 1.017094017094017, "grad_norm": 0.15462550520896912, "learning_rate": 4.998449299629564e-05, "loss": 0.7888, "step": 893 }, { "epoch": 1.0182336182336182, "grad_norm": 0.15768513083457947, "learning_rate": 4.99843284840955e-05, "loss": 0.8228, "step": 894 }, { "epoch": 1.0193732193732195, "grad_norm": 0.1471792310476303, "learning_rate": 4.998416310412093e-05, "loss": 1.0406, "step": 895 }, { "epoch": 1.0205128205128204, "grad_norm": 0.17310656607151031, "learning_rate": 4.9983996856377656e-05, "loss": 0.7129, "step": 896 }, { "epoch": 1.0216524216524216, "grad_norm": 0.159246027469635, "learning_rate": 4.9983829740871476e-05, "loss": 0.7875, "step": 897 }, { "epoch": 1.0227920227920229, "grad_norm": 0.14984700083732605, "learning_rate": 4.998366175760818e-05, "loss": 0.8313, "step": 898 }, { "epoch": 1.0239316239316238, "grad_norm": 0.12798403203487396, "learning_rate": 4.99834929065936e-05, "loss": 0.959, "step": 899 }, { "epoch": 1.025071225071225, "grad_norm": 0.14013813436031342, "learning_rate": 4.998332318783361e-05, "loss": 0.8967, "step": 900 }, { "epoch": 1.0262108262108263, "grad_norm": 0.11586838960647583, "learning_rate": 4.99831526013341e-05, "loss": 0.9817, "step": 901 }, { "epoch": 1.0273504273504273, "grad_norm": 0.1460069864988327, "learning_rate": 4.998298114710101e-05, "loss": 0.8565, "step": 902 }, { "epoch": 1.0284900284900285, "grad_norm": 0.19012194871902466, "learning_rate": 4.9982808825140256e-05, "loss": 0.8737, "step": 903 }, { "epoch": 1.0296296296296297, "grad_norm": 0.14134202897548676, "learning_rate": 4.998263563545787e-05, "loss": 0.8607, "step": 904 }, { "epoch": 1.0307692307692307, "grad_norm": 0.13328659534454346, "learning_rate": 4.9982461578059835e-05, "loss": 0.8979, "step": 905 }, { "epoch": 1.0319088319088319, "grad_norm": 0.13218213617801666, "learning_rate": 4.998228665295221e-05, "loss": 0.8001, "step": 906 }, { "epoch": 1.033048433048433, "grad_norm": 0.15181614458560944, "learning_rate": 4.998211086014107e-05, "loss": 1.0009, "step": 907 }, { "epoch": 1.0341880341880343, "grad_norm": 0.12449385225772858, "learning_rate": 4.9981934199632516e-05, "loss": 0.9788, "step": 908 }, { "epoch": 1.0353276353276353, "grad_norm": 0.13458961248397827, "learning_rate": 4.9981756671432686e-05, "loss": 0.9386, "step": 909 }, { "epoch": 1.0364672364672365, "grad_norm": 0.13624460995197296, "learning_rate": 4.998157827554775e-05, "loss": 0.8266, "step": 910 }, { "epoch": 1.0376068376068377, "grad_norm": 0.16573679447174072, "learning_rate": 4.9981399011983896e-05, "loss": 0.7657, "step": 911 }, { "epoch": 1.0387464387464387, "grad_norm": 0.12045188993215561, "learning_rate": 4.9981218880747363e-05, "loss": 1.0223, "step": 912 }, { "epoch": 1.03988603988604, "grad_norm": 0.13298587501049042, "learning_rate": 4.99810378818444e-05, "loss": 0.9801, "step": 913 }, { "epoch": 1.041025641025641, "grad_norm": 0.13947336375713348, "learning_rate": 4.998085601528129e-05, "loss": 0.8855, "step": 914 }, { "epoch": 1.042165242165242, "grad_norm": 0.15804146230220795, "learning_rate": 4.998067328106434e-05, "loss": 0.8577, "step": 915 }, { "epoch": 1.0433048433048433, "grad_norm": 0.1795351207256317, "learning_rate": 4.9980489679199925e-05, "loss": 0.8085, "step": 916 }, { "epoch": 1.0444444444444445, "grad_norm": 0.14076490700244904, "learning_rate": 4.99803052096944e-05, "loss": 0.9511, "step": 917 }, { "epoch": 1.0455840455840455, "grad_norm": 0.138662189245224, "learning_rate": 4.998011987255419e-05, "loss": 0.9781, "step": 918 }, { "epoch": 1.0467236467236467, "grad_norm": 0.1314990371465683, "learning_rate": 4.997993366778571e-05, "loss": 0.9505, "step": 919 }, { "epoch": 1.047863247863248, "grad_norm": 0.1316601186990738, "learning_rate": 4.9979746595395435e-05, "loss": 1.0062, "step": 920 }, { "epoch": 1.049002849002849, "grad_norm": 0.1458171010017395, "learning_rate": 4.997955865538988e-05, "loss": 0.9072, "step": 921 }, { "epoch": 1.0501424501424501, "grad_norm": 0.11316300928592682, "learning_rate": 4.997936984777555e-05, "loss": 0.8339, "step": 922 }, { "epoch": 1.0512820512820513, "grad_norm": 0.14511379599571228, "learning_rate": 4.997918017255901e-05, "loss": 0.9258, "step": 923 }, { "epoch": 1.0524216524216525, "grad_norm": 0.13927248120307922, "learning_rate": 4.997898962974686e-05, "loss": 0.8553, "step": 924 }, { "epoch": 1.0535612535612535, "grad_norm": 0.1859503984451294, "learning_rate": 4.99787982193457e-05, "loss": 0.7203, "step": 925 }, { "epoch": 1.0547008547008547, "grad_norm": 0.11909198015928268, "learning_rate": 4.997860594136219e-05, "loss": 1.0216, "step": 926 }, { "epoch": 1.055840455840456, "grad_norm": 0.15278655290603638, "learning_rate": 4.9978412795803e-05, "loss": 0.7951, "step": 927 }, { "epoch": 1.056980056980057, "grad_norm": 0.15038850903511047, "learning_rate": 4.997821878267484e-05, "loss": 0.8974, "step": 928 }, { "epoch": 1.0581196581196581, "grad_norm": 0.13177435100078583, "learning_rate": 4.997802390198446e-05, "loss": 0.8941, "step": 929 }, { "epoch": 1.0592592592592593, "grad_norm": 0.1317480504512787, "learning_rate": 4.997782815373861e-05, "loss": 0.9568, "step": 930 }, { "epoch": 1.0603988603988603, "grad_norm": 0.12318077683448792, "learning_rate": 4.9977631537944105e-05, "loss": 1.0919, "step": 931 }, { "epoch": 1.0615384615384615, "grad_norm": 0.13171391189098358, "learning_rate": 4.997743405460777e-05, "loss": 1.0009, "step": 932 }, { "epoch": 1.0626780626780628, "grad_norm": 0.20070981979370117, "learning_rate": 4.9977235703736456e-05, "loss": 0.6937, "step": 933 }, { "epoch": 1.0638176638176637, "grad_norm": 0.180873841047287, "learning_rate": 4.9977036485337056e-05, "loss": 0.6956, "step": 934 }, { "epoch": 1.064957264957265, "grad_norm": 0.13297812640666962, "learning_rate": 4.997683639941651e-05, "loss": 0.8955, "step": 935 }, { "epoch": 1.0660968660968662, "grad_norm": 0.12415697425603867, "learning_rate": 4.9976635445981726e-05, "loss": 0.9765, "step": 936 }, { "epoch": 1.0672364672364671, "grad_norm": 0.11958509683609009, "learning_rate": 4.997643362503972e-05, "loss": 1.03, "step": 937 }, { "epoch": 1.0683760683760684, "grad_norm": 0.15875710546970367, "learning_rate": 4.9976230936597486e-05, "loss": 0.8815, "step": 938 }, { "epoch": 1.0695156695156696, "grad_norm": 0.15593189001083374, "learning_rate": 4.9976027380662066e-05, "loss": 0.7542, "step": 939 }, { "epoch": 1.0706552706552706, "grad_norm": 0.15344645082950592, "learning_rate": 4.997582295724053e-05, "loss": 0.8464, "step": 940 }, { "epoch": 1.0717948717948718, "grad_norm": 0.14731286466121674, "learning_rate": 4.997561766633998e-05, "loss": 1.0861, "step": 941 }, { "epoch": 1.072934472934473, "grad_norm": 0.16866162419319153, "learning_rate": 4.997541150796755e-05, "loss": 0.7826, "step": 942 }, { "epoch": 1.074074074074074, "grad_norm": 0.1267542541027069, "learning_rate": 4.997520448213039e-05, "loss": 1.0525, "step": 943 }, { "epoch": 1.0752136752136752, "grad_norm": 0.14880475401878357, "learning_rate": 4.99749965888357e-05, "loss": 0.953, "step": 944 }, { "epoch": 1.0763532763532764, "grad_norm": 0.1406000852584839, "learning_rate": 4.9974787828090694e-05, "loss": 0.9503, "step": 945 }, { "epoch": 1.0774928774928776, "grad_norm": 0.1542246788740158, "learning_rate": 4.997457819990262e-05, "loss": 1.0167, "step": 946 }, { "epoch": 1.0786324786324786, "grad_norm": 0.15848727524280548, "learning_rate": 4.9974367704278775e-05, "loss": 0.9338, "step": 947 }, { "epoch": 1.0797720797720798, "grad_norm": 0.14062422513961792, "learning_rate": 4.997415634122645e-05, "loss": 0.8293, "step": 948 }, { "epoch": 1.080911680911681, "grad_norm": 0.1257828176021576, "learning_rate": 4.997394411075301e-05, "loss": 0.9943, "step": 949 }, { "epoch": 1.082051282051282, "grad_norm": 0.17399384081363678, "learning_rate": 4.99737310128658e-05, "loss": 0.8379, "step": 950 }, { "epoch": 1.0831908831908832, "grad_norm": 0.14257638156414032, "learning_rate": 4.9973517047572235e-05, "loss": 0.9981, "step": 951 }, { "epoch": 1.0843304843304844, "grad_norm": 0.1620057374238968, "learning_rate": 4.997330221487975e-05, "loss": 0.8397, "step": 952 }, { "epoch": 1.0854700854700854, "grad_norm": 0.13672028481960297, "learning_rate": 4.99730865147958e-05, "loss": 0.9169, "step": 953 }, { "epoch": 1.0866096866096866, "grad_norm": 0.1425710767507553, "learning_rate": 4.997286994732788e-05, "loss": 0.9351, "step": 954 }, { "epoch": 1.0877492877492878, "grad_norm": 0.1277409791946411, "learning_rate": 4.9972652512483514e-05, "loss": 0.8922, "step": 955 }, { "epoch": 1.0888888888888888, "grad_norm": 0.12657009065151215, "learning_rate": 4.9972434210270246e-05, "loss": 0.9153, "step": 956 }, { "epoch": 1.09002849002849, "grad_norm": 0.13573668897151947, "learning_rate": 4.997221504069567e-05, "loss": 0.9554, "step": 957 }, { "epoch": 1.0911680911680912, "grad_norm": 0.1584109365940094, "learning_rate": 4.9971995003767394e-05, "loss": 0.9039, "step": 958 }, { "epoch": 1.0923076923076924, "grad_norm": 0.14247845113277435, "learning_rate": 4.997177409949305e-05, "loss": 1.0108, "step": 959 }, { "epoch": 1.0934472934472934, "grad_norm": 0.13582134246826172, "learning_rate": 4.997155232788032e-05, "loss": 0.8967, "step": 960 }, { "epoch": 1.0945868945868946, "grad_norm": 0.1466704159975052, "learning_rate": 4.9971329688936904e-05, "loss": 0.9519, "step": 961 }, { "epoch": 1.0957264957264958, "grad_norm": 0.15067556500434875, "learning_rate": 4.997110618267053e-05, "loss": 0.9023, "step": 962 }, { "epoch": 1.0968660968660968, "grad_norm": 0.15583336353302002, "learning_rate": 4.997088180908898e-05, "loss": 0.9209, "step": 963 }, { "epoch": 1.098005698005698, "grad_norm": 0.15505431592464447, "learning_rate": 4.9970656568200034e-05, "loss": 1.0007, "step": 964 }, { "epoch": 1.0991452991452992, "grad_norm": 0.11177270114421844, "learning_rate": 4.997043046001151e-05, "loss": 1.1052, "step": 965 }, { "epoch": 1.1002849002849002, "grad_norm": 0.14778798818588257, "learning_rate": 4.997020348453127e-05, "loss": 0.8492, "step": 966 }, { "epoch": 1.1014245014245014, "grad_norm": 0.1263127624988556, "learning_rate": 4.99699756417672e-05, "loss": 1.0462, "step": 967 }, { "epoch": 1.1025641025641026, "grad_norm": 0.17105932533740997, "learning_rate": 4.99697469317272e-05, "loss": 0.8847, "step": 968 }, { "epoch": 1.1037037037037036, "grad_norm": 0.12604227662086487, "learning_rate": 4.996951735441923e-05, "loss": 1.0198, "step": 969 }, { "epoch": 1.1048433048433048, "grad_norm": 0.15668855607509613, "learning_rate": 4.996928690985124e-05, "loss": 0.9538, "step": 970 }, { "epoch": 1.105982905982906, "grad_norm": 0.18552467226982117, "learning_rate": 4.996905559803127e-05, "loss": 0.7007, "step": 971 }, { "epoch": 1.107122507122507, "grad_norm": 0.1310439556837082, "learning_rate": 4.996882341896732e-05, "loss": 1.0816, "step": 972 }, { "epoch": 1.1082621082621082, "grad_norm": 0.14595435559749603, "learning_rate": 4.9968590372667474e-05, "loss": 1.0132, "step": 973 }, { "epoch": 1.1094017094017095, "grad_norm": 0.1250859946012497, "learning_rate": 4.996835645913982e-05, "loss": 1.0407, "step": 974 }, { "epoch": 1.1105413105413104, "grad_norm": 0.15575435757637024, "learning_rate": 4.996812167839248e-05, "loss": 0.8504, "step": 975 }, { "epoch": 1.1116809116809117, "grad_norm": 0.13131991028785706, "learning_rate": 4.996788603043361e-05, "loss": 1.0466, "step": 976 }, { "epoch": 1.1128205128205129, "grad_norm": 0.13062109053134918, "learning_rate": 4.9967649515271406e-05, "loss": 0.8978, "step": 977 }, { "epoch": 1.1139601139601139, "grad_norm": 0.13753442466259003, "learning_rate": 4.996741213291406e-05, "loss": 1.0202, "step": 978 }, { "epoch": 1.115099715099715, "grad_norm": 0.1586059033870697, "learning_rate": 4.996717388336984e-05, "loss": 0.7171, "step": 979 }, { "epoch": 1.1162393162393163, "grad_norm": 0.1317741721868515, "learning_rate": 4.996693476664701e-05, "loss": 0.8723, "step": 980 }, { "epoch": 1.1173789173789175, "grad_norm": 0.13893039524555206, "learning_rate": 4.996669478275387e-05, "loss": 1.0717, "step": 981 }, { "epoch": 1.1185185185185185, "grad_norm": 0.1615033745765686, "learning_rate": 4.9966453931698764e-05, "loss": 0.8087, "step": 982 }, { "epoch": 1.1196581196581197, "grad_norm": 0.1383955478668213, "learning_rate": 4.996621221349006e-05, "loss": 0.9108, "step": 983 }, { "epoch": 1.1207977207977209, "grad_norm": 0.12407194823026657, "learning_rate": 4.9965969628136145e-05, "loss": 0.9014, "step": 984 }, { "epoch": 1.1219373219373219, "grad_norm": 0.1482664793729782, "learning_rate": 4.996572617564545e-05, "loss": 0.9412, "step": 985 }, { "epoch": 1.123076923076923, "grad_norm": 0.14120322465896606, "learning_rate": 4.996548185602642e-05, "loss": 0.9633, "step": 986 }, { "epoch": 1.1242165242165243, "grad_norm": 0.14487271010875702, "learning_rate": 4.996523666928756e-05, "loss": 0.8836, "step": 987 }, { "epoch": 1.1253561253561253, "grad_norm": 0.15308040380477905, "learning_rate": 4.996499061543737e-05, "loss": 0.8938, "step": 988 }, { "epoch": 1.1264957264957265, "grad_norm": 0.12895843386650085, "learning_rate": 4.9964743694484404e-05, "loss": 0.7956, "step": 989 }, { "epoch": 1.1276353276353277, "grad_norm": 0.14449110627174377, "learning_rate": 4.996449590643724e-05, "loss": 1.043, "step": 990 }, { "epoch": 1.1287749287749287, "grad_norm": 0.1205328032374382, "learning_rate": 4.996424725130448e-05, "loss": 1.0164, "step": 991 }, { "epoch": 1.12991452991453, "grad_norm": 0.14364676177501678, "learning_rate": 4.996399772909476e-05, "loss": 0.9615, "step": 992 }, { "epoch": 1.131054131054131, "grad_norm": 0.1430639773607254, "learning_rate": 4.996374733981674e-05, "loss": 0.7809, "step": 993 }, { "epoch": 1.1321937321937323, "grad_norm": 0.1377188116312027, "learning_rate": 4.9963496083479135e-05, "loss": 0.9358, "step": 994 }, { "epoch": 1.1333333333333333, "grad_norm": 0.14906203746795654, "learning_rate": 4.996324396009065e-05, "loss": 0.9767, "step": 995 }, { "epoch": 1.1344729344729345, "grad_norm": 0.13579241931438446, "learning_rate": 4.9962990969660064e-05, "loss": 1.0422, "step": 996 }, { "epoch": 1.1356125356125357, "grad_norm": 0.13844728469848633, "learning_rate": 4.9962737112196144e-05, "loss": 0.9115, "step": 997 }, { "epoch": 1.1367521367521367, "grad_norm": 0.14056533575057983, "learning_rate": 4.996248238770772e-05, "loss": 0.939, "step": 998 }, { "epoch": 1.137891737891738, "grad_norm": 0.13940833508968353, "learning_rate": 4.996222679620364e-05, "loss": 0.9133, "step": 999 }, { "epoch": 1.1390313390313391, "grad_norm": 0.13142770528793335, "learning_rate": 4.996197033769277e-05, "loss": 0.7939, "step": 1000 }, { "epoch": 1.1401709401709401, "grad_norm": 0.13854031264781952, "learning_rate": 4.996171301218403e-05, "loss": 0.8416, "step": 1001 }, { "epoch": 1.1413105413105413, "grad_norm": 0.15251633524894714, "learning_rate": 4.9961454819686346e-05, "loss": 0.8633, "step": 1002 }, { "epoch": 1.1424501424501425, "grad_norm": 0.12816135585308075, "learning_rate": 4.99611957602087e-05, "loss": 1.0117, "step": 1003 }, { "epoch": 1.1435897435897435, "grad_norm": 0.16152721643447876, "learning_rate": 4.996093583376008e-05, "loss": 0.8521, "step": 1004 }, { "epoch": 1.1447293447293447, "grad_norm": 0.14122483134269714, "learning_rate": 4.996067504034951e-05, "loss": 0.9478, "step": 1005 }, { "epoch": 1.145868945868946, "grad_norm": 0.1301722377538681, "learning_rate": 4.996041337998606e-05, "loss": 0.8674, "step": 1006 }, { "epoch": 1.147008547008547, "grad_norm": 0.16084377467632294, "learning_rate": 4.996015085267881e-05, "loss": 0.9332, "step": 1007 }, { "epoch": 1.1481481481481481, "grad_norm": 0.16826502978801727, "learning_rate": 4.995988745843687e-05, "loss": 0.873, "step": 1008 }, { "epoch": 1.1492877492877493, "grad_norm": 0.14940881729125977, "learning_rate": 4.995962319726941e-05, "loss": 0.9941, "step": 1009 }, { "epoch": 1.1504273504273503, "grad_norm": 0.14942386746406555, "learning_rate": 4.99593580691856e-05, "loss": 0.7758, "step": 1010 }, { "epoch": 1.1515669515669515, "grad_norm": 0.16000723838806152, "learning_rate": 4.995909207419464e-05, "loss": 0.7489, "step": 1011 }, { "epoch": 1.1527065527065528, "grad_norm": 0.13298875093460083, "learning_rate": 4.995882521230577e-05, "loss": 1.0353, "step": 1012 }, { "epoch": 1.1538461538461537, "grad_norm": 0.1300956755876541, "learning_rate": 4.9958557483528265e-05, "loss": 0.9054, "step": 1013 }, { "epoch": 1.154985754985755, "grad_norm": 0.15547607839107513, "learning_rate": 4.995828888787143e-05, "loss": 0.954, "step": 1014 }, { "epoch": 1.1561253561253562, "grad_norm": 0.14494642615318298, "learning_rate": 4.9958019425344585e-05, "loss": 0.8624, "step": 1015 }, { "epoch": 1.1572649572649572, "grad_norm": 0.14050385355949402, "learning_rate": 4.995774909595709e-05, "loss": 0.844, "step": 1016 }, { "epoch": 1.1584045584045584, "grad_norm": 0.13790078461170197, "learning_rate": 4.995747789971832e-05, "loss": 0.9498, "step": 1017 }, { "epoch": 1.1595441595441596, "grad_norm": 0.16380153596401215, "learning_rate": 4.995720583663773e-05, "loss": 0.87, "step": 1018 }, { "epoch": 1.1606837606837608, "grad_norm": 0.1295076161623001, "learning_rate": 4.995693290672474e-05, "loss": 1.0092, "step": 1019 }, { "epoch": 1.1618233618233618, "grad_norm": 0.15840724110603333, "learning_rate": 4.995665910998884e-05, "loss": 0.9389, "step": 1020 }, { "epoch": 1.162962962962963, "grad_norm": 0.14955906569957733, "learning_rate": 4.995638444643953e-05, "loss": 1.0145, "step": 1021 }, { "epoch": 1.1641025641025642, "grad_norm": 0.1661931276321411, "learning_rate": 4.995610891608636e-05, "loss": 0.8591, "step": 1022 }, { "epoch": 1.1652421652421652, "grad_norm": 0.13858042657375336, "learning_rate": 4.9955832518938904e-05, "loss": 0.9221, "step": 1023 }, { "epoch": 1.1663817663817664, "grad_norm": 0.124392569065094, "learning_rate": 4.995555525500676e-05, "loss": 1.011, "step": 1024 }, { "epoch": 1.1675213675213676, "grad_norm": 0.14031356573104858, "learning_rate": 4.995527712429955e-05, "loss": 1.1699, "step": 1025 }, { "epoch": 1.1686609686609686, "grad_norm": 0.13859114050865173, "learning_rate": 4.9954998126826934e-05, "loss": 0.9058, "step": 1026 }, { "epoch": 1.1698005698005698, "grad_norm": 0.12210742384195328, "learning_rate": 4.995471826259861e-05, "loss": 0.9804, "step": 1027 }, { "epoch": 1.170940170940171, "grad_norm": 0.1316547989845276, "learning_rate": 4.995443753162429e-05, "loss": 1.0209, "step": 1028 }, { "epoch": 1.172079772079772, "grad_norm": 0.1612570881843567, "learning_rate": 4.995415593391374e-05, "loss": 0.8194, "step": 1029 }, { "epoch": 1.1732193732193732, "grad_norm": 0.12363424897193909, "learning_rate": 4.995387346947672e-05, "loss": 0.9992, "step": 1030 }, { "epoch": 1.1743589743589744, "grad_norm": 0.12171828001737595, "learning_rate": 4.995359013832305e-05, "loss": 1.0623, "step": 1031 }, { "epoch": 1.1754985754985756, "grad_norm": 0.16410517692565918, "learning_rate": 4.995330594046258e-05, "loss": 0.7474, "step": 1032 }, { "epoch": 1.1766381766381766, "grad_norm": 0.1386864334344864, "learning_rate": 4.9953020875905165e-05, "loss": 0.8702, "step": 1033 }, { "epoch": 1.1777777777777778, "grad_norm": 0.1731072962284088, "learning_rate": 4.995273494466072e-05, "loss": 0.7257, "step": 1034 }, { "epoch": 1.178917378917379, "grad_norm": 0.1432584971189499, "learning_rate": 4.995244814673917e-05, "loss": 0.8772, "step": 1035 }, { "epoch": 1.18005698005698, "grad_norm": 0.1770780235528946, "learning_rate": 4.9952160482150476e-05, "loss": 0.6717, "step": 1036 }, { "epoch": 1.1811965811965812, "grad_norm": 0.1530081033706665, "learning_rate": 4.995187195090463e-05, "loss": 0.9842, "step": 1037 }, { "epoch": 1.1823361823361824, "grad_norm": 0.18601734936237335, "learning_rate": 4.9951582553011645e-05, "loss": 0.7432, "step": 1038 }, { "epoch": 1.1834757834757834, "grad_norm": 0.13339142501354218, "learning_rate": 4.995129228848159e-05, "loss": 0.9775, "step": 1039 }, { "epoch": 1.1846153846153846, "grad_norm": 0.16211362183094025, "learning_rate": 4.995100115732453e-05, "loss": 0.9281, "step": 1040 }, { "epoch": 1.1857549857549858, "grad_norm": 0.17683103680610657, "learning_rate": 4.995070915955059e-05, "loss": 0.8562, "step": 1041 }, { "epoch": 1.1868945868945868, "grad_norm": 0.15082947909832, "learning_rate": 4.9950416295169914e-05, "loss": 0.8704, "step": 1042 }, { "epoch": 1.188034188034188, "grad_norm": 0.12745791673660278, "learning_rate": 4.9950122564192656e-05, "loss": 0.9529, "step": 1043 }, { "epoch": 1.1891737891737892, "grad_norm": 0.16052363812923431, "learning_rate": 4.994982796662903e-05, "loss": 0.8525, "step": 1044 }, { "epoch": 1.1903133903133902, "grad_norm": 0.1446310132741928, "learning_rate": 4.994953250248926e-05, "loss": 0.8256, "step": 1045 }, { "epoch": 1.1914529914529914, "grad_norm": 0.16465376317501068, "learning_rate": 4.994923617178362e-05, "loss": 0.9058, "step": 1046 }, { "epoch": 1.1925925925925926, "grad_norm": 0.1354607343673706, "learning_rate": 4.99489389745224e-05, "loss": 0.8885, "step": 1047 }, { "epoch": 1.1937321937321936, "grad_norm": 0.14572255313396454, "learning_rate": 4.9948640910715914e-05, "loss": 0.9387, "step": 1048 }, { "epoch": 1.1948717948717948, "grad_norm": 0.1444302648305893, "learning_rate": 4.9948341980374523e-05, "loss": 0.9486, "step": 1049 }, { "epoch": 1.196011396011396, "grad_norm": 0.17131833732128143, "learning_rate": 4.9948042183508616e-05, "loss": 0.8682, "step": 1050 }, { "epoch": 1.197150997150997, "grad_norm": 0.14840547740459442, "learning_rate": 4.9947741520128586e-05, "loss": 0.7904, "step": 1051 }, { "epoch": 1.1982905982905983, "grad_norm": 0.13730883598327637, "learning_rate": 4.994743999024489e-05, "loss": 0.8302, "step": 1052 }, { "epoch": 1.1994301994301995, "grad_norm": 0.3049890995025635, "learning_rate": 4.9947137593867996e-05, "loss": 1.1651, "step": 1053 }, { "epoch": 1.2005698005698004, "grad_norm": 0.14114195108413696, "learning_rate": 4.994683433100841e-05, "loss": 0.8649, "step": 1054 }, { "epoch": 1.2017094017094017, "grad_norm": 0.14645038545131683, "learning_rate": 4.9946530201676657e-05, "loss": 0.8701, "step": 1055 }, { "epoch": 1.2028490028490029, "grad_norm": 0.17546778917312622, "learning_rate": 4.994622520588331e-05, "loss": 0.7481, "step": 1056 }, { "epoch": 1.203988603988604, "grad_norm": 0.16568420827388763, "learning_rate": 4.994591934363897e-05, "loss": 0.7344, "step": 1057 }, { "epoch": 1.205128205128205, "grad_norm": 0.11934441328048706, "learning_rate": 4.994561261495423e-05, "loss": 0.9762, "step": 1058 }, { "epoch": 1.2062678062678063, "grad_norm": 0.1472657024860382, "learning_rate": 4.994530501983978e-05, "loss": 0.8483, "step": 1059 }, { "epoch": 1.2074074074074075, "grad_norm": 0.14245931804180145, "learning_rate": 4.9944996558306276e-05, "loss": 1.101, "step": 1060 }, { "epoch": 1.2085470085470085, "grad_norm": 0.12662309408187866, "learning_rate": 4.994468723036445e-05, "loss": 1.0301, "step": 1061 }, { "epoch": 1.2096866096866097, "grad_norm": 0.16392038762569427, "learning_rate": 4.9944377036025036e-05, "loss": 0.8724, "step": 1062 }, { "epoch": 1.210826210826211, "grad_norm": 0.14167292416095734, "learning_rate": 4.994406597529881e-05, "loss": 1.0259, "step": 1063 }, { "epoch": 1.2119658119658119, "grad_norm": 0.14389429986476898, "learning_rate": 4.9943754048196576e-05, "loss": 0.9472, "step": 1064 }, { "epoch": 1.213105413105413, "grad_norm": 0.15287116169929504, "learning_rate": 4.9943441254729166e-05, "loss": 0.9496, "step": 1065 }, { "epoch": 1.2142450142450143, "grad_norm": 0.13677488267421722, "learning_rate": 4.9943127594907445e-05, "loss": 0.8896, "step": 1066 }, { "epoch": 1.2153846153846155, "grad_norm": 0.16617237031459808, "learning_rate": 4.9942813068742315e-05, "loss": 0.8672, "step": 1067 }, { "epoch": 1.2165242165242165, "grad_norm": 0.13447706401348114, "learning_rate": 4.9942497676244695e-05, "loss": 0.9431, "step": 1068 }, { "epoch": 1.2176638176638177, "grad_norm": 0.14393222332000732, "learning_rate": 4.994218141742554e-05, "loss": 0.9767, "step": 1069 }, { "epoch": 1.218803418803419, "grad_norm": 0.14204853773117065, "learning_rate": 4.994186429229583e-05, "loss": 1.0034, "step": 1070 }, { "epoch": 1.21994301994302, "grad_norm": 0.14693760871887207, "learning_rate": 4.994154630086657e-05, "loss": 0.8422, "step": 1071 }, { "epoch": 1.2210826210826211, "grad_norm": 0.1492367833852768, "learning_rate": 4.994122744314883e-05, "loss": 0.8092, "step": 1072 }, { "epoch": 1.2222222222222223, "grad_norm": 0.11978384852409363, "learning_rate": 4.9940907719153673e-05, "loss": 1.0559, "step": 1073 }, { "epoch": 1.2233618233618233, "grad_norm": 0.17098432779312134, "learning_rate": 4.99405871288922e-05, "loss": 0.8091, "step": 1074 }, { "epoch": 1.2245014245014245, "grad_norm": 0.12666909396648407, "learning_rate": 4.994026567237555e-05, "loss": 0.9601, "step": 1075 }, { "epoch": 1.2256410256410257, "grad_norm": 0.14846102893352509, "learning_rate": 4.993994334961489e-05, "loss": 0.8489, "step": 1076 }, { "epoch": 1.2267806267806267, "grad_norm": 0.12214870005846024, "learning_rate": 4.993962016062141e-05, "loss": 0.8022, "step": 1077 }, { "epoch": 1.227920227920228, "grad_norm": 0.13113483786582947, "learning_rate": 4.993929610540634e-05, "loss": 0.8764, "step": 1078 }, { "epoch": 1.2290598290598291, "grad_norm": 0.1480408012866974, "learning_rate": 4.993897118398093e-05, "loss": 0.7842, "step": 1079 }, { "epoch": 1.2301994301994301, "grad_norm": 0.1582619994878769, "learning_rate": 4.9938645396356463e-05, "loss": 0.9709, "step": 1080 }, { "epoch": 1.2313390313390313, "grad_norm": 0.15353186428546906, "learning_rate": 4.993831874254426e-05, "loss": 0.7948, "step": 1081 }, { "epoch": 1.2324786324786325, "grad_norm": 0.11932581663131714, "learning_rate": 4.993799122255568e-05, "loss": 0.9209, "step": 1082 }, { "epoch": 1.2336182336182335, "grad_norm": 0.14687809348106384, "learning_rate": 4.993766283640208e-05, "loss": 1.0655, "step": 1083 }, { "epoch": 1.2347578347578347, "grad_norm": 0.15862920880317688, "learning_rate": 4.993733358409486e-05, "loss": 0.9393, "step": 1084 }, { "epoch": 1.235897435897436, "grad_norm": 0.17040449380874634, "learning_rate": 4.9937003465645475e-05, "loss": 0.6897, "step": 1085 }, { "epoch": 1.237037037037037, "grad_norm": 0.16452538967132568, "learning_rate": 4.9936672481065385e-05, "loss": 1.1223, "step": 1086 }, { "epoch": 1.2381766381766381, "grad_norm": 0.1454714834690094, "learning_rate": 4.993634063036608e-05, "loss": 0.9143, "step": 1087 }, { "epoch": 1.2393162393162394, "grad_norm": 0.14211338758468628, "learning_rate": 4.993600791355909e-05, "loss": 0.9156, "step": 1088 }, { "epoch": 1.2404558404558403, "grad_norm": 0.11242610216140747, "learning_rate": 4.993567433065597e-05, "loss": 1.0544, "step": 1089 }, { "epoch": 1.2415954415954416, "grad_norm": 0.12282131612300873, "learning_rate": 4.993533988166831e-05, "loss": 1.0942, "step": 1090 }, { "epoch": 1.2427350427350428, "grad_norm": 0.1312336027622223, "learning_rate": 4.993500456660772e-05, "loss": 1.1559, "step": 1091 }, { "epoch": 1.243874643874644, "grad_norm": 0.16788773238658905, "learning_rate": 4.9934668385485854e-05, "loss": 0.8987, "step": 1092 }, { "epoch": 1.245014245014245, "grad_norm": 0.14625464379787445, "learning_rate": 4.9934331338314384e-05, "loss": 0.9375, "step": 1093 }, { "epoch": 1.2461538461538462, "grad_norm": 0.16500821709632874, "learning_rate": 4.9933993425105014e-05, "loss": 0.768, "step": 1094 }, { "epoch": 1.2472934472934474, "grad_norm": 0.14234770834445953, "learning_rate": 4.993365464586949e-05, "loss": 0.9454, "step": 1095 }, { "epoch": 1.2484330484330484, "grad_norm": 0.14750170707702637, "learning_rate": 4.993331500061956e-05, "loss": 0.9262, "step": 1096 }, { "epoch": 1.2495726495726496, "grad_norm": 0.1623031198978424, "learning_rate": 4.993297448936704e-05, "loss": 0.7957, "step": 1097 }, { "epoch": 1.2507122507122508, "grad_norm": 0.14526617527008057, "learning_rate": 4.993263311212375e-05, "loss": 0.8589, "step": 1098 }, { "epoch": 1.2518518518518518, "grad_norm": 0.1483505368232727, "learning_rate": 4.993229086890155e-05, "loss": 0.9393, "step": 1099 }, { "epoch": 1.252991452991453, "grad_norm": 0.1238211914896965, "learning_rate": 4.993194775971232e-05, "loss": 0.8771, "step": 1100 }, { "epoch": 1.2541310541310542, "grad_norm": 0.13783283531665802, "learning_rate": 4.9931603784567984e-05, "loss": 1.0631, "step": 1101 }, { "epoch": 1.2552706552706554, "grad_norm": 0.162641704082489, "learning_rate": 4.993125894348049e-05, "loss": 0.752, "step": 1102 }, { "epoch": 1.2564102564102564, "grad_norm": 0.14208804070949554, "learning_rate": 4.99309132364618e-05, "loss": 0.8899, "step": 1103 }, { "epoch": 1.2575498575498576, "grad_norm": 0.13560961186885834, "learning_rate": 4.9930566663523936e-05, "loss": 0.9386, "step": 1104 }, { "epoch": 1.2586894586894588, "grad_norm": 0.1455816924571991, "learning_rate": 4.993021922467894e-05, "loss": 0.9252, "step": 1105 }, { "epoch": 1.2598290598290598, "grad_norm": 0.1490432471036911, "learning_rate": 4.992987091993887e-05, "loss": 0.9, "step": 1106 }, { "epoch": 1.260968660968661, "grad_norm": 0.13461579382419586, "learning_rate": 4.992952174931582e-05, "loss": 0.9174, "step": 1107 }, { "epoch": 1.2621082621082622, "grad_norm": 0.15884242951869965, "learning_rate": 4.992917171282193e-05, "loss": 0.7802, "step": 1108 }, { "epoch": 1.2632478632478632, "grad_norm": 0.15744027495384216, "learning_rate": 4.9928820810469344e-05, "loss": 0.7845, "step": 1109 }, { "epoch": 1.2643874643874644, "grad_norm": 0.126771941781044, "learning_rate": 4.992846904227027e-05, "loss": 0.937, "step": 1110 }, { "epoch": 1.2655270655270656, "grad_norm": 0.14142169058322906, "learning_rate": 4.992811640823689e-05, "loss": 0.7667, "step": 1111 }, { "epoch": 1.2666666666666666, "grad_norm": 0.13442941009998322, "learning_rate": 4.992776290838149e-05, "loss": 1.137, "step": 1112 }, { "epoch": 1.2678062678062678, "grad_norm": 0.1702500879764557, "learning_rate": 4.992740854271633e-05, "loss": 0.6608, "step": 1113 }, { "epoch": 1.268945868945869, "grad_norm": 0.1416245847940445, "learning_rate": 4.9927053311253725e-05, "loss": 0.8593, "step": 1114 }, { "epoch": 1.27008547008547, "grad_norm": 0.17827555537223816, "learning_rate": 4.9926697214006e-05, "loss": 0.6907, "step": 1115 }, { "epoch": 1.2712250712250712, "grad_norm": 0.15787549316883087, "learning_rate": 4.992634025098554e-05, "loss": 0.9319, "step": 1116 }, { "epoch": 1.2723646723646724, "grad_norm": 0.13434097170829773, "learning_rate": 4.992598242220473e-05, "loss": 0.9069, "step": 1117 }, { "epoch": 1.2735042735042734, "grad_norm": 0.15382850170135498, "learning_rate": 4.992562372767601e-05, "loss": 0.9074, "step": 1118 }, { "epoch": 1.2746438746438746, "grad_norm": 0.1281144767999649, "learning_rate": 4.992526416741182e-05, "loss": 0.9972, "step": 1119 }, { "epoch": 1.2757834757834758, "grad_norm": 0.15494726598262787, "learning_rate": 4.992490374142467e-05, "loss": 0.869, "step": 1120 }, { "epoch": 1.2769230769230768, "grad_norm": 0.1506667584180832, "learning_rate": 4.9924542449727076e-05, "loss": 0.8423, "step": 1121 }, { "epoch": 1.278062678062678, "grad_norm": 0.1327880322933197, "learning_rate": 4.9924180292331566e-05, "loss": 0.9583, "step": 1122 }, { "epoch": 1.2792022792022792, "grad_norm": 0.14780306816101074, "learning_rate": 4.992381726925074e-05, "loss": 0.8907, "step": 1123 }, { "epoch": 1.2803418803418802, "grad_norm": 0.14822085201740265, "learning_rate": 4.99234533804972e-05, "loss": 0.8683, "step": 1124 }, { "epoch": 1.2814814814814814, "grad_norm": 0.15453283488750458, "learning_rate": 4.992308862608358e-05, "loss": 0.8023, "step": 1125 }, { "epoch": 1.2826210826210827, "grad_norm": 0.1378374844789505, "learning_rate": 4.992272300602255e-05, "loss": 0.9713, "step": 1126 }, { "epoch": 1.2837606837606836, "grad_norm": 0.1661481410264969, "learning_rate": 4.9922356520326825e-05, "loss": 0.746, "step": 1127 }, { "epoch": 1.2849002849002849, "grad_norm": 0.1455329954624176, "learning_rate": 4.992198916900911e-05, "loss": 0.8999, "step": 1128 }, { "epoch": 1.286039886039886, "grad_norm": 0.15151652693748474, "learning_rate": 4.9921620952082185e-05, "loss": 0.9821, "step": 1129 }, { "epoch": 1.287179487179487, "grad_norm": 0.14197927713394165, "learning_rate": 4.9921251869558825e-05, "loss": 0.9381, "step": 1130 }, { "epoch": 1.2883190883190883, "grad_norm": 0.1472100019454956, "learning_rate": 4.992088192145185e-05, "loss": 0.8847, "step": 1131 }, { "epoch": 1.2894586894586895, "grad_norm": 0.13291296362876892, "learning_rate": 4.9920511107774115e-05, "loss": 0.9695, "step": 1132 }, { "epoch": 1.2905982905982907, "grad_norm": 0.12145037949085236, "learning_rate": 4.99201394285385e-05, "loss": 0.8353, "step": 1133 }, { "epoch": 1.2917378917378917, "grad_norm": 0.14736376702785492, "learning_rate": 4.9919766883757914e-05, "loss": 0.8157, "step": 1134 }, { "epoch": 1.2928774928774929, "grad_norm": 0.13806748390197754, "learning_rate": 4.991939347344529e-05, "loss": 1.0154, "step": 1135 }, { "epoch": 1.294017094017094, "grad_norm": 0.13545425236225128, "learning_rate": 4.9919019197613606e-05, "loss": 0.9973, "step": 1136 }, { "epoch": 1.2951566951566953, "grad_norm": 0.14301511645317078, "learning_rate": 4.991864405627585e-05, "loss": 0.8317, "step": 1137 }, { "epoch": 1.2962962962962963, "grad_norm": 0.16001370549201965, "learning_rate": 4.9918268049445066e-05, "loss": 0.7977, "step": 1138 }, { "epoch": 1.2974358974358975, "grad_norm": 0.13568778336048126, "learning_rate": 4.991789117713431e-05, "loss": 1.0203, "step": 1139 }, { "epoch": 1.2985754985754987, "grad_norm": 0.13373130559921265, "learning_rate": 4.991751343935666e-05, "loss": 1.043, "step": 1140 }, { "epoch": 1.2997150997150997, "grad_norm": 0.1454683393239975, "learning_rate": 4.991713483612525e-05, "loss": 0.9341, "step": 1141 }, { "epoch": 1.300854700854701, "grad_norm": 0.1534794270992279, "learning_rate": 4.9916755367453226e-05, "loss": 0.7255, "step": 1142 }, { "epoch": 1.301994301994302, "grad_norm": 0.13185031712055206, "learning_rate": 4.9916375033353766e-05, "loss": 0.9479, "step": 1143 }, { "epoch": 1.303133903133903, "grad_norm": 0.12707069516181946, "learning_rate": 4.991599383384008e-05, "loss": 1.1089, "step": 1144 }, { "epoch": 1.3042735042735043, "grad_norm": 0.1685594916343689, "learning_rate": 4.991561176892541e-05, "loss": 0.8438, "step": 1145 }, { "epoch": 1.3054131054131055, "grad_norm": 0.13592924177646637, "learning_rate": 4.991522883862302e-05, "loss": 0.9217, "step": 1146 }, { "epoch": 1.3065527065527065, "grad_norm": 0.14373846352100372, "learning_rate": 4.9914845042946226e-05, "loss": 0.9532, "step": 1147 }, { "epoch": 1.3076923076923077, "grad_norm": 0.12595140933990479, "learning_rate": 4.991446038190833e-05, "loss": 0.9064, "step": 1148 }, { "epoch": 1.308831908831909, "grad_norm": 0.15080587565898895, "learning_rate": 4.9914074855522724e-05, "loss": 1.0201, "step": 1149 }, { "epoch": 1.30997150997151, "grad_norm": 0.15452298521995544, "learning_rate": 4.991368846380278e-05, "loss": 0.9003, "step": 1150 }, { "epoch": 1.3111111111111111, "grad_norm": 0.14135576784610748, "learning_rate": 4.9913301206761926e-05, "loss": 0.8229, "step": 1151 }, { "epoch": 1.3122507122507123, "grad_norm": 0.1526070386171341, "learning_rate": 4.9912913084413606e-05, "loss": 0.8279, "step": 1152 }, { "epoch": 1.3133903133903133, "grad_norm": 0.1349724680185318, "learning_rate": 4.9912524096771304e-05, "loss": 0.9782, "step": 1153 }, { "epoch": 1.3145299145299145, "grad_norm": 0.1356758326292038, "learning_rate": 4.9912134243848526e-05, "loss": 0.9848, "step": 1154 }, { "epoch": 1.3156695156695157, "grad_norm": 0.14452780783176422, "learning_rate": 4.9911743525658824e-05, "loss": 0.8013, "step": 1155 }, { "epoch": 1.3168091168091167, "grad_norm": 0.14168766140937805, "learning_rate": 4.9911351942215765e-05, "loss": 0.9603, "step": 1156 }, { "epoch": 1.317948717948718, "grad_norm": 0.13349944353103638, "learning_rate": 4.991095949353294e-05, "loss": 0.9107, "step": 1157 }, { "epoch": 1.3190883190883191, "grad_norm": 0.13506649434566498, "learning_rate": 4.9910566179623985e-05, "loss": 0.9539, "step": 1158 }, { "epoch": 1.3202279202279201, "grad_norm": 0.16338467597961426, "learning_rate": 4.991017200050257e-05, "loss": 0.8246, "step": 1159 }, { "epoch": 1.3213675213675213, "grad_norm": 0.14383643865585327, "learning_rate": 4.9909776956182374e-05, "loss": 1.0365, "step": 1160 }, { "epoch": 1.3225071225071225, "grad_norm": 0.16941548883914948, "learning_rate": 4.9909381046677114e-05, "loss": 0.7051, "step": 1161 }, { "epoch": 1.3236467236467235, "grad_norm": 0.1388421356678009, "learning_rate": 4.9908984272000564e-05, "loss": 1.0864, "step": 1162 }, { "epoch": 1.3247863247863247, "grad_norm": 0.15731565654277802, "learning_rate": 4.990858663216648e-05, "loss": 0.9025, "step": 1163 }, { "epoch": 1.325925925925926, "grad_norm": 0.13144905865192413, "learning_rate": 4.990818812718869e-05, "loss": 1.0185, "step": 1164 }, { "epoch": 1.327065527065527, "grad_norm": 0.1429448127746582, "learning_rate": 4.990778875708102e-05, "loss": 0.836, "step": 1165 }, { "epoch": 1.3282051282051281, "grad_norm": 0.1316772848367691, "learning_rate": 4.9907388521857354e-05, "loss": 0.9763, "step": 1166 }, { "epoch": 1.3293447293447294, "grad_norm": 1.0678691864013672, "learning_rate": 4.990698742153159e-05, "loss": 0.9243, "step": 1167 }, { "epoch": 1.3304843304843303, "grad_norm": 0.1553177535533905, "learning_rate": 4.990658545611766e-05, "loss": 0.8401, "step": 1168 }, { "epoch": 1.3316239316239316, "grad_norm": 0.15483981370925903, "learning_rate": 4.990618262562953e-05, "loss": 0.7864, "step": 1169 }, { "epoch": 1.3327635327635328, "grad_norm": 0.14380450546741486, "learning_rate": 4.990577893008117e-05, "loss": 0.9686, "step": 1170 }, { "epoch": 1.333903133903134, "grad_norm": 0.13951167464256287, "learning_rate": 4.990537436948662e-05, "loss": 0.9196, "step": 1171 }, { "epoch": 1.335042735042735, "grad_norm": 0.12414976209402084, "learning_rate": 4.9904968943859936e-05, "loss": 1.0487, "step": 1172 }, { "epoch": 1.3361823361823362, "grad_norm": 0.14760416746139526, "learning_rate": 4.9904562653215186e-05, "loss": 1.1654, "step": 1173 }, { "epoch": 1.3373219373219374, "grad_norm": 0.15358950197696686, "learning_rate": 4.990415549756649e-05, "loss": 1.0133, "step": 1174 }, { "epoch": 1.3384615384615386, "grad_norm": 0.14099790155887604, "learning_rate": 4.990374747692799e-05, "loss": 0.9254, "step": 1175 }, { "epoch": 1.3396011396011396, "grad_norm": 0.14278669655323029, "learning_rate": 4.990333859131384e-05, "loss": 0.9226, "step": 1176 }, { "epoch": 1.3407407407407408, "grad_norm": 0.1511949896812439, "learning_rate": 4.9902928840738276e-05, "loss": 0.9367, "step": 1177 }, { "epoch": 1.341880341880342, "grad_norm": 0.12369052320718765, "learning_rate": 4.990251822521549e-05, "loss": 1.0866, "step": 1178 }, { "epoch": 1.343019943019943, "grad_norm": 0.6881939172744751, "learning_rate": 4.990210674475978e-05, "loss": 0.8612, "step": 1179 }, { "epoch": 1.3441595441595442, "grad_norm": 0.1318873018026352, "learning_rate": 4.9901694399385414e-05, "loss": 0.9232, "step": 1180 }, { "epoch": 1.3452991452991454, "grad_norm": 0.160270556807518, "learning_rate": 4.990128118910672e-05, "loss": 0.8191, "step": 1181 }, { "epoch": 1.3464387464387464, "grad_norm": 0.15048420429229736, "learning_rate": 4.990086711393806e-05, "loss": 0.9407, "step": 1182 }, { "epoch": 1.3475783475783476, "grad_norm": 0.14538373053073883, "learning_rate": 4.99004521738938e-05, "loss": 1.0196, "step": 1183 }, { "epoch": 1.3487179487179488, "grad_norm": 0.14732596278190613, "learning_rate": 4.990003636898837e-05, "loss": 0.8623, "step": 1184 }, { "epoch": 1.3498575498575498, "grad_norm": 0.13414761424064636, "learning_rate": 4.9899619699236185e-05, "loss": 1.018, "step": 1185 }, { "epoch": 1.350997150997151, "grad_norm": 0.13759319484233856, "learning_rate": 4.989920216465175e-05, "loss": 0.9806, "step": 1186 }, { "epoch": 1.3521367521367522, "grad_norm": 0.17735135555267334, "learning_rate": 4.9898783765249535e-05, "loss": 0.762, "step": 1187 }, { "epoch": 1.3532763532763532, "grad_norm": 0.13321515917778015, "learning_rate": 4.9898364501044096e-05, "loss": 1.0177, "step": 1188 }, { "epoch": 1.3544159544159544, "grad_norm": 0.14134404063224792, "learning_rate": 4.989794437204999e-05, "loss": 0.9381, "step": 1189 }, { "epoch": 1.3555555555555556, "grad_norm": 0.14497323334217072, "learning_rate": 4.98975233782818e-05, "loss": 0.8058, "step": 1190 }, { "epoch": 1.3566951566951566, "grad_norm": 0.14402492344379425, "learning_rate": 4.9897101519754165e-05, "loss": 1.0357, "step": 1191 }, { "epoch": 1.3578347578347578, "grad_norm": 0.128183975815773, "learning_rate": 4.9896678796481715e-05, "loss": 0.9305, "step": 1192 }, { "epoch": 1.358974358974359, "grad_norm": 0.1382104754447937, "learning_rate": 4.989625520847915e-05, "loss": 0.8842, "step": 1193 }, { "epoch": 1.36011396011396, "grad_norm": 0.16739597916603088, "learning_rate": 4.9895830755761176e-05, "loss": 0.8996, "step": 1194 }, { "epoch": 1.3612535612535612, "grad_norm": 0.14791855216026306, "learning_rate": 4.9895405438342536e-05, "loss": 0.8785, "step": 1195 }, { "epoch": 1.3623931623931624, "grad_norm": 0.14144086837768555, "learning_rate": 4.9894979256238e-05, "loss": 0.8751, "step": 1196 }, { "epoch": 1.3635327635327634, "grad_norm": 0.14224928617477417, "learning_rate": 4.9894552209462375e-05, "loss": 0.7201, "step": 1197 }, { "epoch": 1.3646723646723646, "grad_norm": 0.16301606595516205, "learning_rate": 4.9894124298030494e-05, "loss": 0.7686, "step": 1198 }, { "epoch": 1.3658119658119658, "grad_norm": 0.16685374081134796, "learning_rate": 4.989369552195722e-05, "loss": 0.8542, "step": 1199 }, { "epoch": 1.3669515669515668, "grad_norm": 0.16126003861427307, "learning_rate": 4.989326588125743e-05, "loss": 0.7832, "step": 1200 }, { "epoch": 1.368091168091168, "grad_norm": 0.13587048649787903, "learning_rate": 4.989283537594607e-05, "loss": 0.9782, "step": 1201 }, { "epoch": 1.3692307692307693, "grad_norm": 0.12424058467149734, "learning_rate": 4.989240400603807e-05, "loss": 0.9735, "step": 1202 }, { "epoch": 1.3703703703703702, "grad_norm": 0.14319686591625214, "learning_rate": 4.989197177154844e-05, "loss": 1.0885, "step": 1203 }, { "epoch": 1.3715099715099714, "grad_norm": 0.15234094858169556, "learning_rate": 4.9891538672492165e-05, "loss": 0.8618, "step": 1204 }, { "epoch": 1.3726495726495727, "grad_norm": 0.12624700367450714, "learning_rate": 4.9891104708884305e-05, "loss": 1.1208, "step": 1205 }, { "epoch": 1.3737891737891739, "grad_norm": 0.15610456466674805, "learning_rate": 4.9890669880739934e-05, "loss": 0.9128, "step": 1206 }, { "epoch": 1.3749287749287749, "grad_norm": 0.1296873688697815, "learning_rate": 4.989023418807414e-05, "loss": 0.8707, "step": 1207 }, { "epoch": 1.376068376068376, "grad_norm": 0.15303196012973785, "learning_rate": 4.988979763090207e-05, "loss": 0.8929, "step": 1208 }, { "epoch": 1.3772079772079773, "grad_norm": 0.1516156941652298, "learning_rate": 4.988936020923887e-05, "loss": 0.7139, "step": 1209 }, { "epoch": 1.3783475783475785, "grad_norm": 0.14863616228103638, "learning_rate": 4.988892192309975e-05, "loss": 0.8932, "step": 1210 }, { "epoch": 1.3794871794871795, "grad_norm": 0.14707180857658386, "learning_rate": 4.9888482772499936e-05, "loss": 0.9953, "step": 1211 }, { "epoch": 1.3806267806267807, "grad_norm": 0.1570790857076645, "learning_rate": 4.988804275745466e-05, "loss": 0.8269, "step": 1212 }, { "epoch": 1.381766381766382, "grad_norm": 0.17625752091407776, "learning_rate": 4.988760187797922e-05, "loss": 0.772, "step": 1213 }, { "epoch": 1.3829059829059829, "grad_norm": 0.14745593070983887, "learning_rate": 4.988716013408893e-05, "loss": 0.7933, "step": 1214 }, { "epoch": 1.384045584045584, "grad_norm": 0.1269306093454361, "learning_rate": 4.988671752579912e-05, "loss": 0.9926, "step": 1215 }, { "epoch": 1.3851851851851853, "grad_norm": 0.14263474941253662, "learning_rate": 4.988627405312518e-05, "loss": 0.8458, "step": 1216 }, { "epoch": 1.3863247863247863, "grad_norm": 0.14795072376728058, "learning_rate": 4.98858297160825e-05, "loss": 0.8447, "step": 1217 }, { "epoch": 1.3874643874643875, "grad_norm": 0.16355492174625397, "learning_rate": 4.988538451468652e-05, "loss": 0.9283, "step": 1218 }, { "epoch": 1.3886039886039887, "grad_norm": 0.13762181997299194, "learning_rate": 4.98849384489527e-05, "loss": 0.7726, "step": 1219 }, { "epoch": 1.3897435897435897, "grad_norm": 0.12672296166419983, "learning_rate": 4.9884491518896526e-05, "loss": 0.9858, "step": 1220 }, { "epoch": 1.390883190883191, "grad_norm": 0.1451258808374405, "learning_rate": 4.9884043724533534e-05, "loss": 0.8785, "step": 1221 }, { "epoch": 1.392022792022792, "grad_norm": 0.1677914410829544, "learning_rate": 4.988359506587928e-05, "loss": 0.9269, "step": 1222 }, { "epoch": 1.393162393162393, "grad_norm": 0.1454748809337616, "learning_rate": 4.988314554294933e-05, "loss": 0.7502, "step": 1223 }, { "epoch": 1.3943019943019943, "grad_norm": 0.1370631903409958, "learning_rate": 4.988269515575931e-05, "loss": 0.8542, "step": 1224 }, { "epoch": 1.3954415954415955, "grad_norm": 0.1208294928073883, "learning_rate": 4.9882243904324866e-05, "loss": 1.0782, "step": 1225 }, { "epoch": 1.3965811965811965, "grad_norm": 0.1262115091085434, "learning_rate": 4.988179178866165e-05, "loss": 0.9658, "step": 1226 }, { "epoch": 1.3977207977207977, "grad_norm": 0.13408705592155457, "learning_rate": 4.9881338808785395e-05, "loss": 0.912, "step": 1227 }, { "epoch": 1.398860398860399, "grad_norm": 0.1178419291973114, "learning_rate": 4.988088496471181e-05, "loss": 0.9557, "step": 1228 }, { "epoch": 1.4, "grad_norm": 0.14413946866989136, "learning_rate": 4.988043025645667e-05, "loss": 0.9482, "step": 1229 }, { "epoch": 1.4011396011396011, "grad_norm": 0.21009854972362518, "learning_rate": 4.987997468403576e-05, "loss": 0.7569, "step": 1230 }, { "epoch": 1.4022792022792023, "grad_norm": 0.13314151763916016, "learning_rate": 4.9879518247464916e-05, "loss": 0.9052, "step": 1231 }, { "epoch": 1.4034188034188033, "grad_norm": 0.14656740427017212, "learning_rate": 4.987906094675999e-05, "loss": 0.8088, "step": 1232 }, { "epoch": 1.4045584045584045, "grad_norm": 0.1369347870349884, "learning_rate": 4.9878602781936845e-05, "loss": 1.0339, "step": 1233 }, { "epoch": 1.4056980056980057, "grad_norm": 0.1365630179643631, "learning_rate": 4.987814375301142e-05, "loss": 1.0111, "step": 1234 }, { "epoch": 1.4068376068376067, "grad_norm": 0.16005423665046692, "learning_rate": 4.9877683859999645e-05, "loss": 0.9137, "step": 1235 }, { "epoch": 1.407977207977208, "grad_norm": 0.14936578273773193, "learning_rate": 4.987722310291749e-05, "loss": 0.8635, "step": 1236 }, { "epoch": 1.4091168091168091, "grad_norm": 0.1410677284002304, "learning_rate": 4.987676148178097e-05, "loss": 0.7862, "step": 1237 }, { "epoch": 1.4102564102564101, "grad_norm": 0.12266705930233002, "learning_rate": 4.987629899660611e-05, "loss": 0.9824, "step": 1238 }, { "epoch": 1.4113960113960113, "grad_norm": 0.12187635898590088, "learning_rate": 4.9875835647408976e-05, "loss": 0.9681, "step": 1239 }, { "epoch": 1.4125356125356126, "grad_norm": 0.16175293922424316, "learning_rate": 4.9875371434205664e-05, "loss": 0.8193, "step": 1240 }, { "epoch": 1.4136752136752135, "grad_norm": 0.14722539484500885, "learning_rate": 4.9874906357012294e-05, "loss": 0.8322, "step": 1241 }, { "epoch": 1.4148148148148147, "grad_norm": 0.1457187533378601, "learning_rate": 4.987444041584501e-05, "loss": 0.8523, "step": 1242 }, { "epoch": 1.415954415954416, "grad_norm": 0.13287276029586792, "learning_rate": 4.987397361072002e-05, "loss": 0.8201, "step": 1243 }, { "epoch": 1.4170940170940172, "grad_norm": 0.15389710664749146, "learning_rate": 4.987350594165352e-05, "loss": 0.8384, "step": 1244 }, { "epoch": 1.4182336182336182, "grad_norm": 0.1264120191335678, "learning_rate": 4.987303740866175e-05, "loss": 0.9186, "step": 1245 }, { "epoch": 1.4193732193732194, "grad_norm": 0.14353694021701813, "learning_rate": 4.987256801176099e-05, "loss": 0.8757, "step": 1246 }, { "epoch": 1.4205128205128206, "grad_norm": 0.12593340873718262, "learning_rate": 4.9872097750967544e-05, "loss": 1.0519, "step": 1247 }, { "epoch": 1.4216524216524218, "grad_norm": 0.1270427107810974, "learning_rate": 4.987162662629775e-05, "loss": 0.8363, "step": 1248 }, { "epoch": 1.4227920227920228, "grad_norm": 0.1486106663942337, "learning_rate": 4.987115463776796e-05, "loss": 0.8655, "step": 1249 }, { "epoch": 1.423931623931624, "grad_norm": 0.15621215105056763, "learning_rate": 4.987068178539458e-05, "loss": 0.8717, "step": 1250 }, { "epoch": 1.4250712250712252, "grad_norm": 0.1431739181280136, "learning_rate": 4.987020806919402e-05, "loss": 0.8792, "step": 1251 }, { "epoch": 1.4262108262108262, "grad_norm": 0.14031408727169037, "learning_rate": 4.986973348918274e-05, "loss": 0.9742, "step": 1252 }, { "epoch": 1.4273504273504274, "grad_norm": 0.1250138133764267, "learning_rate": 4.986925804537723e-05, "loss": 1.0239, "step": 1253 }, { "epoch": 1.4284900284900286, "grad_norm": 0.1533067524433136, "learning_rate": 4.986878173779399e-05, "loss": 1.0121, "step": 1254 }, { "epoch": 1.4296296296296296, "grad_norm": 0.12430374324321747, "learning_rate": 4.986830456644957e-05, "loss": 0.9049, "step": 1255 }, { "epoch": 1.4307692307692308, "grad_norm": 0.12849588692188263, "learning_rate": 4.9867826531360554e-05, "loss": 0.8443, "step": 1256 }, { "epoch": 1.431908831908832, "grad_norm": 0.12638366222381592, "learning_rate": 4.986734763254353e-05, "loss": 1.0444, "step": 1257 }, { "epoch": 1.433048433048433, "grad_norm": 0.13886703550815582, "learning_rate": 4.9866867870015145e-05, "loss": 0.8611, "step": 1258 }, { "epoch": 1.4341880341880342, "grad_norm": 0.1197642832994461, "learning_rate": 4.9866387243792046e-05, "loss": 1.0655, "step": 1259 }, { "epoch": 1.4353276353276354, "grad_norm": 0.1469334363937378, "learning_rate": 4.9865905753890934e-05, "loss": 0.8455, "step": 1260 }, { "epoch": 1.4364672364672364, "grad_norm": 0.14509762823581696, "learning_rate": 4.9865423400328535e-05, "loss": 0.8797, "step": 1261 }, { "epoch": 1.4376068376068376, "grad_norm": 0.1637079119682312, "learning_rate": 4.9864940183121605e-05, "loss": 0.8771, "step": 1262 }, { "epoch": 1.4387464387464388, "grad_norm": 0.13422858715057373, "learning_rate": 4.986445610228692e-05, "loss": 0.8328, "step": 1263 }, { "epoch": 1.4398860398860398, "grad_norm": 0.14663170278072357, "learning_rate": 4.9863971157841306e-05, "loss": 1.0229, "step": 1264 }, { "epoch": 1.441025641025641, "grad_norm": 0.17912691831588745, "learning_rate": 4.986348534980159e-05, "loss": 1.0183, "step": 1265 }, { "epoch": 1.4421652421652422, "grad_norm": 0.15999111533164978, "learning_rate": 4.986299867818465e-05, "loss": 0.8752, "step": 1266 }, { "epoch": 1.4433048433048432, "grad_norm": 0.1540723741054535, "learning_rate": 4.9862511143007394e-05, "loss": 0.8717, "step": 1267 }, { "epoch": 1.4444444444444444, "grad_norm": 0.15807846188545227, "learning_rate": 4.986202274428675e-05, "loss": 0.9551, "step": 1268 }, { "epoch": 1.4455840455840456, "grad_norm": 0.15185344219207764, "learning_rate": 4.9861533482039695e-05, "loss": 0.7945, "step": 1269 }, { "epoch": 1.4467236467236466, "grad_norm": 0.13983812928199768, "learning_rate": 4.986104335628321e-05, "loss": 0.9907, "step": 1270 }, { "epoch": 1.4478632478632478, "grad_norm": 0.1468629688024521, "learning_rate": 4.986055236703432e-05, "loss": 1.0011, "step": 1271 }, { "epoch": 1.449002849002849, "grad_norm": 0.11962605267763138, "learning_rate": 4.986006051431008e-05, "loss": 1.0616, "step": 1272 }, { "epoch": 1.45014245014245, "grad_norm": 0.12591226398944855, "learning_rate": 4.985956779812757e-05, "loss": 1.0343, "step": 1273 }, { "epoch": 1.4512820512820512, "grad_norm": 0.16561780869960785, "learning_rate": 4.9859074218503906e-05, "loss": 0.8572, "step": 1274 }, { "epoch": 1.4524216524216524, "grad_norm": 0.1418294459581375, "learning_rate": 4.985857977545624e-05, "loss": 0.9274, "step": 1275 }, { "epoch": 1.4535612535612534, "grad_norm": 0.13808898627758026, "learning_rate": 4.985808446900173e-05, "loss": 0.9046, "step": 1276 }, { "epoch": 1.4547008547008546, "grad_norm": 0.12016861885786057, "learning_rate": 4.9857588299157586e-05, "loss": 1.0069, "step": 1277 }, { "epoch": 1.4558404558404558, "grad_norm": 0.1498369723558426, "learning_rate": 4.985709126594103e-05, "loss": 0.9394, "step": 1278 }, { "epoch": 1.456980056980057, "grad_norm": 0.13604852557182312, "learning_rate": 4.9856593369369364e-05, "loss": 0.9096, "step": 1279 }, { "epoch": 1.458119658119658, "grad_norm": 0.15665759146213531, "learning_rate": 4.985609460945984e-05, "loss": 0.9244, "step": 1280 }, { "epoch": 1.4592592592592593, "grad_norm": 0.1460144817829132, "learning_rate": 4.985559498622979e-05, "loss": 0.8224, "step": 1281 }, { "epoch": 1.4603988603988605, "grad_norm": 0.14238189160823822, "learning_rate": 4.9855094499696584e-05, "loss": 0.9917, "step": 1282 }, { "epoch": 1.4615384615384617, "grad_norm": 0.15050332248210907, "learning_rate": 4.985459314987759e-05, "loss": 0.8227, "step": 1283 }, { "epoch": 1.4626780626780627, "grad_norm": 0.1324281394481659, "learning_rate": 4.9854090936790224e-05, "loss": 0.8604, "step": 1284 }, { "epoch": 1.4638176638176639, "grad_norm": 0.14524638652801514, "learning_rate": 4.985358786045193e-05, "loss": 0.8155, "step": 1285 }, { "epoch": 1.464957264957265, "grad_norm": 0.13763754069805145, "learning_rate": 4.9853083920880186e-05, "loss": 1.0466, "step": 1286 }, { "epoch": 1.466096866096866, "grad_norm": 0.1332344263792038, "learning_rate": 4.985257911809249e-05, "loss": 0.88, "step": 1287 }, { "epoch": 1.4672364672364673, "grad_norm": 0.13248127698898315, "learning_rate": 4.985207345210637e-05, "loss": 0.9332, "step": 1288 }, { "epoch": 1.4683760683760685, "grad_norm": 0.14803822338581085, "learning_rate": 4.98515669229394e-05, "loss": 0.9199, "step": 1289 }, { "epoch": 1.4695156695156695, "grad_norm": 0.15037009119987488, "learning_rate": 4.985105953060917e-05, "loss": 0.8443, "step": 1290 }, { "epoch": 1.4706552706552707, "grad_norm": 0.13404841721057892, "learning_rate": 4.985055127513331e-05, "loss": 0.8507, "step": 1291 }, { "epoch": 1.471794871794872, "grad_norm": 0.14653468132019043, "learning_rate": 4.9850042156529446e-05, "loss": 0.9502, "step": 1292 }, { "epoch": 1.4729344729344729, "grad_norm": 0.12113948911428452, "learning_rate": 4.984953217481529e-05, "loss": 1.1143, "step": 1293 }, { "epoch": 1.474074074074074, "grad_norm": 0.15463142096996307, "learning_rate": 4.984902133000855e-05, "loss": 0.8268, "step": 1294 }, { "epoch": 1.4752136752136753, "grad_norm": 0.17607726156711578, "learning_rate": 4.984850962212696e-05, "loss": 0.752, "step": 1295 }, { "epoch": 1.4763532763532763, "grad_norm": 0.1366874724626541, "learning_rate": 4.9847997051188294e-05, "loss": 0.928, "step": 1296 }, { "epoch": 1.4774928774928775, "grad_norm": 0.13072553277015686, "learning_rate": 4.9847483617210364e-05, "loss": 1.0024, "step": 1297 }, { "epoch": 1.4786324786324787, "grad_norm": 0.18104462325572968, "learning_rate": 4.984696932021099e-05, "loss": 0.9272, "step": 1298 }, { "epoch": 1.4797720797720797, "grad_norm": 0.13473987579345703, "learning_rate": 4.984645416020805e-05, "loss": 0.866, "step": 1299 }, { "epoch": 1.480911680911681, "grad_norm": 0.12323030084371567, "learning_rate": 4.984593813721942e-05, "loss": 0.9955, "step": 1300 }, { "epoch": 1.4820512820512821, "grad_norm": 0.12297698855400085, "learning_rate": 4.984542125126303e-05, "loss": 0.9549, "step": 1301 }, { "epoch": 1.483190883190883, "grad_norm": 0.13392789661884308, "learning_rate": 4.9844903502356846e-05, "loss": 0.8971, "step": 1302 }, { "epoch": 1.4843304843304843, "grad_norm": 0.13748367130756378, "learning_rate": 4.9844384890518836e-05, "loss": 0.9381, "step": 1303 }, { "epoch": 1.4854700854700855, "grad_norm": 0.13961276412010193, "learning_rate": 4.984386541576702e-05, "loss": 0.9804, "step": 1304 }, { "epoch": 1.4866096866096865, "grad_norm": 0.13649769127368927, "learning_rate": 4.984334507811943e-05, "loss": 0.8591, "step": 1305 }, { "epoch": 1.4877492877492877, "grad_norm": 0.16382646560668945, "learning_rate": 4.9842823877594146e-05, "loss": 0.8231, "step": 1306 }, { "epoch": 1.488888888888889, "grad_norm": 0.13929148018360138, "learning_rate": 4.984230181420928e-05, "loss": 0.889, "step": 1307 }, { "epoch": 1.49002849002849, "grad_norm": 0.13436809182167053, "learning_rate": 4.9841778887982946e-05, "loss": 0.8864, "step": 1308 }, { "epoch": 1.4911680911680911, "grad_norm": 0.14599432051181793, "learning_rate": 4.9841255098933316e-05, "loss": 0.9793, "step": 1309 }, { "epoch": 1.4923076923076923, "grad_norm": 0.13600577414035797, "learning_rate": 4.9840730447078586e-05, "loss": 0.8645, "step": 1310 }, { "epoch": 1.4934472934472933, "grad_norm": 0.17638111114501953, "learning_rate": 4.984020493243697e-05, "loss": 0.8114, "step": 1311 }, { "epoch": 1.4945868945868945, "grad_norm": 0.155809223651886, "learning_rate": 4.983967855502674e-05, "loss": 0.9038, "step": 1312 }, { "epoch": 1.4957264957264957, "grad_norm": 0.15889999270439148, "learning_rate": 4.983915131486615e-05, "loss": 0.7738, "step": 1313 }, { "epoch": 1.4968660968660967, "grad_norm": 0.14001519978046417, "learning_rate": 4.9838623211973536e-05, "loss": 1.0211, "step": 1314 }, { "epoch": 1.498005698005698, "grad_norm": 0.16320933401584625, "learning_rate": 4.983809424636723e-05, "loss": 0.7662, "step": 1315 }, { "epoch": 1.4991452991452991, "grad_norm": 0.1509057879447937, "learning_rate": 4.983756441806559e-05, "loss": 0.8538, "step": 1316 }, { "epoch": 1.5002849002849001, "grad_norm": 0.12491398304700851, "learning_rate": 4.983703372708706e-05, "loss": 0.926, "step": 1317 }, { "epoch": 1.5014245014245016, "grad_norm": 0.13828684389591217, "learning_rate": 4.983650217345003e-05, "loss": 0.9604, "step": 1318 }, { "epoch": 1.5025641025641026, "grad_norm": 0.13418659567832947, "learning_rate": 4.983596975717299e-05, "loss": 0.812, "step": 1319 }, { "epoch": 1.5037037037037035, "grad_norm": 0.12549825012683868, "learning_rate": 4.983543647827441e-05, "loss": 0.8869, "step": 1320 }, { "epoch": 1.504843304843305, "grad_norm": 0.16205163300037384, "learning_rate": 4.9834902336772824e-05, "loss": 0.8127, "step": 1321 }, { "epoch": 1.505982905982906, "grad_norm": 0.15544158220291138, "learning_rate": 4.983436733268679e-05, "loss": 0.8604, "step": 1322 }, { "epoch": 1.5071225071225072, "grad_norm": 0.14789056777954102, "learning_rate": 4.983383146603488e-05, "loss": 0.9052, "step": 1323 }, { "epoch": 1.5082621082621084, "grad_norm": 0.15124930441379547, "learning_rate": 4.983329473683571e-05, "loss": 0.789, "step": 1324 }, { "epoch": 1.5094017094017094, "grad_norm": 0.15126442909240723, "learning_rate": 4.983275714510792e-05, "loss": 0.8036, "step": 1325 }, { "epoch": 1.5105413105413106, "grad_norm": 0.13765566051006317, "learning_rate": 4.983221869087019e-05, "loss": 0.9164, "step": 1326 }, { "epoch": 1.5116809116809118, "grad_norm": 0.12307071685791016, "learning_rate": 4.983167937414122e-05, "loss": 0.9907, "step": 1327 }, { "epoch": 1.5128205128205128, "grad_norm": 0.11977549642324448, "learning_rate": 4.9831139194939725e-05, "loss": 0.9954, "step": 1328 }, { "epoch": 1.513960113960114, "grad_norm": 0.14224258065223694, "learning_rate": 4.983059815328448e-05, "loss": 0.7576, "step": 1329 }, { "epoch": 1.5150997150997152, "grad_norm": 0.1308591663837433, "learning_rate": 4.983005624919428e-05, "loss": 0.7993, "step": 1330 }, { "epoch": 1.5162393162393162, "grad_norm": 0.13867028057575226, "learning_rate": 4.982951348268795e-05, "loss": 0.9724, "step": 1331 }, { "epoch": 1.5173789173789174, "grad_norm": 0.1423736959695816, "learning_rate": 4.982896985378434e-05, "loss": 0.9126, "step": 1332 }, { "epoch": 1.5185185185185186, "grad_norm": 0.13342273235321045, "learning_rate": 4.982842536250231e-05, "loss": 0.8875, "step": 1333 }, { "epoch": 1.5196581196581196, "grad_norm": 0.1460614651441574, "learning_rate": 4.9827880008860794e-05, "loss": 0.7867, "step": 1334 }, { "epoch": 1.5207977207977208, "grad_norm": 0.17134015262126923, "learning_rate": 4.9827333792878736e-05, "loss": 0.7043, "step": 1335 }, { "epoch": 1.521937321937322, "grad_norm": 0.1339685320854187, "learning_rate": 4.982678671457509e-05, "loss": 0.9456, "step": 1336 }, { "epoch": 1.523076923076923, "grad_norm": 0.14963936805725098, "learning_rate": 4.982623877396888e-05, "loss": 0.8165, "step": 1337 }, { "epoch": 1.5242165242165242, "grad_norm": 0.15206843614578247, "learning_rate": 4.9825689971079116e-05, "loss": 1.0145, "step": 1338 }, { "epoch": 1.5253561253561254, "grad_norm": 0.1587289720773697, "learning_rate": 4.982514030592487e-05, "loss": 0.7367, "step": 1339 }, { "epoch": 1.5264957264957264, "grad_norm": 0.13782131671905518, "learning_rate": 4.9824589778525235e-05, "loss": 1.0926, "step": 1340 }, { "epoch": 1.5276353276353276, "grad_norm": 0.13518695533275604, "learning_rate": 4.982403838889932e-05, "loss": 0.9578, "step": 1341 }, { "epoch": 1.5287749287749288, "grad_norm": 0.12508663535118103, "learning_rate": 4.9823486137066285e-05, "loss": 0.8944, "step": 1342 }, { "epoch": 1.5299145299145298, "grad_norm": 0.17138533294200897, "learning_rate": 4.9822933023045324e-05, "loss": 0.8398, "step": 1343 }, { "epoch": 1.531054131054131, "grad_norm": 0.14755333960056305, "learning_rate": 4.982237904685563e-05, "loss": 0.9247, "step": 1344 }, { "epoch": 1.5321937321937322, "grad_norm": 0.1292998194694519, "learning_rate": 4.9821824208516456e-05, "loss": 0.9922, "step": 1345 }, { "epoch": 1.5333333333333332, "grad_norm": 0.14086347818374634, "learning_rate": 4.982126850804706e-05, "loss": 0.8572, "step": 1346 }, { "epoch": 1.5344729344729344, "grad_norm": 0.1637052595615387, "learning_rate": 4.982071194546675e-05, "loss": 0.8899, "step": 1347 }, { "epoch": 1.5356125356125356, "grad_norm": 0.1730019450187683, "learning_rate": 4.9820154520794855e-05, "loss": 0.8176, "step": 1348 }, { "epoch": 1.5367521367521366, "grad_norm": 0.15437714755535126, "learning_rate": 4.981959623405074e-05, "loss": 0.8728, "step": 1349 }, { "epoch": 1.537891737891738, "grad_norm": 0.14854766428470612, "learning_rate": 4.981903708525379e-05, "loss": 0.9019, "step": 1350 }, { "epoch": 1.539031339031339, "grad_norm": 0.12123582512140274, "learning_rate": 4.981847707442343e-05, "loss": 0.9272, "step": 1351 }, { "epoch": 1.54017094017094, "grad_norm": 0.13503971695899963, "learning_rate": 4.9817916201579117e-05, "loss": 0.9228, "step": 1352 }, { "epoch": 1.5413105413105415, "grad_norm": 0.1280747652053833, "learning_rate": 4.9817354466740324e-05, "loss": 0.9124, "step": 1353 }, { "epoch": 1.5424501424501424, "grad_norm": 0.14042603969573975, "learning_rate": 4.981679186992656e-05, "loss": 0.7556, "step": 1354 }, { "epoch": 1.5435897435897434, "grad_norm": 0.1518666297197342, "learning_rate": 4.9816228411157364e-05, "loss": 0.9327, "step": 1355 }, { "epoch": 1.5447293447293449, "grad_norm": 0.1396237313747406, "learning_rate": 4.981566409045232e-05, "loss": 1.094, "step": 1356 }, { "epoch": 1.5458689458689459, "grad_norm": 0.11857928335666656, "learning_rate": 4.9815098907831005e-05, "loss": 0.8035, "step": 1357 }, { "epoch": 1.547008547008547, "grad_norm": 0.1362091302871704, "learning_rate": 4.981453286331307e-05, "loss": 0.9102, "step": 1358 }, { "epoch": 1.5481481481481483, "grad_norm": 0.17575840651988983, "learning_rate": 4.981396595691817e-05, "loss": 0.8394, "step": 1359 }, { "epoch": 1.5492877492877493, "grad_norm": 0.18164268136024475, "learning_rate": 4.981339818866599e-05, "loss": 0.7272, "step": 1360 }, { "epoch": 1.5504273504273505, "grad_norm": 0.13330239057540894, "learning_rate": 4.981282955857626e-05, "loss": 0.9009, "step": 1361 }, { "epoch": 1.5515669515669517, "grad_norm": 0.16982725262641907, "learning_rate": 4.9812260066668725e-05, "loss": 0.6897, "step": 1362 }, { "epoch": 1.5527065527065527, "grad_norm": 0.1205325797200203, "learning_rate": 4.981168971296316e-05, "loss": 0.9547, "step": 1363 }, { "epoch": 1.5538461538461539, "grad_norm": 0.1427002251148224, "learning_rate": 4.9811118497479374e-05, "loss": 0.8734, "step": 1364 }, { "epoch": 1.554985754985755, "grad_norm": 0.13793444633483887, "learning_rate": 4.9810546420237215e-05, "loss": 1.1243, "step": 1365 }, { "epoch": 1.556125356125356, "grad_norm": 0.13552522659301758, "learning_rate": 4.9809973481256553e-05, "loss": 0.9583, "step": 1366 }, { "epoch": 1.5572649572649573, "grad_norm": 0.16145463287830353, "learning_rate": 4.9809399680557277e-05, "loss": 0.8743, "step": 1367 }, { "epoch": 1.5584045584045585, "grad_norm": 0.1432645171880722, "learning_rate": 4.9808825018159334e-05, "loss": 0.954, "step": 1368 }, { "epoch": 1.5595441595441595, "grad_norm": 0.17179745435714722, "learning_rate": 4.980824949408267e-05, "loss": 0.7664, "step": 1369 }, { "epoch": 1.5606837606837607, "grad_norm": 0.15518955886363983, "learning_rate": 4.980767310834728e-05, "loss": 0.9258, "step": 1370 }, { "epoch": 1.561823361823362, "grad_norm": 0.1771356165409088, "learning_rate": 4.980709586097318e-05, "loss": 0.8497, "step": 1371 }, { "epoch": 1.5629629629629629, "grad_norm": 0.1640431433916092, "learning_rate": 4.980651775198041e-05, "loss": 0.7934, "step": 1372 }, { "epoch": 1.564102564102564, "grad_norm": 0.1259385496377945, "learning_rate": 4.9805938781389074e-05, "loss": 1.1292, "step": 1373 }, { "epoch": 1.5652421652421653, "grad_norm": 0.13015595078468323, "learning_rate": 4.980535894921926e-05, "loss": 0.9764, "step": 1374 }, { "epoch": 1.5663817663817663, "grad_norm": 0.15139620006084442, "learning_rate": 4.980477825549112e-05, "loss": 0.8681, "step": 1375 }, { "epoch": 1.5675213675213675, "grad_norm": 0.13207580149173737, "learning_rate": 4.980419670022481e-05, "loss": 0.8417, "step": 1376 }, { "epoch": 1.5686609686609687, "grad_norm": 0.1268857717514038, "learning_rate": 4.9803614283440545e-05, "loss": 1.0332, "step": 1377 }, { "epoch": 1.5698005698005697, "grad_norm": 0.13949313759803772, "learning_rate": 4.980303100515854e-05, "loss": 0.8612, "step": 1378 }, { "epoch": 1.570940170940171, "grad_norm": 0.150383859872818, "learning_rate": 4.980244686539906e-05, "loss": 0.8026, "step": 1379 }, { "epoch": 1.5720797720797721, "grad_norm": 0.14769776165485382, "learning_rate": 4.9801861864182405e-05, "loss": 0.9293, "step": 1380 }, { "epoch": 1.573219373219373, "grad_norm": 0.15573029220104218, "learning_rate": 4.9801276001528876e-05, "loss": 1.0067, "step": 1381 }, { "epoch": 1.5743589743589743, "grad_norm": 0.13544835150241852, "learning_rate": 4.9800689277458824e-05, "loss": 0.9825, "step": 1382 }, { "epoch": 1.5754985754985755, "grad_norm": 0.14520074427127838, "learning_rate": 4.980010169199263e-05, "loss": 0.8529, "step": 1383 }, { "epoch": 1.5766381766381765, "grad_norm": 0.14635954797267914, "learning_rate": 4.97995132451507e-05, "loss": 0.895, "step": 1384 }, { "epoch": 1.5777777777777777, "grad_norm": 0.14890193939208984, "learning_rate": 4.979892393695348e-05, "loss": 0.8566, "step": 1385 }, { "epoch": 1.578917378917379, "grad_norm": 0.13760687410831451, "learning_rate": 4.979833376742144e-05, "loss": 0.9997, "step": 1386 }, { "epoch": 1.58005698005698, "grad_norm": 0.13461118936538696, "learning_rate": 4.979774273657507e-05, "loss": 0.8333, "step": 1387 }, { "epoch": 1.5811965811965814, "grad_norm": 0.13629597425460815, "learning_rate": 4.9797150844434895e-05, "loss": 0.8785, "step": 1388 }, { "epoch": 1.5823361823361823, "grad_norm": 0.1374688744544983, "learning_rate": 4.9796558091021485e-05, "loss": 0.9874, "step": 1389 }, { "epoch": 1.5834757834757833, "grad_norm": 0.13347190618515015, "learning_rate": 4.979596447635542e-05, "loss": 0.9136, "step": 1390 }, { "epoch": 1.5846153846153848, "grad_norm": 0.12285137176513672, "learning_rate": 4.979537000045732e-05, "loss": 0.8398, "step": 1391 }, { "epoch": 1.5857549857549857, "grad_norm": 0.1611711084842682, "learning_rate": 4.979477466334783e-05, "loss": 0.78, "step": 1392 }, { "epoch": 1.5868945868945867, "grad_norm": 0.16434116661548615, "learning_rate": 4.9794178465047636e-05, "loss": 0.7421, "step": 1393 }, { "epoch": 1.5880341880341882, "grad_norm": 0.1435929536819458, "learning_rate": 4.979358140557743e-05, "loss": 0.9948, "step": 1394 }, { "epoch": 1.5891737891737892, "grad_norm": 0.16544382274150848, "learning_rate": 4.979298348495797e-05, "loss": 0.7738, "step": 1395 }, { "epoch": 1.5903133903133904, "grad_norm": 0.14104124903678894, "learning_rate": 4.979238470321e-05, "loss": 1.0428, "step": 1396 }, { "epoch": 1.5914529914529916, "grad_norm": 0.1359635293483734, "learning_rate": 4.979178506035435e-05, "loss": 0.9505, "step": 1397 }, { "epoch": 1.5925925925925926, "grad_norm": 0.16423510015010834, "learning_rate": 4.9791184556411804e-05, "loss": 0.7678, "step": 1398 }, { "epoch": 1.5937321937321938, "grad_norm": 0.1553465873003006, "learning_rate": 4.979058319140325e-05, "loss": 0.7677, "step": 1399 }, { "epoch": 1.594871794871795, "grad_norm": 0.13681966066360474, "learning_rate": 4.9789980965349574e-05, "loss": 0.884, "step": 1400 }, { "epoch": 1.596011396011396, "grad_norm": 0.14007219672203064, "learning_rate": 4.9789377878271683e-05, "loss": 0.826, "step": 1401 }, { "epoch": 1.5971509971509972, "grad_norm": 0.13870969414710999, "learning_rate": 4.978877393019052e-05, "loss": 0.8817, "step": 1402 }, { "epoch": 1.5982905982905984, "grad_norm": 0.12623338401317596, "learning_rate": 4.978816912112708e-05, "loss": 0.9319, "step": 1403 }, { "epoch": 1.5994301994301994, "grad_norm": 0.14531861245632172, "learning_rate": 4.978756345110236e-05, "loss": 0.8492, "step": 1404 }, { "epoch": 1.6005698005698006, "grad_norm": 0.17729686200618744, "learning_rate": 4.978695692013738e-05, "loss": 0.7516, "step": 1405 }, { "epoch": 1.6017094017094018, "grad_norm": 0.15838603675365448, "learning_rate": 4.978634952825324e-05, "loss": 0.7702, "step": 1406 }, { "epoch": 1.6028490028490028, "grad_norm": 0.1507592350244522, "learning_rate": 4.978574127547101e-05, "loss": 0.9203, "step": 1407 }, { "epoch": 1.603988603988604, "grad_norm": 0.14368009567260742, "learning_rate": 4.978513216181182e-05, "loss": 0.8303, "step": 1408 }, { "epoch": 1.6051282051282052, "grad_norm": 0.16003654897212982, "learning_rate": 4.978452218729683e-05, "loss": 0.8414, "step": 1409 }, { "epoch": 1.6062678062678062, "grad_norm": 0.17049500346183777, "learning_rate": 4.978391135194724e-05, "loss": 0.8198, "step": 1410 }, { "epoch": 1.6074074074074074, "grad_norm": 0.1413111835718155, "learning_rate": 4.978329965578424e-05, "loss": 0.9623, "step": 1411 }, { "epoch": 1.6085470085470086, "grad_norm": 0.13809160888195038, "learning_rate": 4.978268709882909e-05, "loss": 0.9943, "step": 1412 }, { "epoch": 1.6096866096866096, "grad_norm": 0.14002510905265808, "learning_rate": 4.978207368110307e-05, "loss": 0.9165, "step": 1413 }, { "epoch": 1.6108262108262108, "grad_norm": 0.14344514906406403, "learning_rate": 4.978145940262748e-05, "loss": 0.8642, "step": 1414 }, { "epoch": 1.611965811965812, "grad_norm": 0.13317260146141052, "learning_rate": 4.978084426342365e-05, "loss": 0.9819, "step": 1415 }, { "epoch": 1.613105413105413, "grad_norm": 0.1387137621641159, "learning_rate": 4.978022826351296e-05, "loss": 0.8717, "step": 1416 }, { "epoch": 1.6142450142450142, "grad_norm": 0.12956343591213226, "learning_rate": 4.9779611402916785e-05, "loss": 0.9787, "step": 1417 }, { "epoch": 1.6153846153846154, "grad_norm": 0.1359727382659912, "learning_rate": 4.9778993681656574e-05, "loss": 0.8508, "step": 1418 }, { "epoch": 1.6165242165242164, "grad_norm": 0.1506776362657547, "learning_rate": 4.977837509975376e-05, "loss": 0.8008, "step": 1419 }, { "epoch": 1.6176638176638176, "grad_norm": 0.14830438792705536, "learning_rate": 4.977775565722984e-05, "loss": 0.8358, "step": 1420 }, { "epoch": 1.6188034188034188, "grad_norm": 0.14233633875846863, "learning_rate": 4.977713535410633e-05, "loss": 0.8087, "step": 1421 }, { "epoch": 1.6199430199430198, "grad_norm": 0.13242223858833313, "learning_rate": 4.9776514190404765e-05, "loss": 1.0834, "step": 1422 }, { "epoch": 1.6210826210826212, "grad_norm": 0.1396029144525528, "learning_rate": 4.977589216614673e-05, "loss": 0.925, "step": 1423 }, { "epoch": 1.6222222222222222, "grad_norm": 0.16417278349399567, "learning_rate": 4.977526928135383e-05, "loss": 0.8083, "step": 1424 }, { "epoch": 1.6233618233618232, "grad_norm": 0.13418695330619812, "learning_rate": 4.977464553604769e-05, "loss": 0.9235, "step": 1425 }, { "epoch": 1.6245014245014247, "grad_norm": 0.13519521057605743, "learning_rate": 4.977402093024999e-05, "loss": 0.7752, "step": 1426 }, { "epoch": 1.6256410256410256, "grad_norm": 0.134214386343956, "learning_rate": 4.977339546398241e-05, "loss": 0.9945, "step": 1427 }, { "epoch": 1.6267806267806266, "grad_norm": 0.1455167531967163, "learning_rate": 4.977276913726667e-05, "loss": 0.847, "step": 1428 }, { "epoch": 1.627920227920228, "grad_norm": 0.13003487884998322, "learning_rate": 4.977214195012454e-05, "loss": 0.8431, "step": 1429 }, { "epoch": 1.629059829059829, "grad_norm": 0.15458545088768005, "learning_rate": 4.9771513902577795e-05, "loss": 0.9582, "step": 1430 }, { "epoch": 1.63019943019943, "grad_norm": 0.12355898320674896, "learning_rate": 4.977088499464825e-05, "loss": 0.9157, "step": 1431 }, { "epoch": 1.6313390313390315, "grad_norm": 0.128941148519516, "learning_rate": 4.977025522635775e-05, "loss": 0.9398, "step": 1432 }, { "epoch": 1.6324786324786325, "grad_norm": 0.16159027814865112, "learning_rate": 4.9769624597728163e-05, "loss": 0.8, "step": 1433 }, { "epoch": 1.6336182336182337, "grad_norm": 0.15231887996196747, "learning_rate": 4.9768993108781404e-05, "loss": 0.8523, "step": 1434 }, { "epoch": 1.6347578347578349, "grad_norm": 0.148428276181221, "learning_rate": 4.97683607595394e-05, "loss": 0.913, "step": 1435 }, { "epoch": 1.6358974358974359, "grad_norm": 0.15839561820030212, "learning_rate": 4.97677275500241e-05, "loss": 0.8127, "step": 1436 }, { "epoch": 1.637037037037037, "grad_norm": 0.1219085156917572, "learning_rate": 4.976709348025752e-05, "loss": 0.9864, "step": 1437 }, { "epoch": 1.6381766381766383, "grad_norm": 0.1593482494354248, "learning_rate": 4.9766458550261677e-05, "loss": 0.8611, "step": 1438 }, { "epoch": 1.6393162393162393, "grad_norm": 0.1455330103635788, "learning_rate": 4.9765822760058626e-05, "loss": 0.9057, "step": 1439 }, { "epoch": 1.6404558404558405, "grad_norm": 0.13011865317821503, "learning_rate": 4.976518610967043e-05, "loss": 0.9973, "step": 1440 }, { "epoch": 1.6415954415954417, "grad_norm": 0.14249815046787262, "learning_rate": 4.976454859911922e-05, "loss": 1.0116, "step": 1441 }, { "epoch": 1.6427350427350427, "grad_norm": 0.1431078016757965, "learning_rate": 4.9763910228427134e-05, "loss": 0.8409, "step": 1442 }, { "epoch": 1.6438746438746439, "grad_norm": 0.1402430385351181, "learning_rate": 4.9763270997616353e-05, "loss": 1.044, "step": 1443 }, { "epoch": 1.645014245014245, "grad_norm": 0.12596355378627777, "learning_rate": 4.976263090670906e-05, "loss": 0.9395, "step": 1444 }, { "epoch": 1.646153846153846, "grad_norm": 0.13879600167274475, "learning_rate": 4.976198995572751e-05, "loss": 0.9572, "step": 1445 }, { "epoch": 1.6472934472934473, "grad_norm": 0.14126229286193848, "learning_rate": 4.9761348144693934e-05, "loss": 0.9924, "step": 1446 }, { "epoch": 1.6484330484330485, "grad_norm": 0.15346400439739227, "learning_rate": 4.9760705473630656e-05, "loss": 0.9316, "step": 1447 }, { "epoch": 1.6495726495726495, "grad_norm": 0.1339617669582367, "learning_rate": 4.9760061942559986e-05, "loss": 1.0002, "step": 1448 }, { "epoch": 1.6507122507122507, "grad_norm": 0.1472388356924057, "learning_rate": 4.975941755150427e-05, "loss": 0.8531, "step": 1449 }, { "epoch": 1.651851851851852, "grad_norm": 0.13572220504283905, "learning_rate": 4.97587723004859e-05, "loss": 0.7862, "step": 1450 }, { "epoch": 1.652991452991453, "grad_norm": 0.138021320104599, "learning_rate": 4.975812618952728e-05, "loss": 0.9538, "step": 1451 }, { "epoch": 1.654131054131054, "grad_norm": 0.15782691538333893, "learning_rate": 4.9757479218650844e-05, "loss": 0.9394, "step": 1452 }, { "epoch": 1.6552706552706553, "grad_norm": 0.1449262648820877, "learning_rate": 4.9756831387879076e-05, "loss": 1.0067, "step": 1453 }, { "epoch": 1.6564102564102563, "grad_norm": 0.16112785041332245, "learning_rate": 4.975618269723447e-05, "loss": 0.8507, "step": 1454 }, { "epoch": 1.6575498575498575, "grad_norm": 0.14953143894672394, "learning_rate": 4.9755533146739566e-05, "loss": 0.7642, "step": 1455 }, { "epoch": 1.6586894586894587, "grad_norm": 0.14783072471618652, "learning_rate": 4.9754882736416907e-05, "loss": 0.8976, "step": 1456 }, { "epoch": 1.6598290598290597, "grad_norm": 0.13446880877017975, "learning_rate": 4.97542314662891e-05, "loss": 0.9203, "step": 1457 }, { "epoch": 1.660968660968661, "grad_norm": 0.13042917847633362, "learning_rate": 4.9753579336378766e-05, "loss": 0.9719, "step": 1458 }, { "epoch": 1.6621082621082621, "grad_norm": 0.1293899416923523, "learning_rate": 4.9752926346708536e-05, "loss": 0.9435, "step": 1459 }, { "epoch": 1.6632478632478631, "grad_norm": 0.12211312353610992, "learning_rate": 4.9752272497301125e-05, "loss": 0.9838, "step": 1460 }, { "epoch": 1.6643874643874645, "grad_norm": 0.1375221461057663, "learning_rate": 4.97516177881792e-05, "loss": 1.0317, "step": 1461 }, { "epoch": 1.6655270655270655, "grad_norm": 0.1523962765932083, "learning_rate": 4.975096221936553e-05, "loss": 0.9513, "step": 1462 }, { "epoch": 1.6666666666666665, "grad_norm": 0.14095567166805267, "learning_rate": 4.975030579088288e-05, "loss": 0.9058, "step": 1463 }, { "epoch": 1.667806267806268, "grad_norm": 0.17985379695892334, "learning_rate": 4.9749648502754034e-05, "loss": 0.693, "step": 1464 }, { "epoch": 1.668945868945869, "grad_norm": 0.15648557245731354, "learning_rate": 4.974899035500184e-05, "loss": 0.8612, "step": 1465 }, { "epoch": 1.67008547008547, "grad_norm": 0.1567339301109314, "learning_rate": 4.974833134764916e-05, "loss": 0.8893, "step": 1466 }, { "epoch": 1.6712250712250714, "grad_norm": 0.13487927615642548, "learning_rate": 4.9747671480718864e-05, "loss": 0.8726, "step": 1467 }, { "epoch": 1.6723646723646723, "grad_norm": 0.15049050748348236, "learning_rate": 4.9747010754233885e-05, "loss": 1.0609, "step": 1468 }, { "epoch": 1.6735042735042736, "grad_norm": 0.1330130249261856, "learning_rate": 4.974634916821717e-05, "loss": 0.8958, "step": 1469 }, { "epoch": 1.6746438746438748, "grad_norm": 0.13046075403690338, "learning_rate": 4.9745686722691684e-05, "loss": 1.0877, "step": 1470 }, { "epoch": 1.6757834757834758, "grad_norm": 0.1364555060863495, "learning_rate": 4.974502341768046e-05, "loss": 0.8551, "step": 1471 }, { "epoch": 1.676923076923077, "grad_norm": 0.1636875867843628, "learning_rate": 4.974435925320652e-05, "loss": 0.9152, "step": 1472 }, { "epoch": 1.6780626780626782, "grad_norm": 0.1247587502002716, "learning_rate": 4.9743694229292937e-05, "loss": 0.8902, "step": 1473 }, { "epoch": 1.6792022792022792, "grad_norm": 0.13861720263957977, "learning_rate": 4.9743028345962806e-05, "loss": 0.8972, "step": 1474 }, { "epoch": 1.6803418803418804, "grad_norm": 0.16217702627182007, "learning_rate": 4.974236160323926e-05, "loss": 0.7423, "step": 1475 }, { "epoch": 1.6814814814814816, "grad_norm": 0.1363646239042282, "learning_rate": 4.974169400114545e-05, "loss": 0.8792, "step": 1476 }, { "epoch": 1.6826210826210826, "grad_norm": 0.12162560224533081, "learning_rate": 4.9741025539704564e-05, "loss": 1.0527, "step": 1477 }, { "epoch": 1.6837606837606838, "grad_norm": 0.14252440631389618, "learning_rate": 4.974035621893983e-05, "loss": 0.7489, "step": 1478 }, { "epoch": 1.684900284900285, "grad_norm": 0.1340060830116272, "learning_rate": 4.973968603887449e-05, "loss": 0.9702, "step": 1479 }, { "epoch": 1.686039886039886, "grad_norm": 0.12684713304042816, "learning_rate": 4.9739014999531815e-05, "loss": 0.8407, "step": 1480 }, { "epoch": 1.6871794871794872, "grad_norm": 0.17233425378799438, "learning_rate": 4.973834310093511e-05, "loss": 0.7636, "step": 1481 }, { "epoch": 1.6883190883190884, "grad_norm": 0.14882443845272064, "learning_rate": 4.9737670343107725e-05, "loss": 0.809, "step": 1482 }, { "epoch": 1.6894586894586894, "grad_norm": 0.1377968043088913, "learning_rate": 4.973699672607303e-05, "loss": 0.8908, "step": 1483 }, { "epoch": 1.6905982905982906, "grad_norm": 0.1356973797082901, "learning_rate": 4.97363222498544e-05, "loss": 0.9429, "step": 1484 }, { "epoch": 1.6917378917378918, "grad_norm": 0.14733992516994476, "learning_rate": 4.9735646914475274e-05, "loss": 0.9215, "step": 1485 }, { "epoch": 1.6928774928774928, "grad_norm": 0.16658036410808563, "learning_rate": 4.973497071995911e-05, "loss": 0.8268, "step": 1486 }, { "epoch": 1.694017094017094, "grad_norm": 0.1438906341791153, "learning_rate": 4.973429366632939e-05, "loss": 0.7811, "step": 1487 }, { "epoch": 1.6951566951566952, "grad_norm": 0.1501467376947403, "learning_rate": 4.973361575360963e-05, "loss": 0.8765, "step": 1488 }, { "epoch": 1.6962962962962962, "grad_norm": 0.15371835231781006, "learning_rate": 4.973293698182337e-05, "loss": 0.8061, "step": 1489 }, { "epoch": 1.6974358974358974, "grad_norm": 0.14565691351890564, "learning_rate": 4.9732257350994204e-05, "loss": 0.9119, "step": 1490 }, { "epoch": 1.6985754985754986, "grad_norm": 0.14003288745880127, "learning_rate": 4.973157686114572e-05, "loss": 0.9315, "step": 1491 }, { "epoch": 1.6997150997150996, "grad_norm": 0.1632835865020752, "learning_rate": 4.973089551230157e-05, "loss": 0.8413, "step": 1492 }, { "epoch": 1.7008547008547008, "grad_norm": 0.13976827263832092, "learning_rate": 4.97302133044854e-05, "loss": 0.7823, "step": 1493 }, { "epoch": 1.701994301994302, "grad_norm": 0.13804538547992706, "learning_rate": 4.9729530237720916e-05, "loss": 0.8166, "step": 1494 }, { "epoch": 1.703133903133903, "grad_norm": 0.14704765379428864, "learning_rate": 4.9728846312031843e-05, "loss": 0.7624, "step": 1495 }, { "epoch": 1.7042735042735044, "grad_norm": 0.13788385689258575, "learning_rate": 4.972816152744193e-05, "loss": 0.9321, "step": 1496 }, { "epoch": 1.7054131054131054, "grad_norm": 0.16037115454673767, "learning_rate": 4.972747588397496e-05, "loss": 0.7737, "step": 1497 }, { "epoch": 1.7065527065527064, "grad_norm": 0.15963195264339447, "learning_rate": 4.972678938165476e-05, "loss": 0.7614, "step": 1498 }, { "epoch": 1.7076923076923078, "grad_norm": 0.14792628586292267, "learning_rate": 4.972610202050516e-05, "loss": 0.8507, "step": 1499 }, { "epoch": 1.7088319088319088, "grad_norm": 0.1442280262708664, "learning_rate": 4.972541380055004e-05, "loss": 0.9073, "step": 1500 }, { "epoch": 1.7099715099715098, "grad_norm": 0.1593361496925354, "learning_rate": 4.97247247218133e-05, "loss": 0.7846, "step": 1501 }, { "epoch": 1.7111111111111112, "grad_norm": 0.13493263721466064, "learning_rate": 4.9724034784318885e-05, "loss": 0.7777, "step": 1502 }, { "epoch": 1.7122507122507122, "grad_norm": 0.14213791489601135, "learning_rate": 4.9723343988090757e-05, "loss": 0.9026, "step": 1503 }, { "epoch": 1.7133903133903132, "grad_norm": 0.1394391804933548, "learning_rate": 4.9722652333152884e-05, "loss": 1.0557, "step": 1504 }, { "epoch": 1.7145299145299147, "grad_norm": 0.15132486820220947, "learning_rate": 4.972195981952932e-05, "loss": 0.8624, "step": 1505 }, { "epoch": 1.7156695156695156, "grad_norm": 0.14266149699687958, "learning_rate": 4.9721266447244104e-05, "loss": 0.9068, "step": 1506 }, { "epoch": 1.7168091168091169, "grad_norm": 0.12187226116657257, "learning_rate": 4.9720572216321315e-05, "loss": 1.0773, "step": 1507 }, { "epoch": 1.717948717948718, "grad_norm": 0.11867115646600723, "learning_rate": 4.971987712678508e-05, "loss": 0.9849, "step": 1508 }, { "epoch": 1.719088319088319, "grad_norm": 0.11881640553474426, "learning_rate": 4.971918117865953e-05, "loss": 0.9703, "step": 1509 }, { "epoch": 1.7202279202279203, "grad_norm": 0.14487901329994202, "learning_rate": 4.971848437196884e-05, "loss": 0.9653, "step": 1510 }, { "epoch": 1.7213675213675215, "grad_norm": 0.1553168147802353, "learning_rate": 4.971778670673721e-05, "loss": 0.835, "step": 1511 }, { "epoch": 1.7225071225071225, "grad_norm": 0.14130182564258575, "learning_rate": 4.971708818298887e-05, "loss": 0.9759, "step": 1512 }, { "epoch": 1.7236467236467237, "grad_norm": 0.15721654891967773, "learning_rate": 4.971638880074809e-05, "loss": 0.9303, "step": 1513 }, { "epoch": 1.7247863247863249, "grad_norm": 0.13313810527324677, "learning_rate": 4.971568856003915e-05, "loss": 0.8846, "step": 1514 }, { "epoch": 1.7259259259259259, "grad_norm": 0.14392173290252686, "learning_rate": 4.971498746088639e-05, "loss": 0.9086, "step": 1515 }, { "epoch": 1.727065527065527, "grad_norm": 0.13961420953273773, "learning_rate": 4.971428550331414e-05, "loss": 0.8449, "step": 1516 }, { "epoch": 1.7282051282051283, "grad_norm": 0.12486325949430466, "learning_rate": 4.971358268734679e-05, "loss": 1.0756, "step": 1517 }, { "epoch": 1.7293447293447293, "grad_norm": 0.136132150888443, "learning_rate": 4.971287901300875e-05, "loss": 0.9214, "step": 1518 }, { "epoch": 1.7304843304843305, "grad_norm": 0.12653079628944397, "learning_rate": 4.971217448032447e-05, "loss": 0.9272, "step": 1519 }, { "epoch": 1.7316239316239317, "grad_norm": 0.1390490084886551, "learning_rate": 4.97114690893184e-05, "loss": 0.9833, "step": 1520 }, { "epoch": 1.7327635327635327, "grad_norm": 0.1536482274532318, "learning_rate": 4.971076284001506e-05, "loss": 0.783, "step": 1521 }, { "epoch": 1.7339031339031339, "grad_norm": 0.1895057111978531, "learning_rate": 4.9710055732438965e-05, "loss": 0.9372, "step": 1522 }, { "epoch": 1.735042735042735, "grad_norm": 0.13091160356998444, "learning_rate": 4.970934776661469e-05, "loss": 0.7417, "step": 1523 }, { "epoch": 1.736182336182336, "grad_norm": 0.1424616128206253, "learning_rate": 4.970863894256681e-05, "loss": 0.9495, "step": 1524 }, { "epoch": 1.7373219373219373, "grad_norm": 0.14544494450092316, "learning_rate": 4.9707929260319955e-05, "loss": 1.0189, "step": 1525 }, { "epoch": 1.7384615384615385, "grad_norm": 0.1325811892747879, "learning_rate": 4.9707218719898763e-05, "loss": 0.8846, "step": 1526 }, { "epoch": 1.7396011396011395, "grad_norm": 0.121336929500103, "learning_rate": 4.970650732132792e-05, "loss": 0.986, "step": 1527 }, { "epoch": 1.7407407407407407, "grad_norm": 0.15782444179058075, "learning_rate": 4.970579506463214e-05, "loss": 0.8415, "step": 1528 }, { "epoch": 1.741880341880342, "grad_norm": 0.14497943222522736, "learning_rate": 4.970508194983615e-05, "loss": 0.7838, "step": 1529 }, { "epoch": 1.743019943019943, "grad_norm": 0.14629590511322021, "learning_rate": 4.9704367976964726e-05, "loss": 0.9586, "step": 1530 }, { "epoch": 1.744159544159544, "grad_norm": 0.14639532566070557, "learning_rate": 4.970365314604267e-05, "loss": 0.7824, "step": 1531 }, { "epoch": 1.7452991452991453, "grad_norm": 0.1332532912492752, "learning_rate": 4.970293745709481e-05, "loss": 1.0232, "step": 1532 }, { "epoch": 1.7464387464387463, "grad_norm": 0.15766581892967224, "learning_rate": 4.970222091014598e-05, "loss": 0.9202, "step": 1533 }, { "epoch": 1.7475783475783477, "grad_norm": 0.13730032742023468, "learning_rate": 4.97015035052211e-05, "loss": 1.0742, "step": 1534 }, { "epoch": 1.7487179487179487, "grad_norm": 0.14459902048110962, "learning_rate": 4.9700785242345073e-05, "loss": 0.9288, "step": 1535 }, { "epoch": 1.7498575498575497, "grad_norm": 0.16486337780952454, "learning_rate": 4.970006612154285e-05, "loss": 0.7792, "step": 1536 }, { "epoch": 1.7509971509971511, "grad_norm": 0.14227153360843658, "learning_rate": 4.96993461428394e-05, "loss": 0.9193, "step": 1537 }, { "epoch": 1.7521367521367521, "grad_norm": 0.13985908031463623, "learning_rate": 4.969862530625974e-05, "loss": 0.8893, "step": 1538 }, { "epoch": 1.7532763532763531, "grad_norm": 0.1515476256608963, "learning_rate": 4.969790361182889e-05, "loss": 1.0429, "step": 1539 }, { "epoch": 1.7544159544159545, "grad_norm": 0.14032472670078278, "learning_rate": 4.969718105957194e-05, "loss": 0.9065, "step": 1540 }, { "epoch": 1.7555555555555555, "grad_norm": 0.1442209780216217, "learning_rate": 4.969645764951397e-05, "loss": 0.8309, "step": 1541 }, { "epoch": 1.7566951566951567, "grad_norm": 0.18346865475177765, "learning_rate": 4.969573338168011e-05, "loss": 0.8036, "step": 1542 }, { "epoch": 1.757834757834758, "grad_norm": 0.1414286494255066, "learning_rate": 4.969500825609551e-05, "loss": 0.8717, "step": 1543 }, { "epoch": 1.758974358974359, "grad_norm": 0.14327985048294067, "learning_rate": 4.9694282272785375e-05, "loss": 0.969, "step": 1544 }, { "epoch": 1.7601139601139602, "grad_norm": 0.15827243030071259, "learning_rate": 4.9693555431774894e-05, "loss": 0.7073, "step": 1545 }, { "epoch": 1.7612535612535614, "grad_norm": 0.1817478984594345, "learning_rate": 4.969282773308933e-05, "loss": 0.6668, "step": 1546 }, { "epoch": 1.7623931623931623, "grad_norm": 0.13845016062259674, "learning_rate": 4.969209917675396e-05, "loss": 0.8722, "step": 1547 }, { "epoch": 1.7635327635327636, "grad_norm": 0.1262572705745697, "learning_rate": 4.969136976279407e-05, "loss": 0.9971, "step": 1548 }, { "epoch": 1.7646723646723648, "grad_norm": 0.14393088221549988, "learning_rate": 4.969063949123502e-05, "loss": 0.8779, "step": 1549 }, { "epoch": 1.7658119658119658, "grad_norm": 0.15604454278945923, "learning_rate": 4.9689908362102155e-05, "loss": 0.8217, "step": 1550 }, { "epoch": 1.766951566951567, "grad_norm": 0.135702446103096, "learning_rate": 4.968917637542087e-05, "loss": 0.9464, "step": 1551 }, { "epoch": 1.7680911680911682, "grad_norm": 0.12812857329845428, "learning_rate": 4.96884435312166e-05, "loss": 1.0318, "step": 1552 }, { "epoch": 1.7692307692307692, "grad_norm": 0.12961755692958832, "learning_rate": 4.9687709829514795e-05, "loss": 0.9367, "step": 1553 }, { "epoch": 1.7703703703703704, "grad_norm": 0.1435171514749527, "learning_rate": 4.968697527034093e-05, "loss": 0.7982, "step": 1554 }, { "epoch": 1.7715099715099716, "grad_norm": 0.12760575115680695, "learning_rate": 4.968623985372053e-05, "loss": 0.9388, "step": 1555 }, { "epoch": 1.7726495726495726, "grad_norm": 0.1674080789089203, "learning_rate": 4.9685503579679126e-05, "loss": 0.8086, "step": 1556 }, { "epoch": 1.7737891737891738, "grad_norm": 0.13285134732723236, "learning_rate": 4.9684766448242296e-05, "loss": 0.9046, "step": 1557 }, { "epoch": 1.774928774928775, "grad_norm": 0.1310773491859436, "learning_rate": 4.968402845943565e-05, "loss": 0.9464, "step": 1558 }, { "epoch": 1.776068376068376, "grad_norm": 0.1309703141450882, "learning_rate": 4.968328961328481e-05, "loss": 0.9149, "step": 1559 }, { "epoch": 1.7772079772079772, "grad_norm": 0.1680506020784378, "learning_rate": 4.968254990981545e-05, "loss": 0.8001, "step": 1560 }, { "epoch": 1.7783475783475784, "grad_norm": 0.15925845503807068, "learning_rate": 4.9681809349053245e-05, "loss": 0.7968, "step": 1561 }, { "epoch": 1.7794871794871794, "grad_norm": 0.14886683225631714, "learning_rate": 4.968106793102393e-05, "loss": 0.9504, "step": 1562 }, { "epoch": 1.7806267806267806, "grad_norm": 0.12897604703903198, "learning_rate": 4.968032565575326e-05, "loss": 0.9228, "step": 1563 }, { "epoch": 1.7817663817663818, "grad_norm": 0.1292092353105545, "learning_rate": 4.9679582523267e-05, "loss": 0.8266, "step": 1564 }, { "epoch": 1.7829059829059828, "grad_norm": 0.1368887573480606, "learning_rate": 4.967883853359097e-05, "loss": 0.8988, "step": 1565 }, { "epoch": 1.784045584045584, "grad_norm": 0.15507234632968903, "learning_rate": 4.967809368675101e-05, "loss": 0.8321, "step": 1566 }, { "epoch": 1.7851851851851852, "grad_norm": 0.15650178492069244, "learning_rate": 4.9677347982772994e-05, "loss": 0.8363, "step": 1567 }, { "epoch": 1.7863247863247862, "grad_norm": 0.16102257370948792, "learning_rate": 4.9676601421682824e-05, "loss": 0.7468, "step": 1568 }, { "epoch": 1.7874643874643876, "grad_norm": 0.1269504427909851, "learning_rate": 4.9675854003506416e-05, "loss": 0.8805, "step": 1569 }, { "epoch": 1.7886039886039886, "grad_norm": 0.13563477993011475, "learning_rate": 4.967510572826974e-05, "loss": 1.0542, "step": 1570 }, { "epoch": 1.7897435897435896, "grad_norm": 0.14412344992160797, "learning_rate": 4.9674356595998795e-05, "loss": 0.8856, "step": 1571 }, { "epoch": 1.790883190883191, "grad_norm": 0.1500939428806305, "learning_rate": 4.967360660671958e-05, "loss": 0.8538, "step": 1572 }, { "epoch": 1.792022792022792, "grad_norm": 0.14257724583148956, "learning_rate": 4.9672855760458164e-05, "loss": 0.8154, "step": 1573 }, { "epoch": 1.793162393162393, "grad_norm": 0.15269680321216583, "learning_rate": 4.967210405724061e-05, "loss": 0.9729, "step": 1574 }, { "epoch": 1.7943019943019944, "grad_norm": 0.14685091376304626, "learning_rate": 4.967135149709304e-05, "loss": 0.7296, "step": 1575 }, { "epoch": 1.7954415954415954, "grad_norm": 0.13171981275081635, "learning_rate": 4.9670598080041584e-05, "loss": 0.7851, "step": 1576 }, { "epoch": 1.7965811965811964, "grad_norm": 0.13896337151527405, "learning_rate": 4.966984380611241e-05, "loss": 0.9284, "step": 1577 }, { "epoch": 1.7977207977207978, "grad_norm": 0.15201911330223083, "learning_rate": 4.966908867533172e-05, "loss": 0.8057, "step": 1578 }, { "epoch": 1.7988603988603988, "grad_norm": 0.1413690596818924, "learning_rate": 4.9668332687725736e-05, "loss": 0.9971, "step": 1579 }, { "epoch": 1.8, "grad_norm": 0.13832518458366394, "learning_rate": 4.9667575843320724e-05, "loss": 0.9877, "step": 1580 }, { "epoch": 1.8011396011396013, "grad_norm": 0.12974989414215088, "learning_rate": 4.966681814214297e-05, "loss": 0.8919, "step": 1581 }, { "epoch": 1.8022792022792022, "grad_norm": 0.1519487351179123, "learning_rate": 4.966605958421878e-05, "loss": 0.8808, "step": 1582 }, { "epoch": 1.8034188034188035, "grad_norm": 0.1690819263458252, "learning_rate": 4.966530016957451e-05, "loss": 0.8406, "step": 1583 }, { "epoch": 1.8045584045584047, "grad_norm": 0.12503588199615479, "learning_rate": 4.966453989823654e-05, "loss": 1.0044, "step": 1584 }, { "epoch": 1.8056980056980056, "grad_norm": 0.14780542254447937, "learning_rate": 4.966377877023127e-05, "loss": 0.9115, "step": 1585 }, { "epoch": 1.8068376068376069, "grad_norm": 0.16675999760627747, "learning_rate": 4.966301678558514e-05, "loss": 0.7534, "step": 1586 }, { "epoch": 1.807977207977208, "grad_norm": 0.1519392728805542, "learning_rate": 4.966225394432461e-05, "loss": 0.7184, "step": 1587 }, { "epoch": 1.809116809116809, "grad_norm": 0.12484191358089447, "learning_rate": 4.966149024647617e-05, "loss": 0.9699, "step": 1588 }, { "epoch": 1.8102564102564103, "grad_norm": 0.132111594080925, "learning_rate": 4.966072569206637e-05, "loss": 0.9271, "step": 1589 }, { "epoch": 1.8113960113960115, "grad_norm": 0.12683501839637756, "learning_rate": 4.9659960281121744e-05, "loss": 0.9114, "step": 1590 }, { "epoch": 1.8125356125356125, "grad_norm": 0.10913930833339691, "learning_rate": 4.965919401366887e-05, "loss": 0.9092, "step": 1591 }, { "epoch": 1.8136752136752137, "grad_norm": 0.18790248036384583, "learning_rate": 4.9658426889734395e-05, "loss": 0.693, "step": 1592 }, { "epoch": 1.8148148148148149, "grad_norm": 0.14741839468479156, "learning_rate": 4.9657658909344935e-05, "loss": 0.7415, "step": 1593 }, { "epoch": 1.8159544159544159, "grad_norm": 0.12909571826457977, "learning_rate": 4.965689007252717e-05, "loss": 0.9465, "step": 1594 }, { "epoch": 1.817094017094017, "grad_norm": 0.12723930180072784, "learning_rate": 4.965612037930781e-05, "loss": 0.9923, "step": 1595 }, { "epoch": 1.8182336182336183, "grad_norm": 0.12213914096355438, "learning_rate": 4.965534982971358e-05, "loss": 1.0347, "step": 1596 }, { "epoch": 1.8193732193732193, "grad_norm": 0.14634861052036285, "learning_rate": 4.9654578423771245e-05, "loss": 0.9588, "step": 1597 }, { "epoch": 1.8205128205128205, "grad_norm": 0.13677772879600525, "learning_rate": 4.965380616150761e-05, "loss": 0.8066, "step": 1598 }, { "epoch": 1.8216524216524217, "grad_norm": 0.14883291721343994, "learning_rate": 4.965303304294949e-05, "loss": 0.6922, "step": 1599 }, { "epoch": 1.8227920227920227, "grad_norm": 0.1348157674074173, "learning_rate": 4.965225906812373e-05, "loss": 0.8031, "step": 1600 }, { "epoch": 1.823931623931624, "grad_norm": 0.13602852821350098, "learning_rate": 4.965148423705722e-05, "loss": 0.81, "step": 1601 }, { "epoch": 1.825071225071225, "grad_norm": 0.14438122510910034, "learning_rate": 4.965070854977687e-05, "loss": 0.8611, "step": 1602 }, { "epoch": 1.826210826210826, "grad_norm": 0.14058946073055267, "learning_rate": 4.964993200630962e-05, "loss": 1.0343, "step": 1603 }, { "epoch": 1.8273504273504273, "grad_norm": 0.1758558750152588, "learning_rate": 4.964915460668245e-05, "loss": 0.7498, "step": 1604 }, { "epoch": 1.8284900284900285, "grad_norm": 0.12693573534488678, "learning_rate": 4.964837635092235e-05, "loss": 0.7828, "step": 1605 }, { "epoch": 1.8296296296296295, "grad_norm": 0.1466381549835205, "learning_rate": 4.964759723905636e-05, "loss": 0.7194, "step": 1606 }, { "epoch": 1.830769230769231, "grad_norm": 0.12980800867080688, "learning_rate": 4.9646817271111537e-05, "loss": 0.9558, "step": 1607 }, { "epoch": 1.831908831908832, "grad_norm": 0.14446121454238892, "learning_rate": 4.9646036447114965e-05, "loss": 0.8896, "step": 1608 }, { "epoch": 1.833048433048433, "grad_norm": 0.16010549664497375, "learning_rate": 4.964525476709378e-05, "loss": 1.0108, "step": 1609 }, { "epoch": 1.8341880341880343, "grad_norm": 0.12889686226844788, "learning_rate": 4.9644472231075113e-05, "loss": 1.0092, "step": 1610 }, { "epoch": 1.8353276353276353, "grad_norm": 0.12488395720720291, "learning_rate": 4.964368883908615e-05, "loss": 0.9701, "step": 1611 }, { "epoch": 1.8364672364672363, "grad_norm": 0.13619735836982727, "learning_rate": 4.9642904591154116e-05, "loss": 0.9283, "step": 1612 }, { "epoch": 1.8376068376068377, "grad_norm": 0.1417708545923233, "learning_rate": 4.964211948730623e-05, "loss": 0.9195, "step": 1613 }, { "epoch": 1.8387464387464387, "grad_norm": 0.14256437122821808, "learning_rate": 4.964133352756977e-05, "loss": 1.0297, "step": 1614 }, { "epoch": 1.83988603988604, "grad_norm": 0.15539376437664032, "learning_rate": 4.9640546711972025e-05, "loss": 0.8273, "step": 1615 }, { "epoch": 1.8410256410256411, "grad_norm": 0.1441424936056137, "learning_rate": 4.963975904054034e-05, "loss": 0.8429, "step": 1616 }, { "epoch": 1.8421652421652421, "grad_norm": 0.142746239900589, "learning_rate": 4.963897051330206e-05, "loss": 0.7521, "step": 1617 }, { "epoch": 1.8433048433048433, "grad_norm": 0.1612187772989273, "learning_rate": 4.963818113028458e-05, "loss": 0.8598, "step": 1618 }, { "epoch": 1.8444444444444446, "grad_norm": 0.12856446206569672, "learning_rate": 4.963739089151531e-05, "loss": 1.0078, "step": 1619 }, { "epoch": 1.8455840455840455, "grad_norm": 0.1801602989435196, "learning_rate": 4.9636599797021705e-05, "loss": 0.7987, "step": 1620 }, { "epoch": 1.8467236467236468, "grad_norm": 0.14602583646774292, "learning_rate": 4.963580784683124e-05, "loss": 0.8743, "step": 1621 }, { "epoch": 1.847863247863248, "grad_norm": 0.12880298495292664, "learning_rate": 4.963501504097141e-05, "loss": 1.0759, "step": 1622 }, { "epoch": 1.849002849002849, "grad_norm": 0.1910523623228073, "learning_rate": 4.963422137946977e-05, "loss": 0.7669, "step": 1623 }, { "epoch": 1.8501424501424502, "grad_norm": 0.14027027785778046, "learning_rate": 4.963342686235387e-05, "loss": 0.7888, "step": 1624 }, { "epoch": 1.8512820512820514, "grad_norm": 0.1644585132598877, "learning_rate": 4.9632631489651323e-05, "loss": 1.0038, "step": 1625 }, { "epoch": 1.8524216524216524, "grad_norm": 0.14322388172149658, "learning_rate": 4.9631835261389736e-05, "loss": 0.8351, "step": 1626 }, { "epoch": 1.8535612535612536, "grad_norm": 0.1453019082546234, "learning_rate": 4.963103817759678e-05, "loss": 0.78, "step": 1627 }, { "epoch": 1.8547008547008548, "grad_norm": 0.1180596649646759, "learning_rate": 4.963024023830012e-05, "loss": 0.992, "step": 1628 }, { "epoch": 1.8558404558404558, "grad_norm": 0.15540020167827606, "learning_rate": 4.962944144352749e-05, "loss": 0.8806, "step": 1629 }, { "epoch": 1.856980056980057, "grad_norm": 0.15270529687404633, "learning_rate": 4.962864179330663e-05, "loss": 0.6976, "step": 1630 }, { "epoch": 1.8581196581196582, "grad_norm": 0.1299174427986145, "learning_rate": 4.962784128766531e-05, "loss": 1.0593, "step": 1631 }, { "epoch": 1.8592592592592592, "grad_norm": 0.11974635720252991, "learning_rate": 4.9627039926631335e-05, "loss": 0.8733, "step": 1632 }, { "epoch": 1.8603988603988604, "grad_norm": 0.12925906479358673, "learning_rate": 4.962623771023254e-05, "loss": 0.9378, "step": 1633 }, { "epoch": 1.8615384615384616, "grad_norm": 0.11761047691106796, "learning_rate": 4.962543463849679e-05, "loss": 0.9612, "step": 1634 }, { "epoch": 1.8626780626780626, "grad_norm": 0.14362961053848267, "learning_rate": 4.962463071145197e-05, "loss": 0.924, "step": 1635 }, { "epoch": 1.8638176638176638, "grad_norm": 0.11878333240747452, "learning_rate": 4.9623825929126004e-05, "loss": 1.0689, "step": 1636 }, { "epoch": 1.864957264957265, "grad_norm": 0.1377599686384201, "learning_rate": 4.9623020291546854e-05, "loss": 0.8006, "step": 1637 }, { "epoch": 1.866096866096866, "grad_norm": 0.15574805438518524, "learning_rate": 4.962221379874249e-05, "loss": 0.938, "step": 1638 }, { "epoch": 1.8672364672364672, "grad_norm": 0.1478899121284485, "learning_rate": 4.9621406450740935e-05, "loss": 0.984, "step": 1639 }, { "epoch": 1.8683760683760684, "grad_norm": 0.15283836424350739, "learning_rate": 4.962059824757022e-05, "loss": 0.7748, "step": 1640 }, { "epoch": 1.8695156695156694, "grad_norm": 0.18042221665382385, "learning_rate": 4.9619789189258425e-05, "loss": 0.8924, "step": 1641 }, { "epoch": 1.8706552706552708, "grad_norm": 0.15604403614997864, "learning_rate": 4.9618979275833646e-05, "loss": 0.8502, "step": 1642 }, { "epoch": 1.8717948717948718, "grad_norm": 0.13915975391864777, "learning_rate": 4.961816850732401e-05, "loss": 0.9339, "step": 1643 }, { "epoch": 1.8729344729344728, "grad_norm": 0.12358283251523972, "learning_rate": 4.961735688375769e-05, "loss": 0.8315, "step": 1644 }, { "epoch": 1.8740740740740742, "grad_norm": 0.16553658246994019, "learning_rate": 4.961654440516286e-05, "loss": 0.7598, "step": 1645 }, { "epoch": 1.8752136752136752, "grad_norm": 0.1350737363100052, "learning_rate": 4.961573107156775e-05, "loss": 0.9617, "step": 1646 }, { "epoch": 1.8763532763532762, "grad_norm": 0.13087335228919983, "learning_rate": 4.9614916883000604e-05, "loss": 1.0107, "step": 1647 }, { "epoch": 1.8774928774928776, "grad_norm": 0.14951574802398682, "learning_rate": 4.9614101839489705e-05, "loss": 0.8785, "step": 1648 }, { "epoch": 1.8786324786324786, "grad_norm": 0.15194663405418396, "learning_rate": 4.9613285941063356e-05, "loss": 0.8196, "step": 1649 }, { "epoch": 1.8797720797720796, "grad_norm": 0.13245616853237152, "learning_rate": 4.961246918774991e-05, "loss": 0.9034, "step": 1650 }, { "epoch": 1.880911680911681, "grad_norm": 0.132239431142807, "learning_rate": 4.9611651579577724e-05, "loss": 1.1342, "step": 1651 }, { "epoch": 1.882051282051282, "grad_norm": 0.1372898519039154, "learning_rate": 4.9610833116575186e-05, "loss": 0.8907, "step": 1652 }, { "epoch": 1.8831908831908832, "grad_norm": 0.1402471363544464, "learning_rate": 4.961001379877074e-05, "loss": 0.9022, "step": 1653 }, { "epoch": 1.8843304843304844, "grad_norm": 0.16995464265346527, "learning_rate": 4.960919362619284e-05, "loss": 0.8191, "step": 1654 }, { "epoch": 1.8854700854700854, "grad_norm": 0.13323241472244263, "learning_rate": 4.9608372598869954e-05, "loss": 0.808, "step": 1655 }, { "epoch": 1.8866096866096866, "grad_norm": 0.14184105396270752, "learning_rate": 4.960755071683063e-05, "loss": 1.0832, "step": 1656 }, { "epoch": 1.8877492877492879, "grad_norm": 0.12509635090827942, "learning_rate": 4.9606727980103396e-05, "loss": 0.9918, "step": 1657 }, { "epoch": 1.8888888888888888, "grad_norm": 0.1542125791311264, "learning_rate": 4.9605904388716826e-05, "loss": 0.8293, "step": 1658 }, { "epoch": 1.89002849002849, "grad_norm": 0.15746447443962097, "learning_rate": 4.960507994269953e-05, "loss": 0.9081, "step": 1659 }, { "epoch": 1.8911680911680913, "grad_norm": 0.12933428585529327, "learning_rate": 4.960425464208015e-05, "loss": 0.8952, "step": 1660 }, { "epoch": 1.8923076923076922, "grad_norm": 0.1908831000328064, "learning_rate": 4.9603428486887335e-05, "loss": 0.8618, "step": 1661 }, { "epoch": 1.8934472934472935, "grad_norm": 0.14690420031547546, "learning_rate": 4.96026014771498e-05, "loss": 0.8608, "step": 1662 }, { "epoch": 1.8945868945868947, "grad_norm": 0.14620721340179443, "learning_rate": 4.960177361289625e-05, "loss": 0.9887, "step": 1663 }, { "epoch": 1.8957264957264957, "grad_norm": 0.13965198397636414, "learning_rate": 4.960094489415545e-05, "loss": 0.9064, "step": 1664 }, { "epoch": 1.8968660968660969, "grad_norm": 0.13315075635910034, "learning_rate": 4.960011532095619e-05, "loss": 0.8957, "step": 1665 }, { "epoch": 1.898005698005698, "grad_norm": 0.15281794965267181, "learning_rate": 4.9599284893327256e-05, "loss": 0.9342, "step": 1666 }, { "epoch": 1.899145299145299, "grad_norm": 0.1320372372865677, "learning_rate": 4.9598453611297526e-05, "loss": 0.8346, "step": 1667 }, { "epoch": 1.9002849002849003, "grad_norm": 0.1560548096895218, "learning_rate": 4.9597621474895844e-05, "loss": 0.9415, "step": 1668 }, { "epoch": 1.9014245014245015, "grad_norm": 0.13182689249515533, "learning_rate": 4.959678848415113e-05, "loss": 0.8651, "step": 1669 }, { "epoch": 1.9025641025641025, "grad_norm": 0.1351374387741089, "learning_rate": 4.959595463909231e-05, "loss": 1.0177, "step": 1670 }, { "epoch": 1.9037037037037037, "grad_norm": 0.12456336617469788, "learning_rate": 4.959511993974835e-05, "loss": 0.905, "step": 1671 }, { "epoch": 1.9048433048433049, "grad_norm": 0.15296873450279236, "learning_rate": 4.959428438614823e-05, "loss": 0.8319, "step": 1672 }, { "epoch": 1.9059829059829059, "grad_norm": 0.14874428510665894, "learning_rate": 4.959344797832098e-05, "loss": 0.8952, "step": 1673 }, { "epoch": 1.907122507122507, "grad_norm": 0.1253022849559784, "learning_rate": 4.959261071629565e-05, "loss": 0.8939, "step": 1674 }, { "epoch": 1.9082621082621083, "grad_norm": 0.11710775643587112, "learning_rate": 4.959177260010133e-05, "loss": 0.8893, "step": 1675 }, { "epoch": 1.9094017094017093, "grad_norm": 0.14235085248947144, "learning_rate": 4.95909336297671e-05, "loss": 0.8521, "step": 1676 }, { "epoch": 1.9105413105413105, "grad_norm": 0.13734152913093567, "learning_rate": 4.959009380532214e-05, "loss": 0.8582, "step": 1677 }, { "epoch": 1.9116809116809117, "grad_norm": 0.12282492220401764, "learning_rate": 4.958925312679559e-05, "loss": 0.9459, "step": 1678 }, { "epoch": 1.9128205128205127, "grad_norm": 0.1440899670124054, "learning_rate": 4.9588411594216656e-05, "loss": 0.9223, "step": 1679 }, { "epoch": 1.9139601139601141, "grad_norm": 0.18352478742599487, "learning_rate": 4.958756920761457e-05, "loss": 0.7239, "step": 1680 }, { "epoch": 1.915099715099715, "grad_norm": 0.16605500876903534, "learning_rate": 4.958672596701859e-05, "loss": 0.71, "step": 1681 }, { "epoch": 1.916239316239316, "grad_norm": 0.16090312600135803, "learning_rate": 4.958588187245801e-05, "loss": 0.8402, "step": 1682 }, { "epoch": 1.9173789173789175, "grad_norm": 0.12660066783428192, "learning_rate": 4.958503692396214e-05, "loss": 0.9584, "step": 1683 }, { "epoch": 1.9185185185185185, "grad_norm": 0.13559545576572418, "learning_rate": 4.958419112156032e-05, "loss": 0.864, "step": 1684 }, { "epoch": 1.9196581196581195, "grad_norm": 0.1398681104183197, "learning_rate": 4.958334446528193e-05, "loss": 0.9285, "step": 1685 }, { "epoch": 1.920797720797721, "grad_norm": 0.1475391387939453, "learning_rate": 4.95824969551564e-05, "loss": 0.8058, "step": 1686 }, { "epoch": 1.921937321937322, "grad_norm": 0.12115340679883957, "learning_rate": 4.958164859121314e-05, "loss": 0.8492, "step": 1687 }, { "epoch": 1.9230769230769231, "grad_norm": 0.13039755821228027, "learning_rate": 4.958079937348162e-05, "loss": 1.043, "step": 1688 }, { "epoch": 1.9242165242165243, "grad_norm": 0.13124294579029083, "learning_rate": 4.9579949301991346e-05, "loss": 0.8335, "step": 1689 }, { "epoch": 1.9253561253561253, "grad_norm": 0.13981278240680695, "learning_rate": 4.957909837677184e-05, "loss": 0.8197, "step": 1690 }, { "epoch": 1.9264957264957265, "grad_norm": 0.1402364820241928, "learning_rate": 4.957824659785265e-05, "loss": 0.8195, "step": 1691 }, { "epoch": 1.9276353276353277, "grad_norm": 0.19058097898960114, "learning_rate": 4.957739396526337e-05, "loss": 0.7932, "step": 1692 }, { "epoch": 1.9287749287749287, "grad_norm": 0.15419264137744904, "learning_rate": 4.9576540479033606e-05, "loss": 0.7841, "step": 1693 }, { "epoch": 1.92991452991453, "grad_norm": 0.1365847885608673, "learning_rate": 4.9575686139193e-05, "loss": 1.0691, "step": 1694 }, { "epoch": 1.9310541310541312, "grad_norm": 0.1296461820602417, "learning_rate": 4.9574830945771246e-05, "loss": 0.9853, "step": 1695 }, { "epoch": 1.9321937321937321, "grad_norm": 0.15075461566448212, "learning_rate": 4.9573974898798016e-05, "loss": 0.7522, "step": 1696 }, { "epoch": 1.9333333333333333, "grad_norm": 0.16045859456062317, "learning_rate": 4.9573117998303065e-05, "loss": 0.7122, "step": 1697 }, { "epoch": 1.9344729344729346, "grad_norm": 0.1375320851802826, "learning_rate": 4.9572260244316154e-05, "loss": 0.9096, "step": 1698 }, { "epoch": 1.9356125356125355, "grad_norm": 0.13000398874282837, "learning_rate": 4.9571401636867065e-05, "loss": 0.8438, "step": 1699 }, { "epoch": 1.9367521367521368, "grad_norm": 0.12630487978458405, "learning_rate": 4.957054217598563e-05, "loss": 0.9253, "step": 1700 }, { "epoch": 1.937891737891738, "grad_norm": 0.13161636888980865, "learning_rate": 4.9569681861701697e-05, "loss": 1.0273, "step": 1701 }, { "epoch": 1.939031339031339, "grad_norm": 0.14148838818073273, "learning_rate": 4.9568820694045136e-05, "loss": 0.8333, "step": 1702 }, { "epoch": 1.9401709401709402, "grad_norm": 0.1517331898212433, "learning_rate": 4.956795867304588e-05, "loss": 0.8759, "step": 1703 }, { "epoch": 1.9413105413105414, "grad_norm": 0.19359228014945984, "learning_rate": 4.9567095798733856e-05, "loss": 0.6335, "step": 1704 }, { "epoch": 1.9424501424501424, "grad_norm": 0.12753388285636902, "learning_rate": 4.9566232071139025e-05, "loss": 0.9788, "step": 1705 }, { "epoch": 1.9435897435897436, "grad_norm": 0.14034512639045715, "learning_rate": 4.95653674902914e-05, "loss": 0.78, "step": 1706 }, { "epoch": 1.9447293447293448, "grad_norm": 0.12014275044202805, "learning_rate": 4.9564502056221014e-05, "loss": 0.9388, "step": 1707 }, { "epoch": 1.9458689458689458, "grad_norm": 0.12698069214820862, "learning_rate": 4.956363576895791e-05, "loss": 1.0367, "step": 1708 }, { "epoch": 1.947008547008547, "grad_norm": 0.14705222845077515, "learning_rate": 4.956276862853219e-05, "loss": 0.8282, "step": 1709 }, { "epoch": 1.9481481481481482, "grad_norm": 0.16959571838378906, "learning_rate": 4.956190063497397e-05, "loss": 0.7765, "step": 1710 }, { "epoch": 1.9492877492877492, "grad_norm": 0.13943663239479065, "learning_rate": 4.956103178831339e-05, "loss": 0.8588, "step": 1711 }, { "epoch": 1.9504273504273504, "grad_norm": 0.13844700157642365, "learning_rate": 4.956016208858064e-05, "loss": 0.8311, "step": 1712 }, { "epoch": 1.9515669515669516, "grad_norm": 0.14396969974040985, "learning_rate": 4.955929153580592e-05, "loss": 0.7609, "step": 1713 }, { "epoch": 1.9527065527065526, "grad_norm": 0.1601596176624298, "learning_rate": 4.9558420130019453e-05, "loss": 0.7877, "step": 1714 }, { "epoch": 1.953846153846154, "grad_norm": 0.12831942737102509, "learning_rate": 4.955754787125153e-05, "loss": 1.1118, "step": 1715 }, { "epoch": 1.954985754985755, "grad_norm": 0.15544627606868744, "learning_rate": 4.955667475953244e-05, "loss": 0.9634, "step": 1716 }, { "epoch": 1.956125356125356, "grad_norm": 0.13641200959682465, "learning_rate": 4.95558007948925e-05, "loss": 0.8102, "step": 1717 }, { "epoch": 1.9572649572649574, "grad_norm": 0.13922782242298126, "learning_rate": 4.955492597736207e-05, "loss": 1.0348, "step": 1718 }, { "epoch": 1.9584045584045584, "grad_norm": 0.13506250083446503, "learning_rate": 4.9554050306971534e-05, "loss": 0.8512, "step": 1719 }, { "epoch": 1.9595441595441594, "grad_norm": 0.13824886083602905, "learning_rate": 4.9553173783751303e-05, "loss": 0.8884, "step": 1720 }, { "epoch": 1.9606837606837608, "grad_norm": 0.14373087882995605, "learning_rate": 4.9552296407731824e-05, "loss": 0.9104, "step": 1721 }, { "epoch": 1.9618233618233618, "grad_norm": 0.1664755493402481, "learning_rate": 4.9551418178943574e-05, "loss": 0.7218, "step": 1722 }, { "epoch": 1.9629629629629628, "grad_norm": 0.14031977951526642, "learning_rate": 4.955053909741706e-05, "loss": 0.7915, "step": 1723 }, { "epoch": 1.9641025641025642, "grad_norm": 0.17467737197875977, "learning_rate": 4.954965916318281e-05, "loss": 0.7986, "step": 1724 }, { "epoch": 1.9652421652421652, "grad_norm": 0.145338237285614, "learning_rate": 4.954877837627138e-05, "loss": 0.8819, "step": 1725 }, { "epoch": 1.9663817663817664, "grad_norm": 0.1481735110282898, "learning_rate": 4.954789673671336e-05, "loss": 0.7493, "step": 1726 }, { "epoch": 1.9675213675213676, "grad_norm": 0.1547917127609253, "learning_rate": 4.954701424453939e-05, "loss": 0.8234, "step": 1727 }, { "epoch": 1.9686609686609686, "grad_norm": 0.13837537169456482, "learning_rate": 4.9546130899780107e-05, "loss": 0.9667, "step": 1728 }, { "epoch": 1.9698005698005698, "grad_norm": 0.13087573647499084, "learning_rate": 4.9545246702466196e-05, "loss": 0.9677, "step": 1729 }, { "epoch": 1.970940170940171, "grad_norm": 0.14065606892108917, "learning_rate": 4.954436165262837e-05, "loss": 0.9104, "step": 1730 }, { "epoch": 1.972079772079772, "grad_norm": 0.1263023018836975, "learning_rate": 4.954347575029736e-05, "loss": 0.9036, "step": 1731 }, { "epoch": 1.9732193732193732, "grad_norm": 0.12526488304138184, "learning_rate": 4.9542588995503955e-05, "loss": 0.8904, "step": 1732 }, { "epoch": 1.9743589743589745, "grad_norm": 0.1285375952720642, "learning_rate": 4.954170138827893e-05, "loss": 1.0683, "step": 1733 }, { "epoch": 1.9754985754985754, "grad_norm": 0.14103996753692627, "learning_rate": 4.9540812928653135e-05, "loss": 0.9428, "step": 1734 }, { "epoch": 1.9766381766381766, "grad_norm": 0.15722481906414032, "learning_rate": 4.9539923616657414e-05, "loss": 0.8144, "step": 1735 }, { "epoch": 1.9777777777777779, "grad_norm": 0.13167962431907654, "learning_rate": 4.953903345232266e-05, "loss": 0.9413, "step": 1736 }, { "epoch": 1.9789173789173788, "grad_norm": 0.13495561480522156, "learning_rate": 4.953814243567979e-05, "loss": 1.0195, "step": 1737 }, { "epoch": 1.98005698005698, "grad_norm": 0.14366374909877777, "learning_rate": 4.9537250566759754e-05, "loss": 0.9204, "step": 1738 }, { "epoch": 1.9811965811965813, "grad_norm": 0.15605323016643524, "learning_rate": 4.953635784559353e-05, "loss": 0.817, "step": 1739 }, { "epoch": 1.9823361823361823, "grad_norm": 0.12272720783948898, "learning_rate": 4.9535464272212125e-05, "loss": 0.8901, "step": 1740 }, { "epoch": 1.9834757834757835, "grad_norm": 0.1490478664636612, "learning_rate": 4.953456984664657e-05, "loss": 0.8106, "step": 1741 }, { "epoch": 1.9846153846153847, "grad_norm": 0.16608838737010956, "learning_rate": 4.953367456892793e-05, "loss": 0.7818, "step": 1742 }, { "epoch": 1.9857549857549857, "grad_norm": 0.1329687088727951, "learning_rate": 4.953277843908731e-05, "loss": 0.9119, "step": 1743 }, { "epoch": 1.9868945868945869, "grad_norm": 0.12370755523443222, "learning_rate": 4.9531881457155825e-05, "loss": 1.0252, "step": 1744 }, { "epoch": 1.988034188034188, "grad_norm": 0.14160947501659393, "learning_rate": 4.953098362316464e-05, "loss": 0.8685, "step": 1745 }, { "epoch": 1.989173789173789, "grad_norm": 0.13524875044822693, "learning_rate": 4.953008493714493e-05, "loss": 1.0185, "step": 1746 }, { "epoch": 1.9903133903133903, "grad_norm": 0.1598372608423233, "learning_rate": 4.9529185399127914e-05, "loss": 0.7148, "step": 1747 }, { "epoch": 1.9914529914529915, "grad_norm": 0.1401708722114563, "learning_rate": 4.9528285009144836e-05, "loss": 0.8118, "step": 1748 }, { "epoch": 1.9925925925925925, "grad_norm": 0.15324155986309052, "learning_rate": 4.9527383767226956e-05, "loss": 0.7589, "step": 1749 }, { "epoch": 1.9937321937321937, "grad_norm": 0.16206003725528717, "learning_rate": 4.95264816734056e-05, "loss": 0.65, "step": 1750 }, { "epoch": 1.994871794871795, "grad_norm": 0.14443135261535645, "learning_rate": 4.952557872771208e-05, "loss": 0.8939, "step": 1751 }, { "epoch": 1.9960113960113959, "grad_norm": 0.15447203814983368, "learning_rate": 4.952467493017776e-05, "loss": 0.7274, "step": 1752 }, { "epoch": 1.9971509971509973, "grad_norm": 0.1378166675567627, "learning_rate": 4.9523770280834044e-05, "loss": 0.9257, "step": 1753 }, { "epoch": 1.9982905982905983, "grad_norm": 0.1470404863357544, "learning_rate": 4.9522864779712356e-05, "loss": 0.9223, "step": 1754 }, { "epoch": 1.9994301994301993, "grad_norm": 0.14608190953731537, "learning_rate": 4.952195842684412e-05, "loss": 0.9686, "step": 1755 }, { "epoch": 2.0, "grad_norm": 0.23963040113449097, "learning_rate": 4.952105122226083e-05, "loss": 0.9708, "step": 1756 }, { "epoch": 2.001139601139601, "grad_norm": 0.13557755947113037, "learning_rate": 4.952014316599401e-05, "loss": 0.7819, "step": 1757 }, { "epoch": 2.0022792022792024, "grad_norm": 0.13543866574764252, "learning_rate": 4.951923425807518e-05, "loss": 0.8381, "step": 1758 }, { "epoch": 2.0034188034188034, "grad_norm": 0.1405077427625656, "learning_rate": 4.951832449853592e-05, "loss": 0.8735, "step": 1759 }, { "epoch": 2.0045584045584044, "grad_norm": 0.15981937944889069, "learning_rate": 4.951741388740782e-05, "loss": 0.7474, "step": 1760 }, { "epoch": 2.005698005698006, "grad_norm": 0.13468600809574127, "learning_rate": 4.951650242472252e-05, "loss": 0.9753, "step": 1761 }, { "epoch": 2.006837606837607, "grad_norm": 0.14661459624767303, "learning_rate": 4.951559011051166e-05, "loss": 0.8771, "step": 1762 }, { "epoch": 2.007977207977208, "grad_norm": 0.15834756195545197, "learning_rate": 4.9514676944806946e-05, "loss": 0.9468, "step": 1763 }, { "epoch": 2.0091168091168092, "grad_norm": 0.12414155900478363, "learning_rate": 4.951376292764008e-05, "loss": 1.0267, "step": 1764 }, { "epoch": 2.01025641025641, "grad_norm": 0.18526801466941833, "learning_rate": 4.9512848059042814e-05, "loss": 0.6723, "step": 1765 }, { "epoch": 2.011396011396011, "grad_norm": 0.16756656765937805, "learning_rate": 4.951193233904693e-05, "loss": 0.8291, "step": 1766 }, { "epoch": 2.0125356125356126, "grad_norm": 0.16463187336921692, "learning_rate": 4.9511015767684224e-05, "loss": 0.7793, "step": 1767 }, { "epoch": 2.0136752136752136, "grad_norm": 0.1506982147693634, "learning_rate": 4.951009834498654e-05, "loss": 0.8529, "step": 1768 }, { "epoch": 2.0148148148148146, "grad_norm": 0.12954819202423096, "learning_rate": 4.9509180070985734e-05, "loss": 0.881, "step": 1769 }, { "epoch": 2.015954415954416, "grad_norm": 0.15093229711055756, "learning_rate": 4.95082609457137e-05, "loss": 0.9121, "step": 1770 }, { "epoch": 2.017094017094017, "grad_norm": 0.135585755109787, "learning_rate": 4.950734096920237e-05, "loss": 0.9688, "step": 1771 }, { "epoch": 2.018233618233618, "grad_norm": 0.11625152826309204, "learning_rate": 4.9506420141483686e-05, "loss": 1.1109, "step": 1772 }, { "epoch": 2.0193732193732195, "grad_norm": 0.1574491560459137, "learning_rate": 4.950549846258965e-05, "loss": 0.7961, "step": 1773 }, { "epoch": 2.0205128205128204, "grad_norm": 0.1278599053621292, "learning_rate": 4.950457593255225e-05, "loss": 0.8749, "step": 1774 }, { "epoch": 2.021652421652422, "grad_norm": 0.14023862779140472, "learning_rate": 4.950365255140355e-05, "loss": 1.0798, "step": 1775 }, { "epoch": 2.022792022792023, "grad_norm": 0.17963793873786926, "learning_rate": 4.950272831917561e-05, "loss": 0.9051, "step": 1776 }, { "epoch": 2.023931623931624, "grad_norm": 0.13962332904338837, "learning_rate": 4.950180323590052e-05, "loss": 0.9353, "step": 1777 }, { "epoch": 2.0250712250712253, "grad_norm": 0.1510384976863861, "learning_rate": 4.950087730161044e-05, "loss": 0.7274, "step": 1778 }, { "epoch": 2.0262108262108263, "grad_norm": 0.15448513627052307, "learning_rate": 4.94999505163375e-05, "loss": 0.7941, "step": 1779 }, { "epoch": 2.0273504273504273, "grad_norm": 0.14918600022792816, "learning_rate": 4.949902288011391e-05, "loss": 0.7575, "step": 1780 }, { "epoch": 2.0284900284900287, "grad_norm": 0.13491478562355042, "learning_rate": 4.9498094392971886e-05, "loss": 1.0261, "step": 1781 }, { "epoch": 2.0296296296296297, "grad_norm": 0.15890218317508698, "learning_rate": 4.949716505494367e-05, "loss": 0.8314, "step": 1782 }, { "epoch": 2.0307692307692307, "grad_norm": 0.15294985473155975, "learning_rate": 4.9496234866061544e-05, "loss": 1.0495, "step": 1783 }, { "epoch": 2.031908831908832, "grad_norm": 0.13908836245536804, "learning_rate": 4.949530382635782e-05, "loss": 0.8791, "step": 1784 }, { "epoch": 2.033048433048433, "grad_norm": 0.15374527871608734, "learning_rate": 4.949437193586482e-05, "loss": 0.8537, "step": 1785 }, { "epoch": 2.034188034188034, "grad_norm": 0.14067526161670685, "learning_rate": 4.949343919461493e-05, "loss": 0.8979, "step": 1786 }, { "epoch": 2.0353276353276355, "grad_norm": 0.12269620597362518, "learning_rate": 4.9492505602640545e-05, "loss": 0.9489, "step": 1787 }, { "epoch": 2.0364672364672365, "grad_norm": 0.14497345685958862, "learning_rate": 4.9491571159974084e-05, "loss": 0.8824, "step": 1788 }, { "epoch": 2.0376068376068375, "grad_norm": 0.14417411386966705, "learning_rate": 4.949063586664799e-05, "loss": 1.0015, "step": 1789 }, { "epoch": 2.038746438746439, "grad_norm": 0.13724790513515472, "learning_rate": 4.9489699722694784e-05, "loss": 0.787, "step": 1790 }, { "epoch": 2.03988603988604, "grad_norm": 0.14156948029994965, "learning_rate": 4.948876272814695e-05, "loss": 0.8961, "step": 1791 }, { "epoch": 2.041025641025641, "grad_norm": 0.15392468869686127, "learning_rate": 4.9487824883037034e-05, "loss": 0.7454, "step": 1792 }, { "epoch": 2.0421652421652423, "grad_norm": 0.1689389944076538, "learning_rate": 4.948688618739763e-05, "loss": 0.7534, "step": 1793 }, { "epoch": 2.0433048433048433, "grad_norm": 0.13176964223384857, "learning_rate": 4.948594664126133e-05, "loss": 0.8851, "step": 1794 }, { "epoch": 2.0444444444444443, "grad_norm": 0.13229134678840637, "learning_rate": 4.948500624466076e-05, "loss": 0.9153, "step": 1795 }, { "epoch": 2.0455840455840457, "grad_norm": 0.14103198051452637, "learning_rate": 4.948406499762859e-05, "loss": 0.7965, "step": 1796 }, { "epoch": 2.0467236467236467, "grad_norm": 0.13603278994560242, "learning_rate": 4.948312290019751e-05, "loss": 0.8652, "step": 1797 }, { "epoch": 2.0478632478632477, "grad_norm": 0.1380765587091446, "learning_rate": 4.9482179952400256e-05, "loss": 0.8834, "step": 1798 }, { "epoch": 2.049002849002849, "grad_norm": 0.139401376247406, "learning_rate": 4.948123615426955e-05, "loss": 0.9797, "step": 1799 }, { "epoch": 2.05014245014245, "grad_norm": 0.14086508750915527, "learning_rate": 4.948029150583819e-05, "loss": 0.8631, "step": 1800 }, { "epoch": 2.051282051282051, "grad_norm": 0.13710635900497437, "learning_rate": 4.9479346007138995e-05, "loss": 0.9418, "step": 1801 }, { "epoch": 2.0524216524216525, "grad_norm": 0.18530669808387756, "learning_rate": 4.947839965820479e-05, "loss": 0.5549, "step": 1802 }, { "epoch": 2.0535612535612535, "grad_norm": 0.12745784223079681, "learning_rate": 4.947745245906845e-05, "loss": 0.8951, "step": 1803 }, { "epoch": 2.0547008547008545, "grad_norm": 0.12986508011817932, "learning_rate": 4.947650440976287e-05, "loss": 0.9997, "step": 1804 }, { "epoch": 2.055840455840456, "grad_norm": 0.15432773530483246, "learning_rate": 4.947555551032098e-05, "loss": 0.8726, "step": 1805 }, { "epoch": 2.056980056980057, "grad_norm": 0.1242598220705986, "learning_rate": 4.9474605760775736e-05, "loss": 0.7066, "step": 1806 }, { "epoch": 2.058119658119658, "grad_norm": 0.15464939177036285, "learning_rate": 4.947365516116014e-05, "loss": 0.8179, "step": 1807 }, { "epoch": 2.0592592592592593, "grad_norm": 0.15885429084300995, "learning_rate": 4.947270371150719e-05, "loss": 0.7955, "step": 1808 }, { "epoch": 2.0603988603988603, "grad_norm": 0.12283238023519516, "learning_rate": 4.9471751411849945e-05, "loss": 0.9151, "step": 1809 }, { "epoch": 2.0615384615384613, "grad_norm": 0.15085271000862122, "learning_rate": 4.947079826222147e-05, "loss": 0.887, "step": 1810 }, { "epoch": 2.0626780626780628, "grad_norm": 0.14796563982963562, "learning_rate": 4.9469844262654873e-05, "loss": 0.9092, "step": 1811 }, { "epoch": 2.0638176638176637, "grad_norm": 0.1264326274394989, "learning_rate": 4.946888941318331e-05, "loss": 0.9539, "step": 1812 }, { "epoch": 2.064957264957265, "grad_norm": 0.12568983435630798, "learning_rate": 4.946793371383991e-05, "loss": 0.9531, "step": 1813 }, { "epoch": 2.066096866096866, "grad_norm": 0.14655908942222595, "learning_rate": 4.946697716465789e-05, "loss": 0.9022, "step": 1814 }, { "epoch": 2.067236467236467, "grad_norm": 0.1334122270345688, "learning_rate": 4.946601976567047e-05, "loss": 1.0485, "step": 1815 }, { "epoch": 2.0683760683760686, "grad_norm": 0.14673428237438202, "learning_rate": 4.94650615169109e-05, "loss": 0.9064, "step": 1816 }, { "epoch": 2.0695156695156696, "grad_norm": 0.14907774329185486, "learning_rate": 4.946410241841246e-05, "loss": 0.935, "step": 1817 }, { "epoch": 2.0706552706552706, "grad_norm": 0.1773405373096466, "learning_rate": 4.946314247020848e-05, "loss": 0.7485, "step": 1818 }, { "epoch": 2.071794871794872, "grad_norm": 0.14234867691993713, "learning_rate": 4.946218167233228e-05, "loss": 1.0076, "step": 1819 }, { "epoch": 2.072934472934473, "grad_norm": 0.15424209833145142, "learning_rate": 4.946122002481723e-05, "loss": 0.8979, "step": 1820 }, { "epoch": 2.074074074074074, "grad_norm": 0.1511053889989853, "learning_rate": 4.946025752769675e-05, "loss": 0.7494, "step": 1821 }, { "epoch": 2.0752136752136754, "grad_norm": 0.1601935625076294, "learning_rate": 4.9459294181004266e-05, "loss": 0.8382, "step": 1822 }, { "epoch": 2.0763532763532764, "grad_norm": 0.14639092981815338, "learning_rate": 4.945832998477322e-05, "loss": 0.9111, "step": 1823 }, { "epoch": 2.0774928774928774, "grad_norm": 0.1675504744052887, "learning_rate": 4.9457364939037124e-05, "loss": 0.7916, "step": 1824 }, { "epoch": 2.078632478632479, "grad_norm": 0.12729400396347046, "learning_rate": 4.9456399043829476e-05, "loss": 0.97, "step": 1825 }, { "epoch": 2.07977207977208, "grad_norm": 0.1588684469461441, "learning_rate": 4.9455432299183836e-05, "loss": 0.8322, "step": 1826 }, { "epoch": 2.0809116809116808, "grad_norm": 0.1341092735528946, "learning_rate": 4.945446470513379e-05, "loss": 0.9179, "step": 1827 }, { "epoch": 2.082051282051282, "grad_norm": 0.16290104389190674, "learning_rate": 4.945349626171292e-05, "loss": 0.6944, "step": 1828 }, { "epoch": 2.083190883190883, "grad_norm": 0.14869116246700287, "learning_rate": 4.945252696895488e-05, "loss": 0.8973, "step": 1829 }, { "epoch": 2.084330484330484, "grad_norm": 0.13451936841011047, "learning_rate": 4.945155682689334e-05, "loss": 0.8897, "step": 1830 }, { "epoch": 2.0854700854700856, "grad_norm": 0.1379305124282837, "learning_rate": 4.945058583556198e-05, "loss": 0.7723, "step": 1831 }, { "epoch": 2.0866096866096866, "grad_norm": 0.14934973418712616, "learning_rate": 4.944961399499455e-05, "loss": 0.7914, "step": 1832 }, { "epoch": 2.0877492877492876, "grad_norm": 0.14998772740364075, "learning_rate": 4.944864130522478e-05, "loss": 0.7924, "step": 1833 }, { "epoch": 2.088888888888889, "grad_norm": 0.16711056232452393, "learning_rate": 4.944766776628646e-05, "loss": 0.7783, "step": 1834 }, { "epoch": 2.09002849002849, "grad_norm": 0.139933779835701, "learning_rate": 4.9446693378213414e-05, "loss": 0.8445, "step": 1835 }, { "epoch": 2.091168091168091, "grad_norm": 0.12723082304000854, "learning_rate": 4.944571814103948e-05, "loss": 0.8902, "step": 1836 }, { "epoch": 2.0923076923076924, "grad_norm": 0.12036453187465668, "learning_rate": 4.944474205479852e-05, "loss": 0.9436, "step": 1837 }, { "epoch": 2.0934472934472934, "grad_norm": 0.17035163938999176, "learning_rate": 4.9443765119524447e-05, "loss": 0.7101, "step": 1838 }, { "epoch": 2.0945868945868944, "grad_norm": 0.14487646520137787, "learning_rate": 4.944278733525119e-05, "loss": 0.8694, "step": 1839 }, { "epoch": 2.095726495726496, "grad_norm": 0.16732631623744965, "learning_rate": 4.9441808702012714e-05, "loss": 0.8103, "step": 1840 }, { "epoch": 2.096866096866097, "grad_norm": 0.13234704732894897, "learning_rate": 4.944082921984301e-05, "loss": 0.978, "step": 1841 }, { "epoch": 2.098005698005698, "grad_norm": 0.16951711475849152, "learning_rate": 4.9439848888776085e-05, "loss": 0.8166, "step": 1842 }, { "epoch": 2.0991452991452992, "grad_norm": 0.1672232747077942, "learning_rate": 4.943886770884601e-05, "loss": 0.8148, "step": 1843 }, { "epoch": 2.1002849002849002, "grad_norm": 0.14344024658203125, "learning_rate": 4.943788568008685e-05, "loss": 0.8167, "step": 1844 }, { "epoch": 2.101424501424501, "grad_norm": 0.11736375838518143, "learning_rate": 4.943690280253271e-05, "loss": 0.9979, "step": 1845 }, { "epoch": 2.1025641025641026, "grad_norm": 0.1611638218164444, "learning_rate": 4.9435919076217736e-05, "loss": 0.966, "step": 1846 }, { "epoch": 2.1037037037037036, "grad_norm": 0.17947030067443848, "learning_rate": 4.943493450117609e-05, "loss": 0.6304, "step": 1847 }, { "epoch": 2.104843304843305, "grad_norm": 0.13923931121826172, "learning_rate": 4.9433949077441975e-05, "loss": 0.7906, "step": 1848 }, { "epoch": 2.105982905982906, "grad_norm": 0.1410183310508728, "learning_rate": 4.943296280504962e-05, "loss": 0.9463, "step": 1849 }, { "epoch": 2.107122507122507, "grad_norm": 0.1561197191476822, "learning_rate": 4.943197568403327e-05, "loss": 0.7344, "step": 1850 }, { "epoch": 2.1082621082621085, "grad_norm": 0.14598383009433746, "learning_rate": 4.943098771442722e-05, "loss": 0.9061, "step": 1851 }, { "epoch": 2.1094017094017095, "grad_norm": 0.17551873624324799, "learning_rate": 4.942999889626578e-05, "loss": 0.7412, "step": 1852 }, { "epoch": 2.1105413105413104, "grad_norm": 0.14245261251926422, "learning_rate": 4.9429009229583295e-05, "loss": 0.9604, "step": 1853 }, { "epoch": 2.111680911680912, "grad_norm": 0.13244985044002533, "learning_rate": 4.9428018714414135e-05, "loss": 0.9125, "step": 1854 }, { "epoch": 2.112820512820513, "grad_norm": 0.1514144390821457, "learning_rate": 4.942702735079272e-05, "loss": 0.8519, "step": 1855 }, { "epoch": 2.113960113960114, "grad_norm": 0.14663314819335938, "learning_rate": 4.9426035138753456e-05, "loss": 0.9008, "step": 1856 }, { "epoch": 2.1150997150997153, "grad_norm": 0.16270847618579865, "learning_rate": 4.942504207833083e-05, "loss": 0.777, "step": 1857 }, { "epoch": 2.1162393162393163, "grad_norm": 0.13018783926963806, "learning_rate": 4.9424048169559324e-05, "loss": 0.963, "step": 1858 }, { "epoch": 2.1173789173789173, "grad_norm": 0.1473851352930069, "learning_rate": 4.942305341247345e-05, "loss": 0.7979, "step": 1859 }, { "epoch": 2.1185185185185187, "grad_norm": 0.14038139581680298, "learning_rate": 4.9422057807107776e-05, "loss": 0.8289, "step": 1860 }, { "epoch": 2.1196581196581197, "grad_norm": 0.1464264988899231, "learning_rate": 4.942106135349687e-05, "loss": 0.8947, "step": 1861 }, { "epoch": 2.1207977207977207, "grad_norm": 0.1594810038805008, "learning_rate": 4.942006405167534e-05, "loss": 0.8643, "step": 1862 }, { "epoch": 2.121937321937322, "grad_norm": 0.13936947286128998, "learning_rate": 4.941906590167784e-05, "loss": 0.8891, "step": 1863 }, { "epoch": 2.123076923076923, "grad_norm": 0.14470365643501282, "learning_rate": 4.941806690353902e-05, "loss": 1.0302, "step": 1864 }, { "epoch": 2.124216524216524, "grad_norm": 0.14046810567378998, "learning_rate": 4.941706705729359e-05, "loss": 0.7488, "step": 1865 }, { "epoch": 2.1253561253561255, "grad_norm": 0.15851527452468872, "learning_rate": 4.9416066362976274e-05, "loss": 0.8979, "step": 1866 }, { "epoch": 2.1264957264957265, "grad_norm": 0.1632106900215149, "learning_rate": 4.9415064820621825e-05, "loss": 0.8245, "step": 1867 }, { "epoch": 2.1276353276353275, "grad_norm": 0.14673244953155518, "learning_rate": 4.941406243026504e-05, "loss": 0.7836, "step": 1868 }, { "epoch": 2.128774928774929, "grad_norm": 0.15931051969528198, "learning_rate": 4.9413059191940726e-05, "loss": 0.9142, "step": 1869 }, { "epoch": 2.12991452991453, "grad_norm": 0.16022063791751862, "learning_rate": 4.941205510568373e-05, "loss": 0.77, "step": 1870 }, { "epoch": 2.131054131054131, "grad_norm": 0.15233448147773743, "learning_rate": 4.9411050171528924e-05, "loss": 0.8241, "step": 1871 }, { "epoch": 2.1321937321937323, "grad_norm": 0.178251251578331, "learning_rate": 4.9410044389511214e-05, "loss": 0.8191, "step": 1872 }, { "epoch": 2.1333333333333333, "grad_norm": 0.1551600694656372, "learning_rate": 4.940903775966553e-05, "loss": 0.8217, "step": 1873 }, { "epoch": 2.1344729344729343, "grad_norm": 0.14596572518348694, "learning_rate": 4.940803028202685e-05, "loss": 0.8558, "step": 1874 }, { "epoch": 2.1356125356125357, "grad_norm": 0.14789175987243652, "learning_rate": 4.9407021956630155e-05, "loss": 0.8399, "step": 1875 }, { "epoch": 2.1367521367521367, "grad_norm": 0.15408600866794586, "learning_rate": 4.9406012783510464e-05, "loss": 0.8219, "step": 1876 }, { "epoch": 2.1378917378917377, "grad_norm": 0.17303259670734406, "learning_rate": 4.940500276270283e-05, "loss": 0.8489, "step": 1877 }, { "epoch": 2.139031339031339, "grad_norm": 0.1342841535806656, "learning_rate": 4.940399189424234e-05, "loss": 0.9228, "step": 1878 }, { "epoch": 2.14017094017094, "grad_norm": 0.1540709286928177, "learning_rate": 4.9402980178164104e-05, "loss": 0.9755, "step": 1879 }, { "epoch": 2.141310541310541, "grad_norm": 0.1552581638097763, "learning_rate": 4.9401967614503256e-05, "loss": 0.8959, "step": 1880 }, { "epoch": 2.1424501424501425, "grad_norm": 0.18478767573833466, "learning_rate": 4.940095420329496e-05, "loss": 0.9137, "step": 1881 }, { "epoch": 2.1435897435897435, "grad_norm": 0.2722806930541992, "learning_rate": 4.939993994457442e-05, "loss": 0.9832, "step": 1882 }, { "epoch": 2.1447293447293445, "grad_norm": 0.12776219844818115, "learning_rate": 4.939892483837688e-05, "loss": 0.9475, "step": 1883 }, { "epoch": 2.145868945868946, "grad_norm": 0.13251222670078278, "learning_rate": 4.9397908884737574e-05, "loss": 0.8796, "step": 1884 }, { "epoch": 2.147008547008547, "grad_norm": 0.13473746180534363, "learning_rate": 4.93968920836918e-05, "loss": 1.0007, "step": 1885 }, { "epoch": 2.148148148148148, "grad_norm": 0.13948100805282593, "learning_rate": 4.939587443527487e-05, "loss": 0.8929, "step": 1886 }, { "epoch": 2.1492877492877493, "grad_norm": 0.13815557956695557, "learning_rate": 4.9394855939522136e-05, "loss": 0.9355, "step": 1887 }, { "epoch": 2.1504273504273503, "grad_norm": 0.19502736628055573, "learning_rate": 4.939383659646896e-05, "loss": 0.6197, "step": 1888 }, { "epoch": 2.1515669515669518, "grad_norm": 0.12203574180603027, "learning_rate": 4.939281640615076e-05, "loss": 0.9107, "step": 1889 }, { "epoch": 2.1527065527065528, "grad_norm": 0.18634234368801117, "learning_rate": 4.939179536860297e-05, "loss": 0.6688, "step": 1890 }, { "epoch": 2.1538461538461537, "grad_norm": 0.1471792757511139, "learning_rate": 4.9390773483861047e-05, "loss": 0.8829, "step": 1891 }, { "epoch": 2.154985754985755, "grad_norm": 0.12695087492465973, "learning_rate": 4.938975075196048e-05, "loss": 0.8376, "step": 1892 }, { "epoch": 2.156125356125356, "grad_norm": 0.12165027111768723, "learning_rate": 4.938872717293681e-05, "loss": 0.9702, "step": 1893 }, { "epoch": 2.157264957264957, "grad_norm": 0.14418058097362518, "learning_rate": 4.938770274682557e-05, "loss": 1.0939, "step": 1894 }, { "epoch": 2.1584045584045586, "grad_norm": 0.15830570459365845, "learning_rate": 4.9386677473662345e-05, "loss": 0.7235, "step": 1895 }, { "epoch": 2.1595441595441596, "grad_norm": 0.14789094030857086, "learning_rate": 4.938565135348275e-05, "loss": 0.8126, "step": 1896 }, { "epoch": 2.1606837606837606, "grad_norm": 0.11831694841384888, "learning_rate": 4.938462438632242e-05, "loss": 0.9538, "step": 1897 }, { "epoch": 2.161823361823362, "grad_norm": 0.13033078610897064, "learning_rate": 4.9383596572217026e-05, "loss": 0.8521, "step": 1898 }, { "epoch": 2.162962962962963, "grad_norm": 0.1421518474817276, "learning_rate": 4.938256791120227e-05, "loss": 0.9645, "step": 1899 }, { "epoch": 2.164102564102564, "grad_norm": 0.1662822961807251, "learning_rate": 4.938153840331388e-05, "loss": 0.8572, "step": 1900 }, { "epoch": 2.1652421652421654, "grad_norm": 0.15469342470169067, "learning_rate": 4.9380508048587614e-05, "loss": 0.7598, "step": 1901 }, { "epoch": 2.1663817663817664, "grad_norm": 0.13138481974601746, "learning_rate": 4.9379476847059244e-05, "loss": 0.936, "step": 1902 }, { "epoch": 2.1675213675213674, "grad_norm": 0.12979283928871155, "learning_rate": 4.9378444798764614e-05, "loss": 0.7665, "step": 1903 }, { "epoch": 2.168660968660969, "grad_norm": 0.12802119553089142, "learning_rate": 4.937741190373955e-05, "loss": 0.9889, "step": 1904 }, { "epoch": 2.16980056980057, "grad_norm": 0.14188210666179657, "learning_rate": 4.937637816201993e-05, "loss": 0.9063, "step": 1905 }, { "epoch": 2.1709401709401708, "grad_norm": 0.1336824595928192, "learning_rate": 4.9375343573641665e-05, "loss": 0.8905, "step": 1906 }, { "epoch": 2.172079772079772, "grad_norm": 0.16564136743545532, "learning_rate": 4.937430813864069e-05, "loss": 0.9138, "step": 1907 }, { "epoch": 2.173219373219373, "grad_norm": 0.15150366723537445, "learning_rate": 4.937327185705295e-05, "loss": 0.856, "step": 1908 }, { "epoch": 2.174358974358974, "grad_norm": 0.13871482014656067, "learning_rate": 4.9372234728914455e-05, "loss": 0.8175, "step": 1909 }, { "epoch": 2.1754985754985756, "grad_norm": 0.16934534907341003, "learning_rate": 4.937119675426123e-05, "loss": 0.6717, "step": 1910 }, { "epoch": 2.1766381766381766, "grad_norm": 0.13458140194416046, "learning_rate": 4.9370157933129326e-05, "loss": 0.8625, "step": 1911 }, { "epoch": 2.1777777777777776, "grad_norm": 0.17928913235664368, "learning_rate": 4.936911826555481e-05, "loss": 0.8025, "step": 1912 }, { "epoch": 2.178917378917379, "grad_norm": 0.17198750376701355, "learning_rate": 4.93680777515738e-05, "loss": 0.7214, "step": 1913 }, { "epoch": 2.18005698005698, "grad_norm": 0.1414993852376938, "learning_rate": 4.936703639122244e-05, "loss": 0.9848, "step": 1914 }, { "epoch": 2.181196581196581, "grad_norm": 0.15657451748847961, "learning_rate": 4.936599418453689e-05, "loss": 0.8098, "step": 1915 }, { "epoch": 2.1823361823361824, "grad_norm": 0.1495695561170578, "learning_rate": 4.936495113155337e-05, "loss": 0.8881, "step": 1916 }, { "epoch": 2.1834757834757834, "grad_norm": 0.1330852061510086, "learning_rate": 4.936390723230808e-05, "loss": 1.0, "step": 1917 }, { "epoch": 2.184615384615385, "grad_norm": 0.1315755397081375, "learning_rate": 4.93628624868373e-05, "loss": 0.9905, "step": 1918 }, { "epoch": 2.185754985754986, "grad_norm": 0.17536915838718414, "learning_rate": 4.936181689517731e-05, "loss": 0.8024, "step": 1919 }, { "epoch": 2.186894586894587, "grad_norm": 0.13161343336105347, "learning_rate": 4.9360770457364406e-05, "loss": 0.958, "step": 1920 }, { "epoch": 2.1880341880341883, "grad_norm": 0.175228089094162, "learning_rate": 4.935972317343497e-05, "loss": 0.7871, "step": 1921 }, { "epoch": 2.1891737891737892, "grad_norm": 0.1431926190853119, "learning_rate": 4.935867504342535e-05, "loss": 1.0687, "step": 1922 }, { "epoch": 2.1903133903133902, "grad_norm": 0.14835286140441895, "learning_rate": 4.935762606737196e-05, "loss": 0.8706, "step": 1923 }, { "epoch": 2.1914529914529917, "grad_norm": 0.13589155673980713, "learning_rate": 4.935657624531123e-05, "loss": 1.0305, "step": 1924 }, { "epoch": 2.1925925925925926, "grad_norm": 0.16879761219024658, "learning_rate": 4.935552557727963e-05, "loss": 0.6608, "step": 1925 }, { "epoch": 2.1937321937321936, "grad_norm": 0.16142266988754272, "learning_rate": 4.935447406331365e-05, "loss": 0.6553, "step": 1926 }, { "epoch": 2.194871794871795, "grad_norm": 0.1474546641111374, "learning_rate": 4.935342170344981e-05, "loss": 0.8198, "step": 1927 }, { "epoch": 2.196011396011396, "grad_norm": 0.15299195051193237, "learning_rate": 4.935236849772467e-05, "loss": 0.8801, "step": 1928 }, { "epoch": 2.197150997150997, "grad_norm": 0.167414590716362, "learning_rate": 4.935131444617479e-05, "loss": 0.8002, "step": 1929 }, { "epoch": 2.1982905982905985, "grad_norm": 0.1790858507156372, "learning_rate": 4.9350259548836796e-05, "loss": 0.6513, "step": 1930 }, { "epoch": 2.1994301994301995, "grad_norm": 0.144561305642128, "learning_rate": 4.934920380574732e-05, "loss": 0.9454, "step": 1931 }, { "epoch": 2.2005698005698004, "grad_norm": 0.15095508098602295, "learning_rate": 4.934814721694304e-05, "loss": 0.8685, "step": 1932 }, { "epoch": 2.201709401709402, "grad_norm": 0.12623736262321472, "learning_rate": 4.9347089782460655e-05, "loss": 0.9214, "step": 1933 }, { "epoch": 2.202849002849003, "grad_norm": 0.16936859488487244, "learning_rate": 4.9346031502336884e-05, "loss": 0.7279, "step": 1934 }, { "epoch": 2.203988603988604, "grad_norm": 0.1386210322380066, "learning_rate": 4.9344972376608486e-05, "loss": 0.8518, "step": 1935 }, { "epoch": 2.2051282051282053, "grad_norm": 0.1472872793674469, "learning_rate": 4.9343912405312245e-05, "loss": 0.9135, "step": 1936 }, { "epoch": 2.2062678062678063, "grad_norm": 0.12712737917900085, "learning_rate": 4.934285158848499e-05, "loss": 1.0231, "step": 1937 }, { "epoch": 2.2074074074074073, "grad_norm": 0.17093539237976074, "learning_rate": 4.934178992616354e-05, "loss": 0.7652, "step": 1938 }, { "epoch": 2.2085470085470087, "grad_norm": 0.13362081348896027, "learning_rate": 4.9340727418384796e-05, "loss": 0.9531, "step": 1939 }, { "epoch": 2.2096866096866097, "grad_norm": 0.13828808069229126, "learning_rate": 4.9339664065185656e-05, "loss": 0.901, "step": 1940 }, { "epoch": 2.2108262108262107, "grad_norm": 0.1424104869365692, "learning_rate": 4.933859986660304e-05, "loss": 0.9433, "step": 1941 }, { "epoch": 2.211965811965812, "grad_norm": 0.15829861164093018, "learning_rate": 4.9337534822673926e-05, "loss": 0.7774, "step": 1942 }, { "epoch": 2.213105413105413, "grad_norm": 0.13689813017845154, "learning_rate": 4.933646893343529e-05, "loss": 0.9929, "step": 1943 }, { "epoch": 2.214245014245014, "grad_norm": 0.13118106126785278, "learning_rate": 4.933540219892417e-05, "loss": 0.906, "step": 1944 }, { "epoch": 2.2153846153846155, "grad_norm": 0.12806855142116547, "learning_rate": 4.93343346191776e-05, "loss": 0.8726, "step": 1945 }, { "epoch": 2.2165242165242165, "grad_norm": 0.1649603396654129, "learning_rate": 4.933326619423268e-05, "loss": 0.8003, "step": 1946 }, { "epoch": 2.2176638176638175, "grad_norm": 0.1277129203081131, "learning_rate": 4.933219692412651e-05, "loss": 0.875, "step": 1947 }, { "epoch": 2.218803418803419, "grad_norm": 0.1342049241065979, "learning_rate": 4.9331126808896214e-05, "loss": 0.9455, "step": 1948 }, { "epoch": 2.21994301994302, "grad_norm": 0.2466350495815277, "learning_rate": 4.933005584857898e-05, "loss": 0.8602, "step": 1949 }, { "epoch": 2.221082621082621, "grad_norm": 0.14591872692108154, "learning_rate": 4.9328984043212e-05, "loss": 0.9328, "step": 1950 }, { "epoch": 2.2222222222222223, "grad_norm": 0.1452866792678833, "learning_rate": 4.9327911392832504e-05, "loss": 0.8267, "step": 1951 }, { "epoch": 2.2233618233618233, "grad_norm": 0.14866749942302704, "learning_rate": 4.9326837897477735e-05, "loss": 0.8183, "step": 1952 }, { "epoch": 2.2245014245014243, "grad_norm": 0.1356418877840042, "learning_rate": 4.932576355718499e-05, "loss": 0.8998, "step": 1953 }, { "epoch": 2.2256410256410257, "grad_norm": 0.1587461680173874, "learning_rate": 4.932468837199158e-05, "loss": 0.9083, "step": 1954 }, { "epoch": 2.2267806267806267, "grad_norm": 0.12066824734210968, "learning_rate": 4.9323612341934844e-05, "loss": 0.9598, "step": 1955 }, { "epoch": 2.2279202279202277, "grad_norm": 0.15439583361148834, "learning_rate": 4.932253546705217e-05, "loss": 0.8972, "step": 1956 }, { "epoch": 2.229059829059829, "grad_norm": 0.12736615538597107, "learning_rate": 4.932145774738095e-05, "loss": 0.9804, "step": 1957 }, { "epoch": 2.23019943019943, "grad_norm": 0.13726183772087097, "learning_rate": 4.9320379182958614e-05, "loss": 0.7944, "step": 1958 }, { "epoch": 2.231339031339031, "grad_norm": 0.12549252808094025, "learning_rate": 4.931929977382263e-05, "loss": 0.9977, "step": 1959 }, { "epoch": 2.2324786324786325, "grad_norm": 0.1366228461265564, "learning_rate": 4.931821952001049e-05, "loss": 0.9187, "step": 1960 }, { "epoch": 2.2336182336182335, "grad_norm": 0.18183672428131104, "learning_rate": 4.9317138421559705e-05, "loss": 0.7452, "step": 1961 }, { "epoch": 2.234757834757835, "grad_norm": 0.1402265876531601, "learning_rate": 4.931605647850783e-05, "loss": 0.8307, "step": 1962 }, { "epoch": 2.235897435897436, "grad_norm": 0.15328846871852875, "learning_rate": 4.931497369089245e-05, "loss": 0.829, "step": 1963 }, { "epoch": 2.237037037037037, "grad_norm": 0.14840663969516754, "learning_rate": 4.931389005875117e-05, "loss": 0.8886, "step": 1964 }, { "epoch": 2.2381766381766384, "grad_norm": 0.15602287650108337, "learning_rate": 4.931280558212161e-05, "loss": 0.703, "step": 1965 }, { "epoch": 2.2393162393162394, "grad_norm": 0.13868959248065948, "learning_rate": 4.931172026104146e-05, "loss": 0.8957, "step": 1966 }, { "epoch": 2.2404558404558403, "grad_norm": 0.12626934051513672, "learning_rate": 4.9310634095548415e-05, "loss": 0.9255, "step": 1967 }, { "epoch": 2.2415954415954418, "grad_norm": 0.15209171175956726, "learning_rate": 4.930954708568018e-05, "loss": 0.8254, "step": 1968 }, { "epoch": 2.2427350427350428, "grad_norm": 0.15199127793312073, "learning_rate": 4.930845923147453e-05, "loss": 0.8763, "step": 1969 }, { "epoch": 2.2438746438746437, "grad_norm": 0.16555799543857574, "learning_rate": 4.9307370532969244e-05, "loss": 0.7342, "step": 1970 }, { "epoch": 2.245014245014245, "grad_norm": 0.12918449938297272, "learning_rate": 4.930628099020213e-05, "loss": 0.8549, "step": 1971 }, { "epoch": 2.246153846153846, "grad_norm": 0.13859300315380096, "learning_rate": 4.930519060321104e-05, "loss": 0.807, "step": 1972 }, { "epoch": 2.247293447293447, "grad_norm": 0.14305748045444489, "learning_rate": 4.930409937203384e-05, "loss": 0.8955, "step": 1973 }, { "epoch": 2.2484330484330486, "grad_norm": 0.15069742500782013, "learning_rate": 4.930300729670843e-05, "loss": 1.0442, "step": 1974 }, { "epoch": 2.2495726495726496, "grad_norm": 0.1632305234670639, "learning_rate": 4.930191437727274e-05, "loss": 0.7508, "step": 1975 }, { "epoch": 2.2507122507122506, "grad_norm": 0.13689802587032318, "learning_rate": 4.9300820613764745e-05, "loss": 0.9544, "step": 1976 }, { "epoch": 2.251851851851852, "grad_norm": 0.123629629611969, "learning_rate": 4.929972600622241e-05, "loss": 1.0424, "step": 1977 }, { "epoch": 2.252991452991453, "grad_norm": 0.1522972732782364, "learning_rate": 4.9298630554683765e-05, "loss": 0.8906, "step": 1978 }, { "epoch": 2.254131054131054, "grad_norm": 0.1493622213602066, "learning_rate": 4.9297534259186864e-05, "loss": 0.9317, "step": 1979 }, { "epoch": 2.2552706552706554, "grad_norm": 0.15533123910427094, "learning_rate": 4.929643711976979e-05, "loss": 0.9778, "step": 1980 }, { "epoch": 2.2564102564102564, "grad_norm": 0.16214512288570404, "learning_rate": 4.9295339136470624e-05, "loss": 0.909, "step": 1981 }, { "epoch": 2.2575498575498574, "grad_norm": 0.1467672884464264, "learning_rate": 4.9294240309327524e-05, "loss": 0.8108, "step": 1982 }, { "epoch": 2.258689458689459, "grad_norm": 0.14121359586715698, "learning_rate": 4.9293140638378646e-05, "loss": 0.78, "step": 1983 }, { "epoch": 2.25982905982906, "grad_norm": 0.15930603444576263, "learning_rate": 4.929204012366219e-05, "loss": 0.8192, "step": 1984 }, { "epoch": 2.260968660968661, "grad_norm": 0.15121567249298096, "learning_rate": 4.929093876521638e-05, "loss": 0.9356, "step": 1985 }, { "epoch": 2.262108262108262, "grad_norm": 0.1301395744085312, "learning_rate": 4.928983656307946e-05, "loss": 1.0268, "step": 1986 }, { "epoch": 2.263247863247863, "grad_norm": 0.1378585547208786, "learning_rate": 4.928873351728972e-05, "loss": 0.8623, "step": 1987 }, { "epoch": 2.2643874643874646, "grad_norm": 0.1481010466814041, "learning_rate": 4.928762962788547e-05, "loss": 0.7744, "step": 1988 }, { "epoch": 2.2655270655270656, "grad_norm": 0.12296169251203537, "learning_rate": 4.928652489490505e-05, "loss": 0.9628, "step": 1989 }, { "epoch": 2.2666666666666666, "grad_norm": 0.17064514756202698, "learning_rate": 4.9285419318386836e-05, "loss": 0.8046, "step": 1990 }, { "epoch": 2.267806267806268, "grad_norm": 0.16034133732318878, "learning_rate": 4.928431289836923e-05, "loss": 0.932, "step": 1991 }, { "epoch": 2.268945868945869, "grad_norm": 0.14264528453350067, "learning_rate": 4.928320563489065e-05, "loss": 0.8834, "step": 1992 }, { "epoch": 2.27008547008547, "grad_norm": 0.13359516859054565, "learning_rate": 4.928209752798956e-05, "loss": 0.9662, "step": 1993 }, { "epoch": 2.2712250712250714, "grad_norm": 0.1782594472169876, "learning_rate": 4.9280988577704434e-05, "loss": 0.786, "step": 1994 }, { "epoch": 2.2723646723646724, "grad_norm": 0.1732977330684662, "learning_rate": 4.927987878407382e-05, "loss": 0.7353, "step": 1995 }, { "epoch": 2.2735042735042734, "grad_norm": 0.15431442856788635, "learning_rate": 4.9278768147136236e-05, "loss": 0.7514, "step": 1996 }, { "epoch": 2.274643874643875, "grad_norm": 0.13067857921123505, "learning_rate": 4.9277656666930274e-05, "loss": 0.9739, "step": 1997 }, { "epoch": 2.275783475783476, "grad_norm": 0.14285075664520264, "learning_rate": 4.927654434349452e-05, "loss": 0.9396, "step": 1998 }, { "epoch": 2.276923076923077, "grad_norm": 0.14833152294158936, "learning_rate": 4.927543117686763e-05, "loss": 0.939, "step": 1999 }, { "epoch": 2.2780626780626783, "grad_norm": 0.15306028723716736, "learning_rate": 4.927431716708826e-05, "loss": 0.8387, "step": 2000 }, { "epoch": 2.2792022792022792, "grad_norm": 0.15124543011188507, "learning_rate": 4.927320231419509e-05, "loss": 0.9343, "step": 2001 }, { "epoch": 2.2803418803418802, "grad_norm": 0.13925035297870636, "learning_rate": 4.927208661822686e-05, "loss": 1.0102, "step": 2002 }, { "epoch": 2.2814814814814817, "grad_norm": 0.16237829625606537, "learning_rate": 4.9270970079222315e-05, "loss": 0.8297, "step": 2003 }, { "epoch": 2.2826210826210827, "grad_norm": 0.1363183706998825, "learning_rate": 4.9269852697220225e-05, "loss": 0.965, "step": 2004 }, { "epoch": 2.2837606837606836, "grad_norm": 0.13237380981445312, "learning_rate": 4.9268734472259416e-05, "loss": 0.8495, "step": 2005 }, { "epoch": 2.284900284900285, "grad_norm": 0.13987985253334045, "learning_rate": 4.9267615404378716e-05, "loss": 1.0381, "step": 2006 }, { "epoch": 2.286039886039886, "grad_norm": 0.13296979665756226, "learning_rate": 4.9266495493616995e-05, "loss": 0.8118, "step": 2007 }, { "epoch": 2.287179487179487, "grad_norm": 0.1340661346912384, "learning_rate": 4.926537474001316e-05, "loss": 0.8634, "step": 2008 }, { "epoch": 2.2883190883190885, "grad_norm": 0.14148005843162537, "learning_rate": 4.926425314360612e-05, "loss": 0.8617, "step": 2009 }, { "epoch": 2.2894586894586895, "grad_norm": 0.13206639885902405, "learning_rate": 4.9263130704434844e-05, "loss": 0.8118, "step": 2010 }, { "epoch": 2.2905982905982905, "grad_norm": 0.15646213293075562, "learning_rate": 4.926200742253832e-05, "loss": 1.026, "step": 2011 }, { "epoch": 2.291737891737892, "grad_norm": 0.16251415014266968, "learning_rate": 4.926088329795556e-05, "loss": 0.829, "step": 2012 }, { "epoch": 2.292877492877493, "grad_norm": 0.16284556686878204, "learning_rate": 4.925975833072559e-05, "loss": 0.6832, "step": 2013 }, { "epoch": 2.294017094017094, "grad_norm": 0.15893757343292236, "learning_rate": 4.9258632520887516e-05, "loss": 0.7808, "step": 2014 }, { "epoch": 2.2951566951566953, "grad_norm": 0.19513992965221405, "learning_rate": 4.925750586848041e-05, "loss": 0.5751, "step": 2015 }, { "epoch": 2.2962962962962963, "grad_norm": 0.141801655292511, "learning_rate": 4.9256378373543425e-05, "loss": 0.9394, "step": 2016 }, { "epoch": 2.2974358974358973, "grad_norm": 0.14876669645309448, "learning_rate": 4.9255250036115715e-05, "loss": 0.8557, "step": 2017 }, { "epoch": 2.2985754985754987, "grad_norm": 0.16340695321559906, "learning_rate": 4.925412085623646e-05, "loss": 0.7437, "step": 2018 }, { "epoch": 2.2997150997150997, "grad_norm": 0.12336662411689758, "learning_rate": 4.92529908339449e-05, "loss": 0.9023, "step": 2019 }, { "epoch": 2.3008547008547007, "grad_norm": 0.1331944316625595, "learning_rate": 4.925185996928027e-05, "loss": 0.9216, "step": 2020 }, { "epoch": 2.301994301994302, "grad_norm": 0.14265064895153046, "learning_rate": 4.9250728262281844e-05, "loss": 0.9598, "step": 2021 }, { "epoch": 2.303133903133903, "grad_norm": 0.16327020525932312, "learning_rate": 4.9249595712988936e-05, "loss": 0.7156, "step": 2022 }, { "epoch": 2.304273504273504, "grad_norm": 0.13805516064167023, "learning_rate": 4.924846232144088e-05, "loss": 0.9657, "step": 2023 }, { "epoch": 2.3054131054131055, "grad_norm": 0.15694867074489594, "learning_rate": 4.924732808767705e-05, "loss": 0.7611, "step": 2024 }, { "epoch": 2.3065527065527065, "grad_norm": 0.12021307647228241, "learning_rate": 4.924619301173684e-05, "loss": 0.8801, "step": 2025 }, { "epoch": 2.3076923076923075, "grad_norm": 0.14766241610050201, "learning_rate": 4.9245057093659655e-05, "loss": 0.8036, "step": 2026 }, { "epoch": 2.308831908831909, "grad_norm": 0.13158078491687775, "learning_rate": 4.9243920333484975e-05, "loss": 0.8843, "step": 2027 }, { "epoch": 2.30997150997151, "grad_norm": 0.1540067344903946, "learning_rate": 4.924278273125227e-05, "loss": 0.9109, "step": 2028 }, { "epoch": 2.311111111111111, "grad_norm": 0.14239220321178436, "learning_rate": 4.924164428700104e-05, "loss": 0.7884, "step": 2029 }, { "epoch": 2.3122507122507123, "grad_norm": 0.15281394124031067, "learning_rate": 4.924050500077085e-05, "loss": 0.925, "step": 2030 }, { "epoch": 2.3133903133903133, "grad_norm": 0.13662955164909363, "learning_rate": 4.923936487260127e-05, "loss": 0.9603, "step": 2031 }, { "epoch": 2.3145299145299143, "grad_norm": 0.1438586264848709, "learning_rate": 4.923822390253187e-05, "loss": 0.8715, "step": 2032 }, { "epoch": 2.3156695156695157, "grad_norm": 0.15727093815803528, "learning_rate": 4.9237082090602304e-05, "loss": 0.9976, "step": 2033 }, { "epoch": 2.3168091168091167, "grad_norm": 0.12305113673210144, "learning_rate": 4.923593943685223e-05, "loss": 1.054, "step": 2034 }, { "epoch": 2.3179487179487177, "grad_norm": 0.16374419629573822, "learning_rate": 4.923479594132133e-05, "loss": 0.7952, "step": 2035 }, { "epoch": 2.319088319088319, "grad_norm": 0.1974456012248993, "learning_rate": 4.923365160404932e-05, "loss": 0.6462, "step": 2036 }, { "epoch": 2.32022792022792, "grad_norm": 0.2112473100423813, "learning_rate": 4.923250642507594e-05, "loss": 0.7523, "step": 2037 }, { "epoch": 2.3213675213675216, "grad_norm": 0.14270855486392975, "learning_rate": 4.923136040444098e-05, "loss": 0.7284, "step": 2038 }, { "epoch": 2.3225071225071225, "grad_norm": 0.14322462677955627, "learning_rate": 4.9230213542184235e-05, "loss": 0.775, "step": 2039 }, { "epoch": 2.3236467236467235, "grad_norm": 0.13296982645988464, "learning_rate": 4.922906583834554e-05, "loss": 0.8894, "step": 2040 }, { "epoch": 2.324786324786325, "grad_norm": 0.138052836060524, "learning_rate": 4.922791729296476e-05, "loss": 0.906, "step": 2041 }, { "epoch": 2.325925925925926, "grad_norm": 0.1894429624080658, "learning_rate": 4.9226767906081787e-05, "loss": 0.6677, "step": 2042 }, { "epoch": 2.327065527065527, "grad_norm": 0.14458133280277252, "learning_rate": 4.922561767773654e-05, "loss": 0.9201, "step": 2043 }, { "epoch": 2.3282051282051284, "grad_norm": 0.14025312662124634, "learning_rate": 4.922446660796896e-05, "loss": 0.8861, "step": 2044 }, { "epoch": 2.3293447293447294, "grad_norm": 0.1329633891582489, "learning_rate": 4.9223314696819045e-05, "loss": 0.8718, "step": 2045 }, { "epoch": 2.3304843304843303, "grad_norm": 0.14083260297775269, "learning_rate": 4.9222161944326795e-05, "loss": 0.9516, "step": 2046 }, { "epoch": 2.331623931623932, "grad_norm": 0.12882499396800995, "learning_rate": 4.922100835053225e-05, "loss": 1.0386, "step": 2047 }, { "epoch": 2.3327635327635328, "grad_norm": 0.15318839251995087, "learning_rate": 4.921985391547548e-05, "loss": 1.0038, "step": 2048 }, { "epoch": 2.3339031339031338, "grad_norm": 0.13524477183818817, "learning_rate": 4.921869863919657e-05, "loss": 0.9484, "step": 2049 }, { "epoch": 2.335042735042735, "grad_norm": 0.1811407059431076, "learning_rate": 4.921754252173566e-05, "loss": 0.9209, "step": 2050 }, { "epoch": 2.336182336182336, "grad_norm": 0.14366723597049713, "learning_rate": 4.9216385563132905e-05, "loss": 0.8652, "step": 2051 }, { "epoch": 2.337321937321937, "grad_norm": 0.16410066187381744, "learning_rate": 4.921522776342848e-05, "loss": 0.9666, "step": 2052 }, { "epoch": 2.3384615384615386, "grad_norm": 0.1788884997367859, "learning_rate": 4.92140691226626e-05, "loss": 0.6524, "step": 2053 }, { "epoch": 2.3396011396011396, "grad_norm": 0.14363914728164673, "learning_rate": 4.921290964087552e-05, "loss": 0.739, "step": 2054 }, { "epoch": 2.3407407407407406, "grad_norm": 0.15925370156764984, "learning_rate": 4.9211749318107495e-05, "loss": 0.7783, "step": 2055 }, { "epoch": 2.341880341880342, "grad_norm": 0.1428164839744568, "learning_rate": 4.9210588154398826e-05, "loss": 0.9463, "step": 2056 }, { "epoch": 2.343019943019943, "grad_norm": 0.1471196860074997, "learning_rate": 4.920942614978986e-05, "loss": 0.8499, "step": 2057 }, { "epoch": 2.344159544159544, "grad_norm": 0.13637736439704895, "learning_rate": 4.920826330432095e-05, "loss": 0.9344, "step": 2058 }, { "epoch": 2.3452991452991454, "grad_norm": 0.12916813790798187, "learning_rate": 4.920709961803247e-05, "loss": 0.9978, "step": 2059 }, { "epoch": 2.3464387464387464, "grad_norm": 0.17525401711463928, "learning_rate": 4.920593509096486e-05, "loss": 0.8386, "step": 2060 }, { "epoch": 2.347578347578348, "grad_norm": 0.1472932994365692, "learning_rate": 4.920476972315856e-05, "loss": 0.8604, "step": 2061 }, { "epoch": 2.348717948717949, "grad_norm": 0.15404731035232544, "learning_rate": 4.920360351465404e-05, "loss": 0.7025, "step": 2062 }, { "epoch": 2.34985754985755, "grad_norm": 0.14780236780643463, "learning_rate": 4.920243646549181e-05, "loss": 0.6789, "step": 2063 }, { "epoch": 2.3509971509971512, "grad_norm": 0.1359528750181198, "learning_rate": 4.920126857571241e-05, "loss": 0.9251, "step": 2064 }, { "epoch": 2.352136752136752, "grad_norm": 0.14521725475788116, "learning_rate": 4.920009984535639e-05, "loss": 0.9406, "step": 2065 }, { "epoch": 2.353276353276353, "grad_norm": 0.16266874969005585, "learning_rate": 4.919893027446435e-05, "loss": 0.8079, "step": 2066 }, { "epoch": 2.3544159544159546, "grad_norm": 0.16694174706935883, "learning_rate": 4.919775986307692e-05, "loss": 0.8636, "step": 2067 }, { "epoch": 2.3555555555555556, "grad_norm": 0.1435796022415161, "learning_rate": 4.919658861123475e-05, "loss": 0.9513, "step": 2068 }, { "epoch": 2.3566951566951566, "grad_norm": 0.17194302380084991, "learning_rate": 4.919541651897851e-05, "loss": 0.7693, "step": 2069 }, { "epoch": 2.357834757834758, "grad_norm": 0.1311817616224289, "learning_rate": 4.9194243586348924e-05, "loss": 0.9718, "step": 2070 }, { "epoch": 2.358974358974359, "grad_norm": 0.15451529622077942, "learning_rate": 4.9193069813386715e-05, "loss": 0.9573, "step": 2071 }, { "epoch": 2.36011396011396, "grad_norm": 0.1511376053094864, "learning_rate": 4.919189520013267e-05, "loss": 0.8786, "step": 2072 }, { "epoch": 2.3612535612535615, "grad_norm": 0.1302129626274109, "learning_rate": 4.919071974662757e-05, "loss": 0.9859, "step": 2073 }, { "epoch": 2.3623931623931624, "grad_norm": 0.15494883060455322, "learning_rate": 4.918954345291225e-05, "loss": 0.8693, "step": 2074 }, { "epoch": 2.3635327635327634, "grad_norm": 0.1515437215566635, "learning_rate": 4.918836631902756e-05, "loss": 0.8416, "step": 2075 }, { "epoch": 2.364672364672365, "grad_norm": 0.14044468104839325, "learning_rate": 4.918718834501439e-05, "loss": 0.9393, "step": 2076 }, { "epoch": 2.365811965811966, "grad_norm": 0.18216492235660553, "learning_rate": 4.9186009530913656e-05, "loss": 0.8229, "step": 2077 }, { "epoch": 2.366951566951567, "grad_norm": 0.14449098706245422, "learning_rate": 4.91848298767663e-05, "loss": 0.912, "step": 2078 }, { "epoch": 2.3680911680911683, "grad_norm": 0.15002913773059845, "learning_rate": 4.9183649382613295e-05, "loss": 0.8626, "step": 2079 }, { "epoch": 2.3692307692307693, "grad_norm": 0.13505762815475464, "learning_rate": 4.918246804849564e-05, "loss": 0.9481, "step": 2080 }, { "epoch": 2.3703703703703702, "grad_norm": 0.16325612366199493, "learning_rate": 4.9181285874454374e-05, "loss": 0.9719, "step": 2081 }, { "epoch": 2.3715099715099717, "grad_norm": 0.14531755447387695, "learning_rate": 4.918010286053054e-05, "loss": 0.8315, "step": 2082 }, { "epoch": 2.3726495726495727, "grad_norm": 0.14217323064804077, "learning_rate": 4.917891900676524e-05, "loss": 0.8154, "step": 2083 }, { "epoch": 2.3737891737891736, "grad_norm": 0.13195064663887024, "learning_rate": 4.9177734313199595e-05, "loss": 0.791, "step": 2084 }, { "epoch": 2.374928774928775, "grad_norm": 0.14778481423854828, "learning_rate": 4.917654877987475e-05, "loss": 0.8761, "step": 2085 }, { "epoch": 2.376068376068376, "grad_norm": 0.12823796272277832, "learning_rate": 4.917536240683188e-05, "loss": 0.9051, "step": 2086 }, { "epoch": 2.377207977207977, "grad_norm": 0.14800260961055756, "learning_rate": 4.917417519411219e-05, "loss": 0.8613, "step": 2087 }, { "epoch": 2.3783475783475785, "grad_norm": 0.1318310797214508, "learning_rate": 4.917298714175691e-05, "loss": 0.9958, "step": 2088 }, { "epoch": 2.3794871794871795, "grad_norm": 0.13034144043922424, "learning_rate": 4.917179824980732e-05, "loss": 0.8367, "step": 2089 }, { "epoch": 2.3806267806267805, "grad_norm": 0.1599917709827423, "learning_rate": 4.9170608518304693e-05, "loss": 0.7495, "step": 2090 }, { "epoch": 2.381766381766382, "grad_norm": 0.12858852744102478, "learning_rate": 4.916941794729037e-05, "loss": 0.9891, "step": 2091 }, { "epoch": 2.382905982905983, "grad_norm": 0.15404509007930756, "learning_rate": 4.916822653680569e-05, "loss": 0.8811, "step": 2092 }, { "epoch": 2.384045584045584, "grad_norm": 0.14026761054992676, "learning_rate": 4.916703428689205e-05, "loss": 0.8023, "step": 2093 }, { "epoch": 2.3851851851851853, "grad_norm": 0.151481032371521, "learning_rate": 4.916584119759085e-05, "loss": 0.79, "step": 2094 }, { "epoch": 2.3863247863247863, "grad_norm": 0.1745295226573944, "learning_rate": 4.916464726894352e-05, "loss": 0.7594, "step": 2095 }, { "epoch": 2.3874643874643873, "grad_norm": 0.15312033891677856, "learning_rate": 4.9163452500991535e-05, "loss": 0.9303, "step": 2096 }, { "epoch": 2.3886039886039887, "grad_norm": 0.1434355080127716, "learning_rate": 4.91622568937764e-05, "loss": 1.0141, "step": 2097 }, { "epoch": 2.3897435897435897, "grad_norm": 0.16610686480998993, "learning_rate": 4.916106044733964e-05, "loss": 0.7276, "step": 2098 }, { "epoch": 2.3908831908831907, "grad_norm": 0.13088960945606232, "learning_rate": 4.9159863161722803e-05, "loss": 0.8771, "step": 2099 }, { "epoch": 2.392022792022792, "grad_norm": 0.13837777078151703, "learning_rate": 4.915866503696748e-05, "loss": 0.9145, "step": 2100 }, { "epoch": 2.393162393162393, "grad_norm": 0.15753956139087677, "learning_rate": 4.915746607311528e-05, "loss": 0.7316, "step": 2101 }, { "epoch": 2.394301994301994, "grad_norm": 0.1448155790567398, "learning_rate": 4.915626627020785e-05, "loss": 0.8271, "step": 2102 }, { "epoch": 2.3954415954415955, "grad_norm": 0.13404794037342072, "learning_rate": 4.915506562828687e-05, "loss": 0.8835, "step": 2103 }, { "epoch": 2.3965811965811965, "grad_norm": 0.13098442554473877, "learning_rate": 4.915386414739403e-05, "loss": 0.7098, "step": 2104 }, { "epoch": 2.3977207977207975, "grad_norm": 0.14145700633525848, "learning_rate": 4.915266182757106e-05, "loss": 1.0053, "step": 2105 }, { "epoch": 2.398860398860399, "grad_norm": 0.15703187882900238, "learning_rate": 4.9151458668859726e-05, "loss": 0.7455, "step": 2106 }, { "epoch": 2.4, "grad_norm": 0.3330056965351105, "learning_rate": 4.9150254671301824e-05, "loss": 0.8628, "step": 2107 }, { "epoch": 2.401139601139601, "grad_norm": 0.16580024361610413, "learning_rate": 4.914904983493916e-05, "loss": 0.8511, "step": 2108 }, { "epoch": 2.4022792022792023, "grad_norm": 0.13251586258411407, "learning_rate": 4.914784415981358e-05, "loss": 1.0675, "step": 2109 }, { "epoch": 2.4034188034188033, "grad_norm": 0.15590563416481018, "learning_rate": 4.914663764596697e-05, "loss": 0.8158, "step": 2110 }, { "epoch": 2.4045584045584047, "grad_norm": 0.1492435485124588, "learning_rate": 4.914543029344123e-05, "loss": 0.8681, "step": 2111 }, { "epoch": 2.4056980056980057, "grad_norm": 0.14389283955097198, "learning_rate": 4.9144222102278296e-05, "loss": 0.7858, "step": 2112 }, { "epoch": 2.4068376068376067, "grad_norm": 0.1349346786737442, "learning_rate": 4.914301307252013e-05, "loss": 0.9187, "step": 2113 }, { "epoch": 2.407977207977208, "grad_norm": 0.16255079209804535, "learning_rate": 4.914180320420873e-05, "loss": 0.7976, "step": 2114 }, { "epoch": 2.409116809116809, "grad_norm": 0.16387365758419037, "learning_rate": 4.9140592497386115e-05, "loss": 0.8124, "step": 2115 }, { "epoch": 2.41025641025641, "grad_norm": 0.12129881978034973, "learning_rate": 4.9139380952094324e-05, "loss": 1.0386, "step": 2116 }, { "epoch": 2.4113960113960116, "grad_norm": 0.19868333637714386, "learning_rate": 4.913816856837546e-05, "loss": 0.7537, "step": 2117 }, { "epoch": 2.4125356125356126, "grad_norm": 0.16071578860282898, "learning_rate": 4.913695534627163e-05, "loss": 0.8187, "step": 2118 }, { "epoch": 2.4136752136752135, "grad_norm": 0.149382084608078, "learning_rate": 4.913574128582494e-05, "loss": 0.9916, "step": 2119 }, { "epoch": 2.414814814814815, "grad_norm": 0.15173287689685822, "learning_rate": 4.9134526387077604e-05, "loss": 0.8567, "step": 2120 }, { "epoch": 2.415954415954416, "grad_norm": 0.14803457260131836, "learning_rate": 4.9133310650071787e-05, "loss": 0.771, "step": 2121 }, { "epoch": 2.417094017094017, "grad_norm": 0.14639389514923096, "learning_rate": 4.913209407484972e-05, "loss": 0.8071, "step": 2122 }, { "epoch": 2.4182336182336184, "grad_norm": 0.1930467039346695, "learning_rate": 4.913087666145367e-05, "loss": 1.0161, "step": 2123 }, { "epoch": 2.4193732193732194, "grad_norm": 0.1407659947872162, "learning_rate": 4.912965840992591e-05, "loss": 0.8314, "step": 2124 }, { "epoch": 2.4205128205128204, "grad_norm": 0.13035765290260315, "learning_rate": 4.912843932030876e-05, "loss": 0.9131, "step": 2125 }, { "epoch": 2.421652421652422, "grad_norm": 0.13344445824623108, "learning_rate": 4.9127219392644554e-05, "loss": 0.851, "step": 2126 }, { "epoch": 2.4227920227920228, "grad_norm": 0.16210369765758514, "learning_rate": 4.912599862697567e-05, "loss": 0.9003, "step": 2127 }, { "epoch": 2.4239316239316238, "grad_norm": 0.14636437594890594, "learning_rate": 4.9124777023344504e-05, "loss": 0.8381, "step": 2128 }, { "epoch": 2.425071225071225, "grad_norm": 0.15211112797260284, "learning_rate": 4.912355458179349e-05, "loss": 0.8346, "step": 2129 }, { "epoch": 2.426210826210826, "grad_norm": 0.11880381405353546, "learning_rate": 4.912233130236509e-05, "loss": 0.9763, "step": 2130 }, { "epoch": 2.427350427350427, "grad_norm": 0.14103466272354126, "learning_rate": 4.912110718510178e-05, "loss": 1.0533, "step": 2131 }, { "epoch": 2.4284900284900286, "grad_norm": 0.16132673621177673, "learning_rate": 4.9119882230046086e-05, "loss": 0.8718, "step": 2132 }, { "epoch": 2.4296296296296296, "grad_norm": 0.14463678002357483, "learning_rate": 4.911865643724056e-05, "loss": 1.023, "step": 2133 }, { "epoch": 2.430769230769231, "grad_norm": 0.1396966278553009, "learning_rate": 4.911742980672776e-05, "loss": 0.9135, "step": 2134 }, { "epoch": 2.431908831908832, "grad_norm": 0.14493419229984283, "learning_rate": 4.91162023385503e-05, "loss": 0.8479, "step": 2135 }, { "epoch": 2.433048433048433, "grad_norm": 0.1389792412519455, "learning_rate": 4.911497403275082e-05, "loss": 0.7689, "step": 2136 }, { "epoch": 2.4341880341880344, "grad_norm": 0.14532049000263214, "learning_rate": 4.911374488937197e-05, "loss": 0.9327, "step": 2137 }, { "epoch": 2.4353276353276354, "grad_norm": 0.15216736495494843, "learning_rate": 4.9112514908456456e-05, "loss": 0.832, "step": 2138 }, { "epoch": 2.4364672364672364, "grad_norm": 0.15010429918766022, "learning_rate": 4.911128409004698e-05, "loss": 0.9334, "step": 2139 }, { "epoch": 2.437606837606838, "grad_norm": 0.15640141069889069, "learning_rate": 4.91100524341863e-05, "loss": 0.8189, "step": 2140 }, { "epoch": 2.438746438746439, "grad_norm": 0.166352778673172, "learning_rate": 4.9108819940917203e-05, "loss": 0.6352, "step": 2141 }, { "epoch": 2.43988603988604, "grad_norm": 0.19830261170864105, "learning_rate": 4.910758661028249e-05, "loss": 0.8124, "step": 2142 }, { "epoch": 2.4410256410256412, "grad_norm": 0.16307613253593445, "learning_rate": 4.9106352442324986e-05, "loss": 0.6899, "step": 2143 }, { "epoch": 2.4421652421652422, "grad_norm": 0.13211458921432495, "learning_rate": 4.910511743708758e-05, "loss": 0.9798, "step": 2144 }, { "epoch": 2.443304843304843, "grad_norm": 0.1344984769821167, "learning_rate": 4.9103881594613155e-05, "loss": 1.0457, "step": 2145 }, { "epoch": 2.4444444444444446, "grad_norm": 0.1644609272480011, "learning_rate": 4.9102644914944634e-05, "loss": 0.7837, "step": 2146 }, { "epoch": 2.4455840455840456, "grad_norm": 0.14739760756492615, "learning_rate": 4.910140739812498e-05, "loss": 0.8853, "step": 2147 }, { "epoch": 2.4467236467236466, "grad_norm": 0.1313575953245163, "learning_rate": 4.910016904419715e-05, "loss": 0.9716, "step": 2148 }, { "epoch": 2.447863247863248, "grad_norm": 0.16711562871932983, "learning_rate": 4.909892985320419e-05, "loss": 0.7586, "step": 2149 }, { "epoch": 2.449002849002849, "grad_norm": 0.14246819913387299, "learning_rate": 4.909768982518912e-05, "loss": 0.9917, "step": 2150 }, { "epoch": 2.45014245014245, "grad_norm": 0.15236404538154602, "learning_rate": 4.909644896019501e-05, "loss": 0.8881, "step": 2151 }, { "epoch": 2.4512820512820515, "grad_norm": 0.15246206521987915, "learning_rate": 4.909520725826496e-05, "loss": 0.8779, "step": 2152 }, { "epoch": 2.4524216524216524, "grad_norm": 0.14849071204662323, "learning_rate": 4.909396471944211e-05, "loss": 0.9273, "step": 2153 }, { "epoch": 2.4535612535612534, "grad_norm": 0.14919808506965637, "learning_rate": 4.90927213437696e-05, "loss": 0.9098, "step": 2154 }, { "epoch": 2.454700854700855, "grad_norm": 0.17571128904819489, "learning_rate": 4.909147713129062e-05, "loss": 0.8339, "step": 2155 }, { "epoch": 2.455840455840456, "grad_norm": 0.14000199735164642, "learning_rate": 4.9090232082048396e-05, "loss": 0.866, "step": 2156 }, { "epoch": 2.456980056980057, "grad_norm": 0.13070739805698395, "learning_rate": 4.9088986196086154e-05, "loss": 0.9645, "step": 2157 }, { "epoch": 2.4581196581196583, "grad_norm": 0.12980005145072937, "learning_rate": 4.908773947344718e-05, "loss": 0.9703, "step": 2158 }, { "epoch": 2.4592592592592593, "grad_norm": 0.179650217294693, "learning_rate": 4.908649191417477e-05, "loss": 0.7172, "step": 2159 }, { "epoch": 2.4603988603988602, "grad_norm": 0.12496597319841385, "learning_rate": 4.908524351831226e-05, "loss": 1.0081, "step": 2160 }, { "epoch": 2.4615384615384617, "grad_norm": 0.14479053020477295, "learning_rate": 4.908399428590301e-05, "loss": 0.7927, "step": 2161 }, { "epoch": 2.4626780626780627, "grad_norm": 0.16434632241725922, "learning_rate": 4.9082744216990404e-05, "loss": 0.7877, "step": 2162 }, { "epoch": 2.4638176638176637, "grad_norm": 0.13760940730571747, "learning_rate": 4.908149331161787e-05, "loss": 0.9409, "step": 2163 }, { "epoch": 2.464957264957265, "grad_norm": 0.12635131180286407, "learning_rate": 4.908024156982884e-05, "loss": 0.8546, "step": 2164 }, { "epoch": 2.466096866096866, "grad_norm": 0.14990006387233734, "learning_rate": 4.907898899166681e-05, "loss": 0.8018, "step": 2165 }, { "epoch": 2.467236467236467, "grad_norm": 0.13738976418972015, "learning_rate": 4.9077735577175266e-05, "loss": 0.9287, "step": 2166 }, { "epoch": 2.4683760683760685, "grad_norm": 0.16194100677967072, "learning_rate": 4.9076481326397754e-05, "loss": 0.7774, "step": 2167 }, { "epoch": 2.4695156695156695, "grad_norm": 0.1397888958454132, "learning_rate": 4.907522623937784e-05, "loss": 0.9735, "step": 2168 }, { "epoch": 2.4706552706552705, "grad_norm": 0.12333477288484573, "learning_rate": 4.90739703161591e-05, "loss": 1.0672, "step": 2169 }, { "epoch": 2.471794871794872, "grad_norm": 0.16172461211681366, "learning_rate": 4.907271355678518e-05, "loss": 0.7231, "step": 2170 }, { "epoch": 2.472934472934473, "grad_norm": 0.16499406099319458, "learning_rate": 4.907145596129971e-05, "loss": 0.7705, "step": 2171 }, { "epoch": 2.474074074074074, "grad_norm": 0.13408449292182922, "learning_rate": 4.9070197529746374e-05, "loss": 1.0096, "step": 2172 }, { "epoch": 2.4752136752136753, "grad_norm": 0.18295085430145264, "learning_rate": 4.906893826216889e-05, "loss": 0.5515, "step": 2173 }, { "epoch": 2.4763532763532763, "grad_norm": 0.16711701452732086, "learning_rate": 4.906767815861099e-05, "loss": 0.7263, "step": 2174 }, { "epoch": 2.4774928774928773, "grad_norm": 0.13981565833091736, "learning_rate": 4.906641721911645e-05, "loss": 0.8751, "step": 2175 }, { "epoch": 2.4786324786324787, "grad_norm": 0.14861001074314117, "learning_rate": 4.906515544372904e-05, "loss": 0.8041, "step": 2176 }, { "epoch": 2.4797720797720797, "grad_norm": 0.183335542678833, "learning_rate": 4.906389283249261e-05, "loss": 0.6565, "step": 2177 }, { "epoch": 2.4809116809116807, "grad_norm": 0.12697993218898773, "learning_rate": 4.906262938545101e-05, "loss": 0.9796, "step": 2178 }, { "epoch": 2.482051282051282, "grad_norm": 0.13962382078170776, "learning_rate": 4.9061365102648116e-05, "loss": 0.9329, "step": 2179 }, { "epoch": 2.483190883190883, "grad_norm": 0.17473967373371124, "learning_rate": 4.906009998412784e-05, "loss": 0.6088, "step": 2180 }, { "epoch": 2.484330484330484, "grad_norm": 0.13965274393558502, "learning_rate": 4.905883402993413e-05, "loss": 0.862, "step": 2181 }, { "epoch": 2.4854700854700855, "grad_norm": 0.13113312423229218, "learning_rate": 4.905756724011095e-05, "loss": 0.8951, "step": 2182 }, { "epoch": 2.4866096866096865, "grad_norm": 0.1369440108537674, "learning_rate": 4.90562996147023e-05, "loss": 0.8166, "step": 2183 }, { "epoch": 2.487749287749288, "grad_norm": 0.1592363715171814, "learning_rate": 4.905503115375221e-05, "loss": 0.7751, "step": 2184 }, { "epoch": 2.488888888888889, "grad_norm": 0.14454996585845947, "learning_rate": 4.9053761857304734e-05, "loss": 0.8808, "step": 2185 }, { "epoch": 2.49002849002849, "grad_norm": 0.14796991646289825, "learning_rate": 4.905249172540397e-05, "loss": 0.8883, "step": 2186 }, { "epoch": 2.4911680911680913, "grad_norm": 0.1680203229188919, "learning_rate": 4.905122075809402e-05, "loss": 0.8488, "step": 2187 }, { "epoch": 2.4923076923076923, "grad_norm": 0.13565847277641296, "learning_rate": 4.904994895541902e-05, "loss": 0.8863, "step": 2188 }, { "epoch": 2.4934472934472933, "grad_norm": 0.14112970232963562, "learning_rate": 4.904867631742317e-05, "loss": 0.8267, "step": 2189 }, { "epoch": 2.4945868945868948, "grad_norm": 0.15535341203212738, "learning_rate": 4.904740284415065e-05, "loss": 0.7497, "step": 2190 }, { "epoch": 2.4957264957264957, "grad_norm": 0.16363096237182617, "learning_rate": 4.90461285356457e-05, "loss": 0.7626, "step": 2191 }, { "epoch": 2.4968660968660967, "grad_norm": 0.12676620483398438, "learning_rate": 4.9044853391952584e-05, "loss": 0.9664, "step": 2192 }, { "epoch": 2.498005698005698, "grad_norm": 0.17134632170200348, "learning_rate": 4.904357741311558e-05, "loss": 0.8354, "step": 2193 }, { "epoch": 2.499145299145299, "grad_norm": 0.15235522389411926, "learning_rate": 4.9042300599179014e-05, "loss": 0.7875, "step": 2194 }, { "epoch": 2.5002849002849, "grad_norm": 0.17035681009292603, "learning_rate": 4.904102295018723e-05, "loss": 0.8493, "step": 2195 }, { "epoch": 2.5014245014245016, "grad_norm": 0.17633633315563202, "learning_rate": 4.903974446618461e-05, "loss": 0.6833, "step": 2196 }, { "epoch": 2.5025641025641026, "grad_norm": 0.17860007286071777, "learning_rate": 4.903846514721555e-05, "loss": 0.7551, "step": 2197 }, { "epoch": 2.5037037037037035, "grad_norm": 0.15313498675823212, "learning_rate": 4.903718499332449e-05, "loss": 0.8472, "step": 2198 }, { "epoch": 2.504843304843305, "grad_norm": 0.15097908675670624, "learning_rate": 4.90359040045559e-05, "loss": 0.7295, "step": 2199 }, { "epoch": 2.505982905982906, "grad_norm": 0.14608563482761383, "learning_rate": 4.903462218095426e-05, "loss": 0.9297, "step": 2200 }, { "epoch": 2.5071225071225074, "grad_norm": 0.1348266899585724, "learning_rate": 4.903333952256409e-05, "loss": 0.9039, "step": 2201 }, { "epoch": 2.5082621082621084, "grad_norm": 0.12180972844362259, "learning_rate": 4.903205602942996e-05, "loss": 0.9295, "step": 2202 }, { "epoch": 2.5094017094017094, "grad_norm": 0.160002663731575, "learning_rate": 4.903077170159642e-05, "loss": 0.7332, "step": 2203 }, { "epoch": 2.510541310541311, "grad_norm": 0.1406853049993515, "learning_rate": 4.90294865391081e-05, "loss": 0.7723, "step": 2204 }, { "epoch": 2.511680911680912, "grad_norm": 0.1374700516462326, "learning_rate": 4.902820054200964e-05, "loss": 0.9844, "step": 2205 }, { "epoch": 2.5128205128205128, "grad_norm": 0.13007374107837677, "learning_rate": 4.902691371034569e-05, "loss": 0.9031, "step": 2206 }, { "epoch": 2.513960113960114, "grad_norm": 0.17269563674926758, "learning_rate": 4.9025626044160955e-05, "loss": 0.9266, "step": 2207 }, { "epoch": 2.515099715099715, "grad_norm": 0.13753359019756317, "learning_rate": 4.902433754350015e-05, "loss": 0.9533, "step": 2208 }, { "epoch": 2.516239316239316, "grad_norm": 0.1347282975912094, "learning_rate": 4.902304820840804e-05, "loss": 0.9467, "step": 2209 }, { "epoch": 2.5173789173789176, "grad_norm": 0.14514537155628204, "learning_rate": 4.902175803892941e-05, "loss": 0.8664, "step": 2210 }, { "epoch": 2.5185185185185186, "grad_norm": 0.14554670453071594, "learning_rate": 4.9020467035109054e-05, "loss": 0.8018, "step": 2211 }, { "epoch": 2.5196581196581196, "grad_norm": 0.1244606077671051, "learning_rate": 4.9019175196991815e-05, "loss": 1.0697, "step": 2212 }, { "epoch": 2.520797720797721, "grad_norm": 0.16238799691200256, "learning_rate": 4.901788252462258e-05, "loss": 0.7914, "step": 2213 }, { "epoch": 2.521937321937322, "grad_norm": 0.1343729943037033, "learning_rate": 4.901658901804623e-05, "loss": 0.8722, "step": 2214 }, { "epoch": 2.523076923076923, "grad_norm": 0.12723635137081146, "learning_rate": 4.90152946773077e-05, "loss": 0.8355, "step": 2215 }, { "epoch": 2.5242165242165244, "grad_norm": 0.14465561509132385, "learning_rate": 4.901399950245194e-05, "loss": 0.8795, "step": 2216 }, { "epoch": 2.5253561253561254, "grad_norm": 0.14480602741241455, "learning_rate": 4.9012703493523944e-05, "loss": 0.9399, "step": 2217 }, { "epoch": 2.5264957264957264, "grad_norm": 0.16589534282684326, "learning_rate": 4.9011406650568716e-05, "loss": 0.6992, "step": 2218 }, { "epoch": 2.527635327635328, "grad_norm": 0.1654810607433319, "learning_rate": 4.901010897363131e-05, "loss": 0.8627, "step": 2219 }, { "epoch": 2.528774928774929, "grad_norm": 0.12974773347377777, "learning_rate": 4.9008810462756784e-05, "loss": 0.8778, "step": 2220 }, { "epoch": 2.52991452991453, "grad_norm": 0.1594112366437912, "learning_rate": 4.9007511117990244e-05, "loss": 0.7756, "step": 2221 }, { "epoch": 2.5310541310541312, "grad_norm": 0.14981664717197418, "learning_rate": 4.900621093937683e-05, "loss": 0.9099, "step": 2222 }, { "epoch": 2.5321937321937322, "grad_norm": 0.12964390218257904, "learning_rate": 4.900490992696169e-05, "loss": 0.9585, "step": 2223 }, { "epoch": 2.533333333333333, "grad_norm": 0.16134761273860931, "learning_rate": 4.900360808079001e-05, "loss": 0.8194, "step": 2224 }, { "epoch": 2.5344729344729346, "grad_norm": 0.14627186954021454, "learning_rate": 4.900230540090701e-05, "loss": 0.999, "step": 2225 }, { "epoch": 2.5356125356125356, "grad_norm": 0.1490875780582428, "learning_rate": 4.9001001887357946e-05, "loss": 0.878, "step": 2226 }, { "epoch": 2.5367521367521366, "grad_norm": 0.1640450656414032, "learning_rate": 4.8999697540188075e-05, "loss": 0.7422, "step": 2227 }, { "epoch": 2.537891737891738, "grad_norm": 0.16059789061546326, "learning_rate": 4.8998392359442714e-05, "loss": 0.8303, "step": 2228 }, { "epoch": 2.539031339031339, "grad_norm": 0.14292532205581665, "learning_rate": 4.899708634516719e-05, "loss": 0.8758, "step": 2229 }, { "epoch": 2.54017094017094, "grad_norm": 0.15925559401512146, "learning_rate": 4.899577949740687e-05, "loss": 0.7378, "step": 2230 }, { "epoch": 2.5413105413105415, "grad_norm": 0.1324724704027176, "learning_rate": 4.899447181620713e-05, "loss": 0.8752, "step": 2231 }, { "epoch": 2.5424501424501424, "grad_norm": 0.12038611620664597, "learning_rate": 4.89931633016134e-05, "loss": 1.085, "step": 2232 }, { "epoch": 2.5435897435897434, "grad_norm": 0.16954825818538666, "learning_rate": 4.899185395367113e-05, "loss": 0.8512, "step": 2233 }, { "epoch": 2.544729344729345, "grad_norm": 0.13975279033184052, "learning_rate": 4.899054377242579e-05, "loss": 0.8634, "step": 2234 }, { "epoch": 2.545868945868946, "grad_norm": 0.15176747739315033, "learning_rate": 4.8989232757922895e-05, "loss": 0.6961, "step": 2235 }, { "epoch": 2.547008547008547, "grad_norm": 0.16036196053028107, "learning_rate": 4.898792091020798e-05, "loss": 0.6815, "step": 2236 }, { "epoch": 2.5481481481481483, "grad_norm": 0.14126650989055634, "learning_rate": 4.89866082293266e-05, "loss": 0.7721, "step": 2237 }, { "epoch": 2.5492877492877493, "grad_norm": 0.14795511960983276, "learning_rate": 4.898529471532435e-05, "loss": 0.756, "step": 2238 }, { "epoch": 2.5504273504273502, "grad_norm": 0.1589314192533493, "learning_rate": 4.898398036824686e-05, "loss": 0.8141, "step": 2239 }, { "epoch": 2.5515669515669517, "grad_norm": 0.12089326977729797, "learning_rate": 4.898266518813977e-05, "loss": 1.0512, "step": 2240 }, { "epoch": 2.5527065527065527, "grad_norm": 0.15464527904987335, "learning_rate": 4.8981349175048776e-05, "loss": 0.8214, "step": 2241 }, { "epoch": 2.5538461538461537, "grad_norm": 0.13419292867183685, "learning_rate": 4.898003232901956e-05, "loss": 0.8224, "step": 2242 }, { "epoch": 2.554985754985755, "grad_norm": 0.14059868454933167, "learning_rate": 4.897871465009789e-05, "loss": 0.7849, "step": 2243 }, { "epoch": 2.556125356125356, "grad_norm": 0.14682996273040771, "learning_rate": 4.897739613832951e-05, "loss": 0.9846, "step": 2244 }, { "epoch": 2.557264957264957, "grad_norm": 0.15590307116508484, "learning_rate": 4.897607679376023e-05, "loss": 0.9279, "step": 2245 }, { "epoch": 2.5584045584045585, "grad_norm": 0.16817530989646912, "learning_rate": 4.897475661643586e-05, "loss": 0.7257, "step": 2246 }, { "epoch": 2.5595441595441595, "grad_norm": 0.12491889297962189, "learning_rate": 4.897343560640227e-05, "loss": 0.8916, "step": 2247 }, { "epoch": 2.5606837606837605, "grad_norm": 0.14887359738349915, "learning_rate": 4.897211376370533e-05, "loss": 0.8814, "step": 2248 }, { "epoch": 2.561823361823362, "grad_norm": 0.17256565392017365, "learning_rate": 4.8970791088390955e-05, "loss": 0.7636, "step": 2249 }, { "epoch": 2.562962962962963, "grad_norm": 0.12653836607933044, "learning_rate": 4.896946758050509e-05, "loss": 1.0161, "step": 2250 }, { "epoch": 2.564102564102564, "grad_norm": 0.142549067735672, "learning_rate": 4.89681432400937e-05, "loss": 0.9215, "step": 2251 }, { "epoch": 2.5652421652421653, "grad_norm": 0.14559660851955414, "learning_rate": 4.896681806720278e-05, "loss": 0.8567, "step": 2252 }, { "epoch": 2.5663817663817663, "grad_norm": 0.14624235033988953, "learning_rate": 4.896549206187836e-05, "loss": 0.8393, "step": 2253 }, { "epoch": 2.5675213675213673, "grad_norm": 0.14064151048660278, "learning_rate": 4.896416522416649e-05, "loss": 0.8965, "step": 2254 }, { "epoch": 2.5686609686609687, "grad_norm": 0.13081389665603638, "learning_rate": 4.8962837554113266e-05, "loss": 0.8549, "step": 2255 }, { "epoch": 2.5698005698005697, "grad_norm": 0.11787986010313034, "learning_rate": 4.89615090517648e-05, "loss": 0.922, "step": 2256 }, { "epoch": 2.5709401709401707, "grad_norm": 0.10941874980926514, "learning_rate": 4.896017971716722e-05, "loss": 0.881, "step": 2257 }, { "epoch": 2.572079772079772, "grad_norm": 0.16250310838222504, "learning_rate": 4.8958849550366715e-05, "loss": 0.8528, "step": 2258 }, { "epoch": 2.573219373219373, "grad_norm": 0.15323151648044586, "learning_rate": 4.895751855140947e-05, "loss": 0.9175, "step": 2259 }, { "epoch": 2.574358974358974, "grad_norm": 0.16868767142295837, "learning_rate": 4.895618672034173e-05, "loss": 0.7577, "step": 2260 }, { "epoch": 2.5754985754985755, "grad_norm": 0.14792990684509277, "learning_rate": 4.8954854057209746e-05, "loss": 0.9471, "step": 2261 }, { "epoch": 2.5766381766381765, "grad_norm": 0.1616770476102829, "learning_rate": 4.89535205620598e-05, "loss": 0.7875, "step": 2262 }, { "epoch": 2.5777777777777775, "grad_norm": 0.14625534415245056, "learning_rate": 4.895218623493821e-05, "loss": 0.8225, "step": 2263 }, { "epoch": 2.578917378917379, "grad_norm": 0.1452033966779709, "learning_rate": 4.895085107589133e-05, "loss": 0.9115, "step": 2264 }, { "epoch": 2.58005698005698, "grad_norm": 0.14346660673618317, "learning_rate": 4.8949515084965527e-05, "loss": 0.8379, "step": 2265 }, { "epoch": 2.5811965811965814, "grad_norm": 0.17461304366588593, "learning_rate": 4.89481782622072e-05, "loss": 0.7311, "step": 2266 }, { "epoch": 2.5823361823361823, "grad_norm": 0.13005805015563965, "learning_rate": 4.894684060766278e-05, "loss": 1.0409, "step": 2267 }, { "epoch": 2.5834757834757833, "grad_norm": 0.14587563276290894, "learning_rate": 4.894550212137874e-05, "loss": 1.0037, "step": 2268 }, { "epoch": 2.5846153846153848, "grad_norm": 0.13456064462661743, "learning_rate": 4.8944162803401556e-05, "loss": 0.8347, "step": 2269 }, { "epoch": 2.5857549857549857, "grad_norm": 0.13639184832572937, "learning_rate": 4.8942822653777754e-05, "loss": 0.9492, "step": 2270 }, { "epoch": 2.5868945868945867, "grad_norm": 0.1436249017715454, "learning_rate": 4.894148167255388e-05, "loss": 0.8732, "step": 2271 }, { "epoch": 2.588034188034188, "grad_norm": 0.1270371973514557, "learning_rate": 4.89401398597765e-05, "loss": 0.8511, "step": 2272 }, { "epoch": 2.589173789173789, "grad_norm": 0.14852432906627655, "learning_rate": 4.8938797215492234e-05, "loss": 0.8033, "step": 2273 }, { "epoch": 2.5903133903133906, "grad_norm": 0.1301039159297943, "learning_rate": 4.893745373974771e-05, "loss": 0.976, "step": 2274 }, { "epoch": 2.5914529914529916, "grad_norm": 0.11183959245681763, "learning_rate": 4.8936109432589584e-05, "loss": 0.9511, "step": 2275 }, { "epoch": 2.5925925925925926, "grad_norm": 0.12877728044986725, "learning_rate": 4.8934764294064556e-05, "loss": 1.0287, "step": 2276 }, { "epoch": 2.593732193732194, "grad_norm": 0.13311651349067688, "learning_rate": 4.893341832421934e-05, "loss": 1.0287, "step": 2277 }, { "epoch": 2.594871794871795, "grad_norm": 0.1323351114988327, "learning_rate": 4.893207152310069e-05, "loss": 0.9303, "step": 2278 }, { "epoch": 2.596011396011396, "grad_norm": 0.14124305546283722, "learning_rate": 4.8930723890755384e-05, "loss": 0.8712, "step": 2279 }, { "epoch": 2.5971509971509974, "grad_norm": 0.1442643404006958, "learning_rate": 4.892937542723023e-05, "loss": 0.9544, "step": 2280 }, { "epoch": 2.5982905982905984, "grad_norm": 0.15250055491924286, "learning_rate": 4.8928026132572055e-05, "loss": 0.9108, "step": 2281 }, { "epoch": 2.5994301994301994, "grad_norm": 0.17575474083423615, "learning_rate": 4.892667600682773e-05, "loss": 0.7062, "step": 2282 }, { "epoch": 2.600569800569801, "grad_norm": 0.17873618006706238, "learning_rate": 4.8925325050044156e-05, "loss": 0.6816, "step": 2283 }, { "epoch": 2.601709401709402, "grad_norm": 0.12445148080587387, "learning_rate": 4.892397326226824e-05, "loss": 0.95, "step": 2284 }, { "epoch": 2.602849002849003, "grad_norm": 0.1586519330739975, "learning_rate": 4.892262064354695e-05, "loss": 0.7746, "step": 2285 }, { "epoch": 2.603988603988604, "grad_norm": 0.1753976196050644, "learning_rate": 4.8921267193927257e-05, "loss": 0.676, "step": 2286 }, { "epoch": 2.605128205128205, "grad_norm": 0.12897807359695435, "learning_rate": 4.8919912913456165e-05, "loss": 0.8996, "step": 2287 }, { "epoch": 2.606267806267806, "grad_norm": 0.12569855153560638, "learning_rate": 4.891855780218072e-05, "loss": 1.0, "step": 2288 }, { "epoch": 2.6074074074074076, "grad_norm": 0.13972389698028564, "learning_rate": 4.891720186014799e-05, "loss": 0.8355, "step": 2289 }, { "epoch": 2.6085470085470086, "grad_norm": 0.17812113463878632, "learning_rate": 4.891584508740505e-05, "loss": 0.767, "step": 2290 }, { "epoch": 2.6096866096866096, "grad_norm": 0.17294801771640778, "learning_rate": 4.891448748399906e-05, "loss": 0.6727, "step": 2291 }, { "epoch": 2.610826210826211, "grad_norm": 0.15305830538272858, "learning_rate": 4.891312904997715e-05, "loss": 0.8055, "step": 2292 }, { "epoch": 2.611965811965812, "grad_norm": 0.14757372438907623, "learning_rate": 4.891176978538651e-05, "loss": 0.9009, "step": 2293 }, { "epoch": 2.613105413105413, "grad_norm": 0.1358184814453125, "learning_rate": 4.891040969027434e-05, "loss": 0.9646, "step": 2294 }, { "epoch": 2.6142450142450144, "grad_norm": 0.15509755909442902, "learning_rate": 4.890904876468789e-05, "loss": 0.885, "step": 2295 }, { "epoch": 2.6153846153846154, "grad_norm": 0.16125355660915375, "learning_rate": 4.890768700867443e-05, "loss": 0.8014, "step": 2296 }, { "epoch": 2.6165242165242164, "grad_norm": 0.13600467145442963, "learning_rate": 4.8906324422281244e-05, "loss": 0.9259, "step": 2297 }, { "epoch": 2.617663817663818, "grad_norm": 0.14281436800956726, "learning_rate": 4.890496100555567e-05, "loss": 0.9063, "step": 2298 }, { "epoch": 2.618803418803419, "grad_norm": 0.20354962348937988, "learning_rate": 4.890359675854507e-05, "loss": 0.6094, "step": 2299 }, { "epoch": 2.61994301994302, "grad_norm": 0.1543664038181305, "learning_rate": 4.8902231681296815e-05, "loss": 0.8462, "step": 2300 }, { "epoch": 2.6210826210826212, "grad_norm": 0.13549914956092834, "learning_rate": 4.890086577385832e-05, "loss": 0.9421, "step": 2301 }, { "epoch": 2.6222222222222222, "grad_norm": 0.14352764189243317, "learning_rate": 4.889949903627704e-05, "loss": 0.9357, "step": 2302 }, { "epoch": 2.623361823361823, "grad_norm": 0.13328053057193756, "learning_rate": 4.889813146860042e-05, "loss": 0.9461, "step": 2303 }, { "epoch": 2.6245014245014247, "grad_norm": 0.1606045663356781, "learning_rate": 4.8896763070875985e-05, "loss": 0.8631, "step": 2304 }, { "epoch": 2.6256410256410256, "grad_norm": 0.1382226049900055, "learning_rate": 4.8895393843151246e-05, "loss": 0.9991, "step": 2305 }, { "epoch": 2.6267806267806266, "grad_norm": 0.14316731691360474, "learning_rate": 4.889402378547376e-05, "loss": 0.7862, "step": 2306 }, { "epoch": 2.627920227920228, "grad_norm": 0.14806653559207916, "learning_rate": 4.889265289789113e-05, "loss": 0.8859, "step": 2307 }, { "epoch": 2.629059829059829, "grad_norm": 0.14570772647857666, "learning_rate": 4.889128118045096e-05, "loss": 0.9522, "step": 2308 }, { "epoch": 2.63019943019943, "grad_norm": 0.1557767391204834, "learning_rate": 4.8889908633200885e-05, "loss": 0.8091, "step": 2309 }, { "epoch": 2.6313390313390315, "grad_norm": 0.13151291012763977, "learning_rate": 4.888853525618859e-05, "loss": 0.935, "step": 2310 }, { "epoch": 2.6324786324786325, "grad_norm": 0.133224755525589, "learning_rate": 4.888716104946178e-05, "loss": 0.9327, "step": 2311 }, { "epoch": 2.6336182336182334, "grad_norm": 0.1351376473903656, "learning_rate": 4.888578601306817e-05, "loss": 0.9608, "step": 2312 }, { "epoch": 2.634757834757835, "grad_norm": 0.13731828331947327, "learning_rate": 4.888441014705553e-05, "loss": 0.9103, "step": 2313 }, { "epoch": 2.635897435897436, "grad_norm": 0.1555176079273224, "learning_rate": 4.888303345147163e-05, "loss": 0.7599, "step": 2314 }, { "epoch": 2.637037037037037, "grad_norm": 0.14736001193523407, "learning_rate": 4.888165592636431e-05, "loss": 0.8739, "step": 2315 }, { "epoch": 2.6381766381766383, "grad_norm": 0.1545141190290451, "learning_rate": 4.88802775717814e-05, "loss": 0.7775, "step": 2316 }, { "epoch": 2.6393162393162393, "grad_norm": 0.1417769193649292, "learning_rate": 4.887889838777078e-05, "loss": 0.7407, "step": 2317 }, { "epoch": 2.6404558404558403, "grad_norm": 0.13049474358558655, "learning_rate": 4.887751837438036e-05, "loss": 0.8846, "step": 2318 }, { "epoch": 2.6415954415954417, "grad_norm": 0.13132892549037933, "learning_rate": 4.887613753165805e-05, "loss": 0.8925, "step": 2319 }, { "epoch": 2.6427350427350427, "grad_norm": 0.16289007663726807, "learning_rate": 4.887475585965183e-05, "loss": 0.8932, "step": 2320 }, { "epoch": 2.6438746438746437, "grad_norm": 0.1573866456747055, "learning_rate": 4.887337335840969e-05, "loss": 0.9487, "step": 2321 }, { "epoch": 2.645014245014245, "grad_norm": 0.1315973550081253, "learning_rate": 4.887199002797963e-05, "loss": 0.981, "step": 2322 }, { "epoch": 2.646153846153846, "grad_norm": 0.14866186678409576, "learning_rate": 4.8870605868409714e-05, "loss": 0.7045, "step": 2323 }, { "epoch": 2.647293447293447, "grad_norm": 0.16348348557949066, "learning_rate": 4.8869220879748014e-05, "loss": 0.7777, "step": 2324 }, { "epoch": 2.6484330484330485, "grad_norm": 0.13641276955604553, "learning_rate": 4.8867835062042625e-05, "loss": 0.8961, "step": 2325 }, { "epoch": 2.6495726495726495, "grad_norm": 0.1564176380634308, "learning_rate": 4.886644841534169e-05, "loss": 0.9032, "step": 2326 }, { "epoch": 2.6507122507122505, "grad_norm": 0.15130561590194702, "learning_rate": 4.8865060939693376e-05, "loss": 0.7571, "step": 2327 }, { "epoch": 2.651851851851852, "grad_norm": 0.1388384997844696, "learning_rate": 4.8863672635145854e-05, "loss": 0.9121, "step": 2328 }, { "epoch": 2.652991452991453, "grad_norm": 0.12795355916023254, "learning_rate": 4.886228350174736e-05, "loss": 0.9148, "step": 2329 }, { "epoch": 2.654131054131054, "grad_norm": 0.14797872304916382, "learning_rate": 4.886089353954615e-05, "loss": 0.7561, "step": 2330 }, { "epoch": 2.6552706552706553, "grad_norm": 0.15494829416275024, "learning_rate": 4.885950274859048e-05, "loss": 0.7004, "step": 2331 }, { "epoch": 2.6564102564102563, "grad_norm": 0.12286999076604843, "learning_rate": 4.885811112892866e-05, "loss": 0.8983, "step": 2332 }, { "epoch": 2.6575498575498573, "grad_norm": 0.15718288719654083, "learning_rate": 4.8856718680609044e-05, "loss": 0.7651, "step": 2333 }, { "epoch": 2.6586894586894587, "grad_norm": 0.1449444442987442, "learning_rate": 4.885532540367997e-05, "loss": 0.9286, "step": 2334 }, { "epoch": 2.6598290598290597, "grad_norm": 0.1783261001110077, "learning_rate": 4.8853931298189846e-05, "loss": 0.7188, "step": 2335 }, { "epoch": 2.6609686609686607, "grad_norm": 0.14908367395401, "learning_rate": 4.885253636418709e-05, "loss": 0.8957, "step": 2336 }, { "epoch": 2.662108262108262, "grad_norm": 0.15540620684623718, "learning_rate": 4.885114060172016e-05, "loss": 0.9731, "step": 2337 }, { "epoch": 2.663247863247863, "grad_norm": 0.17368145287036896, "learning_rate": 4.884974401083752e-05, "loss": 0.8083, "step": 2338 }, { "epoch": 2.6643874643874645, "grad_norm": 0.1426796168088913, "learning_rate": 4.884834659158768e-05, "loss": 0.8908, "step": 2339 }, { "epoch": 2.6655270655270655, "grad_norm": 0.1870046705007553, "learning_rate": 4.884694834401918e-05, "loss": 0.8589, "step": 2340 }, { "epoch": 2.6666666666666665, "grad_norm": 0.18150825798511505, "learning_rate": 4.884554926818058e-05, "loss": 0.6171, "step": 2341 }, { "epoch": 2.667806267806268, "grad_norm": 0.15253104269504547, "learning_rate": 4.8844149364120487e-05, "loss": 0.7087, "step": 2342 }, { "epoch": 2.668945868945869, "grad_norm": 0.14413723349571228, "learning_rate": 4.884274863188751e-05, "loss": 0.9708, "step": 2343 }, { "epoch": 2.67008547008547, "grad_norm": 0.1447208970785141, "learning_rate": 4.884134707153031e-05, "loss": 1.0512, "step": 2344 }, { "epoch": 2.6712250712250714, "grad_norm": 0.11699206382036209, "learning_rate": 4.883994468309756e-05, "loss": 0.9276, "step": 2345 }, { "epoch": 2.6723646723646723, "grad_norm": 0.13071955740451813, "learning_rate": 4.883854146663797e-05, "loss": 0.8017, "step": 2346 }, { "epoch": 2.6735042735042738, "grad_norm": 0.1524588018655777, "learning_rate": 4.8837137422200274e-05, "loss": 0.851, "step": 2347 }, { "epoch": 2.6746438746438748, "grad_norm": 0.15947096049785614, "learning_rate": 4.8835732549833255e-05, "loss": 0.6179, "step": 2348 }, { "epoch": 2.6757834757834758, "grad_norm": 0.13846081495285034, "learning_rate": 4.8834326849585686e-05, "loss": 0.7371, "step": 2349 }, { "epoch": 2.676923076923077, "grad_norm": 0.12069350481033325, "learning_rate": 4.8832920321506405e-05, "loss": 1.0292, "step": 2350 }, { "epoch": 2.678062678062678, "grad_norm": 0.14449162781238556, "learning_rate": 4.883151296564425e-05, "loss": 0.9641, "step": 2351 }, { "epoch": 2.679202279202279, "grad_norm": 0.12880225479602814, "learning_rate": 4.883010478204813e-05, "loss": 1.0, "step": 2352 }, { "epoch": 2.6803418803418806, "grad_norm": 0.18075476586818695, "learning_rate": 4.882869577076693e-05, "loss": 0.6857, "step": 2353 }, { "epoch": 2.6814814814814816, "grad_norm": 0.15963158011436462, "learning_rate": 4.88272859318496e-05, "loss": 0.807, "step": 2354 }, { "epoch": 2.6826210826210826, "grad_norm": 0.16257156431674957, "learning_rate": 4.88258752653451e-05, "loss": 0.7622, "step": 2355 }, { "epoch": 2.683760683760684, "grad_norm": 0.13691702485084534, "learning_rate": 4.8824463771302434e-05, "loss": 0.9084, "step": 2356 }, { "epoch": 2.684900284900285, "grad_norm": 0.17475008964538574, "learning_rate": 4.8823051449770615e-05, "loss": 0.7889, "step": 2357 }, { "epoch": 2.686039886039886, "grad_norm": 0.13000094890594482, "learning_rate": 4.882163830079872e-05, "loss": 0.8508, "step": 2358 }, { "epoch": 2.6871794871794874, "grad_norm": 0.16077207028865814, "learning_rate": 4.88202243244358e-05, "loss": 0.757, "step": 2359 }, { "epoch": 2.6883190883190884, "grad_norm": 0.1422513872385025, "learning_rate": 4.881880952073099e-05, "loss": 0.9493, "step": 2360 }, { "epoch": 2.6894586894586894, "grad_norm": 0.14238572120666504, "learning_rate": 4.881739388973343e-05, "loss": 0.9475, "step": 2361 }, { "epoch": 2.690598290598291, "grad_norm": 0.12992393970489502, "learning_rate": 4.881597743149228e-05, "loss": 1.0139, "step": 2362 }, { "epoch": 2.691737891737892, "grad_norm": 0.13528192043304443, "learning_rate": 4.881456014605674e-05, "loss": 0.8092, "step": 2363 }, { "epoch": 2.692877492877493, "grad_norm": 0.16100099682807922, "learning_rate": 4.881314203347603e-05, "loss": 0.7825, "step": 2364 }, { "epoch": 2.694017094017094, "grad_norm": 0.14202088117599487, "learning_rate": 4.8811723093799414e-05, "loss": 0.7644, "step": 2365 }, { "epoch": 2.695156695156695, "grad_norm": 0.15252763032913208, "learning_rate": 4.881030332707617e-05, "loss": 0.8202, "step": 2366 }, { "epoch": 2.696296296296296, "grad_norm": 0.1721193492412567, "learning_rate": 4.880888273335562e-05, "loss": 0.7333, "step": 2367 }, { "epoch": 2.6974358974358976, "grad_norm": 0.15455088019371033, "learning_rate": 4.8807461312687095e-05, "loss": 0.8263, "step": 2368 }, { "epoch": 2.6985754985754986, "grad_norm": 0.15107551217079163, "learning_rate": 4.880603906511996e-05, "loss": 0.7473, "step": 2369 }, { "epoch": 2.6997150997150996, "grad_norm": 0.13747701048851013, "learning_rate": 4.880461599070363e-05, "loss": 0.8929, "step": 2370 }, { "epoch": 2.700854700854701, "grad_norm": 0.13977332413196564, "learning_rate": 4.8803192089487525e-05, "loss": 0.7478, "step": 2371 }, { "epoch": 2.701994301994302, "grad_norm": 0.15916401147842407, "learning_rate": 4.8801767361521096e-05, "loss": 0.838, "step": 2372 }, { "epoch": 2.703133903133903, "grad_norm": 0.15147262811660767, "learning_rate": 4.880034180685383e-05, "loss": 0.7628, "step": 2373 }, { "epoch": 2.7042735042735044, "grad_norm": 0.12499698996543884, "learning_rate": 4.8798915425535243e-05, "loss": 0.9577, "step": 2374 }, { "epoch": 2.7054131054131054, "grad_norm": 0.14667262136936188, "learning_rate": 4.8797488217614875e-05, "loss": 0.9405, "step": 2375 }, { "epoch": 2.7065527065527064, "grad_norm": 0.16255013644695282, "learning_rate": 4.87960601831423e-05, "loss": 0.8123, "step": 2376 }, { "epoch": 2.707692307692308, "grad_norm": 0.14595149457454681, "learning_rate": 4.879463132216712e-05, "loss": 0.9989, "step": 2377 }, { "epoch": 2.708831908831909, "grad_norm": 0.14499174058437347, "learning_rate": 4.879320163473895e-05, "loss": 0.8095, "step": 2378 }, { "epoch": 2.70997150997151, "grad_norm": 0.15754401683807373, "learning_rate": 4.879177112090746e-05, "loss": 0.9526, "step": 2379 }, { "epoch": 2.7111111111111112, "grad_norm": 0.17268238961696625, "learning_rate": 4.879033978072233e-05, "loss": 0.8242, "step": 2380 }, { "epoch": 2.7122507122507122, "grad_norm": 0.13873246312141418, "learning_rate": 4.878890761423327e-05, "loss": 0.9453, "step": 2381 }, { "epoch": 2.7133903133903132, "grad_norm": 0.1353233903646469, "learning_rate": 4.878747462149004e-05, "loss": 0.8778, "step": 2382 }, { "epoch": 2.7145299145299147, "grad_norm": 0.1560286432504654, "learning_rate": 4.878604080254239e-05, "loss": 0.8435, "step": 2383 }, { "epoch": 2.7156695156695156, "grad_norm": 0.15349486470222473, "learning_rate": 4.878460615744014e-05, "loss": 0.9696, "step": 2384 }, { "epoch": 2.7168091168091166, "grad_norm": 0.14637236297130585, "learning_rate": 4.87831706862331e-05, "loss": 0.771, "step": 2385 }, { "epoch": 2.717948717948718, "grad_norm": 0.1703229695558548, "learning_rate": 4.878173438897115e-05, "loss": 0.7469, "step": 2386 }, { "epoch": 2.719088319088319, "grad_norm": 0.14243018627166748, "learning_rate": 4.878029726570416e-05, "loss": 0.9683, "step": 2387 }, { "epoch": 2.72022792022792, "grad_norm": 0.17341654002666473, "learning_rate": 4.877885931648205e-05, "loss": 0.5926, "step": 2388 }, { "epoch": 2.7213675213675215, "grad_norm": 0.14507317543029785, "learning_rate": 4.877742054135476e-05, "loss": 0.8954, "step": 2389 }, { "epoch": 2.7225071225071225, "grad_norm": 0.13696393370628357, "learning_rate": 4.877598094037227e-05, "loss": 0.9731, "step": 2390 }, { "epoch": 2.7236467236467234, "grad_norm": 0.13453227281570435, "learning_rate": 4.877454051358458e-05, "loss": 1.0183, "step": 2391 }, { "epoch": 2.724786324786325, "grad_norm": 0.1507004052400589, "learning_rate": 4.877309926104172e-05, "loss": 0.8363, "step": 2392 }, { "epoch": 2.725925925925926, "grad_norm": 0.15319862961769104, "learning_rate": 4.877165718279374e-05, "loss": 0.7825, "step": 2393 }, { "epoch": 2.727065527065527, "grad_norm": 0.14467768371105194, "learning_rate": 4.8770214278890735e-05, "loss": 0.8576, "step": 2394 }, { "epoch": 2.7282051282051283, "grad_norm": 0.15961861610412598, "learning_rate": 4.8768770549382816e-05, "loss": 0.7549, "step": 2395 }, { "epoch": 2.7293447293447293, "grad_norm": 0.16250202059745789, "learning_rate": 4.876732599432014e-05, "loss": 0.7362, "step": 2396 }, { "epoch": 2.7304843304843303, "grad_norm": 0.12247171252965927, "learning_rate": 4.876588061375286e-05, "loss": 0.9879, "step": 2397 }, { "epoch": 2.7316239316239317, "grad_norm": 0.16196970641613007, "learning_rate": 4.87644344077312e-05, "loss": 0.7945, "step": 2398 }, { "epoch": 2.7327635327635327, "grad_norm": 0.17949151992797852, "learning_rate": 4.876298737630538e-05, "loss": 0.6306, "step": 2399 }, { "epoch": 2.7339031339031337, "grad_norm": 0.1515873372554779, "learning_rate": 4.8761539519525656e-05, "loss": 0.9867, "step": 2400 }, { "epoch": 2.735042735042735, "grad_norm": 0.17852823436260223, "learning_rate": 4.876009083744232e-05, "loss": 0.7344, "step": 2401 }, { "epoch": 2.736182336182336, "grad_norm": 0.1448134183883667, "learning_rate": 4.875864133010569e-05, "loss": 0.853, "step": 2402 }, { "epoch": 2.737321937321937, "grad_norm": 0.16220110654830933, "learning_rate": 4.875719099756612e-05, "loss": 0.6932, "step": 2403 }, { "epoch": 2.7384615384615385, "grad_norm": 0.14161759614944458, "learning_rate": 4.875573983987396e-05, "loss": 0.9736, "step": 2404 }, { "epoch": 2.7396011396011395, "grad_norm": 0.14874771237373352, "learning_rate": 4.8754287857079625e-05, "loss": 0.7235, "step": 2405 }, { "epoch": 2.7407407407407405, "grad_norm": 0.16446420550346375, "learning_rate": 4.875283504923356e-05, "loss": 0.6952, "step": 2406 }, { "epoch": 2.741880341880342, "grad_norm": 0.14252431690692902, "learning_rate": 4.875138141638621e-05, "loss": 0.8992, "step": 2407 }, { "epoch": 2.743019943019943, "grad_norm": 0.1775975525379181, "learning_rate": 4.874992695858806e-05, "loss": 0.7867, "step": 2408 }, { "epoch": 2.744159544159544, "grad_norm": 0.1353948414325714, "learning_rate": 4.874847167588964e-05, "loss": 0.9669, "step": 2409 }, { "epoch": 2.7452991452991453, "grad_norm": 0.13299725949764252, "learning_rate": 4.874701556834149e-05, "loss": 0.9093, "step": 2410 }, { "epoch": 2.7464387464387463, "grad_norm": 0.15264327824115753, "learning_rate": 4.874555863599418e-05, "loss": 0.8345, "step": 2411 }, { "epoch": 2.7475783475783477, "grad_norm": 0.14281661808490753, "learning_rate": 4.8744100878898326e-05, "loss": 0.8936, "step": 2412 }, { "epoch": 2.7487179487179487, "grad_norm": 0.18340249359607697, "learning_rate": 4.874264229710454e-05, "loss": 0.629, "step": 2413 }, { "epoch": 2.7498575498575497, "grad_norm": 0.15715591609477997, "learning_rate": 4.8741182890663503e-05, "loss": 0.7674, "step": 2414 }, { "epoch": 2.750997150997151, "grad_norm": 0.1819486767053604, "learning_rate": 4.8739722659625895e-05, "loss": 0.6718, "step": 2415 }, { "epoch": 2.752136752136752, "grad_norm": 0.1454288214445114, "learning_rate": 4.873826160404244e-05, "loss": 0.8572, "step": 2416 }, { "epoch": 2.753276353276353, "grad_norm": 0.1301049143075943, "learning_rate": 4.873679972396387e-05, "loss": 0.9876, "step": 2417 }, { "epoch": 2.7544159544159545, "grad_norm": 0.15237055718898773, "learning_rate": 4.8735337019440973e-05, "loss": 0.7469, "step": 2418 }, { "epoch": 2.7555555555555555, "grad_norm": 0.12584395706653595, "learning_rate": 4.873387349052455e-05, "loss": 0.9856, "step": 2419 }, { "epoch": 2.756695156695157, "grad_norm": 0.16024748980998993, "learning_rate": 4.8732409137265435e-05, "loss": 0.9009, "step": 2420 }, { "epoch": 2.757834757834758, "grad_norm": 0.13635265827178955, "learning_rate": 4.873094395971448e-05, "loss": 0.943, "step": 2421 }, { "epoch": 2.758974358974359, "grad_norm": 0.13709954917430878, "learning_rate": 4.872947795792258e-05, "loss": 0.8623, "step": 2422 }, { "epoch": 2.7601139601139604, "grad_norm": 0.11856869608163834, "learning_rate": 4.872801113194066e-05, "loss": 0.9851, "step": 2423 }, { "epoch": 2.7612535612535614, "grad_norm": 0.14636924862861633, "learning_rate": 4.8726543481819655e-05, "loss": 0.6853, "step": 2424 }, { "epoch": 2.7623931623931623, "grad_norm": 0.15100698173046112, "learning_rate": 4.8725075007610554e-05, "loss": 0.9399, "step": 2425 }, { "epoch": 2.763532763532764, "grad_norm": 0.14547982811927795, "learning_rate": 4.8723605709364346e-05, "loss": 0.9165, "step": 2426 }, { "epoch": 2.7646723646723648, "grad_norm": 0.14002835750579834, "learning_rate": 4.8722135587132076e-05, "loss": 0.7849, "step": 2427 }, { "epoch": 2.7658119658119658, "grad_norm": 0.15732434391975403, "learning_rate": 4.87206646409648e-05, "loss": 0.7952, "step": 2428 }, { "epoch": 2.766951566951567, "grad_norm": 0.1421229988336563, "learning_rate": 4.871919287091361e-05, "loss": 0.8011, "step": 2429 }, { "epoch": 2.768091168091168, "grad_norm": 0.16403017938137054, "learning_rate": 4.8717720277029624e-05, "loss": 0.7583, "step": 2430 }, { "epoch": 2.769230769230769, "grad_norm": 0.1436501145362854, "learning_rate": 4.8716246859363984e-05, "loss": 0.7588, "step": 2431 }, { "epoch": 2.7703703703703706, "grad_norm": 0.18933072686195374, "learning_rate": 4.8714772617967876e-05, "loss": 0.6442, "step": 2432 }, { "epoch": 2.7715099715099716, "grad_norm": 0.14586688578128815, "learning_rate": 4.8713297552892494e-05, "loss": 0.8444, "step": 2433 }, { "epoch": 2.7726495726495726, "grad_norm": 0.14224807918071747, "learning_rate": 4.871182166418908e-05, "loss": 0.953, "step": 2434 }, { "epoch": 2.773789173789174, "grad_norm": 0.1479322910308838, "learning_rate": 4.8710344951908895e-05, "loss": 0.8418, "step": 2435 }, { "epoch": 2.774928774928775, "grad_norm": 0.1636160910129547, "learning_rate": 4.870886741610322e-05, "loss": 0.6788, "step": 2436 }, { "epoch": 2.776068376068376, "grad_norm": 0.12679874897003174, "learning_rate": 4.8707389056823385e-05, "loss": 0.8821, "step": 2437 }, { "epoch": 2.7772079772079774, "grad_norm": 0.14184336364269257, "learning_rate": 4.8705909874120724e-05, "loss": 0.8595, "step": 2438 }, { "epoch": 2.7783475783475784, "grad_norm": 0.15984278917312622, "learning_rate": 4.870442986804663e-05, "loss": 0.7673, "step": 2439 }, { "epoch": 2.7794871794871794, "grad_norm": 0.14385563135147095, "learning_rate": 4.87029490386525e-05, "loss": 0.9158, "step": 2440 }, { "epoch": 2.780626780626781, "grad_norm": 0.11938353627920151, "learning_rate": 4.870146738598976e-05, "loss": 1.0238, "step": 2441 }, { "epoch": 2.781766381766382, "grad_norm": 0.1400216519832611, "learning_rate": 4.869998491010988e-05, "loss": 0.7232, "step": 2442 }, { "epoch": 2.782905982905983, "grad_norm": 0.13375689089298248, "learning_rate": 4.869850161106435e-05, "loss": 0.837, "step": 2443 }, { "epoch": 2.784045584045584, "grad_norm": 0.14787495136260986, "learning_rate": 4.869701748890469e-05, "loss": 0.7918, "step": 2444 }, { "epoch": 2.785185185185185, "grad_norm": 0.1406019628047943, "learning_rate": 4.8695532543682445e-05, "loss": 0.7858, "step": 2445 }, { "epoch": 2.786324786324786, "grad_norm": 0.14257314801216125, "learning_rate": 4.869404677544919e-05, "loss": 0.9071, "step": 2446 }, { "epoch": 2.7874643874643876, "grad_norm": 0.22361870110034943, "learning_rate": 4.869256018425653e-05, "loss": 0.5754, "step": 2447 }, { "epoch": 2.7886039886039886, "grad_norm": 0.1851426213979721, "learning_rate": 4.8691072770156104e-05, "loss": 0.6529, "step": 2448 }, { "epoch": 2.7897435897435896, "grad_norm": 0.15292732417583466, "learning_rate": 4.868958453319957e-05, "loss": 0.7769, "step": 2449 }, { "epoch": 2.790883190883191, "grad_norm": 0.1640138179063797, "learning_rate": 4.8688095473438615e-05, "loss": 0.816, "step": 2450 }, { "epoch": 2.792022792022792, "grad_norm": 0.15426386892795563, "learning_rate": 4.868660559092496e-05, "loss": 0.843, "step": 2451 }, { "epoch": 2.793162393162393, "grad_norm": 0.13922430574893951, "learning_rate": 4.868511488571036e-05, "loss": 0.9024, "step": 2452 }, { "epoch": 2.7943019943019944, "grad_norm": 0.1422007977962494, "learning_rate": 4.8683623357846584e-05, "loss": 0.8869, "step": 2453 }, { "epoch": 2.7954415954415954, "grad_norm": 0.135456845164299, "learning_rate": 4.868213100738544e-05, "loss": 0.9832, "step": 2454 }, { "epoch": 2.7965811965811964, "grad_norm": 0.15272760391235352, "learning_rate": 4.8680637834378753e-05, "loss": 0.7435, "step": 2455 }, { "epoch": 2.797720797720798, "grad_norm": 0.12942877411842346, "learning_rate": 4.86791438388784e-05, "loss": 0.9887, "step": 2456 }, { "epoch": 2.798860398860399, "grad_norm": 0.13875113427639008, "learning_rate": 4.867764902093626e-05, "loss": 0.7868, "step": 2457 }, { "epoch": 2.8, "grad_norm": 0.15177272260189056, "learning_rate": 4.8676153380604265e-05, "loss": 0.8213, "step": 2458 }, { "epoch": 2.8011396011396013, "grad_norm": 0.1446572095155716, "learning_rate": 4.867465691793434e-05, "loss": 0.9024, "step": 2459 }, { "epoch": 2.8022792022792022, "grad_norm": 0.15868481993675232, "learning_rate": 4.867315963297848e-05, "loss": 0.9277, "step": 2460 }, { "epoch": 2.8034188034188032, "grad_norm": 0.15583594143390656, "learning_rate": 4.867166152578868e-05, "loss": 0.8648, "step": 2461 }, { "epoch": 2.8045584045584047, "grad_norm": 0.14443641901016235, "learning_rate": 4.867016259641698e-05, "loss": 0.8398, "step": 2462 }, { "epoch": 2.8056980056980056, "grad_norm": 0.1363164782524109, "learning_rate": 4.8668662844915446e-05, "loss": 1.1114, "step": 2463 }, { "epoch": 2.8068376068376066, "grad_norm": 0.15923745930194855, "learning_rate": 4.866716227133616e-05, "loss": 0.7173, "step": 2464 }, { "epoch": 2.807977207977208, "grad_norm": 0.16313041746616364, "learning_rate": 4.866566087573124e-05, "loss": 0.8958, "step": 2465 }, { "epoch": 2.809116809116809, "grad_norm": 0.14752578735351562, "learning_rate": 4.866415865815284e-05, "loss": 0.8703, "step": 2466 }, { "epoch": 2.81025641025641, "grad_norm": 0.1389155387878418, "learning_rate": 4.866265561865313e-05, "loss": 0.9084, "step": 2467 }, { "epoch": 2.8113960113960115, "grad_norm": 0.15107576549053192, "learning_rate": 4.866115175728432e-05, "loss": 0.8091, "step": 2468 }, { "epoch": 2.8125356125356125, "grad_norm": 0.16162735223770142, "learning_rate": 4.865964707409864e-05, "loss": 0.8045, "step": 2469 }, { "epoch": 2.8136752136752134, "grad_norm": 0.15423087775707245, "learning_rate": 4.865814156914835e-05, "loss": 0.8556, "step": 2470 }, { "epoch": 2.814814814814815, "grad_norm": 0.13887278735637665, "learning_rate": 4.8656635242485746e-05, "loss": 0.8468, "step": 2471 }, { "epoch": 2.815954415954416, "grad_norm": 0.14610813558101654, "learning_rate": 4.8655128094163146e-05, "loss": 0.8819, "step": 2472 }, { "epoch": 2.817094017094017, "grad_norm": 0.12121319770812988, "learning_rate": 4.86536201242329e-05, "loss": 0.8597, "step": 2473 }, { "epoch": 2.8182336182336183, "grad_norm": 0.15945690870285034, "learning_rate": 4.865211133274737e-05, "loss": 0.884, "step": 2474 }, { "epoch": 2.8193732193732193, "grad_norm": 0.14454999566078186, "learning_rate": 4.865060171975897e-05, "loss": 0.8596, "step": 2475 }, { "epoch": 2.8205128205128203, "grad_norm": 0.1749100536108017, "learning_rate": 4.864909128532014e-05, "loss": 0.8071, "step": 2476 }, { "epoch": 2.8216524216524217, "grad_norm": 0.12628749012947083, "learning_rate": 4.864758002948333e-05, "loss": 1.068, "step": 2477 }, { "epoch": 2.8227920227920227, "grad_norm": 0.15003350377082825, "learning_rate": 4.8646067952301034e-05, "loss": 0.8227, "step": 2478 }, { "epoch": 2.8239316239316237, "grad_norm": 0.12011383473873138, "learning_rate": 4.864455505382577e-05, "loss": 0.9069, "step": 2479 }, { "epoch": 2.825071225071225, "grad_norm": 0.14381088316440582, "learning_rate": 4.8643041334110087e-05, "loss": 0.947, "step": 2480 }, { "epoch": 2.826210826210826, "grad_norm": 0.2045334279537201, "learning_rate": 4.864152679320656e-05, "loss": 0.4873, "step": 2481 }, { "epoch": 2.827350427350427, "grad_norm": 0.12821900844573975, "learning_rate": 4.8640011431167795e-05, "loss": 0.9162, "step": 2482 }, { "epoch": 2.8284900284900285, "grad_norm": 0.14463935792446136, "learning_rate": 4.863849524804642e-05, "loss": 0.9374, "step": 2483 }, { "epoch": 2.8296296296296295, "grad_norm": 0.13995467126369476, "learning_rate": 4.86369782438951e-05, "loss": 1.0935, "step": 2484 }, { "epoch": 2.830769230769231, "grad_norm": 0.1604464054107666, "learning_rate": 4.863546041876653e-05, "loss": 0.6838, "step": 2485 }, { "epoch": 2.831908831908832, "grad_norm": 0.4254525899887085, "learning_rate": 4.863394177271342e-05, "loss": 0.8499, "step": 2486 }, { "epoch": 2.833048433048433, "grad_norm": 0.1413906067609787, "learning_rate": 4.863242230578851e-05, "loss": 0.8146, "step": 2487 }, { "epoch": 2.8341880341880343, "grad_norm": 0.14763469994068146, "learning_rate": 4.863090201804459e-05, "loss": 0.7791, "step": 2488 }, { "epoch": 2.8353276353276353, "grad_norm": 0.1795123964548111, "learning_rate": 4.8629380909534456e-05, "loss": 0.7705, "step": 2489 }, { "epoch": 2.8364672364672363, "grad_norm": 0.1430339515209198, "learning_rate": 4.862785898031094e-05, "loss": 0.9561, "step": 2490 }, { "epoch": 2.8376068376068377, "grad_norm": 0.192425936460495, "learning_rate": 4.862633623042691e-05, "loss": 0.591, "step": 2491 }, { "epoch": 2.8387464387464387, "grad_norm": 0.19948531687259674, "learning_rate": 4.862481265993525e-05, "loss": 0.7574, "step": 2492 }, { "epoch": 2.83988603988604, "grad_norm": 0.14906612038612366, "learning_rate": 4.8623288268888874e-05, "loss": 0.7663, "step": 2493 }, { "epoch": 2.841025641025641, "grad_norm": 0.13290870189666748, "learning_rate": 4.862176305734073e-05, "loss": 0.9403, "step": 2494 }, { "epoch": 2.842165242165242, "grad_norm": 0.1471358686685562, "learning_rate": 4.8620237025343795e-05, "loss": 0.8925, "step": 2495 }, { "epoch": 2.8433048433048436, "grad_norm": 0.1289387345314026, "learning_rate": 4.861871017295107e-05, "loss": 0.9856, "step": 2496 }, { "epoch": 2.8444444444444446, "grad_norm": 0.12943419814109802, "learning_rate": 4.86171825002156e-05, "loss": 0.9986, "step": 2497 }, { "epoch": 2.8455840455840455, "grad_norm": 0.14328914880752563, "learning_rate": 4.861565400719043e-05, "loss": 0.8184, "step": 2498 }, { "epoch": 2.846723646723647, "grad_norm": 0.17245805263519287, "learning_rate": 4.861412469392865e-05, "loss": 0.7762, "step": 2499 }, { "epoch": 2.847863247863248, "grad_norm": 0.13560296595096588, "learning_rate": 4.861259456048338e-05, "loss": 0.8769, "step": 2500 }, { "epoch": 2.849002849002849, "grad_norm": 0.15914316475391388, "learning_rate": 4.861106360690777e-05, "loss": 0.8049, "step": 2501 }, { "epoch": 2.8501424501424504, "grad_norm": 0.14240868389606476, "learning_rate": 4.8609531833254995e-05, "loss": 1.0095, "step": 2502 }, { "epoch": 2.8512820512820514, "grad_norm": 0.15167224407196045, "learning_rate": 4.860799923957824e-05, "loss": 0.9604, "step": 2503 }, { "epoch": 2.8524216524216524, "grad_norm": 0.13193781673908234, "learning_rate": 4.8606465825930755e-05, "loss": 1.0387, "step": 2504 }, { "epoch": 2.853561253561254, "grad_norm": 0.14292052388191223, "learning_rate": 4.86049315923658e-05, "loss": 0.8779, "step": 2505 }, { "epoch": 2.8547008547008548, "grad_norm": 0.1795503944158554, "learning_rate": 4.860339653893665e-05, "loss": 0.9948, "step": 2506 }, { "epoch": 2.8558404558404558, "grad_norm": 0.13853605091571808, "learning_rate": 4.860186066569663e-05, "loss": 0.8175, "step": 2507 }, { "epoch": 2.856980056980057, "grad_norm": 1.005756139755249, "learning_rate": 4.860032397269908e-05, "loss": 0.944, "step": 2508 }, { "epoch": 2.858119658119658, "grad_norm": 0.1676628142595291, "learning_rate": 4.859878645999738e-05, "loss": 0.7503, "step": 2509 }, { "epoch": 2.859259259259259, "grad_norm": 0.14670723676681519, "learning_rate": 4.859724812764493e-05, "loss": 0.7994, "step": 2510 }, { "epoch": 2.8603988603988606, "grad_norm": 0.13554325699806213, "learning_rate": 4.8595708975695166e-05, "loss": 0.8988, "step": 2511 }, { "epoch": 2.8615384615384616, "grad_norm": 0.15614831447601318, "learning_rate": 4.8594169004201526e-05, "loss": 0.8385, "step": 2512 }, { "epoch": 2.8626780626780626, "grad_norm": 0.15784631669521332, "learning_rate": 4.859262821321753e-05, "loss": 0.819, "step": 2513 }, { "epoch": 2.863817663817664, "grad_norm": 0.14411480724811554, "learning_rate": 4.859108660279667e-05, "loss": 0.9739, "step": 2514 }, { "epoch": 2.864957264957265, "grad_norm": 0.14265736937522888, "learning_rate": 4.8589544172992486e-05, "loss": 0.854, "step": 2515 }, { "epoch": 2.866096866096866, "grad_norm": 0.1601988822221756, "learning_rate": 4.858800092385857e-05, "loss": 0.9041, "step": 2516 }, { "epoch": 2.8672364672364674, "grad_norm": 0.1530003547668457, "learning_rate": 4.858645685544852e-05, "loss": 0.8412, "step": 2517 }, { "epoch": 2.8683760683760684, "grad_norm": 0.14619427919387817, "learning_rate": 4.858491196781595e-05, "loss": 1.0097, "step": 2518 }, { "epoch": 2.8695156695156694, "grad_norm": 0.15662996470928192, "learning_rate": 4.858336626101453e-05, "loss": 0.8877, "step": 2519 }, { "epoch": 2.870655270655271, "grad_norm": 0.13511283695697784, "learning_rate": 4.858181973509795e-05, "loss": 0.7647, "step": 2520 }, { "epoch": 2.871794871794872, "grad_norm": 0.14401453733444214, "learning_rate": 4.858027239011992e-05, "loss": 1.0041, "step": 2521 }, { "epoch": 2.872934472934473, "grad_norm": 0.13081257045269012, "learning_rate": 4.857872422613418e-05, "loss": 0.999, "step": 2522 }, { "epoch": 2.8740740740740742, "grad_norm": 0.13149163126945496, "learning_rate": 4.857717524319451e-05, "loss": 0.8075, "step": 2523 }, { "epoch": 2.875213675213675, "grad_norm": 0.13673201203346252, "learning_rate": 4.85756254413547e-05, "loss": 0.8693, "step": 2524 }, { "epoch": 2.876353276353276, "grad_norm": 0.14889150857925415, "learning_rate": 4.8574074820668595e-05, "loss": 0.9053, "step": 2525 }, { "epoch": 2.8774928774928776, "grad_norm": 0.16127127408981323, "learning_rate": 4.857252338119003e-05, "loss": 0.9123, "step": 2526 }, { "epoch": 2.8786324786324786, "grad_norm": 0.16958686709403992, "learning_rate": 4.8570971122972914e-05, "loss": 0.7225, "step": 2527 }, { "epoch": 2.8797720797720796, "grad_norm": 0.14371536672115326, "learning_rate": 4.8569418046071144e-05, "loss": 0.8774, "step": 2528 }, { "epoch": 2.880911680911681, "grad_norm": 0.15222488343715668, "learning_rate": 4.856786415053868e-05, "loss": 0.8117, "step": 2529 }, { "epoch": 2.882051282051282, "grad_norm": 0.14268170297145844, "learning_rate": 4.856630943642947e-05, "loss": 1.0089, "step": 2530 }, { "epoch": 2.883190883190883, "grad_norm": 0.1648574322462082, "learning_rate": 4.8564753903797536e-05, "loss": 0.8957, "step": 2531 }, { "epoch": 2.8843304843304844, "grad_norm": 0.14225581288337708, "learning_rate": 4.856319755269688e-05, "loss": 0.8577, "step": 2532 }, { "epoch": 2.8854700854700854, "grad_norm": 0.14959944784641266, "learning_rate": 4.856164038318159e-05, "loss": 0.7652, "step": 2533 }, { "epoch": 2.8866096866096864, "grad_norm": 0.1473144292831421, "learning_rate": 4.856008239530573e-05, "loss": 0.8029, "step": 2534 }, { "epoch": 2.887749287749288, "grad_norm": 0.17135225236415863, "learning_rate": 4.855852358912341e-05, "loss": 0.7584, "step": 2535 }, { "epoch": 2.888888888888889, "grad_norm": 0.15319319069385529, "learning_rate": 4.8556963964688786e-05, "loss": 0.8239, "step": 2536 }, { "epoch": 2.89002849002849, "grad_norm": 0.14567650854587555, "learning_rate": 4.855540352205602e-05, "loss": 0.9502, "step": 2537 }, { "epoch": 2.8911680911680913, "grad_norm": 0.16664889454841614, "learning_rate": 4.855384226127932e-05, "loss": 0.9223, "step": 2538 }, { "epoch": 2.8923076923076922, "grad_norm": 0.1748281568288803, "learning_rate": 4.8552280182412893e-05, "loss": 0.7306, "step": 2539 }, { "epoch": 2.8934472934472932, "grad_norm": 0.14994482696056366, "learning_rate": 4.855071728551101e-05, "loss": 0.7669, "step": 2540 }, { "epoch": 2.8945868945868947, "grad_norm": 0.1550205796957016, "learning_rate": 4.8549153570627954e-05, "loss": 0.8358, "step": 2541 }, { "epoch": 2.8957264957264957, "grad_norm": 0.1540808230638504, "learning_rate": 4.854758903781803e-05, "loss": 0.7247, "step": 2542 }, { "epoch": 2.8968660968660966, "grad_norm": 0.14603246748447418, "learning_rate": 4.854602368713559e-05, "loss": 0.9276, "step": 2543 }, { "epoch": 2.898005698005698, "grad_norm": 0.1589769870042801, "learning_rate": 4.854445751863499e-05, "loss": 0.7108, "step": 2544 }, { "epoch": 2.899145299145299, "grad_norm": 0.1363423764705658, "learning_rate": 4.8542890532370634e-05, "loss": 0.9213, "step": 2545 }, { "epoch": 2.9002849002849, "grad_norm": 0.1457771509885788, "learning_rate": 4.8541322728396946e-05, "loss": 0.8932, "step": 2546 }, { "epoch": 2.9014245014245015, "grad_norm": 0.16639792919158936, "learning_rate": 4.8539754106768374e-05, "loss": 0.7764, "step": 2547 }, { "epoch": 2.9025641025641025, "grad_norm": 0.1675666719675064, "learning_rate": 4.853818466753942e-05, "loss": 0.7531, "step": 2548 }, { "epoch": 2.9037037037037035, "grad_norm": 0.18550263345241547, "learning_rate": 4.853661441076457e-05, "loss": 0.9278, "step": 2549 }, { "epoch": 2.904843304843305, "grad_norm": 0.15534579753875732, "learning_rate": 4.8535043336498387e-05, "loss": 0.7667, "step": 2550 }, { "epoch": 2.905982905982906, "grad_norm": 0.17672714591026306, "learning_rate": 4.853347144479542e-05, "loss": 0.8079, "step": 2551 }, { "epoch": 2.907122507122507, "grad_norm": 0.12661369144916534, "learning_rate": 4.8531898735710277e-05, "loss": 0.9358, "step": 2552 }, { "epoch": 2.9082621082621083, "grad_norm": 0.1643446385860443, "learning_rate": 4.853032520929758e-05, "loss": 0.8325, "step": 2553 }, { "epoch": 2.9094017094017093, "grad_norm": 0.1532631665468216, "learning_rate": 4.852875086561197e-05, "loss": 0.8259, "step": 2554 }, { "epoch": 2.9105413105413103, "grad_norm": 0.16986294090747833, "learning_rate": 4.8527175704708146e-05, "loss": 0.6643, "step": 2555 }, { "epoch": 2.9116809116809117, "grad_norm": 0.13494716584682465, "learning_rate": 4.8525599726640806e-05, "loss": 0.8337, "step": 2556 }, { "epoch": 2.9128205128205127, "grad_norm": 0.13411372900009155, "learning_rate": 4.8524022931464695e-05, "loss": 0.9284, "step": 2557 }, { "epoch": 2.913960113960114, "grad_norm": 0.1455051302909851, "learning_rate": 4.852244531923458e-05, "loss": 0.8702, "step": 2558 }, { "epoch": 2.915099715099715, "grad_norm": 0.12225284427404404, "learning_rate": 4.852086689000524e-05, "loss": 0.9424, "step": 2559 }, { "epoch": 2.916239316239316, "grad_norm": 0.15196116268634796, "learning_rate": 4.851928764383152e-05, "loss": 0.8331, "step": 2560 }, { "epoch": 2.9173789173789175, "grad_norm": 0.13254469633102417, "learning_rate": 4.851770758076826e-05, "loss": 0.8626, "step": 2561 }, { "epoch": 2.9185185185185185, "grad_norm": 0.1561303287744522, "learning_rate": 4.851612670087034e-05, "loss": 0.8615, "step": 2562 }, { "epoch": 2.9196581196581195, "grad_norm": 0.1483648121356964, "learning_rate": 4.851454500419267e-05, "loss": 0.7833, "step": 2563 }, { "epoch": 2.920797720797721, "grad_norm": 0.16570700705051422, "learning_rate": 4.851296249079018e-05, "loss": 0.687, "step": 2564 }, { "epoch": 2.921937321937322, "grad_norm": 0.16205599904060364, "learning_rate": 4.8511379160717855e-05, "loss": 0.8039, "step": 2565 }, { "epoch": 2.9230769230769234, "grad_norm": 0.139508917927742, "learning_rate": 4.850979501403067e-05, "loss": 0.8681, "step": 2566 }, { "epoch": 2.9242165242165243, "grad_norm": 0.14752286672592163, "learning_rate": 4.8508210050783655e-05, "loss": 0.6955, "step": 2567 }, { "epoch": 2.9253561253561253, "grad_norm": 0.1667383909225464, "learning_rate": 4.850662427103185e-05, "loss": 0.6762, "step": 2568 }, { "epoch": 2.9264957264957268, "grad_norm": 0.13458852469921112, "learning_rate": 4.850503767483035e-05, "loss": 0.7954, "step": 2569 }, { "epoch": 2.9276353276353277, "grad_norm": 0.16461481153964996, "learning_rate": 4.850345026223424e-05, "loss": 0.796, "step": 2570 }, { "epoch": 2.9287749287749287, "grad_norm": 0.1626298725605011, "learning_rate": 4.8501862033298674e-05, "loss": 0.8109, "step": 2571 }, { "epoch": 2.92991452991453, "grad_norm": 0.13834208250045776, "learning_rate": 4.850027298807881e-05, "loss": 1.0353, "step": 2572 }, { "epoch": 2.931054131054131, "grad_norm": 0.14099299907684326, "learning_rate": 4.849868312662984e-05, "loss": 0.9729, "step": 2573 }, { "epoch": 2.932193732193732, "grad_norm": 0.13429856300354004, "learning_rate": 4.849709244900697e-05, "loss": 0.9534, "step": 2574 }, { "epoch": 2.9333333333333336, "grad_norm": 0.13129566609859467, "learning_rate": 4.8495500955265486e-05, "loss": 0.8689, "step": 2575 }, { "epoch": 2.9344729344729346, "grad_norm": 0.13619905710220337, "learning_rate": 4.849390864546063e-05, "loss": 0.8268, "step": 2576 }, { "epoch": 2.9356125356125355, "grad_norm": 0.1461378037929535, "learning_rate": 4.849231551964771e-05, "loss": 0.771, "step": 2577 }, { "epoch": 2.936752136752137, "grad_norm": 0.17044039070606232, "learning_rate": 4.849072157788207e-05, "loss": 0.8405, "step": 2578 }, { "epoch": 2.937891737891738, "grad_norm": 0.17612218856811523, "learning_rate": 4.848912682021908e-05, "loss": 0.8316, "step": 2579 }, { "epoch": 2.939031339031339, "grad_norm": 0.16972126066684723, "learning_rate": 4.848753124671411e-05, "loss": 0.7985, "step": 2580 }, { "epoch": 2.9401709401709404, "grad_norm": 0.14959603548049927, "learning_rate": 4.84859348574226e-05, "loss": 0.7799, "step": 2581 }, { "epoch": 2.9413105413105414, "grad_norm": 0.15644197165966034, "learning_rate": 4.848433765239998e-05, "loss": 0.723, "step": 2582 }, { "epoch": 2.9424501424501424, "grad_norm": 0.1325509250164032, "learning_rate": 4.848273963170173e-05, "loss": 0.8743, "step": 2583 }, { "epoch": 2.943589743589744, "grad_norm": 0.14713600277900696, "learning_rate": 4.848114079538335e-05, "loss": 0.8541, "step": 2584 }, { "epoch": 2.9447293447293448, "grad_norm": 0.16051732003688812, "learning_rate": 4.847954114350039e-05, "loss": 0.9102, "step": 2585 }, { "epoch": 2.9458689458689458, "grad_norm": 0.17072464525699615, "learning_rate": 4.847794067610839e-05, "loss": 0.777, "step": 2586 }, { "epoch": 2.947008547008547, "grad_norm": 0.12746527791023254, "learning_rate": 4.8476339393262946e-05, "loss": 0.8447, "step": 2587 }, { "epoch": 2.948148148148148, "grad_norm": 0.13438880443572998, "learning_rate": 4.847473729501967e-05, "loss": 0.9381, "step": 2588 }, { "epoch": 2.949287749287749, "grad_norm": 0.14622321724891663, "learning_rate": 4.847313438143422e-05, "loss": 0.8911, "step": 2589 }, { "epoch": 2.9504273504273506, "grad_norm": 0.1536107361316681, "learning_rate": 4.847153065256226e-05, "loss": 0.7733, "step": 2590 }, { "epoch": 2.9515669515669516, "grad_norm": 0.13855865597724915, "learning_rate": 4.8469926108459485e-05, "loss": 0.8919, "step": 2591 }, { "epoch": 2.9527065527065526, "grad_norm": 0.1281176656484604, "learning_rate": 4.846832074918164e-05, "loss": 0.9962, "step": 2592 }, { "epoch": 2.953846153846154, "grad_norm": 0.17547999322414398, "learning_rate": 4.8466714574784476e-05, "loss": 0.9281, "step": 2593 }, { "epoch": 2.954985754985755, "grad_norm": 0.14814116060733795, "learning_rate": 4.8465107585323785e-05, "loss": 0.8159, "step": 2594 }, { "epoch": 2.956125356125356, "grad_norm": 0.17398834228515625, "learning_rate": 4.846349978085537e-05, "loss": 0.7007, "step": 2595 }, { "epoch": 2.9572649572649574, "grad_norm": 0.1629709005355835, "learning_rate": 4.8461891161435094e-05, "loss": 0.7755, "step": 2596 }, { "epoch": 2.9584045584045584, "grad_norm": 0.1796780228614807, "learning_rate": 4.846028172711881e-05, "loss": 0.758, "step": 2597 }, { "epoch": 2.9595441595441594, "grad_norm": 0.1606217622756958, "learning_rate": 4.845867147796243e-05, "loss": 0.7226, "step": 2598 }, { "epoch": 2.960683760683761, "grad_norm": 0.17775005102157593, "learning_rate": 4.845706041402187e-05, "loss": 0.7264, "step": 2599 }, { "epoch": 2.961823361823362, "grad_norm": 0.16189998388290405, "learning_rate": 4.84554485353531e-05, "loss": 0.7123, "step": 2600 }, { "epoch": 2.962962962962963, "grad_norm": 0.16861553490161896, "learning_rate": 4.8453835842012104e-05, "loss": 0.7882, "step": 2601 }, { "epoch": 2.9641025641025642, "grad_norm": 0.14921441674232483, "learning_rate": 4.8452222334054883e-05, "loss": 0.8811, "step": 2602 }, { "epoch": 2.965242165242165, "grad_norm": 0.1653035283088684, "learning_rate": 4.845060801153749e-05, "loss": 0.6865, "step": 2603 }, { "epoch": 2.966381766381766, "grad_norm": 0.12394673377275467, "learning_rate": 4.844899287451599e-05, "loss": 0.9979, "step": 2604 }, { "epoch": 2.9675213675213676, "grad_norm": 0.1545882672071457, "learning_rate": 4.8447376923046484e-05, "loss": 0.7133, "step": 2605 }, { "epoch": 2.9686609686609686, "grad_norm": 0.1520768702030182, "learning_rate": 4.84457601571851e-05, "loss": 0.7198, "step": 2606 }, { "epoch": 2.9698005698005696, "grad_norm": 0.145513653755188, "learning_rate": 4.844414257698798e-05, "loss": 0.8881, "step": 2607 }, { "epoch": 2.970940170940171, "grad_norm": 0.17397546768188477, "learning_rate": 4.844252418251133e-05, "loss": 0.7565, "step": 2608 }, { "epoch": 2.972079772079772, "grad_norm": 0.15827535092830658, "learning_rate": 4.844090497381134e-05, "loss": 0.6552, "step": 2609 }, { "epoch": 2.973219373219373, "grad_norm": 0.16732941567897797, "learning_rate": 4.843928495094425e-05, "loss": 0.7455, "step": 2610 }, { "epoch": 2.9743589743589745, "grad_norm": 0.14165620505809784, "learning_rate": 4.843766411396635e-05, "loss": 0.8492, "step": 2611 }, { "epoch": 2.9754985754985754, "grad_norm": 0.17917455732822418, "learning_rate": 4.8436042462933914e-05, "loss": 0.8732, "step": 2612 }, { "epoch": 2.9766381766381764, "grad_norm": 0.15666763484477997, "learning_rate": 4.8434419997903275e-05, "loss": 1.089, "step": 2613 }, { "epoch": 2.977777777777778, "grad_norm": 0.17236775159835815, "learning_rate": 4.8432796718930785e-05, "loss": 0.7096, "step": 2614 }, { "epoch": 2.978917378917379, "grad_norm": 0.169717937707901, "learning_rate": 4.8431172626072824e-05, "loss": 0.6719, "step": 2615 }, { "epoch": 2.98005698005698, "grad_norm": 0.15024301409721375, "learning_rate": 4.8429547719385803e-05, "loss": 0.8322, "step": 2616 }, { "epoch": 2.9811965811965813, "grad_norm": 0.15195104479789734, "learning_rate": 4.8427921998926156e-05, "loss": 1.0122, "step": 2617 }, { "epoch": 2.9823361823361823, "grad_norm": 0.1863865703344345, "learning_rate": 4.8426295464750356e-05, "loss": 0.7327, "step": 2618 }, { "epoch": 2.9834757834757832, "grad_norm": 0.1663333624601364, "learning_rate": 4.8424668116914884e-05, "loss": 0.7586, "step": 2619 }, { "epoch": 2.9846153846153847, "grad_norm": 0.1752542406320572, "learning_rate": 4.842303995547627e-05, "loss": 0.6552, "step": 2620 }, { "epoch": 2.9857549857549857, "grad_norm": 0.13285940885543823, "learning_rate": 4.8421410980491077e-05, "loss": 0.9676, "step": 2621 }, { "epoch": 2.9868945868945866, "grad_norm": 0.1365329921245575, "learning_rate": 4.841978119201586e-05, "loss": 0.9183, "step": 2622 }, { "epoch": 2.988034188034188, "grad_norm": 0.1425931304693222, "learning_rate": 4.841815059010724e-05, "loss": 0.6172, "step": 2623 }, { "epoch": 2.989173789173789, "grad_norm": 0.15361155569553375, "learning_rate": 4.841651917482185e-05, "loss": 0.8251, "step": 2624 }, { "epoch": 2.99031339031339, "grad_norm": 0.1416916400194168, "learning_rate": 4.8414886946216356e-05, "loss": 0.838, "step": 2625 }, { "epoch": 2.9914529914529915, "grad_norm": 0.1529463678598404, "learning_rate": 4.841325390434744e-05, "loss": 0.8808, "step": 2626 }, { "epoch": 2.9925925925925925, "grad_norm": 0.14615002274513245, "learning_rate": 4.841162004927184e-05, "loss": 0.8699, "step": 2627 }, { "epoch": 2.9937321937321935, "grad_norm": 0.16817350685596466, "learning_rate": 4.840998538104629e-05, "loss": 0.7843, "step": 2628 }, { "epoch": 2.994871794871795, "grad_norm": 0.14813373982906342, "learning_rate": 4.840834989972757e-05, "loss": 0.8616, "step": 2629 }, { "epoch": 2.996011396011396, "grad_norm": 0.12398434430360794, "learning_rate": 4.8406713605372476e-05, "loss": 1.0063, "step": 2630 }, { "epoch": 2.9971509971509973, "grad_norm": 0.14389266073703766, "learning_rate": 4.840507649803786e-05, "loss": 0.8416, "step": 2631 }, { "epoch": 2.9982905982905983, "grad_norm": 0.13971839845180511, "learning_rate": 4.840343857778057e-05, "loss": 0.9507, "step": 2632 }, { "epoch": 2.9994301994301993, "grad_norm": 0.14235249161720276, "learning_rate": 4.84017998446575e-05, "loss": 0.9633, "step": 2633 }, { "epoch": 3.0, "grad_norm": 0.21776600182056427, "learning_rate": 4.840016029872556e-05, "loss": 1.1035, "step": 2634 }, { "epoch": 3.001139601139601, "grad_norm": 0.15960942208766937, "learning_rate": 4.839851994004171e-05, "loss": 0.7122, "step": 2635 }, { "epoch": 3.0022792022792024, "grad_norm": 0.14113770425319672, "learning_rate": 4.8396878768662915e-05, "loss": 0.8827, "step": 2636 }, { "epoch": 3.0034188034188034, "grad_norm": 0.13497541844844818, "learning_rate": 4.839523678464617e-05, "loss": 0.8685, "step": 2637 }, { "epoch": 3.0045584045584044, "grad_norm": 0.15868493914604187, "learning_rate": 4.839359398804852e-05, "loss": 0.9344, "step": 2638 }, { "epoch": 3.005698005698006, "grad_norm": 0.1271446943283081, "learning_rate": 4.839195037892702e-05, "loss": 0.9242, "step": 2639 }, { "epoch": 3.006837606837607, "grad_norm": 0.1592073291540146, "learning_rate": 4.839030595733876e-05, "loss": 0.8411, "step": 2640 }, { "epoch": 3.007977207977208, "grad_norm": 0.16828948259353638, "learning_rate": 4.838866072334084e-05, "loss": 0.8178, "step": 2641 }, { "epoch": 3.0091168091168092, "grad_norm": 0.15163841843605042, "learning_rate": 4.838701467699041e-05, "loss": 0.9099, "step": 2642 }, { "epoch": 3.01025641025641, "grad_norm": 0.15888333320617676, "learning_rate": 4.838536781834466e-05, "loss": 0.7028, "step": 2643 }, { "epoch": 3.011396011396011, "grad_norm": 0.1368103325366974, "learning_rate": 4.8383720147460776e-05, "loss": 0.9294, "step": 2644 }, { "epoch": 3.0125356125356126, "grad_norm": 0.12142717838287354, "learning_rate": 4.838207166439598e-05, "loss": 0.862, "step": 2645 }, { "epoch": 3.0136752136752136, "grad_norm": 0.16460753977298737, "learning_rate": 4.8380422369207535e-05, "loss": 0.8824, "step": 2646 }, { "epoch": 3.0148148148148146, "grad_norm": 0.20473873615264893, "learning_rate": 4.837877226195272e-05, "loss": 0.5814, "step": 2647 }, { "epoch": 3.015954415954416, "grad_norm": 0.16301965713500977, "learning_rate": 4.8377121342688855e-05, "loss": 0.8065, "step": 2648 }, { "epoch": 3.017094017094017, "grad_norm": 0.1365353763103485, "learning_rate": 4.837546961147328e-05, "loss": 0.8704, "step": 2649 }, { "epoch": 3.018233618233618, "grad_norm": 0.1537342667579651, "learning_rate": 4.8373817068363366e-05, "loss": 0.7572, "step": 2650 }, { "epoch": 3.0193732193732195, "grad_norm": 0.15122590959072113, "learning_rate": 4.8372163713416505e-05, "loss": 0.8173, "step": 2651 }, { "epoch": 3.0205128205128204, "grad_norm": 0.12765035033226013, "learning_rate": 4.8370509546690126e-05, "loss": 1.0291, "step": 2652 }, { "epoch": 3.021652421652422, "grad_norm": 0.15918688476085663, "learning_rate": 4.836885456824167e-05, "loss": 0.8528, "step": 2653 }, { "epoch": 3.022792022792023, "grad_norm": 0.1551569253206253, "learning_rate": 4.836719877812864e-05, "loss": 0.7266, "step": 2654 }, { "epoch": 3.023931623931624, "grad_norm": 0.13425129652023315, "learning_rate": 4.8365542176408534e-05, "loss": 0.8123, "step": 2655 }, { "epoch": 3.0250712250712253, "grad_norm": 0.1595241129398346, "learning_rate": 4.83638847631389e-05, "loss": 0.8837, "step": 2656 }, { "epoch": 3.0262108262108263, "grad_norm": 0.16135190427303314, "learning_rate": 4.8362226538377286e-05, "loss": 0.7402, "step": 2657 }, { "epoch": 3.0273504273504273, "grad_norm": 0.1531604379415512, "learning_rate": 4.8360567502181307e-05, "loss": 0.95, "step": 2658 }, { "epoch": 3.0284900284900287, "grad_norm": 0.16332782804965973, "learning_rate": 4.8358907654608564e-05, "loss": 0.8343, "step": 2659 }, { "epoch": 3.0296296296296297, "grad_norm": 0.13291047513484955, "learning_rate": 4.835724699571673e-05, "loss": 0.9438, "step": 2660 }, { "epoch": 3.0307692307692307, "grad_norm": 0.1411600261926651, "learning_rate": 4.835558552556347e-05, "loss": 0.7242, "step": 2661 }, { "epoch": 3.031908831908832, "grad_norm": 0.15683066844940186, "learning_rate": 4.835392324420651e-05, "loss": 0.7056, "step": 2662 }, { "epoch": 3.033048433048433, "grad_norm": 0.178327277302742, "learning_rate": 4.8352260151703565e-05, "loss": 0.6822, "step": 2663 }, { "epoch": 3.034188034188034, "grad_norm": 0.1309361457824707, "learning_rate": 4.83505962481124e-05, "loss": 0.9562, "step": 2664 }, { "epoch": 3.0353276353276355, "grad_norm": 0.1497715264558792, "learning_rate": 4.834893153349082e-05, "loss": 0.9794, "step": 2665 }, { "epoch": 3.0364672364672365, "grad_norm": 0.14094099402427673, "learning_rate": 4.834726600789663e-05, "loss": 0.9215, "step": 2666 }, { "epoch": 3.0376068376068375, "grad_norm": 0.16675636172294617, "learning_rate": 4.83455996713877e-05, "loss": 0.8251, "step": 2667 }, { "epoch": 3.038746438746439, "grad_norm": 0.14439137279987335, "learning_rate": 4.834393252402188e-05, "loss": 0.8335, "step": 2668 }, { "epoch": 3.03988603988604, "grad_norm": 0.15338900685310364, "learning_rate": 4.834226456585709e-05, "loss": 0.9091, "step": 2669 }, { "epoch": 3.041025641025641, "grad_norm": 0.13452865183353424, "learning_rate": 4.834059579695126e-05, "loss": 0.912, "step": 2670 }, { "epoch": 3.0421652421652423, "grad_norm": 0.20547722280025482, "learning_rate": 4.833892621736236e-05, "loss": 0.8714, "step": 2671 }, { "epoch": 3.0433048433048433, "grad_norm": 0.16397899389266968, "learning_rate": 4.833725582714836e-05, "loss": 0.8073, "step": 2672 }, { "epoch": 3.0444444444444443, "grad_norm": 0.13033859431743622, "learning_rate": 4.833558462636729e-05, "loss": 0.8383, "step": 2673 }, { "epoch": 3.0455840455840457, "grad_norm": 0.1302100419998169, "learning_rate": 4.833391261507719e-05, "loss": 0.926, "step": 2674 }, { "epoch": 3.0467236467236467, "grad_norm": 0.14383549988269806, "learning_rate": 4.8332239793336145e-05, "loss": 0.7762, "step": 2675 }, { "epoch": 3.0478632478632477, "grad_norm": 0.19122305512428284, "learning_rate": 4.8330566161202236e-05, "loss": 0.9622, "step": 2676 }, { "epoch": 3.049002849002849, "grad_norm": 0.15395867824554443, "learning_rate": 4.832889171873362e-05, "loss": 0.7845, "step": 2677 }, { "epoch": 3.05014245014245, "grad_norm": 0.12767601013183594, "learning_rate": 4.832721646598843e-05, "loss": 0.9744, "step": 2678 }, { "epoch": 3.051282051282051, "grad_norm": 0.15964336693286896, "learning_rate": 4.832554040302486e-05, "loss": 0.7777, "step": 2679 }, { "epoch": 3.0524216524216525, "grad_norm": 0.15287315845489502, "learning_rate": 4.8323863529901134e-05, "loss": 0.8422, "step": 2680 }, { "epoch": 3.0535612535612535, "grad_norm": 0.17749686539173126, "learning_rate": 4.832218584667548e-05, "loss": 0.7228, "step": 2681 }, { "epoch": 3.0547008547008545, "grad_norm": 0.18201468884944916, "learning_rate": 4.832050735340617e-05, "loss": 0.5886, "step": 2682 }, { "epoch": 3.055840455840456, "grad_norm": 0.13972000777721405, "learning_rate": 4.831882805015152e-05, "loss": 1.1174, "step": 2683 }, { "epoch": 3.056980056980057, "grad_norm": 0.1677132099866867, "learning_rate": 4.831714793696984e-05, "loss": 0.8129, "step": 2684 }, { "epoch": 3.058119658119658, "grad_norm": 0.15444085001945496, "learning_rate": 4.8315467013919494e-05, "loss": 0.8082, "step": 2685 }, { "epoch": 3.0592592592592593, "grad_norm": 0.15181677043437958, "learning_rate": 4.831378528105886e-05, "loss": 0.8809, "step": 2686 }, { "epoch": 3.0603988603988603, "grad_norm": 0.12987425923347473, "learning_rate": 4.8312102738446346e-05, "loss": 0.9548, "step": 2687 }, { "epoch": 3.0615384615384613, "grad_norm": 0.1311892420053482, "learning_rate": 4.8310419386140395e-05, "loss": 0.7892, "step": 2688 }, { "epoch": 3.0626780626780628, "grad_norm": 0.1323302984237671, "learning_rate": 4.830873522419947e-05, "loss": 0.8871, "step": 2689 }, { "epoch": 3.0638176638176637, "grad_norm": 0.1338183581829071, "learning_rate": 4.8307050252682085e-05, "loss": 0.758, "step": 2690 }, { "epoch": 3.064957264957265, "grad_norm": 0.13787806034088135, "learning_rate": 4.8305364471646744e-05, "loss": 0.8731, "step": 2691 }, { "epoch": 3.066096866096866, "grad_norm": 0.18944986164569855, "learning_rate": 4.8303677881152004e-05, "loss": 0.631, "step": 2692 }, { "epoch": 3.067236467236467, "grad_norm": 0.14428669214248657, "learning_rate": 4.8301990481256445e-05, "loss": 0.9133, "step": 2693 }, { "epoch": 3.0683760683760686, "grad_norm": 0.14433634281158447, "learning_rate": 4.8300302272018676e-05, "loss": 0.8105, "step": 2694 }, { "epoch": 3.0695156695156696, "grad_norm": 0.15779542922973633, "learning_rate": 4.829861325349734e-05, "loss": 0.7859, "step": 2695 }, { "epoch": 3.0706552706552706, "grad_norm": 0.15013574063777924, "learning_rate": 4.829692342575109e-05, "loss": 0.7969, "step": 2696 }, { "epoch": 3.071794871794872, "grad_norm": 0.16951070725917816, "learning_rate": 4.829523278883862e-05, "loss": 0.7068, "step": 2697 }, { "epoch": 3.072934472934473, "grad_norm": 0.16703423857688904, "learning_rate": 4.829354134281865e-05, "loss": 0.7581, "step": 2698 }, { "epoch": 3.074074074074074, "grad_norm": 0.13053932785987854, "learning_rate": 4.829184908774993e-05, "loss": 0.9117, "step": 2699 }, { "epoch": 3.0752136752136754, "grad_norm": 0.13885967433452606, "learning_rate": 4.829015602369125e-05, "loss": 0.8018, "step": 2700 }, { "epoch": 3.0763532763532764, "grad_norm": 0.15271960198879242, "learning_rate": 4.82884621507014e-05, "loss": 0.8809, "step": 2701 }, { "epoch": 3.0774928774928774, "grad_norm": 0.13208994269371033, "learning_rate": 4.8286767468839204e-05, "loss": 1.02, "step": 2702 }, { "epoch": 3.078632478632479, "grad_norm": 0.16731829941272736, "learning_rate": 4.8285071978163544e-05, "loss": 0.7642, "step": 2703 }, { "epoch": 3.07977207977208, "grad_norm": 0.17941708862781525, "learning_rate": 4.82833756787333e-05, "loss": 0.7251, "step": 2704 }, { "epoch": 3.0809116809116808, "grad_norm": 0.1853124499320984, "learning_rate": 4.828167857060739e-05, "loss": 0.6653, "step": 2705 }, { "epoch": 3.082051282051282, "grad_norm": 0.14601990580558777, "learning_rate": 4.827998065384476e-05, "loss": 0.8097, "step": 2706 }, { "epoch": 3.083190883190883, "grad_norm": 0.1611698567867279, "learning_rate": 4.8278281928504374e-05, "loss": 0.7889, "step": 2707 }, { "epoch": 3.084330484330484, "grad_norm": 0.11879551410675049, "learning_rate": 4.827658239464525e-05, "loss": 1.0276, "step": 2708 }, { "epoch": 3.0854700854700856, "grad_norm": 0.13108141720294952, "learning_rate": 4.82748820523264e-05, "loss": 0.9315, "step": 2709 }, { "epoch": 3.0866096866096866, "grad_norm": 0.15066450834274292, "learning_rate": 4.8273180901606896e-05, "loss": 1.0759, "step": 2710 }, { "epoch": 3.0877492877492876, "grad_norm": 0.16900579631328583, "learning_rate": 4.8271478942545826e-05, "loss": 0.7612, "step": 2711 }, { "epoch": 3.088888888888889, "grad_norm": 0.14955787360668182, "learning_rate": 4.826977617520228e-05, "loss": 0.8679, "step": 2712 }, { "epoch": 3.09002849002849, "grad_norm": 0.1958359032869339, "learning_rate": 4.8268072599635416e-05, "loss": 0.767, "step": 2713 }, { "epoch": 3.091168091168091, "grad_norm": 0.15889379382133484, "learning_rate": 4.826636821590441e-05, "loss": 0.8347, "step": 2714 }, { "epoch": 3.0923076923076924, "grad_norm": 0.14854945242404938, "learning_rate": 4.826466302406846e-05, "loss": 0.8601, "step": 2715 }, { "epoch": 3.0934472934472934, "grad_norm": 0.1435076892375946, "learning_rate": 4.826295702418677e-05, "loss": 0.9191, "step": 2716 }, { "epoch": 3.0945868945868944, "grad_norm": 0.1630614846944809, "learning_rate": 4.826125021631862e-05, "loss": 0.7881, "step": 2717 }, { "epoch": 3.095726495726496, "grad_norm": 0.16588105261325836, "learning_rate": 4.8259542600523275e-05, "loss": 0.8138, "step": 2718 }, { "epoch": 3.096866096866097, "grad_norm": 0.14578644931316376, "learning_rate": 4.825783417686005e-05, "loss": 0.9703, "step": 2719 }, { "epoch": 3.098005698005698, "grad_norm": 0.1567363291978836, "learning_rate": 4.825612494538829e-05, "loss": 0.6838, "step": 2720 }, { "epoch": 3.0991452991452992, "grad_norm": 0.16741551458835602, "learning_rate": 4.8254414906167355e-05, "loss": 0.8385, "step": 2721 }, { "epoch": 3.1002849002849002, "grad_norm": 0.16887108981609344, "learning_rate": 4.825270405925664e-05, "loss": 0.738, "step": 2722 }, { "epoch": 3.101424501424501, "grad_norm": 0.16314460337162018, "learning_rate": 4.825099240471557e-05, "loss": 0.7969, "step": 2723 }, { "epoch": 3.1025641025641026, "grad_norm": 0.16530047357082367, "learning_rate": 4.824927994260359e-05, "loss": 0.8159, "step": 2724 }, { "epoch": 3.1037037037037036, "grad_norm": 0.1305146962404251, "learning_rate": 4.824756667298018e-05, "loss": 0.9435, "step": 2725 }, { "epoch": 3.104843304843305, "grad_norm": 0.15956594049930573, "learning_rate": 4.8245852595904857e-05, "loss": 0.7105, "step": 2726 }, { "epoch": 3.105982905982906, "grad_norm": 0.1415933072566986, "learning_rate": 4.8244137711437144e-05, "loss": 0.9338, "step": 2727 }, { "epoch": 3.107122507122507, "grad_norm": 0.14600738883018494, "learning_rate": 4.82424220196366e-05, "loss": 0.7956, "step": 2728 }, { "epoch": 3.1082621082621085, "grad_norm": 0.1394665688276291, "learning_rate": 4.824070552056282e-05, "loss": 0.8196, "step": 2729 }, { "epoch": 3.1094017094017095, "grad_norm": 0.13695195317268372, "learning_rate": 4.823898821427544e-05, "loss": 0.8641, "step": 2730 }, { "epoch": 3.1105413105413104, "grad_norm": 0.15620756149291992, "learning_rate": 4.823727010083408e-05, "loss": 0.9084, "step": 2731 }, { "epoch": 3.111680911680912, "grad_norm": 0.14547526836395264, "learning_rate": 4.823555118029843e-05, "loss": 0.8668, "step": 2732 }, { "epoch": 3.112820512820513, "grad_norm": 0.1622859686613083, "learning_rate": 4.823383145272819e-05, "loss": 0.79, "step": 2733 }, { "epoch": 3.113960113960114, "grad_norm": 0.154898002743721, "learning_rate": 4.8232110918183094e-05, "loss": 0.9353, "step": 2734 }, { "epoch": 3.1150997150997153, "grad_norm": 0.1352144479751587, "learning_rate": 4.823038957672289e-05, "loss": 0.932, "step": 2735 }, { "epoch": 3.1162393162393163, "grad_norm": 0.1418062448501587, "learning_rate": 4.822866742840737e-05, "loss": 0.8635, "step": 2736 }, { "epoch": 3.1173789173789173, "grad_norm": 0.15414664149284363, "learning_rate": 4.822694447329636e-05, "loss": 0.9737, "step": 2737 }, { "epoch": 3.1185185185185187, "grad_norm": 0.14848250150680542, "learning_rate": 4.822522071144969e-05, "loss": 1.0357, "step": 2738 }, { "epoch": 3.1196581196581197, "grad_norm": 0.13740673661231995, "learning_rate": 4.822349614292724e-05, "loss": 0.7917, "step": 2739 }, { "epoch": 3.1207977207977207, "grad_norm": 0.13903386890888214, "learning_rate": 4.82217707677889e-05, "loss": 0.8972, "step": 2740 }, { "epoch": 3.121937321937322, "grad_norm": 0.1286192238330841, "learning_rate": 4.82200445860946e-05, "loss": 0.7864, "step": 2741 }, { "epoch": 3.123076923076923, "grad_norm": 0.14623361825942993, "learning_rate": 4.821831759790429e-05, "loss": 1.0075, "step": 2742 }, { "epoch": 3.124216524216524, "grad_norm": 0.14365258812904358, "learning_rate": 4.821658980327797e-05, "loss": 0.7144, "step": 2743 }, { "epoch": 3.1253561253561255, "grad_norm": 0.1389722377061844, "learning_rate": 4.821486120227563e-05, "loss": 0.9085, "step": 2744 }, { "epoch": 3.1264957264957265, "grad_norm": 0.1723526120185852, "learning_rate": 4.8213131794957334e-05, "loss": 0.8972, "step": 2745 }, { "epoch": 3.1276353276353275, "grad_norm": 0.16342268884181976, "learning_rate": 4.821140158138312e-05, "loss": 0.7712, "step": 2746 }, { "epoch": 3.128774928774929, "grad_norm": 0.14794638752937317, "learning_rate": 4.8209670561613105e-05, "loss": 0.9249, "step": 2747 }, { "epoch": 3.12991452991453, "grad_norm": 0.13991481065750122, "learning_rate": 4.82079387357074e-05, "loss": 0.8941, "step": 2748 }, { "epoch": 3.131054131054131, "grad_norm": 0.17690086364746094, "learning_rate": 4.820620610372617e-05, "loss": 0.7206, "step": 2749 }, { "epoch": 3.1321937321937323, "grad_norm": 0.16780176758766174, "learning_rate": 4.8204472665729576e-05, "loss": 0.883, "step": 2750 }, { "epoch": 3.1333333333333333, "grad_norm": 0.17503003776073456, "learning_rate": 4.8202738421777836e-05, "loss": 0.8847, "step": 2751 }, { "epoch": 3.1344729344729343, "grad_norm": 0.14730869233608246, "learning_rate": 4.820100337193119e-05, "loss": 0.8328, "step": 2752 }, { "epoch": 3.1356125356125357, "grad_norm": 0.16876858472824097, "learning_rate": 4.819926751624988e-05, "loss": 0.7394, "step": 2753 }, { "epoch": 3.1367521367521367, "grad_norm": 0.1489054262638092, "learning_rate": 4.819753085479422e-05, "loss": 1.0266, "step": 2754 }, { "epoch": 3.1378917378917377, "grad_norm": 0.1436724215745926, "learning_rate": 4.819579338762452e-05, "loss": 0.9499, "step": 2755 }, { "epoch": 3.139031339031339, "grad_norm": 0.14720487594604492, "learning_rate": 4.819405511480112e-05, "loss": 0.8827, "step": 2756 }, { "epoch": 3.14017094017094, "grad_norm": 0.13117007911205292, "learning_rate": 4.819231603638441e-05, "loss": 0.9022, "step": 2757 }, { "epoch": 3.141310541310541, "grad_norm": 0.13781315088272095, "learning_rate": 4.819057615243479e-05, "loss": 0.936, "step": 2758 }, { "epoch": 3.1424501424501425, "grad_norm": 0.14733007550239563, "learning_rate": 4.818883546301267e-05, "loss": 0.9361, "step": 2759 }, { "epoch": 3.1435897435897435, "grad_norm": 0.16954368352890015, "learning_rate": 4.818709396817853e-05, "loss": 0.6559, "step": 2760 }, { "epoch": 3.1447293447293445, "grad_norm": 0.13638292253017426, "learning_rate": 4.8185351667992856e-05, "loss": 0.9139, "step": 2761 }, { "epoch": 3.145868945868946, "grad_norm": 0.15743222832679749, "learning_rate": 4.818360856251616e-05, "loss": 0.8733, "step": 2762 }, { "epoch": 3.147008547008547, "grad_norm": 0.1818668395280838, "learning_rate": 4.818186465180898e-05, "loss": 0.6358, "step": 2763 }, { "epoch": 3.148148148148148, "grad_norm": 0.1493171751499176, "learning_rate": 4.818011993593189e-05, "loss": 0.9824, "step": 2764 }, { "epoch": 3.1492877492877493, "grad_norm": 0.1341221034526825, "learning_rate": 4.8178374414945484e-05, "loss": 1.0686, "step": 2765 }, { "epoch": 3.1504273504273503, "grad_norm": 0.13915280997753143, "learning_rate": 4.8176628088910404e-05, "loss": 0.8628, "step": 2766 }, { "epoch": 3.1515669515669518, "grad_norm": 0.15209360420703888, "learning_rate": 4.817488095788729e-05, "loss": 0.7546, "step": 2767 }, { "epoch": 3.1527065527065528, "grad_norm": 0.14440672099590302, "learning_rate": 4.817313302193683e-05, "loss": 0.8006, "step": 2768 }, { "epoch": 3.1538461538461537, "grad_norm": 0.2144801765680313, "learning_rate": 4.817138428111973e-05, "loss": 0.4971, "step": 2769 }, { "epoch": 3.154985754985755, "grad_norm": 0.17117416858673096, "learning_rate": 4.816963473549674e-05, "loss": 0.6989, "step": 2770 }, { "epoch": 3.156125356125356, "grad_norm": 0.15620580315589905, "learning_rate": 4.816788438512861e-05, "loss": 0.8888, "step": 2771 }, { "epoch": 3.157264957264957, "grad_norm": 0.14185447990894318, "learning_rate": 4.816613323007615e-05, "loss": 1.0015, "step": 2772 }, { "epoch": 3.1584045584045586, "grad_norm": 0.14057381451129913, "learning_rate": 4.8164381270400175e-05, "loss": 0.8852, "step": 2773 }, { "epoch": 3.1595441595441596, "grad_norm": 0.15467970073223114, "learning_rate": 4.8162628506161534e-05, "loss": 0.9103, "step": 2774 }, { "epoch": 3.1606837606837606, "grad_norm": 0.14638753235340118, "learning_rate": 4.816087493742111e-05, "loss": 0.9498, "step": 2775 }, { "epoch": 3.161823361823362, "grad_norm": 0.14307600259780884, "learning_rate": 4.81591205642398e-05, "loss": 0.9224, "step": 2776 }, { "epoch": 3.162962962962963, "grad_norm": 0.14286182820796967, "learning_rate": 4.815736538667855e-05, "loss": 0.8386, "step": 2777 }, { "epoch": 3.164102564102564, "grad_norm": 0.15866969525814056, "learning_rate": 4.815560940479832e-05, "loss": 0.8296, "step": 2778 }, { "epoch": 3.1652421652421654, "grad_norm": 0.16058185696601868, "learning_rate": 4.8153852618660087e-05, "loss": 0.7985, "step": 2779 }, { "epoch": 3.1663817663817664, "grad_norm": 0.16978701949119568, "learning_rate": 4.815209502832489e-05, "loss": 0.8867, "step": 2780 }, { "epoch": 3.1675213675213674, "grad_norm": 0.1744219809770584, "learning_rate": 4.815033663385376e-05, "loss": 0.8944, "step": 2781 }, { "epoch": 3.168660968660969, "grad_norm": 0.14762021601200104, "learning_rate": 4.8148577435307774e-05, "loss": 0.9251, "step": 2782 }, { "epoch": 3.16980056980057, "grad_norm": 0.1757873147726059, "learning_rate": 4.814681743274804e-05, "loss": 0.7844, "step": 2783 }, { "epoch": 3.1709401709401708, "grad_norm": 0.13869482278823853, "learning_rate": 4.814505662623568e-05, "loss": 0.9691, "step": 2784 }, { "epoch": 3.172079772079772, "grad_norm": 0.1521621197462082, "learning_rate": 4.814329501583185e-05, "loss": 0.7805, "step": 2785 }, { "epoch": 3.173219373219373, "grad_norm": 0.1564890444278717, "learning_rate": 4.814153260159774e-05, "loss": 0.8231, "step": 2786 }, { "epoch": 3.174358974358974, "grad_norm": 0.17055024206638336, "learning_rate": 4.813976938359457e-05, "loss": 0.7321, "step": 2787 }, { "epoch": 3.1754985754985756, "grad_norm": 0.15321950614452362, "learning_rate": 4.813800536188357e-05, "loss": 0.7353, "step": 2788 }, { "epoch": 3.1766381766381766, "grad_norm": 0.15879105031490326, "learning_rate": 4.8136240536526015e-05, "loss": 0.664, "step": 2789 }, { "epoch": 3.1777777777777776, "grad_norm": 0.16343984007835388, "learning_rate": 4.81344749075832e-05, "loss": 0.8369, "step": 2790 }, { "epoch": 3.178917378917379, "grad_norm": 0.1567770391702652, "learning_rate": 4.813270847511645e-05, "loss": 0.9153, "step": 2791 }, { "epoch": 3.18005698005698, "grad_norm": 0.14378564059734344, "learning_rate": 4.8130941239187124e-05, "loss": 0.8609, "step": 2792 }, { "epoch": 3.181196581196581, "grad_norm": 0.14918112754821777, "learning_rate": 4.8129173199856597e-05, "loss": 0.7855, "step": 2793 }, { "epoch": 3.1823361823361824, "grad_norm": 0.17712976038455963, "learning_rate": 4.812740435718628e-05, "loss": 0.862, "step": 2794 }, { "epoch": 3.1834757834757834, "grad_norm": 0.13256049156188965, "learning_rate": 4.812563471123761e-05, "loss": 0.8987, "step": 2795 }, { "epoch": 3.184615384615385, "grad_norm": 0.16576290130615234, "learning_rate": 4.8123864262072045e-05, "loss": 0.8584, "step": 2796 }, { "epoch": 3.185754985754986, "grad_norm": 0.1808508187532425, "learning_rate": 4.812209300975109e-05, "loss": 0.6485, "step": 2797 }, { "epoch": 3.186894586894587, "grad_norm": 0.16196200251579285, "learning_rate": 4.812032095433625e-05, "loss": 0.9194, "step": 2798 }, { "epoch": 3.1880341880341883, "grad_norm": 0.1355491280555725, "learning_rate": 4.811854809588909e-05, "loss": 1.0122, "step": 2799 }, { "epoch": 3.1891737891737892, "grad_norm": 0.14594519138336182, "learning_rate": 4.811677443447118e-05, "loss": 0.9122, "step": 2800 }, { "epoch": 3.1903133903133902, "grad_norm": 0.13732974231243134, "learning_rate": 4.811499997014412e-05, "loss": 0.9927, "step": 2801 }, { "epoch": 3.1914529914529917, "grad_norm": 0.1354142278432846, "learning_rate": 4.811322470296954e-05, "loss": 0.9402, "step": 2802 }, { "epoch": 3.1925925925925926, "grad_norm": 0.17087936401367188, "learning_rate": 4.811144863300911e-05, "loss": 0.7806, "step": 2803 }, { "epoch": 3.1937321937321936, "grad_norm": 0.1560145616531372, "learning_rate": 4.810967176032451e-05, "loss": 0.7925, "step": 2804 }, { "epoch": 3.194871794871795, "grad_norm": 0.13679790496826172, "learning_rate": 4.810789408497745e-05, "loss": 0.9236, "step": 2805 }, { "epoch": 3.196011396011396, "grad_norm": 0.15265654027462006, "learning_rate": 4.810611560702969e-05, "loss": 0.7165, "step": 2806 }, { "epoch": 3.197150997150997, "grad_norm": 0.13758231699466705, "learning_rate": 4.8104336326543e-05, "loss": 0.9037, "step": 2807 }, { "epoch": 3.1982905982905985, "grad_norm": 0.13955584168434143, "learning_rate": 4.810255624357915e-05, "loss": 0.8877, "step": 2808 }, { "epoch": 3.1994301994301995, "grad_norm": 0.16765445470809937, "learning_rate": 4.810077535820001e-05, "loss": 0.7356, "step": 2809 }, { "epoch": 3.2005698005698004, "grad_norm": 0.1510707587003708, "learning_rate": 4.809899367046741e-05, "loss": 0.8906, "step": 2810 }, { "epoch": 3.201709401709402, "grad_norm": 0.1507355272769928, "learning_rate": 4.809721118044323e-05, "loss": 0.8705, "step": 2811 }, { "epoch": 3.202849002849003, "grad_norm": 0.16737888753414154, "learning_rate": 4.8095427888189405e-05, "loss": 0.8396, "step": 2812 }, { "epoch": 3.203988603988604, "grad_norm": 0.2135433703660965, "learning_rate": 4.809364379376784e-05, "loss": 0.6376, "step": 2813 }, { "epoch": 3.2051282051282053, "grad_norm": 0.15428176522254944, "learning_rate": 4.8091858897240525e-05, "loss": 0.7275, "step": 2814 }, { "epoch": 3.2062678062678063, "grad_norm": 0.16375058889389038, "learning_rate": 4.809007319866945e-05, "loss": 0.6918, "step": 2815 }, { "epoch": 3.2074074074074073, "grad_norm": 0.14606980979442596, "learning_rate": 4.808828669811663e-05, "loss": 0.8123, "step": 2816 }, { "epoch": 3.2085470085470087, "grad_norm": 0.13594239950180054, "learning_rate": 4.808649939564412e-05, "loss": 0.7501, "step": 2817 }, { "epoch": 3.2096866096866097, "grad_norm": 0.14622075855731964, "learning_rate": 4.8084711291314e-05, "loss": 0.8996, "step": 2818 }, { "epoch": 3.2108262108262107, "grad_norm": 0.15222690999507904, "learning_rate": 4.808292238518837e-05, "loss": 0.6714, "step": 2819 }, { "epoch": 3.211965811965812, "grad_norm": 0.12604406476020813, "learning_rate": 4.8081132677329365e-05, "loss": 0.794, "step": 2820 }, { "epoch": 3.213105413105413, "grad_norm": 0.12628376483917236, "learning_rate": 4.807934216779916e-05, "loss": 0.983, "step": 2821 }, { "epoch": 3.214245014245014, "grad_norm": 0.34481173753738403, "learning_rate": 4.807755085665992e-05, "loss": 0.7421, "step": 2822 }, { "epoch": 3.2153846153846155, "grad_norm": 0.15877555310726166, "learning_rate": 4.807575874397389e-05, "loss": 0.7, "step": 2823 }, { "epoch": 3.2165242165242165, "grad_norm": 0.13727839291095734, "learning_rate": 4.807396582980329e-05, "loss": 1.0087, "step": 2824 }, { "epoch": 3.2176638176638175, "grad_norm": 0.15775807201862335, "learning_rate": 4.80721721142104e-05, "loss": 0.9107, "step": 2825 }, { "epoch": 3.218803418803419, "grad_norm": 0.14970804750919342, "learning_rate": 4.807037759725753e-05, "loss": 0.8396, "step": 2826 }, { "epoch": 3.21994301994302, "grad_norm": 0.13678164780139923, "learning_rate": 4.8068582279007e-05, "loss": 0.9512, "step": 2827 }, { "epoch": 3.221082621082621, "grad_norm": 0.16693085432052612, "learning_rate": 4.806678615952117e-05, "loss": 0.752, "step": 2828 }, { "epoch": 3.2222222222222223, "grad_norm": 0.139791801571846, "learning_rate": 4.806498923886243e-05, "loss": 0.9341, "step": 2829 }, { "epoch": 3.2233618233618233, "grad_norm": 0.1404886394739151, "learning_rate": 4.806319151709318e-05, "loss": 0.849, "step": 2830 }, { "epoch": 3.2245014245014243, "grad_norm": 0.15113630890846252, "learning_rate": 4.8061392994275854e-05, "loss": 0.9324, "step": 2831 }, { "epoch": 3.2256410256410257, "grad_norm": 0.206705242395401, "learning_rate": 4.805959367047294e-05, "loss": 0.8974, "step": 2832 }, { "epoch": 3.2267806267806267, "grad_norm": 0.13375575840473175, "learning_rate": 4.805779354574692e-05, "loss": 0.8051, "step": 2833 }, { "epoch": 3.2279202279202277, "grad_norm": 0.15902279317378998, "learning_rate": 4.805599262016033e-05, "loss": 0.8863, "step": 2834 }, { "epoch": 3.229059829059829, "grad_norm": 0.1568021923303604, "learning_rate": 4.8054190893775706e-05, "loss": 0.8338, "step": 2835 }, { "epoch": 3.23019943019943, "grad_norm": 0.1916961669921875, "learning_rate": 4.8052388366655634e-05, "loss": 0.8858, "step": 2836 }, { "epoch": 3.231339031339031, "grad_norm": 0.1506609469652176, "learning_rate": 4.805058503886271e-05, "loss": 0.8021, "step": 2837 }, { "epoch": 3.2324786324786325, "grad_norm": 0.13231350481510162, "learning_rate": 4.804878091045959e-05, "loss": 0.7675, "step": 2838 }, { "epoch": 3.2336182336182335, "grad_norm": 0.16221636533737183, "learning_rate": 4.8046975981508915e-05, "loss": 0.8182, "step": 2839 }, { "epoch": 3.234757834757835, "grad_norm": 0.1357022523880005, "learning_rate": 4.804517025207339e-05, "loss": 1.0797, "step": 2840 }, { "epoch": 3.235897435897436, "grad_norm": 0.14520856738090515, "learning_rate": 4.804336372221573e-05, "loss": 0.8627, "step": 2841 }, { "epoch": 3.237037037037037, "grad_norm": 0.19395774602890015, "learning_rate": 4.804155639199868e-05, "loss": 0.6094, "step": 2842 }, { "epoch": 3.2381766381766384, "grad_norm": 0.13953706622123718, "learning_rate": 4.8039748261485005e-05, "loss": 0.9921, "step": 2843 }, { "epoch": 3.2393162393162394, "grad_norm": 0.1515970379114151, "learning_rate": 4.803793933073752e-05, "loss": 0.8693, "step": 2844 }, { "epoch": 3.2404558404558403, "grad_norm": 0.17366208136081696, "learning_rate": 4.803612959981905e-05, "loss": 0.7134, "step": 2845 }, { "epoch": 3.2415954415954418, "grad_norm": 0.1383877545595169, "learning_rate": 4.803431906879243e-05, "loss": 0.9014, "step": 2846 }, { "epoch": 3.2427350427350428, "grad_norm": 0.13188236951828003, "learning_rate": 4.8032507737720585e-05, "loss": 0.988, "step": 2847 }, { "epoch": 3.2438746438746437, "grad_norm": 0.161112442612648, "learning_rate": 4.80306956066664e-05, "loss": 0.7929, "step": 2848 }, { "epoch": 3.245014245014245, "grad_norm": 0.156901016831398, "learning_rate": 4.802888267569282e-05, "loss": 0.7972, "step": 2849 }, { "epoch": 3.246153846153846, "grad_norm": 0.14184769988059998, "learning_rate": 4.802706894486282e-05, "loss": 0.8899, "step": 2850 }, { "epoch": 3.247293447293447, "grad_norm": 0.1341044157743454, "learning_rate": 4.8025254414239386e-05, "loss": 0.9932, "step": 2851 }, { "epoch": 3.2484330484330486, "grad_norm": 0.13208197057247162, "learning_rate": 4.802343908388555e-05, "loss": 0.953, "step": 2852 }, { "epoch": 3.2495726495726496, "grad_norm": 0.15056800842285156, "learning_rate": 4.8021622953864356e-05, "loss": 1.0065, "step": 2853 }, { "epoch": 3.2507122507122506, "grad_norm": 0.14316238462924957, "learning_rate": 4.801980602423889e-05, "loss": 0.7553, "step": 2854 }, { "epoch": 3.251851851851852, "grad_norm": 0.13339664041996002, "learning_rate": 4.801798829507225e-05, "loss": 0.8095, "step": 2855 }, { "epoch": 3.252991452991453, "grad_norm": 0.17552809417247772, "learning_rate": 4.801616976642758e-05, "loss": 0.6818, "step": 2856 }, { "epoch": 3.254131054131054, "grad_norm": 0.16580873727798462, "learning_rate": 4.801435043836804e-05, "loss": 0.7491, "step": 2857 }, { "epoch": 3.2552706552706554, "grad_norm": 0.1647835671901703, "learning_rate": 4.801253031095683e-05, "loss": 0.7981, "step": 2858 }, { "epoch": 3.2564102564102564, "grad_norm": 0.18218526244163513, "learning_rate": 4.801070938425714e-05, "loss": 0.756, "step": 2859 }, { "epoch": 3.2575498575498574, "grad_norm": 0.1507270634174347, "learning_rate": 4.800888765833224e-05, "loss": 0.9235, "step": 2860 }, { "epoch": 3.258689458689459, "grad_norm": 0.1650409698486328, "learning_rate": 4.80070651332454e-05, "loss": 0.8632, "step": 2861 }, { "epoch": 3.25982905982906, "grad_norm": 0.1487380415201187, "learning_rate": 4.800524180905992e-05, "loss": 0.7974, "step": 2862 }, { "epoch": 3.260968660968661, "grad_norm": 0.17483578622341156, "learning_rate": 4.800341768583912e-05, "loss": 0.7623, "step": 2863 }, { "epoch": 3.262108262108262, "grad_norm": 0.1822967827320099, "learning_rate": 4.800159276364637e-05, "loss": 0.7696, "step": 2864 }, { "epoch": 3.263247863247863, "grad_norm": 0.16464319825172424, "learning_rate": 4.7999767042545046e-05, "loss": 0.8615, "step": 2865 }, { "epoch": 3.2643874643874646, "grad_norm": 0.12645751237869263, "learning_rate": 4.799794052259856e-05, "loss": 0.8416, "step": 2866 }, { "epoch": 3.2655270655270656, "grad_norm": 0.1537548303604126, "learning_rate": 4.799611320387036e-05, "loss": 0.8733, "step": 2867 }, { "epoch": 3.2666666666666666, "grad_norm": 0.15415510535240173, "learning_rate": 4.7994285086423904e-05, "loss": 0.8616, "step": 2868 }, { "epoch": 3.267806267806268, "grad_norm": 0.14532047510147095, "learning_rate": 4.7992456170322704e-05, "loss": 0.9884, "step": 2869 }, { "epoch": 3.268945868945869, "grad_norm": 0.182328000664711, "learning_rate": 4.799062645563026e-05, "loss": 0.9378, "step": 2870 }, { "epoch": 3.27008547008547, "grad_norm": 0.15013889968395233, "learning_rate": 4.798879594241014e-05, "loss": 0.8353, "step": 2871 }, { "epoch": 3.2712250712250714, "grad_norm": 0.16455797851085663, "learning_rate": 4.7986964630725914e-05, "loss": 0.6966, "step": 2872 }, { "epoch": 3.2723646723646724, "grad_norm": 0.18213346600532532, "learning_rate": 4.79851325206412e-05, "loss": 0.6468, "step": 2873 }, { "epoch": 3.2735042735042734, "grad_norm": 0.14936354756355286, "learning_rate": 4.7983299612219615e-05, "loss": 0.8894, "step": 2874 }, { "epoch": 3.274643874643875, "grad_norm": 0.17364799976348877, "learning_rate": 4.7981465905524833e-05, "loss": 0.911, "step": 2875 }, { "epoch": 3.275783475783476, "grad_norm": 0.177171528339386, "learning_rate": 4.797963140062054e-05, "loss": 0.7807, "step": 2876 }, { "epoch": 3.276923076923077, "grad_norm": 0.1283562183380127, "learning_rate": 4.797779609757046e-05, "loss": 0.8896, "step": 2877 }, { "epoch": 3.2780626780626783, "grad_norm": 0.15506917238235474, "learning_rate": 4.797595999643832e-05, "loss": 0.8594, "step": 2878 }, { "epoch": 3.2792022792022792, "grad_norm": 0.167799711227417, "learning_rate": 4.7974123097287916e-05, "loss": 0.8762, "step": 2879 }, { "epoch": 3.2803418803418802, "grad_norm": 0.14976146817207336, "learning_rate": 4.7972285400183034e-05, "loss": 0.8167, "step": 2880 }, { "epoch": 3.2814814814814817, "grad_norm": 0.17413875460624695, "learning_rate": 4.797044690518751e-05, "loss": 0.7142, "step": 2881 }, { "epoch": 3.2826210826210827, "grad_norm": 0.14165496826171875, "learning_rate": 4.796860761236519e-05, "loss": 0.9026, "step": 2882 }, { "epoch": 3.2837606837606836, "grad_norm": 0.17734207212924957, "learning_rate": 4.7966767521779966e-05, "loss": 0.7106, "step": 2883 }, { "epoch": 3.284900284900285, "grad_norm": 0.185882568359375, "learning_rate": 4.796492663349575e-05, "loss": 0.7243, "step": 2884 }, { "epoch": 3.286039886039886, "grad_norm": 0.12977924942970276, "learning_rate": 4.7963084947576474e-05, "loss": 0.7925, "step": 2885 }, { "epoch": 3.287179487179487, "grad_norm": 0.14745867252349854, "learning_rate": 4.796124246408611e-05, "loss": 0.9405, "step": 2886 }, { "epoch": 3.2883190883190885, "grad_norm": 0.15374231338500977, "learning_rate": 4.7959399183088656e-05, "loss": 0.9385, "step": 2887 }, { "epoch": 3.2894586894586895, "grad_norm": 0.14674480259418488, "learning_rate": 4.795755510464812e-05, "loss": 0.7354, "step": 2888 }, { "epoch": 3.2905982905982905, "grad_norm": 0.17418181896209717, "learning_rate": 4.795571022882858e-05, "loss": 0.819, "step": 2889 }, { "epoch": 3.291737891737892, "grad_norm": 0.17170394957065582, "learning_rate": 4.795386455569407e-05, "loss": 0.6846, "step": 2890 }, { "epoch": 3.292877492877493, "grad_norm": 0.18094651401042938, "learning_rate": 4.7952018085308734e-05, "loss": 0.9179, "step": 2891 }, { "epoch": 3.294017094017094, "grad_norm": 0.17319831252098083, "learning_rate": 4.795017081773669e-05, "loss": 0.7225, "step": 2892 }, { "epoch": 3.2951566951566953, "grad_norm": 0.1557462215423584, "learning_rate": 4.794832275304211e-05, "loss": 0.792, "step": 2893 }, { "epoch": 3.2962962962962963, "grad_norm": 0.14070087671279907, "learning_rate": 4.794647389128917e-05, "loss": 0.833, "step": 2894 }, { "epoch": 3.2974358974358973, "grad_norm": 0.16469363868236542, "learning_rate": 4.794462423254208e-05, "loss": 0.7882, "step": 2895 }, { "epoch": 3.2985754985754987, "grad_norm": 0.1564418226480484, "learning_rate": 4.794277377686509e-05, "loss": 0.7366, "step": 2896 }, { "epoch": 3.2997150997150997, "grad_norm": 0.1659630686044693, "learning_rate": 4.794092252432248e-05, "loss": 0.8426, "step": 2897 }, { "epoch": 3.3008547008547007, "grad_norm": 0.194271057844162, "learning_rate": 4.7939070474978544e-05, "loss": 0.7006, "step": 2898 }, { "epoch": 3.301994301994302, "grad_norm": 0.2531142234802246, "learning_rate": 4.79372176288976e-05, "loss": 0.7947, "step": 2899 }, { "epoch": 3.303133903133903, "grad_norm": 0.17031803727149963, "learning_rate": 4.793536398614402e-05, "loss": 0.6874, "step": 2900 }, { "epoch": 3.304273504273504, "grad_norm": 0.14180181920528412, "learning_rate": 4.793350954678217e-05, "loss": 0.8639, "step": 2901 }, { "epoch": 3.3054131054131055, "grad_norm": 0.16624847054481506, "learning_rate": 4.7931654310876475e-05, "loss": 0.7003, "step": 2902 }, { "epoch": 3.3065527065527065, "grad_norm": 0.16943198442459106, "learning_rate": 4.792979827849135e-05, "loss": 0.7789, "step": 2903 }, { "epoch": 3.3076923076923075, "grad_norm": 0.1517980694770813, "learning_rate": 4.792794144969128e-05, "loss": 0.9833, "step": 2904 }, { "epoch": 3.308831908831909, "grad_norm": 0.15181303024291992, "learning_rate": 4.7926083824540755e-05, "loss": 0.9133, "step": 2905 }, { "epoch": 3.30997150997151, "grad_norm": 0.1384531408548355, "learning_rate": 4.7924225403104294e-05, "loss": 0.9715, "step": 2906 }, { "epoch": 3.311111111111111, "grad_norm": 0.17269845306873322, "learning_rate": 4.792236618544643e-05, "loss": 0.6893, "step": 2907 }, { "epoch": 3.3122507122507123, "grad_norm": 0.16633005440235138, "learning_rate": 4.7920506171631765e-05, "loss": 0.7204, "step": 2908 }, { "epoch": 3.3133903133903133, "grad_norm": 0.14482691884040833, "learning_rate": 4.791864536172488e-05, "loss": 0.8033, "step": 2909 }, { "epoch": 3.3145299145299143, "grad_norm": 0.15212933719158173, "learning_rate": 4.791678375579042e-05, "loss": 0.8164, "step": 2910 }, { "epoch": 3.3156695156695157, "grad_norm": 0.15326949954032898, "learning_rate": 4.791492135389304e-05, "loss": 0.8251, "step": 2911 }, { "epoch": 3.3168091168091167, "grad_norm": 0.14179466664791107, "learning_rate": 4.791305815609742e-05, "loss": 0.8734, "step": 2912 }, { "epoch": 3.3179487179487177, "grad_norm": 0.16047196090221405, "learning_rate": 4.791119416246828e-05, "loss": 0.7975, "step": 2913 }, { "epoch": 3.319088319088319, "grad_norm": 0.14850234985351562, "learning_rate": 4.790932937307037e-05, "loss": 0.8757, "step": 2914 }, { "epoch": 3.32022792022792, "grad_norm": 0.15505769848823547, "learning_rate": 4.790746378796843e-05, "loss": 0.9229, "step": 2915 }, { "epoch": 3.3213675213675216, "grad_norm": 0.12910988926887512, "learning_rate": 4.7905597407227294e-05, "loss": 0.8803, "step": 2916 }, { "epoch": 3.3225071225071225, "grad_norm": 0.18499715626239777, "learning_rate": 4.790373023091176e-05, "loss": 0.7642, "step": 2917 }, { "epoch": 3.3236467236467235, "grad_norm": 0.15825526416301727, "learning_rate": 4.7901862259086696e-05, "loss": 0.7758, "step": 2918 }, { "epoch": 3.324786324786325, "grad_norm": 0.13876748085021973, "learning_rate": 4.7899993491816975e-05, "loss": 0.9218, "step": 2919 }, { "epoch": 3.325925925925926, "grad_norm": 0.15273845195770264, "learning_rate": 4.789812392916751e-05, "loss": 0.7492, "step": 2920 }, { "epoch": 3.327065527065527, "grad_norm": 0.18696825206279755, "learning_rate": 4.789625357120322e-05, "loss": 0.704, "step": 2921 }, { "epoch": 3.3282051282051284, "grad_norm": 0.14205411076545715, "learning_rate": 4.789438241798908e-05, "loss": 0.707, "step": 2922 }, { "epoch": 3.3293447293447294, "grad_norm": 0.1440926492214203, "learning_rate": 4.789251046959008e-05, "loss": 0.9097, "step": 2923 }, { "epoch": 3.3304843304843303, "grad_norm": 0.14126600325107574, "learning_rate": 4.789063772607124e-05, "loss": 0.9476, "step": 2924 }, { "epoch": 3.331623931623932, "grad_norm": 0.13090485334396362, "learning_rate": 4.7888764187497597e-05, "loss": 1.0064, "step": 2925 }, { "epoch": 3.3327635327635328, "grad_norm": 0.15178431570529938, "learning_rate": 4.788688985393423e-05, "loss": 0.8593, "step": 2926 }, { "epoch": 3.3339031339031338, "grad_norm": 0.16778749227523804, "learning_rate": 4.7885014725446245e-05, "loss": 0.7121, "step": 2927 }, { "epoch": 3.335042735042735, "grad_norm": 0.13816341757774353, "learning_rate": 4.788313880209876e-05, "loss": 0.9397, "step": 2928 }, { "epoch": 3.336182336182336, "grad_norm": 0.1426878273487091, "learning_rate": 4.788126208395694e-05, "loss": 0.8855, "step": 2929 }, { "epoch": 3.337321937321937, "grad_norm": 0.14349904656410217, "learning_rate": 4.787938457108596e-05, "loss": 0.9022, "step": 2930 }, { "epoch": 3.3384615384615386, "grad_norm": 0.14963257312774658, "learning_rate": 4.7877506263551035e-05, "loss": 0.737, "step": 2931 }, { "epoch": 3.3396011396011396, "grad_norm": 0.15363183617591858, "learning_rate": 4.787562716141741e-05, "loss": 0.9313, "step": 2932 }, { "epoch": 3.3407407407407406, "grad_norm": 0.18619300425052643, "learning_rate": 4.7873747264750336e-05, "loss": 0.6705, "step": 2933 }, { "epoch": 3.341880341880342, "grad_norm": 0.14622679352760315, "learning_rate": 4.787186657361512e-05, "loss": 0.8231, "step": 2934 }, { "epoch": 3.343019943019943, "grad_norm": 0.15449830889701843, "learning_rate": 4.786998508807709e-05, "loss": 0.9379, "step": 2935 }, { "epoch": 3.344159544159544, "grad_norm": 0.15847383439540863, "learning_rate": 4.786810280820158e-05, "loss": 0.7817, "step": 2936 }, { "epoch": 3.3452991452991454, "grad_norm": 0.17634394764900208, "learning_rate": 4.786621973405396e-05, "loss": 0.7425, "step": 2937 }, { "epoch": 3.3464387464387464, "grad_norm": 0.13386593759059906, "learning_rate": 4.7864335865699654e-05, "loss": 0.8333, "step": 2938 }, { "epoch": 3.347578347578348, "grad_norm": 0.1562299132347107, "learning_rate": 4.786245120320409e-05, "loss": 0.7517, "step": 2939 }, { "epoch": 3.348717948717949, "grad_norm": 0.1563599556684494, "learning_rate": 4.786056574663272e-05, "loss": 0.8658, "step": 2940 }, { "epoch": 3.34985754985755, "grad_norm": 0.13764537870883942, "learning_rate": 4.7858679496051034e-05, "loss": 0.9505, "step": 2941 }, { "epoch": 3.3509971509971512, "grad_norm": 0.12421752512454987, "learning_rate": 4.7856792451524543e-05, "loss": 1.1381, "step": 2942 }, { "epoch": 3.352136752136752, "grad_norm": 0.15025193989276886, "learning_rate": 4.785490461311881e-05, "loss": 0.7032, "step": 2943 }, { "epoch": 3.353276353276353, "grad_norm": 0.1785949319601059, "learning_rate": 4.7853015980899374e-05, "loss": 0.7779, "step": 2944 }, { "epoch": 3.3544159544159546, "grad_norm": 0.14679111540317535, "learning_rate": 4.785112655493185e-05, "loss": 0.8643, "step": 2945 }, { "epoch": 3.3555555555555556, "grad_norm": 0.13939358294010162, "learning_rate": 4.7849236335281866e-05, "loss": 0.7463, "step": 2946 }, { "epoch": 3.3566951566951566, "grad_norm": 0.14103035628795624, "learning_rate": 4.784734532201506e-05, "loss": 0.7751, "step": 2947 }, { "epoch": 3.357834757834758, "grad_norm": 0.1503453105688095, "learning_rate": 4.7845453515197124e-05, "loss": 0.9789, "step": 2948 }, { "epoch": 3.358974358974359, "grad_norm": 0.1696447730064392, "learning_rate": 4.784356091489376e-05, "loss": 0.7723, "step": 2949 }, { "epoch": 3.36011396011396, "grad_norm": 0.1859482079744339, "learning_rate": 4.784166752117071e-05, "loss": 0.7402, "step": 2950 }, { "epoch": 3.3612535612535615, "grad_norm": 0.1765981763601303, "learning_rate": 4.783977333409373e-05, "loss": 0.735, "step": 2951 }, { "epoch": 3.3623931623931624, "grad_norm": 0.1399850994348526, "learning_rate": 4.7837878353728614e-05, "loss": 0.8872, "step": 2952 }, { "epoch": 3.3635327635327634, "grad_norm": 0.1468261480331421, "learning_rate": 4.783598258014118e-05, "loss": 0.8349, "step": 2953 }, { "epoch": 3.364672364672365, "grad_norm": 0.13761648535728455, "learning_rate": 4.783408601339726e-05, "loss": 0.8262, "step": 2954 }, { "epoch": 3.365811965811966, "grad_norm": 0.1641799658536911, "learning_rate": 4.783218865356275e-05, "loss": 0.659, "step": 2955 }, { "epoch": 3.366951566951567, "grad_norm": 0.13689367473125458, "learning_rate": 4.783029050070354e-05, "loss": 0.8077, "step": 2956 }, { "epoch": 3.3680911680911683, "grad_norm": 0.1391943395137787, "learning_rate": 4.782839155488556e-05, "loss": 1.0299, "step": 2957 }, { "epoch": 3.3692307692307693, "grad_norm": 0.1720588058233261, "learning_rate": 4.7826491816174746e-05, "loss": 0.7851, "step": 2958 }, { "epoch": 3.3703703703703702, "grad_norm": 0.13822168111801147, "learning_rate": 4.782459128463711e-05, "loss": 1.0171, "step": 2959 }, { "epoch": 3.3715099715099717, "grad_norm": 0.15165726840496063, "learning_rate": 4.782268996033865e-05, "loss": 0.8503, "step": 2960 }, { "epoch": 3.3726495726495727, "grad_norm": 0.15746258199214935, "learning_rate": 4.7820787843345405e-05, "loss": 0.8418, "step": 2961 }, { "epoch": 3.3737891737891736, "grad_norm": 0.1569579392671585, "learning_rate": 4.781888493372344e-05, "loss": 0.888, "step": 2962 }, { "epoch": 3.374928774928775, "grad_norm": 0.1676587015390396, "learning_rate": 4.781698123153885e-05, "loss": 0.9519, "step": 2963 }, { "epoch": 3.376068376068376, "grad_norm": 0.14912888407707214, "learning_rate": 4.7815076736857756e-05, "loss": 0.7956, "step": 2964 }, { "epoch": 3.377207977207977, "grad_norm": 0.13699576258659363, "learning_rate": 4.7813171449746305e-05, "loss": 0.7573, "step": 2965 }, { "epoch": 3.3783475783475785, "grad_norm": 0.16007672250270844, "learning_rate": 4.781126537027067e-05, "loss": 0.685, "step": 2966 }, { "epoch": 3.3794871794871795, "grad_norm": 0.18223276734352112, "learning_rate": 4.780935849849706e-05, "loss": 0.7412, "step": 2967 }, { "epoch": 3.3806267806267805, "grad_norm": 0.13427135348320007, "learning_rate": 4.7807450834491705e-05, "loss": 0.8703, "step": 2968 }, { "epoch": 3.381766381766382, "grad_norm": 0.1502491980791092, "learning_rate": 4.780554237832086e-05, "loss": 0.8414, "step": 2969 }, { "epoch": 3.382905982905983, "grad_norm": 0.16403824090957642, "learning_rate": 4.780363313005081e-05, "loss": 0.7574, "step": 2970 }, { "epoch": 3.384045584045584, "grad_norm": 0.16481411457061768, "learning_rate": 4.7801723089747874e-05, "loss": 0.7611, "step": 2971 }, { "epoch": 3.3851851851851853, "grad_norm": 0.15091760456562042, "learning_rate": 4.7799812257478394e-05, "loss": 0.8899, "step": 2972 }, { "epoch": 3.3863247863247863, "grad_norm": 0.15702155232429504, "learning_rate": 4.7797900633308725e-05, "loss": 0.7831, "step": 2973 }, { "epoch": 3.3874643874643873, "grad_norm": 0.15315869450569153, "learning_rate": 4.7795988217305274e-05, "loss": 0.8124, "step": 2974 }, { "epoch": 3.3886039886039887, "grad_norm": 0.17758114635944366, "learning_rate": 4.779407500953447e-05, "loss": 0.8101, "step": 2975 }, { "epoch": 3.3897435897435897, "grad_norm": 0.18814335763454437, "learning_rate": 4.779216101006275e-05, "loss": 0.8026, "step": 2976 }, { "epoch": 3.3908831908831907, "grad_norm": 0.16487222909927368, "learning_rate": 4.779024621895661e-05, "loss": 0.8371, "step": 2977 }, { "epoch": 3.392022792022792, "grad_norm": 0.1344875991344452, "learning_rate": 4.7788330636282544e-05, "loss": 0.9105, "step": 2978 }, { "epoch": 3.393162393162393, "grad_norm": 0.17777496576309204, "learning_rate": 4.778641426210707e-05, "loss": 0.6903, "step": 2979 }, { "epoch": 3.394301994301994, "grad_norm": 0.14538417756557465, "learning_rate": 4.778449709649678e-05, "loss": 0.8707, "step": 2980 }, { "epoch": 3.3954415954415955, "grad_norm": 0.15061908960342407, "learning_rate": 4.778257913951825e-05, "loss": 0.862, "step": 2981 }, { "epoch": 3.3965811965811965, "grad_norm": 0.18131569027900696, "learning_rate": 4.7780660391238086e-05, "loss": 0.7183, "step": 2982 }, { "epoch": 3.3977207977207975, "grad_norm": 0.15029703080654144, "learning_rate": 4.777874085172295e-05, "loss": 0.926, "step": 2983 }, { "epoch": 3.398860398860399, "grad_norm": 0.17785389721393585, "learning_rate": 4.777682052103949e-05, "loss": 0.789, "step": 2984 }, { "epoch": 3.4, "grad_norm": 0.13893821835517883, "learning_rate": 4.777489939925443e-05, "loss": 0.9492, "step": 2985 }, { "epoch": 3.401139601139601, "grad_norm": 0.17500656843185425, "learning_rate": 4.777297748643447e-05, "loss": 0.6401, "step": 2986 }, { "epoch": 3.4022792022792023, "grad_norm": 0.13758200407028198, "learning_rate": 4.7771054782646376e-05, "loss": 0.915, "step": 2987 }, { "epoch": 3.4034188034188033, "grad_norm": 0.14513808488845825, "learning_rate": 4.7769131287956936e-05, "loss": 0.985, "step": 2988 }, { "epoch": 3.4045584045584047, "grad_norm": 0.17476683855056763, "learning_rate": 4.776720700243295e-05, "loss": 0.827, "step": 2989 }, { "epoch": 3.4056980056980057, "grad_norm": 0.15774057805538177, "learning_rate": 4.7765281926141254e-05, "loss": 0.7041, "step": 2990 }, { "epoch": 3.4068376068376067, "grad_norm": 0.19230559468269348, "learning_rate": 4.7763356059148714e-05, "loss": 0.7692, "step": 2991 }, { "epoch": 3.407977207977208, "grad_norm": 0.15212854743003845, "learning_rate": 4.776142940152221e-05, "loss": 0.6771, "step": 2992 }, { "epoch": 3.409116809116809, "grad_norm": 0.16355951130390167, "learning_rate": 4.7759501953328676e-05, "loss": 0.8565, "step": 2993 }, { "epoch": 3.41025641025641, "grad_norm": 0.14680014550685883, "learning_rate": 4.775757371463505e-05, "loss": 0.835, "step": 2994 }, { "epoch": 3.4113960113960116, "grad_norm": 0.16888846457004547, "learning_rate": 4.7755644685508305e-05, "loss": 0.8461, "step": 2995 }, { "epoch": 3.4125356125356126, "grad_norm": 0.13844668865203857, "learning_rate": 4.7753714866015445e-05, "loss": 0.7777, "step": 2996 }, { "epoch": 3.4136752136752135, "grad_norm": 0.15098446607589722, "learning_rate": 4.7751784256223484e-05, "loss": 0.7478, "step": 2997 }, { "epoch": 3.414814814814815, "grad_norm": 0.17515455186367035, "learning_rate": 4.7749852856199494e-05, "loss": 0.6304, "step": 2998 }, { "epoch": 3.415954415954416, "grad_norm": 0.1213260143995285, "learning_rate": 4.774792066601056e-05, "loss": 1.0288, "step": 2999 }, { "epoch": 3.417094017094017, "grad_norm": 0.1554059535264969, "learning_rate": 4.774598768572377e-05, "loss": 0.8015, "step": 3000 }, { "epoch": 3.4182336182336184, "grad_norm": 0.14148254692554474, "learning_rate": 4.774405391540628e-05, "loss": 0.7828, "step": 3001 }, { "epoch": 3.4193732193732194, "grad_norm": 0.19590474665164948, "learning_rate": 4.774211935512526e-05, "loss": 0.5977, "step": 3002 }, { "epoch": 3.4205128205128204, "grad_norm": 0.16398891806602478, "learning_rate": 4.774018400494788e-05, "loss": 0.87, "step": 3003 }, { "epoch": 3.421652421652422, "grad_norm": 0.1326790750026703, "learning_rate": 4.773824786494139e-05, "loss": 0.903, "step": 3004 }, { "epoch": 3.4227920227920228, "grad_norm": 0.1271533966064453, "learning_rate": 4.7736310935173013e-05, "loss": 0.8567, "step": 3005 }, { "epoch": 3.4239316239316238, "grad_norm": 0.17647764086723328, "learning_rate": 4.773437321571003e-05, "loss": 0.7339, "step": 3006 }, { "epoch": 3.425071225071225, "grad_norm": 0.1769576370716095, "learning_rate": 4.773243470661975e-05, "loss": 0.5728, "step": 3007 }, { "epoch": 3.426210826210826, "grad_norm": 0.12879271805286407, "learning_rate": 4.77304954079695e-05, "loss": 0.831, "step": 3008 }, { "epoch": 3.427350427350427, "grad_norm": 0.16009968519210815, "learning_rate": 4.772855531982663e-05, "loss": 0.7828, "step": 3009 }, { "epoch": 3.4284900284900286, "grad_norm": 0.14020909368991852, "learning_rate": 4.772661444225853e-05, "loss": 0.8271, "step": 3010 }, { "epoch": 3.4296296296296296, "grad_norm": 0.15442004799842834, "learning_rate": 4.7724672775332615e-05, "loss": 0.8451, "step": 3011 }, { "epoch": 3.430769230769231, "grad_norm": 0.16702589392662048, "learning_rate": 4.7722730319116314e-05, "loss": 0.8092, "step": 3012 }, { "epoch": 3.431908831908832, "grad_norm": 0.14913535118103027, "learning_rate": 4.772078707367711e-05, "loss": 0.726, "step": 3013 }, { "epoch": 3.433048433048433, "grad_norm": 0.1301717758178711, "learning_rate": 4.7718843039082485e-05, "loss": 1.0495, "step": 3014 }, { "epoch": 3.4341880341880344, "grad_norm": 0.15527473390102386, "learning_rate": 4.771689821539996e-05, "loss": 0.7916, "step": 3015 }, { "epoch": 3.4353276353276354, "grad_norm": 0.1384832262992859, "learning_rate": 4.771495260269709e-05, "loss": 1.0, "step": 3016 }, { "epoch": 3.4364672364672364, "grad_norm": 0.1275806427001953, "learning_rate": 4.771300620104146e-05, "loss": 0.9059, "step": 3017 }, { "epoch": 3.437606837606838, "grad_norm": 0.13449861109256744, "learning_rate": 4.771105901050066e-05, "loss": 0.9651, "step": 3018 }, { "epoch": 3.438746438746439, "grad_norm": 0.14235353469848633, "learning_rate": 4.7709111031142315e-05, "loss": 1.0003, "step": 3019 }, { "epoch": 3.43988603988604, "grad_norm": 0.15162557363510132, "learning_rate": 4.77071622630341e-05, "loss": 0.8483, "step": 3020 }, { "epoch": 3.4410256410256412, "grad_norm": 0.1523604840040207, "learning_rate": 4.7705212706243696e-05, "loss": 0.8053, "step": 3021 }, { "epoch": 3.4421652421652422, "grad_norm": 0.15889766812324524, "learning_rate": 4.770326236083881e-05, "loss": 0.6381, "step": 3022 }, { "epoch": 3.443304843304843, "grad_norm": 0.15443409979343414, "learning_rate": 4.7701311226887194e-05, "loss": 0.9534, "step": 3023 }, { "epoch": 3.4444444444444446, "grad_norm": 0.13383810222148895, "learning_rate": 4.769935930445661e-05, "loss": 0.9529, "step": 3024 }, { "epoch": 3.4455840455840456, "grad_norm": 0.15414220094680786, "learning_rate": 4.769740659361485e-05, "loss": 0.7851, "step": 3025 }, { "epoch": 3.4467236467236466, "grad_norm": 0.12543773651123047, "learning_rate": 4.769545309442974e-05, "loss": 0.9761, "step": 3026 }, { "epoch": 3.447863247863248, "grad_norm": 0.16195324063301086, "learning_rate": 4.769349880696914e-05, "loss": 0.832, "step": 3027 }, { "epoch": 3.449002849002849, "grad_norm": 0.14392708241939545, "learning_rate": 4.769154373130091e-05, "loss": 0.8292, "step": 3028 }, { "epoch": 3.45014245014245, "grad_norm": 0.14125484228134155, "learning_rate": 4.768958786749297e-05, "loss": 0.8413, "step": 3029 }, { "epoch": 3.4512820512820515, "grad_norm": 0.17190024256706238, "learning_rate": 4.768763121561324e-05, "loss": 0.7695, "step": 3030 }, { "epoch": 3.4524216524216524, "grad_norm": 0.1310916244983673, "learning_rate": 4.768567377572969e-05, "loss": 0.9733, "step": 3031 }, { "epoch": 3.4535612535612534, "grad_norm": 0.1735365092754364, "learning_rate": 4.768371554791031e-05, "loss": 0.8854, "step": 3032 }, { "epoch": 3.454700854700855, "grad_norm": 0.16494423151016235, "learning_rate": 4.76817565322231e-05, "loss": 0.8977, "step": 3033 }, { "epoch": 3.455840455840456, "grad_norm": 0.13473758101463318, "learning_rate": 4.7679796728736115e-05, "loss": 0.9667, "step": 3034 }, { "epoch": 3.456980056980057, "grad_norm": 0.16762381792068481, "learning_rate": 4.7677836137517416e-05, "loss": 0.8502, "step": 3035 }, { "epoch": 3.4581196581196583, "grad_norm": 0.15547460317611694, "learning_rate": 4.767587475863511e-05, "loss": 0.8049, "step": 3036 }, { "epoch": 3.4592592592592593, "grad_norm": 0.15720248222351074, "learning_rate": 4.767391259215731e-05, "loss": 0.7653, "step": 3037 }, { "epoch": 3.4603988603988602, "grad_norm": 0.15790072083473206, "learning_rate": 4.767194963815217e-05, "loss": 0.878, "step": 3038 }, { "epoch": 3.4615384615384617, "grad_norm": 0.14522385597229004, "learning_rate": 4.766998589668788e-05, "loss": 0.7098, "step": 3039 }, { "epoch": 3.4626780626780627, "grad_norm": 0.14111749827861786, "learning_rate": 4.7668021367832625e-05, "loss": 0.8254, "step": 3040 }, { "epoch": 3.4638176638176637, "grad_norm": 0.15024295449256897, "learning_rate": 4.7666056051654665e-05, "loss": 0.843, "step": 3041 }, { "epoch": 3.464957264957265, "grad_norm": 0.1455540657043457, "learning_rate": 4.766408994822223e-05, "loss": 0.764, "step": 3042 }, { "epoch": 3.466096866096866, "grad_norm": 0.20096252858638763, "learning_rate": 4.7662123057603636e-05, "loss": 0.6538, "step": 3043 }, { "epoch": 3.467236467236467, "grad_norm": 0.1540260910987854, "learning_rate": 4.7660155379867184e-05, "loss": 0.8088, "step": 3044 }, { "epoch": 3.4683760683760685, "grad_norm": 0.14348721504211426, "learning_rate": 4.7658186915081215e-05, "loss": 0.7288, "step": 3045 }, { "epoch": 3.4695156695156695, "grad_norm": 0.14723597466945648, "learning_rate": 4.765621766331411e-05, "loss": 0.8305, "step": 3046 }, { "epoch": 3.4706552706552705, "grad_norm": 0.15247543156147003, "learning_rate": 4.7654247624634266e-05, "loss": 0.6852, "step": 3047 }, { "epoch": 3.471794871794872, "grad_norm": 0.15918880701065063, "learning_rate": 4.765227679911009e-05, "loss": 0.7662, "step": 3048 }, { "epoch": 3.472934472934473, "grad_norm": 0.17041854560375214, "learning_rate": 4.7650305186810054e-05, "loss": 0.7827, "step": 3049 }, { "epoch": 3.474074074074074, "grad_norm": 0.1634455770254135, "learning_rate": 4.764833278780263e-05, "loss": 0.7868, "step": 3050 }, { "epoch": 3.4752136752136753, "grad_norm": 0.13534197211265564, "learning_rate": 4.764635960215632e-05, "loss": 1.0153, "step": 3051 }, { "epoch": 3.4763532763532763, "grad_norm": 0.17341741919517517, "learning_rate": 4.764438562993967e-05, "loss": 0.6822, "step": 3052 }, { "epoch": 3.4774928774928773, "grad_norm": 0.13949541747570038, "learning_rate": 4.764241087122123e-05, "loss": 1.0058, "step": 3053 }, { "epoch": 3.4786324786324787, "grad_norm": 0.13739712536334991, "learning_rate": 4.7640435326069597e-05, "loss": 0.8821, "step": 3054 }, { "epoch": 3.4797720797720797, "grad_norm": 0.1282612532377243, "learning_rate": 4.7638458994553384e-05, "loss": 0.8529, "step": 3055 }, { "epoch": 3.4809116809116807, "grad_norm": 0.21667225658893585, "learning_rate": 4.763648187674124e-05, "loss": 0.4698, "step": 3056 }, { "epoch": 3.482051282051282, "grad_norm": 0.1563890129327774, "learning_rate": 4.763450397270182e-05, "loss": 0.8811, "step": 3057 }, { "epoch": 3.483190883190883, "grad_norm": 0.17969991266727448, "learning_rate": 4.7632525282503835e-05, "loss": 0.7609, "step": 3058 }, { "epoch": 3.484330484330484, "grad_norm": 0.17439280450344086, "learning_rate": 4.763054580621601e-05, "loss": 0.8125, "step": 3059 }, { "epoch": 3.4854700854700855, "grad_norm": 0.1651092767715454, "learning_rate": 4.762856554390709e-05, "loss": 0.6987, "step": 3060 }, { "epoch": 3.4866096866096865, "grad_norm": 0.17362132668495178, "learning_rate": 4.762658449564586e-05, "loss": 0.8971, "step": 3061 }, { "epoch": 3.487749287749288, "grad_norm": 0.16419439017772675, "learning_rate": 4.762460266150113e-05, "loss": 0.6536, "step": 3062 }, { "epoch": 3.488888888888889, "grad_norm": 0.14855636656284332, "learning_rate": 4.762262004154173e-05, "loss": 0.9623, "step": 3063 }, { "epoch": 3.49002849002849, "grad_norm": 0.16179607808589935, "learning_rate": 4.7620636635836525e-05, "loss": 0.8504, "step": 3064 }, { "epoch": 3.4911680911680913, "grad_norm": 0.14565414190292358, "learning_rate": 4.7618652444454404e-05, "loss": 0.7869, "step": 3065 }, { "epoch": 3.4923076923076923, "grad_norm": 0.17142000794410706, "learning_rate": 4.7616667467464274e-05, "loss": 0.7129, "step": 3066 }, { "epoch": 3.4934472934472933, "grad_norm": 0.13818399608135223, "learning_rate": 4.761468170493509e-05, "loss": 0.9723, "step": 3067 }, { "epoch": 3.4945868945868948, "grad_norm": 0.14472275972366333, "learning_rate": 4.7612695156935824e-05, "loss": 0.8437, "step": 3068 }, { "epoch": 3.4957264957264957, "grad_norm": 0.14463575184345245, "learning_rate": 4.7610707823535475e-05, "loss": 0.8419, "step": 3069 }, { "epoch": 3.4968660968660967, "grad_norm": 0.1572341024875641, "learning_rate": 4.760871970480305e-05, "loss": 0.8705, "step": 3070 }, { "epoch": 3.498005698005698, "grad_norm": 0.17815126478672028, "learning_rate": 4.760673080080762e-05, "loss": 0.6992, "step": 3071 }, { "epoch": 3.499145299145299, "grad_norm": 0.1395833045244217, "learning_rate": 4.760474111161827e-05, "loss": 0.9855, "step": 3072 }, { "epoch": 3.5002849002849, "grad_norm": 0.16465449333190918, "learning_rate": 4.760275063730409e-05, "loss": 0.7593, "step": 3073 }, { "epoch": 3.5014245014245016, "grad_norm": 0.15138347446918488, "learning_rate": 4.7600759377934223e-05, "loss": 0.774, "step": 3074 }, { "epoch": 3.5025641025641026, "grad_norm": 0.1572001874446869, "learning_rate": 4.759876733357783e-05, "loss": 0.7944, "step": 3075 }, { "epoch": 3.5037037037037035, "grad_norm": 0.15404963493347168, "learning_rate": 4.75967745043041e-05, "loss": 0.7147, "step": 3076 }, { "epoch": 3.504843304843305, "grad_norm": 0.13912785053253174, "learning_rate": 4.7594780890182255e-05, "loss": 0.847, "step": 3077 }, { "epoch": 3.505982905982906, "grad_norm": 0.13626809418201447, "learning_rate": 4.7592786491281526e-05, "loss": 0.8828, "step": 3078 }, { "epoch": 3.5071225071225074, "grad_norm": 0.14898303151130676, "learning_rate": 4.75907913076712e-05, "loss": 0.8821, "step": 3079 }, { "epoch": 3.5082621082621084, "grad_norm": 0.1598164439201355, "learning_rate": 4.758879533942057e-05, "loss": 0.7792, "step": 3080 }, { "epoch": 3.5094017094017094, "grad_norm": 0.1513209193944931, "learning_rate": 4.758679858659894e-05, "loss": 0.7868, "step": 3081 }, { "epoch": 3.510541310541311, "grad_norm": 0.12373489141464233, "learning_rate": 4.7584801049275696e-05, "loss": 0.9528, "step": 3082 }, { "epoch": 3.511680911680912, "grad_norm": 0.15823349356651306, "learning_rate": 4.75828027275202e-05, "loss": 0.604, "step": 3083 }, { "epoch": 3.5128205128205128, "grad_norm": 0.15279988944530487, "learning_rate": 4.758080362140186e-05, "loss": 0.8486, "step": 3084 }, { "epoch": 3.513960113960114, "grad_norm": 0.1481696516275406, "learning_rate": 4.757880373099012e-05, "loss": 0.7861, "step": 3085 }, { "epoch": 3.515099715099715, "grad_norm": 0.15903371572494507, "learning_rate": 4.7576803056354427e-05, "loss": 0.7997, "step": 3086 }, { "epoch": 3.516239316239316, "grad_norm": 0.1583215892314911, "learning_rate": 4.7574801597564275e-05, "loss": 0.7687, "step": 3087 }, { "epoch": 3.5173789173789176, "grad_norm": 0.1464768797159195, "learning_rate": 4.757279935468919e-05, "loss": 0.9386, "step": 3088 }, { "epoch": 3.5185185185185186, "grad_norm": 0.17636044323444366, "learning_rate": 4.7570796327798706e-05, "loss": 0.7182, "step": 3089 }, { "epoch": 3.5196581196581196, "grad_norm": 0.1644255369901657, "learning_rate": 4.7568792516962404e-05, "loss": 0.7191, "step": 3090 }, { "epoch": 3.520797720797721, "grad_norm": 0.18549492955207825, "learning_rate": 4.756678792224986e-05, "loss": 0.7867, "step": 3091 }, { "epoch": 3.521937321937322, "grad_norm": 0.1530570387840271, "learning_rate": 4.756478254373071e-05, "loss": 0.9241, "step": 3092 }, { "epoch": 3.523076923076923, "grad_norm": 0.15714332461357117, "learning_rate": 4.756277638147462e-05, "loss": 0.7939, "step": 3093 }, { "epoch": 3.5242165242165244, "grad_norm": 0.16354434192180634, "learning_rate": 4.7560769435551256e-05, "loss": 0.8097, "step": 3094 }, { "epoch": 3.5253561253561254, "grad_norm": 0.14795003831386566, "learning_rate": 4.755876170603032e-05, "loss": 0.7605, "step": 3095 }, { "epoch": 3.5264957264957264, "grad_norm": 0.16275209188461304, "learning_rate": 4.755675319298156e-05, "loss": 0.9156, "step": 3096 }, { "epoch": 3.527635327635328, "grad_norm": 0.12918637692928314, "learning_rate": 4.7554743896474726e-05, "loss": 0.9186, "step": 3097 }, { "epoch": 3.528774928774929, "grad_norm": 0.14242558181285858, "learning_rate": 4.755273381657962e-05, "loss": 0.896, "step": 3098 }, { "epoch": 3.52991452991453, "grad_norm": 0.15556873381137848, "learning_rate": 4.755072295336605e-05, "loss": 0.8993, "step": 3099 }, { "epoch": 3.5310541310541312, "grad_norm": 0.134568452835083, "learning_rate": 4.754871130690384e-05, "loss": 0.9547, "step": 3100 }, { "epoch": 3.5321937321937322, "grad_norm": 0.18947847187519073, "learning_rate": 4.754669887726289e-05, "loss": 0.6513, "step": 3101 }, { "epoch": 3.533333333333333, "grad_norm": 0.1824527382850647, "learning_rate": 4.754468566451308e-05, "loss": 0.724, "step": 3102 }, { "epoch": 3.5344729344729346, "grad_norm": 0.14512591063976288, "learning_rate": 4.7542671668724335e-05, "loss": 0.9582, "step": 3103 }, { "epoch": 3.5356125356125356, "grad_norm": 0.13544780015945435, "learning_rate": 4.754065688996662e-05, "loss": 0.878, "step": 3104 }, { "epoch": 3.5367521367521366, "grad_norm": 0.156879261136055, "learning_rate": 4.7538641328309894e-05, "loss": 0.747, "step": 3105 }, { "epoch": 3.537891737891738, "grad_norm": 0.14823807775974274, "learning_rate": 4.7536624983824174e-05, "loss": 0.7952, "step": 3106 }, { "epoch": 3.539031339031339, "grad_norm": 0.14617039263248444, "learning_rate": 4.75346078565795e-05, "loss": 0.9083, "step": 3107 }, { "epoch": 3.54017094017094, "grad_norm": 0.1773999184370041, "learning_rate": 4.753258994664592e-05, "loss": 0.979, "step": 3108 }, { "epoch": 3.5413105413105415, "grad_norm": 0.17309634387493134, "learning_rate": 4.753057125409352e-05, "loss": 0.7047, "step": 3109 }, { "epoch": 3.5424501424501424, "grad_norm": 0.17175230383872986, "learning_rate": 4.7528551778992434e-05, "loss": 0.6613, "step": 3110 }, { "epoch": 3.5435897435897434, "grad_norm": 0.12826001644134521, "learning_rate": 4.7526531521412785e-05, "loss": 0.9728, "step": 3111 }, { "epoch": 3.544729344729345, "grad_norm": 0.14230799674987793, "learning_rate": 4.752451048142476e-05, "loss": 0.9293, "step": 3112 }, { "epoch": 3.545868945868946, "grad_norm": 0.1300976276397705, "learning_rate": 4.752248865909853e-05, "loss": 1.0119, "step": 3113 }, { "epoch": 3.547008547008547, "grad_norm": 0.13346892595291138, "learning_rate": 4.752046605450433e-05, "loss": 0.9862, "step": 3114 }, { "epoch": 3.5481481481481483, "grad_norm": 0.15452101826667786, "learning_rate": 4.751844266771242e-05, "loss": 0.7844, "step": 3115 }, { "epoch": 3.5492877492877493, "grad_norm": 0.11814556270837784, "learning_rate": 4.751641849879306e-05, "loss": 0.9023, "step": 3116 }, { "epoch": 3.5504273504273502, "grad_norm": 0.14452509582042694, "learning_rate": 4.7514393547816574e-05, "loss": 0.8339, "step": 3117 }, { "epoch": 3.5515669515669517, "grad_norm": 0.1519222855567932, "learning_rate": 4.751236781485328e-05, "loss": 0.8992, "step": 3118 }, { "epoch": 3.5527065527065527, "grad_norm": 0.16835427284240723, "learning_rate": 4.7510341299973544e-05, "loss": 0.6874, "step": 3119 }, { "epoch": 3.5538461538461537, "grad_norm": 0.1623925268650055, "learning_rate": 4.7508314003247753e-05, "loss": 0.5674, "step": 3120 }, { "epoch": 3.554985754985755, "grad_norm": 0.1429547369480133, "learning_rate": 4.750628592474632e-05, "loss": 0.852, "step": 3121 }, { "epoch": 3.556125356125356, "grad_norm": 0.17852966487407684, "learning_rate": 4.7504257064539676e-05, "loss": 0.7445, "step": 3122 }, { "epoch": 3.557264957264957, "grad_norm": 0.15837563574314117, "learning_rate": 4.75022274226983e-05, "loss": 0.7572, "step": 3123 }, { "epoch": 3.5584045584045585, "grad_norm": 0.1362544149160385, "learning_rate": 4.750019699929269e-05, "loss": 0.9731, "step": 3124 }, { "epoch": 3.5595441595441595, "grad_norm": 0.15003323554992676, "learning_rate": 4.7498165794393356e-05, "loss": 0.689, "step": 3125 }, { "epoch": 3.5606837606837605, "grad_norm": 0.19216139614582062, "learning_rate": 4.7496133808070866e-05, "loss": 0.769, "step": 3126 }, { "epoch": 3.561823361823362, "grad_norm": 0.16024665534496307, "learning_rate": 4.749410104039577e-05, "loss": 0.7634, "step": 3127 }, { "epoch": 3.562962962962963, "grad_norm": 0.140433669090271, "learning_rate": 4.74920674914387e-05, "loss": 0.9262, "step": 3128 }, { "epoch": 3.564102564102564, "grad_norm": 0.14482799172401428, "learning_rate": 4.749003316127026e-05, "loss": 0.8689, "step": 3129 }, { "epoch": 3.5652421652421653, "grad_norm": 0.18085375428199768, "learning_rate": 4.7487998049961125e-05, "loss": 0.6887, "step": 3130 }, { "epoch": 3.5663817663817663, "grad_norm": 0.16813300549983978, "learning_rate": 4.748596215758198e-05, "loss": 0.7823, "step": 3131 }, { "epoch": 3.5675213675213673, "grad_norm": 0.1730300337076187, "learning_rate": 4.748392548420352e-05, "loss": 0.6934, "step": 3132 }, { "epoch": 3.5686609686609687, "grad_norm": 0.18341879546642303, "learning_rate": 4.7481888029896506e-05, "loss": 0.843, "step": 3133 }, { "epoch": 3.5698005698005697, "grad_norm": 0.14152836799621582, "learning_rate": 4.747984979473169e-05, "loss": 0.7443, "step": 3134 }, { "epoch": 3.5709401709401707, "grad_norm": 0.16617611050605774, "learning_rate": 4.7477810778779875e-05, "loss": 0.7803, "step": 3135 }, { "epoch": 3.572079772079772, "grad_norm": 0.1385413557291031, "learning_rate": 4.7475770982111875e-05, "loss": 0.7995, "step": 3136 }, { "epoch": 3.573219373219373, "grad_norm": 0.15684014558792114, "learning_rate": 4.7473730404798544e-05, "loss": 0.7581, "step": 3137 }, { "epoch": 3.574358974358974, "grad_norm": 0.16094832122325897, "learning_rate": 4.7471689046910756e-05, "loss": 0.7769, "step": 3138 }, { "epoch": 3.5754985754985755, "grad_norm": 0.13003958761692047, "learning_rate": 4.7469646908519404e-05, "loss": 0.9816, "step": 3139 }, { "epoch": 3.5766381766381765, "grad_norm": 0.16533993184566498, "learning_rate": 4.746760398969543e-05, "loss": 0.7451, "step": 3140 }, { "epoch": 3.5777777777777775, "grad_norm": 0.13551752269268036, "learning_rate": 4.746556029050977e-05, "loss": 0.9916, "step": 3141 }, { "epoch": 3.578917378917379, "grad_norm": 0.17578278481960297, "learning_rate": 4.7463515811033435e-05, "loss": 0.661, "step": 3142 }, { "epoch": 3.58005698005698, "grad_norm": 0.1797582507133484, "learning_rate": 4.746147055133741e-05, "loss": 0.6817, "step": 3143 }, { "epoch": 3.5811965811965814, "grad_norm": 0.12641745805740356, "learning_rate": 4.7459424511492745e-05, "loss": 1.1176, "step": 3144 }, { "epoch": 3.5823361823361823, "grad_norm": 0.16470804810523987, "learning_rate": 4.745737769157051e-05, "loss": 0.8451, "step": 3145 }, { "epoch": 3.5834757834757833, "grad_norm": 0.1253516972064972, "learning_rate": 4.7455330091641784e-05, "loss": 0.8838, "step": 3146 }, { "epoch": 3.5846153846153848, "grad_norm": 0.15218587219715118, "learning_rate": 4.745328171177769e-05, "loss": 0.7625, "step": 3147 }, { "epoch": 3.5857549857549857, "grad_norm": 0.168340802192688, "learning_rate": 4.7451232552049385e-05, "loss": 0.8593, "step": 3148 }, { "epoch": 3.5868945868945867, "grad_norm": 0.18887469172477722, "learning_rate": 4.744918261252802e-05, "loss": 0.7663, "step": 3149 }, { "epoch": 3.588034188034188, "grad_norm": 0.1507597118616104, "learning_rate": 4.7447131893284815e-05, "loss": 0.8658, "step": 3150 }, { "epoch": 3.589173789173789, "grad_norm": 0.15251576900482178, "learning_rate": 4.744508039439099e-05, "loss": 0.6719, "step": 3151 }, { "epoch": 3.5903133903133906, "grad_norm": 0.141212597489357, "learning_rate": 4.74430281159178e-05, "loss": 0.9153, "step": 3152 }, { "epoch": 3.5914529914529916, "grad_norm": 0.17158134281635284, "learning_rate": 4.744097505793652e-05, "loss": 0.5871, "step": 3153 }, { "epoch": 3.5925925925925926, "grad_norm": 0.1644529551267624, "learning_rate": 4.743892122051846e-05, "loss": 0.7573, "step": 3154 }, { "epoch": 3.593732193732194, "grad_norm": 0.12898863852024078, "learning_rate": 4.7436866603734964e-05, "loss": 0.8707, "step": 3155 }, { "epoch": 3.594871794871795, "grad_norm": 0.15508373081684113, "learning_rate": 4.743481120765739e-05, "loss": 0.8554, "step": 3156 }, { "epoch": 3.596011396011396, "grad_norm": 0.1768398880958557, "learning_rate": 4.743275503235712e-05, "loss": 0.6901, "step": 3157 }, { "epoch": 3.5971509971509974, "grad_norm": 0.16949577629566193, "learning_rate": 4.743069807790559e-05, "loss": 0.7631, "step": 3158 }, { "epoch": 3.5982905982905984, "grad_norm": 0.15541768074035645, "learning_rate": 4.742864034437422e-05, "loss": 0.8703, "step": 3159 }, { "epoch": 3.5994301994301994, "grad_norm": 0.1964762657880783, "learning_rate": 4.742658183183449e-05, "loss": 0.6899, "step": 3160 }, { "epoch": 3.600569800569801, "grad_norm": 0.15012037754058838, "learning_rate": 4.742452254035791e-05, "loss": 0.7818, "step": 3161 }, { "epoch": 3.601709401709402, "grad_norm": 0.17907105386257172, "learning_rate": 4.742246247001599e-05, "loss": 0.7718, "step": 3162 }, { "epoch": 3.602849002849003, "grad_norm": 0.16445888578891754, "learning_rate": 4.742040162088029e-05, "loss": 0.7743, "step": 3163 }, { "epoch": 3.603988603988604, "grad_norm": 0.18797078728675842, "learning_rate": 4.741833999302238e-05, "loss": 0.6795, "step": 3164 }, { "epoch": 3.605128205128205, "grad_norm": 0.15027131140232086, "learning_rate": 4.7416277586513876e-05, "loss": 0.8253, "step": 3165 }, { "epoch": 3.606267806267806, "grad_norm": 0.14495904743671417, "learning_rate": 4.7414214401426406e-05, "loss": 0.8755, "step": 3166 }, { "epoch": 3.6074074074074076, "grad_norm": 0.16689111292362213, "learning_rate": 4.7412150437831625e-05, "loss": 0.8236, "step": 3167 }, { "epoch": 3.6085470085470086, "grad_norm": 0.1899695247411728, "learning_rate": 4.7410085695801226e-05, "loss": 0.7957, "step": 3168 }, { "epoch": 3.6096866096866096, "grad_norm": 0.13431382179260254, "learning_rate": 4.740802017540692e-05, "loss": 0.9913, "step": 3169 }, { "epoch": 3.610826210826211, "grad_norm": 0.18161001801490784, "learning_rate": 4.740595387672047e-05, "loss": 0.853, "step": 3170 }, { "epoch": 3.611965811965812, "grad_norm": 0.14907872676849365, "learning_rate": 4.74038867998136e-05, "loss": 0.9096, "step": 3171 }, { "epoch": 3.613105413105413, "grad_norm": 0.14314506947994232, "learning_rate": 4.740181894475815e-05, "loss": 0.9571, "step": 3172 }, { "epoch": 3.6142450142450144, "grad_norm": 0.14771422743797302, "learning_rate": 4.739975031162591e-05, "loss": 0.8377, "step": 3173 }, { "epoch": 3.6153846153846154, "grad_norm": 0.1530153453350067, "learning_rate": 4.739768090048875e-05, "loss": 0.7473, "step": 3174 }, { "epoch": 3.6165242165242164, "grad_norm": 0.15972135961055756, "learning_rate": 4.739561071141854e-05, "loss": 0.8732, "step": 3175 }, { "epoch": 3.617663817663818, "grad_norm": 0.1361907720565796, "learning_rate": 4.739353974448717e-05, "loss": 0.9092, "step": 3176 }, { "epoch": 3.618803418803419, "grad_norm": 0.16396193206310272, "learning_rate": 4.739146799976659e-05, "loss": 0.7726, "step": 3177 }, { "epoch": 3.61994301994302, "grad_norm": 0.16118596494197845, "learning_rate": 4.738939547732875e-05, "loss": 0.8619, "step": 3178 }, { "epoch": 3.6210826210826212, "grad_norm": 0.1634664088487625, "learning_rate": 4.7387322177245635e-05, "loss": 0.6513, "step": 3179 }, { "epoch": 3.6222222222222222, "grad_norm": 0.1703759729862213, "learning_rate": 4.7385248099589255e-05, "loss": 0.6414, "step": 3180 }, { "epoch": 3.623361823361823, "grad_norm": 0.14178255200386047, "learning_rate": 4.738317324443164e-05, "loss": 0.8136, "step": 3181 }, { "epoch": 3.6245014245014247, "grad_norm": 0.1369297355413437, "learning_rate": 4.7381097611844876e-05, "loss": 0.9897, "step": 3182 }, { "epoch": 3.6256410256410256, "grad_norm": 0.15781539678573608, "learning_rate": 4.737902120190104e-05, "loss": 0.7981, "step": 3183 }, { "epoch": 3.6267806267806266, "grad_norm": 0.17928865551948547, "learning_rate": 4.7376944014672255e-05, "loss": 0.6701, "step": 3184 }, { "epoch": 3.627920227920228, "grad_norm": 0.17236638069152832, "learning_rate": 4.737486605023067e-05, "loss": 0.8286, "step": 3185 }, { "epoch": 3.629059829059829, "grad_norm": 0.12563465535640717, "learning_rate": 4.7372787308648444e-05, "loss": 1.1561, "step": 3186 }, { "epoch": 3.63019943019943, "grad_norm": 0.15684127807617188, "learning_rate": 4.7370707789997804e-05, "loss": 0.6536, "step": 3187 }, { "epoch": 3.6313390313390315, "grad_norm": 0.18025460839271545, "learning_rate": 4.736862749435096e-05, "loss": 0.7383, "step": 3188 }, { "epoch": 3.6324786324786325, "grad_norm": 0.13752953708171844, "learning_rate": 4.7366546421780165e-05, "loss": 0.7251, "step": 3189 }, { "epoch": 3.6336182336182334, "grad_norm": 0.14074869453907013, "learning_rate": 4.7364464572357705e-05, "loss": 0.8969, "step": 3190 }, { "epoch": 3.634757834757835, "grad_norm": 0.1559051275253296, "learning_rate": 4.736238194615589e-05, "loss": 0.7639, "step": 3191 }, { "epoch": 3.635897435897436, "grad_norm": 0.15167847275733948, "learning_rate": 4.736029854324705e-05, "loss": 0.8245, "step": 3192 }, { "epoch": 3.637037037037037, "grad_norm": 0.17585301399230957, "learning_rate": 4.7358214363703555e-05, "loss": 0.8256, "step": 3193 }, { "epoch": 3.6381766381766383, "grad_norm": 0.162922203540802, "learning_rate": 4.735612940759778e-05, "loss": 0.7108, "step": 3194 }, { "epoch": 3.6393162393162393, "grad_norm": 0.12826752662658691, "learning_rate": 4.735404367500217e-05, "loss": 0.8953, "step": 3195 }, { "epoch": 3.6404558404558403, "grad_norm": 0.15029852092266083, "learning_rate": 4.7351957165989136e-05, "loss": 0.7577, "step": 3196 }, { "epoch": 3.6415954415954417, "grad_norm": 0.16467316448688507, "learning_rate": 4.734986988063116e-05, "loss": 0.8308, "step": 3197 }, { "epoch": 3.6427350427350427, "grad_norm": 0.1632472723722458, "learning_rate": 4.7347781819000747e-05, "loss": 0.7183, "step": 3198 }, { "epoch": 3.6438746438746437, "grad_norm": 0.18199078738689423, "learning_rate": 4.734569298117041e-05, "loss": 0.7075, "step": 3199 }, { "epoch": 3.645014245014245, "grad_norm": 0.17410241067409515, "learning_rate": 4.734360336721271e-05, "loss": 1.0027, "step": 3200 }, { "epoch": 3.646153846153846, "grad_norm": 0.1600143015384674, "learning_rate": 4.734151297720021e-05, "loss": 0.8422, "step": 3201 }, { "epoch": 3.647293447293447, "grad_norm": 0.17152932286262512, "learning_rate": 4.7339421811205534e-05, "loss": 0.7428, "step": 3202 }, { "epoch": 3.6484330484330485, "grad_norm": 0.1582898199558258, "learning_rate": 4.73373298693013e-05, "loss": 0.9045, "step": 3203 }, { "epoch": 3.6495726495726495, "grad_norm": 0.18901553750038147, "learning_rate": 4.7335237151560176e-05, "loss": 0.6705, "step": 3204 }, { "epoch": 3.6507122507122505, "grad_norm": 0.13323083519935608, "learning_rate": 4.733314365805484e-05, "loss": 0.8926, "step": 3205 }, { "epoch": 3.651851851851852, "grad_norm": 0.13110944628715515, "learning_rate": 4.733104938885801e-05, "loss": 0.8441, "step": 3206 }, { "epoch": 3.652991452991453, "grad_norm": 0.18995679914951324, "learning_rate": 4.732895434404242e-05, "loss": 0.7849, "step": 3207 }, { "epoch": 3.654131054131054, "grad_norm": 0.1365271657705307, "learning_rate": 4.732685852368085e-05, "loss": 1.0407, "step": 3208 }, { "epoch": 3.6552706552706553, "grad_norm": 0.16701480746269226, "learning_rate": 4.732476192784608e-05, "loss": 0.5978, "step": 3209 }, { "epoch": 3.6564102564102563, "grad_norm": 0.1443236917257309, "learning_rate": 4.732266455661093e-05, "loss": 0.8791, "step": 3210 }, { "epoch": 3.6575498575498573, "grad_norm": 0.15609289705753326, "learning_rate": 4.732056641004826e-05, "loss": 0.7923, "step": 3211 }, { "epoch": 3.6586894586894587, "grad_norm": 0.17128822207450867, "learning_rate": 4.731846748823093e-05, "loss": 0.7968, "step": 3212 }, { "epoch": 3.6598290598290597, "grad_norm": 0.16645938158035278, "learning_rate": 4.731636779123185e-05, "loss": 0.7756, "step": 3213 }, { "epoch": 3.6609686609686607, "grad_norm": 0.1768391877412796, "learning_rate": 4.731426731912395e-05, "loss": 0.6578, "step": 3214 }, { "epoch": 3.662108262108262, "grad_norm": 0.14515741169452667, "learning_rate": 4.731216607198018e-05, "loss": 0.9738, "step": 3215 }, { "epoch": 3.663247863247863, "grad_norm": 0.15290328860282898, "learning_rate": 4.7310064049873525e-05, "loss": 0.8501, "step": 3216 }, { "epoch": 3.6643874643874645, "grad_norm": 0.1539739966392517, "learning_rate": 4.7307961252876985e-05, "loss": 0.8498, "step": 3217 }, { "epoch": 3.6655270655270655, "grad_norm": 0.170927956700325, "learning_rate": 4.730585768106362e-05, "loss": 0.7508, "step": 3218 }, { "epoch": 3.6666666666666665, "grad_norm": 0.17547346651554108, "learning_rate": 4.730375333450646e-05, "loss": 0.8683, "step": 3219 }, { "epoch": 3.667806267806268, "grad_norm": 0.1330338418483734, "learning_rate": 4.730164821327861e-05, "loss": 0.9195, "step": 3220 }, { "epoch": 3.668945868945869, "grad_norm": 0.13993962109088898, "learning_rate": 4.72995423174532e-05, "loss": 0.8747, "step": 3221 }, { "epoch": 3.67008547008547, "grad_norm": 0.1791338473558426, "learning_rate": 4.729743564710336e-05, "loss": 0.7849, "step": 3222 }, { "epoch": 3.6712250712250714, "grad_norm": 0.1725768744945526, "learning_rate": 4.729532820230225e-05, "loss": 0.9448, "step": 3223 }, { "epoch": 3.6723646723646723, "grad_norm": 0.16085100173950195, "learning_rate": 4.72932199831231e-05, "loss": 0.7872, "step": 3224 }, { "epoch": 3.6735042735042738, "grad_norm": 0.15714609622955322, "learning_rate": 4.729111098963909e-05, "loss": 0.7453, "step": 3225 }, { "epoch": 3.6746438746438748, "grad_norm": 0.17268367111682892, "learning_rate": 4.728900122192351e-05, "loss": 0.7574, "step": 3226 }, { "epoch": 3.6757834757834758, "grad_norm": 0.14194540679454803, "learning_rate": 4.728689068004961e-05, "loss": 0.6415, "step": 3227 }, { "epoch": 3.676923076923077, "grad_norm": 0.18830282986164093, "learning_rate": 4.7284779364090716e-05, "loss": 0.7544, "step": 3228 }, { "epoch": 3.678062678062678, "grad_norm": 0.14973561465740204, "learning_rate": 4.7282667274120153e-05, "loss": 0.9419, "step": 3229 }, { "epoch": 3.679202279202279, "grad_norm": 0.17502374947071075, "learning_rate": 4.728055441021128e-05, "loss": 0.7105, "step": 3230 }, { "epoch": 3.6803418803418806, "grad_norm": 0.15161217749118805, "learning_rate": 4.7278440772437473e-05, "loss": 0.7876, "step": 3231 }, { "epoch": 3.6814814814814816, "grad_norm": 0.1851918250322342, "learning_rate": 4.727632636087215e-05, "loss": 0.5753, "step": 3232 }, { "epoch": 3.6826210826210826, "grad_norm": 0.17830820381641388, "learning_rate": 4.7274211175588757e-05, "loss": 0.6505, "step": 3233 }, { "epoch": 3.683760683760684, "grad_norm": 0.1459147185087204, "learning_rate": 4.727209521666075e-05, "loss": 0.7728, "step": 3234 }, { "epoch": 3.684900284900285, "grad_norm": 0.14419788122177124, "learning_rate": 4.7269978484161633e-05, "loss": 0.9277, "step": 3235 }, { "epoch": 3.686039886039886, "grad_norm": 0.12847690284252167, "learning_rate": 4.726786097816492e-05, "loss": 1.0063, "step": 3236 }, { "epoch": 3.6871794871794874, "grad_norm": 0.16791048645973206, "learning_rate": 4.726574269874416e-05, "loss": 0.8008, "step": 3237 }, { "epoch": 3.6883190883190884, "grad_norm": 0.13223494589328766, "learning_rate": 4.7263623645972935e-05, "loss": 0.8122, "step": 3238 }, { "epoch": 3.6894586894586894, "grad_norm": 0.16067475080490112, "learning_rate": 4.726150381992482e-05, "loss": 0.7976, "step": 3239 }, { "epoch": 3.690598290598291, "grad_norm": 0.17711344361305237, "learning_rate": 4.725938322067346e-05, "loss": 0.8262, "step": 3240 }, { "epoch": 3.691737891737892, "grad_norm": 0.1587996780872345, "learning_rate": 4.725726184829251e-05, "loss": 0.7744, "step": 3241 }, { "epoch": 3.692877492877493, "grad_norm": 0.14792990684509277, "learning_rate": 4.725513970285565e-05, "loss": 0.9333, "step": 3242 }, { "epoch": 3.694017094017094, "grad_norm": 0.16375887393951416, "learning_rate": 4.725301678443659e-05, "loss": 0.8978, "step": 3243 }, { "epoch": 3.695156695156695, "grad_norm": 0.1641547977924347, "learning_rate": 4.725089309310905e-05, "loss": 0.7527, "step": 3244 }, { "epoch": 3.696296296296296, "grad_norm": 0.1527947634458542, "learning_rate": 4.7248768628946805e-05, "loss": 0.8253, "step": 3245 }, { "epoch": 3.6974358974358976, "grad_norm": 0.17005781829357147, "learning_rate": 4.7246643392023647e-05, "loss": 0.6966, "step": 3246 }, { "epoch": 3.6985754985754986, "grad_norm": 0.19834214448928833, "learning_rate": 4.7244517382413385e-05, "loss": 0.7864, "step": 3247 }, { "epoch": 3.6997150997150996, "grad_norm": 0.1645403653383255, "learning_rate": 4.724239060018985e-05, "loss": 0.8421, "step": 3248 }, { "epoch": 3.700854700854701, "grad_norm": 0.15105541050434113, "learning_rate": 4.7240263045426934e-05, "loss": 0.8767, "step": 3249 }, { "epoch": 3.701994301994302, "grad_norm": 0.13681286573410034, "learning_rate": 4.723813471819852e-05, "loss": 0.8714, "step": 3250 }, { "epoch": 3.703133903133903, "grad_norm": 0.15862339735031128, "learning_rate": 4.723600561857854e-05, "loss": 0.6707, "step": 3251 }, { "epoch": 3.7042735042735044, "grad_norm": 0.14312617480754852, "learning_rate": 4.723387574664092e-05, "loss": 0.8925, "step": 3252 }, { "epoch": 3.7054131054131054, "grad_norm": 0.1338813751935959, "learning_rate": 4.723174510245966e-05, "loss": 1.0258, "step": 3253 }, { "epoch": 3.7065527065527064, "grad_norm": 0.1546410769224167, "learning_rate": 4.722961368610876e-05, "loss": 0.78, "step": 3254 }, { "epoch": 3.707692307692308, "grad_norm": 0.1816253811120987, "learning_rate": 4.722748149766224e-05, "loss": 0.8125, "step": 3255 }, { "epoch": 3.708831908831909, "grad_norm": 0.1573161780834198, "learning_rate": 4.722534853719416e-05, "loss": 0.8516, "step": 3256 }, { "epoch": 3.70997150997151, "grad_norm": 0.158403217792511, "learning_rate": 4.7223214804778604e-05, "loss": 0.8635, "step": 3257 }, { "epoch": 3.7111111111111112, "grad_norm": 0.17837317287921906, "learning_rate": 4.722108030048968e-05, "loss": 0.9281, "step": 3258 }, { "epoch": 3.7122507122507122, "grad_norm": 0.14864912629127502, "learning_rate": 4.721894502440154e-05, "loss": 0.8862, "step": 3259 }, { "epoch": 3.7133903133903132, "grad_norm": 0.17327450215816498, "learning_rate": 4.721680897658833e-05, "loss": 0.7906, "step": 3260 }, { "epoch": 3.7145299145299147, "grad_norm": 0.1560973972082138, "learning_rate": 4.721467215712425e-05, "loss": 0.9117, "step": 3261 }, { "epoch": 3.7156695156695156, "grad_norm": 0.1419924944639206, "learning_rate": 4.721253456608352e-05, "loss": 0.9314, "step": 3262 }, { "epoch": 3.7168091168091166, "grad_norm": 0.16928216814994812, "learning_rate": 4.721039620354038e-05, "loss": 0.6262, "step": 3263 }, { "epoch": 3.717948717948718, "grad_norm": 0.15330585837364197, "learning_rate": 4.720825706956909e-05, "loss": 0.8932, "step": 3264 }, { "epoch": 3.719088319088319, "grad_norm": 0.14610134065151215, "learning_rate": 4.7206117164243966e-05, "loss": 0.8271, "step": 3265 }, { "epoch": 3.72022792022792, "grad_norm": 0.1372208148241043, "learning_rate": 4.7203976487639326e-05, "loss": 0.8912, "step": 3266 }, { "epoch": 3.7213675213675215, "grad_norm": 0.1757371574640274, "learning_rate": 4.7201835039829524e-05, "loss": 0.8377, "step": 3267 }, { "epoch": 3.7225071225071225, "grad_norm": 0.17159296572208405, "learning_rate": 4.719969282088893e-05, "loss": 0.7907, "step": 3268 }, { "epoch": 3.7236467236467234, "grad_norm": 0.13616004586219788, "learning_rate": 4.719754983089195e-05, "loss": 0.8665, "step": 3269 }, { "epoch": 3.724786324786325, "grad_norm": 0.14465661346912384, "learning_rate": 4.719540606991303e-05, "loss": 0.8498, "step": 3270 }, { "epoch": 3.725925925925926, "grad_norm": 0.1675175577402115, "learning_rate": 4.719326153802662e-05, "loss": 0.6964, "step": 3271 }, { "epoch": 3.727065527065527, "grad_norm": 0.14356575906276703, "learning_rate": 4.71911162353072e-05, "loss": 0.8681, "step": 3272 }, { "epoch": 3.7282051282051283, "grad_norm": 0.14403332769870758, "learning_rate": 4.718897016182929e-05, "loss": 0.8282, "step": 3273 }, { "epoch": 3.7293447293447293, "grad_norm": 0.13207969069480896, "learning_rate": 4.7186823317667425e-05, "loss": 0.8278, "step": 3274 }, { "epoch": 3.7304843304843303, "grad_norm": 0.1665349304676056, "learning_rate": 4.718467570289618e-05, "loss": 0.7893, "step": 3275 }, { "epoch": 3.7316239316239317, "grad_norm": 0.16078945994377136, "learning_rate": 4.718252731759013e-05, "loss": 0.7598, "step": 3276 }, { "epoch": 3.7327635327635327, "grad_norm": 0.18205560743808746, "learning_rate": 4.718037816182391e-05, "loss": 0.7266, "step": 3277 }, { "epoch": 3.7339031339031337, "grad_norm": 0.2010476142168045, "learning_rate": 4.717822823567216e-05, "loss": 0.6091, "step": 3278 }, { "epoch": 3.735042735042735, "grad_norm": 0.16430054605007172, "learning_rate": 4.717607753920955e-05, "loss": 0.8155, "step": 3279 }, { "epoch": 3.736182336182336, "grad_norm": 0.1744856834411621, "learning_rate": 4.7173926072510774e-05, "loss": 0.7571, "step": 3280 }, { "epoch": 3.737321937321937, "grad_norm": 0.15343201160430908, "learning_rate": 4.717177383565058e-05, "loss": 0.8475, "step": 3281 }, { "epoch": 3.7384615384615385, "grad_norm": 0.1410541534423828, "learning_rate": 4.716962082870371e-05, "loss": 0.8938, "step": 3282 }, { "epoch": 3.7396011396011395, "grad_norm": 0.14926479756832123, "learning_rate": 4.7167467051744926e-05, "loss": 0.7719, "step": 3283 }, { "epoch": 3.7407407407407405, "grad_norm": 0.1376180797815323, "learning_rate": 4.7165312504849065e-05, "loss": 0.8868, "step": 3284 }, { "epoch": 3.741880341880342, "grad_norm": 0.1488949954509735, "learning_rate": 4.716315718809094e-05, "loss": 0.8712, "step": 3285 }, { "epoch": 3.743019943019943, "grad_norm": 0.13525907695293427, "learning_rate": 4.716100110154542e-05, "loss": 0.8757, "step": 3286 }, { "epoch": 3.744159544159544, "grad_norm": 0.1648627072572708, "learning_rate": 4.715884424528739e-05, "loss": 0.7802, "step": 3287 }, { "epoch": 3.7452991452991453, "grad_norm": 0.13732190430164337, "learning_rate": 4.7156686619391755e-05, "loss": 1.07, "step": 3288 }, { "epoch": 3.7464387464387463, "grad_norm": 0.1729089766740799, "learning_rate": 4.715452822393347e-05, "loss": 0.7759, "step": 3289 }, { "epoch": 3.7475783475783477, "grad_norm": 0.14817307889461517, "learning_rate": 4.715236905898749e-05, "loss": 0.9599, "step": 3290 }, { "epoch": 3.7487179487179487, "grad_norm": 0.1385723501443863, "learning_rate": 4.715020912462882e-05, "loss": 0.9544, "step": 3291 }, { "epoch": 3.7498575498575497, "grad_norm": 0.1851496547460556, "learning_rate": 4.714804842093247e-05, "loss": 0.7674, "step": 3292 }, { "epoch": 3.750997150997151, "grad_norm": 0.1579301357269287, "learning_rate": 4.7145886947973485e-05, "loss": 0.7869, "step": 3293 }, { "epoch": 3.752136752136752, "grad_norm": 0.19079633057117462, "learning_rate": 4.714372470582695e-05, "loss": 0.7055, "step": 3294 }, { "epoch": 3.753276353276353, "grad_norm": 0.15140880644321442, "learning_rate": 4.714156169456796e-05, "loss": 0.9413, "step": 3295 }, { "epoch": 3.7544159544159545, "grad_norm": 0.17241929471492767, "learning_rate": 4.7139397914271646e-05, "loss": 0.8051, "step": 3296 }, { "epoch": 3.7555555555555555, "grad_norm": 0.1487005650997162, "learning_rate": 4.713723336501316e-05, "loss": 0.9048, "step": 3297 }, { "epoch": 3.756695156695157, "grad_norm": 0.1777118444442749, "learning_rate": 4.713506804686767e-05, "loss": 0.7942, "step": 3298 }, { "epoch": 3.757834757834758, "grad_norm": 0.12350175529718399, "learning_rate": 4.71329019599104e-05, "loss": 0.965, "step": 3299 }, { "epoch": 3.758974358974359, "grad_norm": 0.1593742072582245, "learning_rate": 4.7130735104216584e-05, "loss": 0.7879, "step": 3300 }, { "epoch": 3.7601139601139604, "grad_norm": 0.16756659746170044, "learning_rate": 4.7128567479861476e-05, "loss": 0.778, "step": 3301 }, { "epoch": 3.7612535612535614, "grad_norm": 0.16091838479042053, "learning_rate": 4.712639908692037e-05, "loss": 0.7816, "step": 3302 }, { "epoch": 3.7623931623931623, "grad_norm": 0.1462317407131195, "learning_rate": 4.712422992546857e-05, "loss": 0.7826, "step": 3303 }, { "epoch": 3.763532763532764, "grad_norm": 0.16462798416614532, "learning_rate": 4.712205999558143e-05, "loss": 0.9022, "step": 3304 }, { "epoch": 3.7646723646723648, "grad_norm": 0.16236019134521484, "learning_rate": 4.711988929733429e-05, "loss": 0.7944, "step": 3305 }, { "epoch": 3.7658119658119658, "grad_norm": 0.14094164967536926, "learning_rate": 4.711771783080259e-05, "loss": 0.8613, "step": 3306 }, { "epoch": 3.766951566951567, "grad_norm": 0.18130993843078613, "learning_rate": 4.711554559606171e-05, "loss": 0.6142, "step": 3307 }, { "epoch": 3.768091168091168, "grad_norm": 0.14163918793201447, "learning_rate": 4.711337259318712e-05, "loss": 1.0577, "step": 3308 }, { "epoch": 3.769230769230769, "grad_norm": 0.14657098054885864, "learning_rate": 4.711119882225429e-05, "loss": 0.8357, "step": 3309 }, { "epoch": 3.7703703703703706, "grad_norm": 0.12951062619686127, "learning_rate": 4.710902428333871e-05, "loss": 0.852, "step": 3310 }, { "epoch": 3.7715099715099716, "grad_norm": 0.1622898429632187, "learning_rate": 4.710684897651592e-05, "loss": 0.704, "step": 3311 }, { "epoch": 3.7726495726495726, "grad_norm": 0.16411170363426208, "learning_rate": 4.710467290186148e-05, "loss": 0.7952, "step": 3312 }, { "epoch": 3.773789173789174, "grad_norm": 0.15382306277751923, "learning_rate": 4.710249605945095e-05, "loss": 0.6917, "step": 3313 }, { "epoch": 3.774928774928775, "grad_norm": 0.14002524316310883, "learning_rate": 4.710031844935996e-05, "loss": 1.0518, "step": 3314 }, { "epoch": 3.776068376068376, "grad_norm": 0.1697578728199005, "learning_rate": 4.709814007166412e-05, "loss": 0.6775, "step": 3315 }, { "epoch": 3.7772079772079774, "grad_norm": 0.15462514758110046, "learning_rate": 4.709596092643912e-05, "loss": 0.8434, "step": 3316 }, { "epoch": 3.7783475783475784, "grad_norm": 0.19596315920352936, "learning_rate": 4.709378101376063e-05, "loss": 0.7472, "step": 3317 }, { "epoch": 3.7794871794871794, "grad_norm": 0.1463899463415146, "learning_rate": 4.709160033370435e-05, "loss": 0.9265, "step": 3318 }, { "epoch": 3.780626780626781, "grad_norm": 0.16193729639053345, "learning_rate": 4.7089418886346056e-05, "loss": 0.8363, "step": 3319 }, { "epoch": 3.781766381766382, "grad_norm": 0.13628825545310974, "learning_rate": 4.7087236671761506e-05, "loss": 0.8596, "step": 3320 }, { "epoch": 3.782905982905983, "grad_norm": 0.17029441893100739, "learning_rate": 4.708505369002647e-05, "loss": 0.8404, "step": 3321 }, { "epoch": 3.784045584045584, "grad_norm": 0.17348740994930267, "learning_rate": 4.708286994121679e-05, "loss": 0.6665, "step": 3322 }, { "epoch": 3.785185185185185, "grad_norm": 0.13818663358688354, "learning_rate": 4.708068542540831e-05, "loss": 0.8523, "step": 3323 }, { "epoch": 3.786324786324786, "grad_norm": 0.13160809874534607, "learning_rate": 4.707850014267689e-05, "loss": 0.9992, "step": 3324 }, { "epoch": 3.7874643874643876, "grad_norm": 0.14526665210723877, "learning_rate": 4.707631409309846e-05, "loss": 0.8231, "step": 3325 }, { "epoch": 3.7886039886039886, "grad_norm": 0.1624801754951477, "learning_rate": 4.707412727674893e-05, "loss": 0.8036, "step": 3326 }, { "epoch": 3.7897435897435896, "grad_norm": 0.16265356540679932, "learning_rate": 4.7071939693704255e-05, "loss": 0.9489, "step": 3327 }, { "epoch": 3.790883190883191, "grad_norm": 0.1629178673028946, "learning_rate": 4.7069751344040416e-05, "loss": 0.8472, "step": 3328 }, { "epoch": 3.792022792022792, "grad_norm": 0.16582542657852173, "learning_rate": 4.706756222783342e-05, "loss": 0.8367, "step": 3329 }, { "epoch": 3.793162393162393, "grad_norm": 0.15202531218528748, "learning_rate": 4.70653723451593e-05, "loss": 1.01, "step": 3330 }, { "epoch": 3.7943019943019944, "grad_norm": 0.13972711563110352, "learning_rate": 4.706318169609412e-05, "loss": 0.8285, "step": 3331 }, { "epoch": 3.7954415954415954, "grad_norm": 0.14830423891544342, "learning_rate": 4.706099028071396e-05, "loss": 0.8479, "step": 3332 }, { "epoch": 3.7965811965811964, "grad_norm": 0.14009328186511993, "learning_rate": 4.7058798099094946e-05, "loss": 0.9931, "step": 3333 }, { "epoch": 3.797720797720798, "grad_norm": 0.18037128448486328, "learning_rate": 4.705660515131321e-05, "loss": 0.7468, "step": 3334 }, { "epoch": 3.798860398860399, "grad_norm": 0.13112333416938782, "learning_rate": 4.7054411437444926e-05, "loss": 0.8557, "step": 3335 }, { "epoch": 3.8, "grad_norm": 0.1434762328863144, "learning_rate": 4.7052216957566275e-05, "loss": 0.7453, "step": 3336 }, { "epoch": 3.8011396011396013, "grad_norm": 0.2030930519104004, "learning_rate": 4.705002171175349e-05, "loss": 0.6288, "step": 3337 }, { "epoch": 3.8022792022792022, "grad_norm": 0.13329127430915833, "learning_rate": 4.704782570008281e-05, "loss": 0.8263, "step": 3338 }, { "epoch": 3.8034188034188032, "grad_norm": 0.16466042399406433, "learning_rate": 4.7045628922630515e-05, "loss": 0.7695, "step": 3339 }, { "epoch": 3.8045584045584047, "grad_norm": 0.18540437519550323, "learning_rate": 4.70434313794729e-05, "loss": 0.8276, "step": 3340 }, { "epoch": 3.8056980056980056, "grad_norm": 0.17809467017650604, "learning_rate": 4.704123307068629e-05, "loss": 0.7119, "step": 3341 }, { "epoch": 3.8068376068376066, "grad_norm": 0.14918246865272522, "learning_rate": 4.7039033996347046e-05, "loss": 0.8541, "step": 3342 }, { "epoch": 3.807977207977208, "grad_norm": 0.151133731007576, "learning_rate": 4.703683415653154e-05, "loss": 0.8092, "step": 3343 }, { "epoch": 3.809116809116809, "grad_norm": 0.15425163507461548, "learning_rate": 4.703463355131619e-05, "loss": 0.899, "step": 3344 }, { "epoch": 3.81025641025641, "grad_norm": 0.16399483382701874, "learning_rate": 4.703243218077742e-05, "loss": 0.823, "step": 3345 }, { "epoch": 3.8113960113960115, "grad_norm": 0.1537674367427826, "learning_rate": 4.7030230044991684e-05, "loss": 0.8778, "step": 3346 }, { "epoch": 3.8125356125356125, "grad_norm": 0.1560267060995102, "learning_rate": 4.702802714403548e-05, "loss": 0.8398, "step": 3347 }, { "epoch": 3.8136752136752134, "grad_norm": 0.16034267842769623, "learning_rate": 4.7025823477985316e-05, "loss": 0.7942, "step": 3348 }, { "epoch": 3.814814814814815, "grad_norm": 0.17743459343910217, "learning_rate": 4.702361904691773e-05, "loss": 0.807, "step": 3349 }, { "epoch": 3.815954415954416, "grad_norm": 0.13582278788089752, "learning_rate": 4.702141385090929e-05, "loss": 0.8062, "step": 3350 }, { "epoch": 3.817094017094017, "grad_norm": 0.1425933688879013, "learning_rate": 4.7019207890036584e-05, "loss": 0.8535, "step": 3351 }, { "epoch": 3.8182336182336183, "grad_norm": 0.145987406373024, "learning_rate": 4.701700116437624e-05, "loss": 0.9585, "step": 3352 }, { "epoch": 3.8193732193732193, "grad_norm": 0.16236691176891327, "learning_rate": 4.7014793674004896e-05, "loss": 0.7493, "step": 3353 }, { "epoch": 3.8205128205128203, "grad_norm": 0.12828007340431213, "learning_rate": 4.701258541899923e-05, "loss": 0.9837, "step": 3354 }, { "epoch": 3.8216524216524217, "grad_norm": 0.16995170712471008, "learning_rate": 4.701037639943594e-05, "loss": 0.6957, "step": 3355 }, { "epoch": 3.8227920227920227, "grad_norm": 0.1511501967906952, "learning_rate": 4.700816661539174e-05, "loss": 0.695, "step": 3356 }, { "epoch": 3.8239316239316237, "grad_norm": 0.16818365454673767, "learning_rate": 4.7005956066943396e-05, "loss": 0.7185, "step": 3357 }, { "epoch": 3.825071225071225, "grad_norm": 0.15481878817081451, "learning_rate": 4.7003744754167676e-05, "loss": 0.7676, "step": 3358 }, { "epoch": 3.826210826210826, "grad_norm": 0.1362566351890564, "learning_rate": 4.70015326771414e-05, "loss": 0.8843, "step": 3359 }, { "epoch": 3.827350427350427, "grad_norm": 0.16790726780891418, "learning_rate": 4.699931983594138e-05, "loss": 0.8681, "step": 3360 }, { "epoch": 3.8284900284900285, "grad_norm": 0.1538618505001068, "learning_rate": 4.69971062306445e-05, "loss": 0.6958, "step": 3361 }, { "epoch": 3.8296296296296295, "grad_norm": 0.14279936254024506, "learning_rate": 4.6994891861327606e-05, "loss": 0.9614, "step": 3362 }, { "epoch": 3.830769230769231, "grad_norm": 0.13365939259529114, "learning_rate": 4.699267672806764e-05, "loss": 1.2272, "step": 3363 }, { "epoch": 3.831908831908832, "grad_norm": 0.18748198449611664, "learning_rate": 4.699046083094154e-05, "loss": 0.6171, "step": 3364 }, { "epoch": 3.833048433048433, "grad_norm": 0.1396261751651764, "learning_rate": 4.698824417002625e-05, "loss": 0.944, "step": 3365 }, { "epoch": 3.8341880341880343, "grad_norm": 0.15107348561286926, "learning_rate": 4.698602674539878e-05, "loss": 0.9461, "step": 3366 }, { "epoch": 3.8353276353276353, "grad_norm": 0.15369813144207, "learning_rate": 4.6983808557136135e-05, "loss": 0.8081, "step": 3367 }, { "epoch": 3.8364672364672363, "grad_norm": 0.1463460773229599, "learning_rate": 4.698158960531536e-05, "loss": 0.8933, "step": 3368 }, { "epoch": 3.8376068376068377, "grad_norm": 0.1420212984085083, "learning_rate": 4.697936989001353e-05, "loss": 0.9669, "step": 3369 }, { "epoch": 3.8387464387464387, "grad_norm": 0.16711066663265228, "learning_rate": 4.697714941130774e-05, "loss": 0.8245, "step": 3370 }, { "epoch": 3.83988603988604, "grad_norm": 0.1338273137807846, "learning_rate": 4.697492816927512e-05, "loss": 0.9024, "step": 3371 }, { "epoch": 3.841025641025641, "grad_norm": 0.16420644521713257, "learning_rate": 4.697270616399281e-05, "loss": 0.7058, "step": 3372 }, { "epoch": 3.842165242165242, "grad_norm": 0.1521245837211609, "learning_rate": 4.6970483395537987e-05, "loss": 0.7781, "step": 3373 }, { "epoch": 3.8433048433048436, "grad_norm": 0.14421981573104858, "learning_rate": 4.696825986398786e-05, "loss": 0.7919, "step": 3374 }, { "epoch": 3.8444444444444446, "grad_norm": 0.16730935871601105, "learning_rate": 4.6966035569419654e-05, "loss": 0.694, "step": 3375 }, { "epoch": 3.8455840455840455, "grad_norm": 0.15459780395030975, "learning_rate": 4.696381051191062e-05, "loss": 0.9161, "step": 3376 }, { "epoch": 3.846723646723647, "grad_norm": 0.1502046287059784, "learning_rate": 4.696158469153805e-05, "loss": 0.7694, "step": 3377 }, { "epoch": 3.847863247863248, "grad_norm": 0.1428917646408081, "learning_rate": 4.6959358108379256e-05, "loss": 0.953, "step": 3378 }, { "epoch": 3.849002849002849, "grad_norm": 0.19488106667995453, "learning_rate": 4.695713076251156e-05, "loss": 0.6441, "step": 3379 }, { "epoch": 3.8501424501424504, "grad_norm": 0.18286015093326569, "learning_rate": 4.695490265401233e-05, "loss": 0.7315, "step": 3380 }, { "epoch": 3.8512820512820514, "grad_norm": 0.16389550268650055, "learning_rate": 4.695267378295896e-05, "loss": 0.667, "step": 3381 }, { "epoch": 3.8524216524216524, "grad_norm": 0.1413779854774475, "learning_rate": 4.6950444149428854e-05, "loss": 0.7685, "step": 3382 }, { "epoch": 3.853561253561254, "grad_norm": 0.1864253282546997, "learning_rate": 4.694821375349946e-05, "loss": 0.6877, "step": 3383 }, { "epoch": 3.8547008547008548, "grad_norm": 0.160108283162117, "learning_rate": 4.694598259524824e-05, "loss": 0.8099, "step": 3384 }, { "epoch": 3.8558404558404558, "grad_norm": 0.14981703460216522, "learning_rate": 4.69437506747527e-05, "loss": 0.854, "step": 3385 }, { "epoch": 3.856980056980057, "grad_norm": 0.1802595853805542, "learning_rate": 4.694151799209035e-05, "loss": 0.7782, "step": 3386 }, { "epoch": 3.858119658119658, "grad_norm": 0.13733747601509094, "learning_rate": 4.693928454733875e-05, "loss": 1.0103, "step": 3387 }, { "epoch": 3.859259259259259, "grad_norm": 0.1732407510280609, "learning_rate": 4.693705034057545e-05, "loss": 0.8377, "step": 3388 }, { "epoch": 3.8603988603988606, "grad_norm": 0.14769984781742096, "learning_rate": 4.693481537187807e-05, "loss": 0.9164, "step": 3389 }, { "epoch": 3.8615384615384616, "grad_norm": 0.15641845762729645, "learning_rate": 4.6932579641324234e-05, "loss": 0.9101, "step": 3390 }, { "epoch": 3.8626780626780626, "grad_norm": 0.144445538520813, "learning_rate": 4.693034314899159e-05, "loss": 0.877, "step": 3391 }, { "epoch": 3.863817663817664, "grad_norm": 0.15623387694358826, "learning_rate": 4.6928105894957816e-05, "loss": 0.8651, "step": 3392 }, { "epoch": 3.864957264957265, "grad_norm": 0.14462868869304657, "learning_rate": 4.692586787930063e-05, "loss": 0.8706, "step": 3393 }, { "epoch": 3.866096866096866, "grad_norm": 0.16158561408519745, "learning_rate": 4.692362910209775e-05, "loss": 0.8211, "step": 3394 }, { "epoch": 3.8672364672364674, "grad_norm": 0.14164027571678162, "learning_rate": 4.692138956342694e-05, "loss": 0.9125, "step": 3395 }, { "epoch": 3.8683760683760684, "grad_norm": 0.16288837790489197, "learning_rate": 4.691914926336599e-05, "loss": 0.8973, "step": 3396 }, { "epoch": 3.8695156695156694, "grad_norm": 0.21154040098190308, "learning_rate": 4.691690820199271e-05, "loss": 0.6815, "step": 3397 }, { "epoch": 3.870655270655271, "grad_norm": 0.130178302526474, "learning_rate": 4.691466637938493e-05, "loss": 0.9517, "step": 3398 }, { "epoch": 3.871794871794872, "grad_norm": 0.18135111033916473, "learning_rate": 4.691242379562053e-05, "loss": 0.7641, "step": 3399 }, { "epoch": 3.872934472934473, "grad_norm": 0.1670042723417282, "learning_rate": 4.6910180450777384e-05, "loss": 0.7716, "step": 3400 }, { "epoch": 3.8740740740740742, "grad_norm": 0.14281009137630463, "learning_rate": 4.690793634493342e-05, "loss": 0.9608, "step": 3401 }, { "epoch": 3.875213675213675, "grad_norm": 0.17043545842170715, "learning_rate": 4.690569147816658e-05, "loss": 0.7808, "step": 3402 }, { "epoch": 3.876353276353276, "grad_norm": 0.18869633972644806, "learning_rate": 4.690344585055484e-05, "loss": 0.7139, "step": 3403 }, { "epoch": 3.8774928774928776, "grad_norm": 0.13043910264968872, "learning_rate": 4.6901199462176184e-05, "loss": 1.0668, "step": 3404 }, { "epoch": 3.8786324786324786, "grad_norm": 0.13791286945343018, "learning_rate": 4.689895231310864e-05, "loss": 0.8447, "step": 3405 }, { "epoch": 3.8797720797720796, "grad_norm": 0.14306683838367462, "learning_rate": 4.6896704403430265e-05, "loss": 0.9538, "step": 3406 }, { "epoch": 3.880911680911681, "grad_norm": 0.16624945402145386, "learning_rate": 4.689445573321913e-05, "loss": 0.8153, "step": 3407 }, { "epoch": 3.882051282051282, "grad_norm": 0.16687414050102234, "learning_rate": 4.689220630255333e-05, "loss": 0.8279, "step": 3408 }, { "epoch": 3.883190883190883, "grad_norm": 0.16535744071006775, "learning_rate": 4.688995611151101e-05, "loss": 0.6107, "step": 3409 }, { "epoch": 3.8843304843304844, "grad_norm": 0.14052452147006989, "learning_rate": 4.6887705160170314e-05, "loss": 0.7906, "step": 3410 }, { "epoch": 3.8854700854700854, "grad_norm": 0.15893520414829254, "learning_rate": 4.688545344860943e-05, "loss": 0.72, "step": 3411 }, { "epoch": 3.8866096866096864, "grad_norm": 0.16928741335868835, "learning_rate": 4.6883200976906556e-05, "loss": 0.723, "step": 3412 }, { "epoch": 3.887749287749288, "grad_norm": 0.16943496465682983, "learning_rate": 4.6880947745139934e-05, "loss": 0.7667, "step": 3413 }, { "epoch": 3.888888888888889, "grad_norm": 0.15176483988761902, "learning_rate": 4.687869375338783e-05, "loss": 0.7118, "step": 3414 }, { "epoch": 3.89002849002849, "grad_norm": 0.17340417206287384, "learning_rate": 4.687643900172852e-05, "loss": 0.8506, "step": 3415 }, { "epoch": 3.8911680911680913, "grad_norm": 0.15264245867729187, "learning_rate": 4.687418349024032e-05, "loss": 0.9093, "step": 3416 }, { "epoch": 3.8923076923076922, "grad_norm": 0.14344799518585205, "learning_rate": 4.6871927219001576e-05, "loss": 0.8267, "step": 3417 }, { "epoch": 3.8934472934472932, "grad_norm": 0.16674695909023285, "learning_rate": 4.686967018809065e-05, "loss": 0.8371, "step": 3418 }, { "epoch": 3.8945868945868947, "grad_norm": 0.1460442692041397, "learning_rate": 4.686741239758594e-05, "loss": 1.0146, "step": 3419 }, { "epoch": 3.8957264957264957, "grad_norm": 0.17456045746803284, "learning_rate": 4.686515384756587e-05, "loss": 0.6636, "step": 3420 }, { "epoch": 3.8968660968660966, "grad_norm": 0.24361936748027802, "learning_rate": 4.686289453810886e-05, "loss": 0.4847, "step": 3421 }, { "epoch": 3.898005698005698, "grad_norm": 0.12225434184074402, "learning_rate": 4.686063446929341e-05, "loss": 1.0733, "step": 3422 }, { "epoch": 3.899145299145299, "grad_norm": 0.1633254885673523, "learning_rate": 4.6858373641198e-05, "loss": 0.8836, "step": 3423 }, { "epoch": 3.9002849002849, "grad_norm": 0.16331011056900024, "learning_rate": 4.685611205390117e-05, "loss": 0.8265, "step": 3424 }, { "epoch": 3.9014245014245015, "grad_norm": 0.13616515696048737, "learning_rate": 4.6853849707481466e-05, "loss": 0.893, "step": 3425 }, { "epoch": 3.9025641025641025, "grad_norm": 0.14399182796478271, "learning_rate": 4.685158660201746e-05, "loss": 0.7359, "step": 3426 }, { "epoch": 3.9037037037037035, "grad_norm": 0.16251274943351746, "learning_rate": 4.6849322737587765e-05, "loss": 0.8572, "step": 3427 }, { "epoch": 3.904843304843305, "grad_norm": 0.13758879899978638, "learning_rate": 4.684705811427099e-05, "loss": 0.9263, "step": 3428 }, { "epoch": 3.905982905982906, "grad_norm": 0.1321915239095688, "learning_rate": 4.684479273214582e-05, "loss": 0.8173, "step": 3429 }, { "epoch": 3.907122507122507, "grad_norm": 0.172994464635849, "learning_rate": 4.684252659129093e-05, "loss": 0.7523, "step": 3430 }, { "epoch": 3.9082621082621083, "grad_norm": 0.1446010023355484, "learning_rate": 4.684025969178501e-05, "loss": 0.9883, "step": 3431 }, { "epoch": 3.9094017094017093, "grad_norm": 0.15167920291423798, "learning_rate": 4.6837992033706815e-05, "loss": 0.7456, "step": 3432 }, { "epoch": 3.9105413105413103, "grad_norm": 0.16871385276317596, "learning_rate": 4.68357236171351e-05, "loss": 0.8245, "step": 3433 }, { "epoch": 3.9116809116809117, "grad_norm": 0.1432938277721405, "learning_rate": 4.683345444214866e-05, "loss": 0.9974, "step": 3434 }, { "epoch": 3.9128205128205127, "grad_norm": 0.16849583387374878, "learning_rate": 4.68311845088263e-05, "loss": 0.8222, "step": 3435 }, { "epoch": 3.913960113960114, "grad_norm": 0.1425115168094635, "learning_rate": 4.6828913817246875e-05, "loss": 0.9264, "step": 3436 }, { "epoch": 3.915099715099715, "grad_norm": 0.1551201045513153, "learning_rate": 4.682664236748924e-05, "loss": 0.9058, "step": 3437 }, { "epoch": 3.916239316239316, "grad_norm": 0.16255636513233185, "learning_rate": 4.682437015963228e-05, "loss": 0.7827, "step": 3438 }, { "epoch": 3.9173789173789175, "grad_norm": 0.135501891374588, "learning_rate": 4.682209719375494e-05, "loss": 0.9333, "step": 3439 }, { "epoch": 3.9185185185185185, "grad_norm": 0.14796842634677887, "learning_rate": 4.6819823469936154e-05, "loss": 0.8812, "step": 3440 }, { "epoch": 3.9196581196581195, "grad_norm": 0.17937713861465454, "learning_rate": 4.681754898825489e-05, "loss": 0.6784, "step": 3441 }, { "epoch": 3.920797720797721, "grad_norm": 0.19107134640216827, "learning_rate": 4.681527374879015e-05, "loss": 0.7208, "step": 3442 }, { "epoch": 3.921937321937322, "grad_norm": 0.12797027826309204, "learning_rate": 4.681299775162096e-05, "loss": 0.9502, "step": 3443 }, { "epoch": 3.9230769230769234, "grad_norm": 0.15293848514556885, "learning_rate": 4.681072099682637e-05, "loss": 0.7478, "step": 3444 }, { "epoch": 3.9242165242165243, "grad_norm": 0.15323732793331146, "learning_rate": 4.6808443484485466e-05, "loss": 0.967, "step": 3445 }, { "epoch": 3.9253561253561253, "grad_norm": 0.14220960438251495, "learning_rate": 4.6806165214677346e-05, "loss": 0.8756, "step": 3446 }, { "epoch": 3.9264957264957268, "grad_norm": 0.16512326896190643, "learning_rate": 4.680388618748114e-05, "loss": 0.9124, "step": 3447 }, { "epoch": 3.9276353276353277, "grad_norm": 0.16266563534736633, "learning_rate": 4.6801606402976005e-05, "loss": 0.8936, "step": 3448 }, { "epoch": 3.9287749287749287, "grad_norm": 0.15460358560085297, "learning_rate": 4.6799325861241125e-05, "loss": 0.7959, "step": 3449 }, { "epoch": 3.92991452991453, "grad_norm": 0.13435477018356323, "learning_rate": 4.679704456235571e-05, "loss": 1.0186, "step": 3450 }, { "epoch": 3.931054131054131, "grad_norm": 0.15804614126682281, "learning_rate": 4.679476250639899e-05, "loss": 0.942, "step": 3451 }, { "epoch": 3.932193732193732, "grad_norm": 0.17201738059520721, "learning_rate": 4.679247969345024e-05, "loss": 0.7787, "step": 3452 }, { "epoch": 3.9333333333333336, "grad_norm": 0.15990282595157623, "learning_rate": 4.6790196123588735e-05, "loss": 0.8476, "step": 3453 }, { "epoch": 3.9344729344729346, "grad_norm": 0.14648666977882385, "learning_rate": 4.6787911796893804e-05, "loss": 0.8386, "step": 3454 }, { "epoch": 3.9356125356125355, "grad_norm": 0.14609716832637787, "learning_rate": 4.678562671344477e-05, "loss": 0.9798, "step": 3455 }, { "epoch": 3.936752136752137, "grad_norm": 0.14763420820236206, "learning_rate": 4.6783340873321014e-05, "loss": 0.8774, "step": 3456 }, { "epoch": 3.937891737891738, "grad_norm": 0.14125679433345795, "learning_rate": 4.678105427660192e-05, "loss": 0.739, "step": 3457 }, { "epoch": 3.939031339031339, "grad_norm": 0.13715296983718872, "learning_rate": 4.677876692336693e-05, "loss": 0.8174, "step": 3458 }, { "epoch": 3.9401709401709404, "grad_norm": 0.14227436482906342, "learning_rate": 4.6776478813695446e-05, "loss": 0.8349, "step": 3459 }, { "epoch": 3.9413105413105414, "grad_norm": 0.15223047137260437, "learning_rate": 4.677418994766698e-05, "loss": 0.8952, "step": 3460 }, { "epoch": 3.9424501424501424, "grad_norm": 0.1365755945444107, "learning_rate": 4.677190032536102e-05, "loss": 0.8954, "step": 3461 }, { "epoch": 3.943589743589744, "grad_norm": 0.16250252723693848, "learning_rate": 4.676960994685709e-05, "loss": 0.8019, "step": 3462 }, { "epoch": 3.9447293447293448, "grad_norm": 0.16038726270198822, "learning_rate": 4.676731881223474e-05, "loss": 0.7893, "step": 3463 }, { "epoch": 3.9458689458689458, "grad_norm": 0.1472318321466446, "learning_rate": 4.676502692157354e-05, "loss": 0.8278, "step": 3464 }, { "epoch": 3.947008547008547, "grad_norm": 0.1726992130279541, "learning_rate": 4.676273427495311e-05, "loss": 0.7704, "step": 3465 }, { "epoch": 3.948148148148148, "grad_norm": 0.22516140341758728, "learning_rate": 4.676044087245306e-05, "loss": 0.8842, "step": 3466 }, { "epoch": 3.949287749287749, "grad_norm": 0.14022859930992126, "learning_rate": 4.675814671415307e-05, "loss": 0.7451, "step": 3467 }, { "epoch": 3.9504273504273506, "grad_norm": 0.18753965198993683, "learning_rate": 4.67558518001328e-05, "loss": 0.7253, "step": 3468 }, { "epoch": 3.9515669515669516, "grad_norm": 0.14902377128601074, "learning_rate": 4.675355613047197e-05, "loss": 0.9185, "step": 3469 }, { "epoch": 3.9527065527065526, "grad_norm": 0.16637928783893585, "learning_rate": 4.675125970525031e-05, "loss": 0.7003, "step": 3470 }, { "epoch": 3.953846153846154, "grad_norm": 0.15486907958984375, "learning_rate": 4.6748962524547594e-05, "loss": 0.8815, "step": 3471 }, { "epoch": 3.954985754985755, "grad_norm": 0.14602243900299072, "learning_rate": 4.674666458844359e-05, "loss": 0.6844, "step": 3472 }, { "epoch": 3.956125356125356, "grad_norm": 0.18017630279064178, "learning_rate": 4.674436589701813e-05, "loss": 0.7329, "step": 3473 }, { "epoch": 3.9572649572649574, "grad_norm": 0.15623396635055542, "learning_rate": 4.674206645035104e-05, "loss": 0.8304, "step": 3474 }, { "epoch": 3.9584045584045584, "grad_norm": 0.1459084302186966, "learning_rate": 4.6739766248522194e-05, "loss": 0.9101, "step": 3475 }, { "epoch": 3.9595441595441594, "grad_norm": 0.1555999517440796, "learning_rate": 4.6737465291611484e-05, "loss": 0.8722, "step": 3476 }, { "epoch": 3.960683760683761, "grad_norm": 0.166189044713974, "learning_rate": 4.673516357969882e-05, "loss": 0.7908, "step": 3477 }, { "epoch": 3.961823361823362, "grad_norm": 0.16147474944591522, "learning_rate": 4.673286111286415e-05, "loss": 0.8033, "step": 3478 }, { "epoch": 3.962962962962963, "grad_norm": 0.14976289868354797, "learning_rate": 4.673055789118745e-05, "loss": 0.7987, "step": 3479 }, { "epoch": 3.9641025641025642, "grad_norm": 0.1402273029088974, "learning_rate": 4.672825391474872e-05, "loss": 1.0181, "step": 3480 }, { "epoch": 3.965242165242165, "grad_norm": 0.15403476357460022, "learning_rate": 4.6725949183627975e-05, "loss": 0.6773, "step": 3481 }, { "epoch": 3.966381766381766, "grad_norm": 0.16899093985557556, "learning_rate": 4.6723643697905274e-05, "loss": 0.725, "step": 3482 }, { "epoch": 3.9675213675213676, "grad_norm": 0.15205314755439758, "learning_rate": 4.672133745766068e-05, "loss": 0.8625, "step": 3483 }, { "epoch": 3.9686609686609686, "grad_norm": 0.13682928681373596, "learning_rate": 4.6719030462974304e-05, "loss": 0.8228, "step": 3484 }, { "epoch": 3.9698005698005696, "grad_norm": 0.16801244020462036, "learning_rate": 4.671672271392627e-05, "loss": 0.9002, "step": 3485 }, { "epoch": 3.970940170940171, "grad_norm": 0.15570217370986938, "learning_rate": 4.6714414210596744e-05, "loss": 0.8369, "step": 3486 }, { "epoch": 3.972079772079772, "grad_norm": 0.14175504446029663, "learning_rate": 4.6712104953065894e-05, "loss": 0.932, "step": 3487 }, { "epoch": 3.973219373219373, "grad_norm": 0.14280645549297333, "learning_rate": 4.670979494141393e-05, "loss": 0.6716, "step": 3488 }, { "epoch": 3.9743589743589745, "grad_norm": 0.18303732573986053, "learning_rate": 4.670748417572108e-05, "loss": 0.6283, "step": 3489 }, { "epoch": 3.9754985754985754, "grad_norm": 0.1463060826063156, "learning_rate": 4.6705172656067606e-05, "loss": 0.9817, "step": 3490 }, { "epoch": 3.9766381766381764, "grad_norm": 0.13618658483028412, "learning_rate": 4.670286038253381e-05, "loss": 0.6886, "step": 3491 }, { "epoch": 3.977777777777778, "grad_norm": 0.18522575497627258, "learning_rate": 4.670054735519998e-05, "loss": 0.8265, "step": 3492 }, { "epoch": 3.978917378917379, "grad_norm": 0.14105400443077087, "learning_rate": 4.669823357414647e-05, "loss": 0.7428, "step": 3493 }, { "epoch": 3.98005698005698, "grad_norm": 0.13627874851226807, "learning_rate": 4.669591903945363e-05, "loss": 0.8517, "step": 3494 }, { "epoch": 3.9811965811965813, "grad_norm": 0.15071073174476624, "learning_rate": 4.669360375120187e-05, "loss": 0.9303, "step": 3495 }, { "epoch": 3.9823361823361823, "grad_norm": 0.14497306942939758, "learning_rate": 4.6691287709471574e-05, "loss": 0.8023, "step": 3496 }, { "epoch": 3.9834757834757832, "grad_norm": 0.16087129712104797, "learning_rate": 4.6688970914343224e-05, "loss": 0.7488, "step": 3497 }, { "epoch": 3.9846153846153847, "grad_norm": 0.11948225647211075, "learning_rate": 4.668665336589726e-05, "loss": 0.9906, "step": 3498 }, { "epoch": 3.9857549857549857, "grad_norm": 0.14590954780578613, "learning_rate": 4.668433506421419e-05, "loss": 0.9324, "step": 3499 }, { "epoch": 3.9868945868945866, "grad_norm": 0.14911605417728424, "learning_rate": 4.668201600937453e-05, "loss": 0.82, "step": 3500 }, { "epoch": 3.988034188034188, "grad_norm": 0.16907189786434174, "learning_rate": 4.667969620145882e-05, "loss": 0.8538, "step": 3501 }, { "epoch": 3.989173789173789, "grad_norm": 0.1457272469997406, "learning_rate": 4.6677375640547646e-05, "loss": 0.8234, "step": 3502 }, { "epoch": 3.99031339031339, "grad_norm": 0.20971538126468658, "learning_rate": 4.667505432672161e-05, "loss": 0.6387, "step": 3503 }, { "epoch": 3.9914529914529915, "grad_norm": 0.1684182733297348, "learning_rate": 4.6672732260061326e-05, "loss": 0.8368, "step": 3504 }, { "epoch": 3.9925925925925925, "grad_norm": 0.13544008135795593, "learning_rate": 4.6670409440647455e-05, "loss": 0.8769, "step": 3505 }, { "epoch": 3.9937321937321935, "grad_norm": 0.15714909136295319, "learning_rate": 4.666808586856066e-05, "loss": 0.7932, "step": 3506 }, { "epoch": 3.994871794871795, "grad_norm": 0.16212841868400574, "learning_rate": 4.666576154388166e-05, "loss": 0.8595, "step": 3507 }, { "epoch": 3.996011396011396, "grad_norm": 0.20602630078792572, "learning_rate": 4.666343646669118e-05, "loss": 0.8981, "step": 3508 }, { "epoch": 3.9971509971509973, "grad_norm": 0.15896059572696686, "learning_rate": 4.666111063706998e-05, "loss": 0.8023, "step": 3509 }, { "epoch": 3.9982905982905983, "grad_norm": 0.148764505982399, "learning_rate": 4.665878405509883e-05, "loss": 0.727, "step": 3510 }, { "epoch": 3.9994301994301993, "grad_norm": 0.16278378665447235, "learning_rate": 4.665645672085856e-05, "loss": 0.7494, "step": 3511 }, { "epoch": 4.0, "grad_norm": 0.3494948446750641, "learning_rate": 4.6654128634429996e-05, "loss": 0.6898, "step": 3512 }, { "epoch": 4.001139601139601, "grad_norm": 0.156687393784523, "learning_rate": 4.665179979589399e-05, "loss": 0.7136, "step": 3513 }, { "epoch": 4.002279202279202, "grad_norm": 0.16005277633666992, "learning_rate": 4.6649470205331424e-05, "loss": 0.8428, "step": 3514 }, { "epoch": 4.003418803418803, "grad_norm": 0.17963695526123047, "learning_rate": 4.664713986282323e-05, "loss": 0.7718, "step": 3515 }, { "epoch": 4.004558404558405, "grad_norm": 0.16050583124160767, "learning_rate": 4.6644808768450346e-05, "loss": 0.8189, "step": 3516 }, { "epoch": 4.005698005698005, "grad_norm": 0.1456081122159958, "learning_rate": 4.664247692229372e-05, "loss": 0.7237, "step": 3517 }, { "epoch": 4.006837606837607, "grad_norm": 0.16313639283180237, "learning_rate": 4.6640144324434355e-05, "loss": 0.8514, "step": 3518 }, { "epoch": 4.007977207977208, "grad_norm": 0.16933190822601318, "learning_rate": 4.663781097495327e-05, "loss": 0.8514, "step": 3519 }, { "epoch": 4.009116809116809, "grad_norm": 0.14906363189220428, "learning_rate": 4.66354768739315e-05, "loss": 0.935, "step": 3520 }, { "epoch": 4.01025641025641, "grad_norm": 0.15414398908615112, "learning_rate": 4.663314202145012e-05, "loss": 0.7571, "step": 3521 }, { "epoch": 4.011396011396012, "grad_norm": 0.15351127088069916, "learning_rate": 4.6630806417590235e-05, "loss": 0.8924, "step": 3522 }, { "epoch": 4.012535612535612, "grad_norm": 0.17059801518917084, "learning_rate": 4.662847006243295e-05, "loss": 0.5921, "step": 3523 }, { "epoch": 4.013675213675214, "grad_norm": 0.1579991728067398, "learning_rate": 4.662613295605942e-05, "loss": 0.6791, "step": 3524 }, { "epoch": 4.014814814814815, "grad_norm": 0.15408803522586823, "learning_rate": 4.6623795098550825e-05, "loss": 0.8667, "step": 3525 }, { "epoch": 4.015954415954416, "grad_norm": 0.15600430965423584, "learning_rate": 4.662145648998836e-05, "loss": 0.7489, "step": 3526 }, { "epoch": 4.017094017094017, "grad_norm": 0.13071642816066742, "learning_rate": 4.6619117130453254e-05, "loss": 0.8093, "step": 3527 }, { "epoch": 4.0182336182336185, "grad_norm": 0.12814581394195557, "learning_rate": 4.661677702002676e-05, "loss": 0.9089, "step": 3528 }, { "epoch": 4.019373219373219, "grad_norm": 0.15070417523384094, "learning_rate": 4.661443615879015e-05, "loss": 0.6987, "step": 3529 }, { "epoch": 4.02051282051282, "grad_norm": 0.1645977795124054, "learning_rate": 4.661209454682472e-05, "loss": 0.7235, "step": 3530 }, { "epoch": 4.021652421652422, "grad_norm": 0.14421582221984863, "learning_rate": 4.660975218421183e-05, "loss": 0.8332, "step": 3531 }, { "epoch": 4.022792022792022, "grad_norm": 0.189396470785141, "learning_rate": 4.6607409071032815e-05, "loss": 0.7734, "step": 3532 }, { "epoch": 4.023931623931624, "grad_norm": 0.14422671496868134, "learning_rate": 4.660506520736906e-05, "loss": 0.8768, "step": 3533 }, { "epoch": 4.025071225071225, "grad_norm": 0.17115554213523865, "learning_rate": 4.660272059330198e-05, "loss": 0.8174, "step": 3534 }, { "epoch": 4.026210826210826, "grad_norm": 0.17225325107574463, "learning_rate": 4.6600375228913e-05, "loss": 0.8584, "step": 3535 }, { "epoch": 4.027350427350427, "grad_norm": 0.16570670902729034, "learning_rate": 4.659802911428359e-05, "loss": 0.9189, "step": 3536 }, { "epoch": 4.028490028490029, "grad_norm": 0.187309131026268, "learning_rate": 4.659568224949524e-05, "loss": 0.6821, "step": 3537 }, { "epoch": 4.029629629629629, "grad_norm": 0.11738475412130356, "learning_rate": 4.659333463462945e-05, "loss": 0.9917, "step": 3538 }, { "epoch": 4.030769230769231, "grad_norm": 0.17785432934761047, "learning_rate": 4.6590986269767766e-05, "loss": 0.8195, "step": 3539 }, { "epoch": 4.031908831908832, "grad_norm": 0.17022278904914856, "learning_rate": 4.658863715499175e-05, "loss": 0.8277, "step": 3540 }, { "epoch": 4.033048433048433, "grad_norm": 0.13847829401493073, "learning_rate": 4.658628729038301e-05, "loss": 0.792, "step": 3541 }, { "epoch": 4.034188034188034, "grad_norm": 0.15126143395900726, "learning_rate": 4.658393667602314e-05, "loss": 0.8249, "step": 3542 }, { "epoch": 4.0353276353276355, "grad_norm": 0.18600907921791077, "learning_rate": 4.658158531199379e-05, "loss": 0.7253, "step": 3543 }, { "epoch": 4.036467236467236, "grad_norm": 0.13330426812171936, "learning_rate": 4.6579233198376634e-05, "loss": 0.896, "step": 3544 }, { "epoch": 4.0376068376068375, "grad_norm": 0.16327106952667236, "learning_rate": 4.6576880335253374e-05, "loss": 0.7623, "step": 3545 }, { "epoch": 4.038746438746439, "grad_norm": 0.2002708464860916, "learning_rate": 4.6574526722705726e-05, "loss": 0.6612, "step": 3546 }, { "epoch": 4.0398860398860394, "grad_norm": 0.1520686000585556, "learning_rate": 4.657217236081543e-05, "loss": 1.0156, "step": 3547 }, { "epoch": 4.041025641025641, "grad_norm": 0.14421986043453217, "learning_rate": 4.656981724966426e-05, "loss": 0.9037, "step": 3548 }, { "epoch": 4.042165242165242, "grad_norm": 0.14663898944854736, "learning_rate": 4.6567461389334016e-05, "loss": 0.9552, "step": 3549 }, { "epoch": 4.043304843304844, "grad_norm": 0.15126721560955048, "learning_rate": 4.656510477990653e-05, "loss": 0.8096, "step": 3550 }, { "epoch": 4.044444444444444, "grad_norm": 0.15986499190330505, "learning_rate": 4.6562747421463655e-05, "loss": 0.7929, "step": 3551 }, { "epoch": 4.045584045584046, "grad_norm": 0.17100024223327637, "learning_rate": 4.656038931408726e-05, "loss": 0.6964, "step": 3552 }, { "epoch": 4.046723646723647, "grad_norm": 0.14742796123027802, "learning_rate": 4.655803045785926e-05, "loss": 0.9442, "step": 3553 }, { "epoch": 4.047863247863248, "grad_norm": 0.15347158908843994, "learning_rate": 4.6555670852861574e-05, "loss": 0.7916, "step": 3554 }, { "epoch": 4.049002849002849, "grad_norm": 0.16807137429714203, "learning_rate": 4.6553310499176154e-05, "loss": 0.7506, "step": 3555 }, { "epoch": 4.050142450142451, "grad_norm": 0.15673980116844177, "learning_rate": 4.6550949396884995e-05, "loss": 0.8105, "step": 3556 }, { "epoch": 4.051282051282051, "grad_norm": 0.13703081011772156, "learning_rate": 4.65485875460701e-05, "loss": 0.9299, "step": 3557 }, { "epoch": 4.0524216524216525, "grad_norm": 0.14694665372371674, "learning_rate": 4.65462249468135e-05, "loss": 0.8278, "step": 3558 }, { "epoch": 4.053561253561254, "grad_norm": 0.15886713564395905, "learning_rate": 4.6543861599197246e-05, "loss": 0.8094, "step": 3559 }, { "epoch": 4.0547008547008545, "grad_norm": 0.1390674114227295, "learning_rate": 4.654149750330345e-05, "loss": 0.8836, "step": 3560 }, { "epoch": 4.055840455840456, "grad_norm": 0.14855355024337769, "learning_rate": 4.653913265921419e-05, "loss": 0.9287, "step": 3561 }, { "epoch": 4.056980056980057, "grad_norm": 0.14535707235336304, "learning_rate": 4.653676706701163e-05, "loss": 0.8883, "step": 3562 }, { "epoch": 4.058119658119658, "grad_norm": 0.13472265005111694, "learning_rate": 4.653440072677792e-05, "loss": 0.9482, "step": 3563 }, { "epoch": 4.059259259259259, "grad_norm": 0.15425075590610504, "learning_rate": 4.6532033638595254e-05, "loss": 0.8602, "step": 3564 }, { "epoch": 4.060398860398861, "grad_norm": 0.19460026919841766, "learning_rate": 4.652966580254584e-05, "loss": 0.7565, "step": 3565 }, { "epoch": 4.061538461538461, "grad_norm": 0.14792226254940033, "learning_rate": 4.6527297218711926e-05, "loss": 0.8495, "step": 3566 }, { "epoch": 4.062678062678063, "grad_norm": 0.16837316751480103, "learning_rate": 4.652492788717579e-05, "loss": 0.8988, "step": 3567 }, { "epoch": 4.063817663817664, "grad_norm": 0.15385596454143524, "learning_rate": 4.6522557808019704e-05, "loss": 0.866, "step": 3568 }, { "epoch": 4.064957264957265, "grad_norm": 0.17028549313545227, "learning_rate": 4.652018698132601e-05, "loss": 0.8137, "step": 3569 }, { "epoch": 4.066096866096866, "grad_norm": 0.14771980047225952, "learning_rate": 4.651781540717704e-05, "loss": 0.867, "step": 3570 }, { "epoch": 4.067236467236468, "grad_norm": 0.15428456664085388, "learning_rate": 4.651544308565515e-05, "loss": 0.784, "step": 3571 }, { "epoch": 4.068376068376068, "grad_norm": 0.13995905220508575, "learning_rate": 4.651307001684276e-05, "loss": 0.9278, "step": 3572 }, { "epoch": 4.06951566951567, "grad_norm": 0.1955014020204544, "learning_rate": 4.65106962008223e-05, "loss": 0.5831, "step": 3573 }, { "epoch": 4.070655270655271, "grad_norm": 0.17508552968502045, "learning_rate": 4.65083216376762e-05, "loss": 0.7284, "step": 3574 }, { "epoch": 4.0717948717948715, "grad_norm": 0.16214320063591003, "learning_rate": 4.6505946327486936e-05, "loss": 0.8925, "step": 3575 }, { "epoch": 4.072934472934473, "grad_norm": 0.1865091770887375, "learning_rate": 4.650357027033702e-05, "loss": 0.7345, "step": 3576 }, { "epoch": 4.074074074074074, "grad_norm": 0.19175677001476288, "learning_rate": 4.650119346630897e-05, "loss": 0.7089, "step": 3577 }, { "epoch": 4.075213675213675, "grad_norm": 0.1551404446363449, "learning_rate": 4.649881591548535e-05, "loss": 0.8324, "step": 3578 }, { "epoch": 4.076353276353276, "grad_norm": 0.14528369903564453, "learning_rate": 4.6496437617948725e-05, "loss": 0.8641, "step": 3579 }, { "epoch": 4.077492877492878, "grad_norm": 0.15619570016860962, "learning_rate": 4.649405857378171e-05, "loss": 0.7039, "step": 3580 }, { "epoch": 4.078632478632478, "grad_norm": 0.17114530503749847, "learning_rate": 4.6491678783066924e-05, "loss": 0.7716, "step": 3581 }, { "epoch": 4.07977207977208, "grad_norm": 0.15298311412334442, "learning_rate": 4.6489298245887034e-05, "loss": 0.7569, "step": 3582 }, { "epoch": 4.080911680911681, "grad_norm": 0.2157907336950302, "learning_rate": 4.6486916962324724e-05, "loss": 0.5514, "step": 3583 }, { "epoch": 4.082051282051282, "grad_norm": 0.1464688926935196, "learning_rate": 4.6484534932462696e-05, "loss": 0.9838, "step": 3584 }, { "epoch": 4.083190883190883, "grad_norm": 0.16757918894290924, "learning_rate": 4.6482152156383684e-05, "loss": 0.7074, "step": 3585 }, { "epoch": 4.084330484330485, "grad_norm": 0.1393502950668335, "learning_rate": 4.647976863417046e-05, "loss": 0.9847, "step": 3586 }, { "epoch": 4.085470085470085, "grad_norm": 0.16414041817188263, "learning_rate": 4.64773843659058e-05, "loss": 0.8177, "step": 3587 }, { "epoch": 4.086609686609687, "grad_norm": 0.1699393391609192, "learning_rate": 4.647499935167251e-05, "loss": 0.9266, "step": 3588 }, { "epoch": 4.087749287749288, "grad_norm": 0.1846645325422287, "learning_rate": 4.647261359155344e-05, "loss": 0.8353, "step": 3589 }, { "epoch": 4.088888888888889, "grad_norm": 0.16945374011993408, "learning_rate": 4.6470227085631455e-05, "loss": 0.8741, "step": 3590 }, { "epoch": 4.09002849002849, "grad_norm": 0.11592093110084534, "learning_rate": 4.6467839833989435e-05, "loss": 0.8984, "step": 3591 }, { "epoch": 4.091168091168091, "grad_norm": 0.19027556478977203, "learning_rate": 4.64654518367103e-05, "loss": 0.8381, "step": 3592 }, { "epoch": 4.092307692307692, "grad_norm": 0.17370308935642242, "learning_rate": 4.646306309387699e-05, "loss": 0.7826, "step": 3593 }, { "epoch": 4.093447293447293, "grad_norm": 0.17760266363620758, "learning_rate": 4.6460673605572484e-05, "loss": 0.7861, "step": 3594 }, { "epoch": 4.094586894586895, "grad_norm": 0.18767882883548737, "learning_rate": 4.645828337187976e-05, "loss": 0.6726, "step": 3595 }, { "epoch": 4.095726495726495, "grad_norm": 0.19562728703022003, "learning_rate": 4.645589239288185e-05, "loss": 0.7954, "step": 3596 }, { "epoch": 4.096866096866097, "grad_norm": 0.16158398985862732, "learning_rate": 4.6453500668661786e-05, "loss": 0.7515, "step": 3597 }, { "epoch": 4.098005698005698, "grad_norm": 0.17128139734268188, "learning_rate": 4.645110819930265e-05, "loss": 0.6846, "step": 3598 }, { "epoch": 4.099145299145299, "grad_norm": 0.1473407745361328, "learning_rate": 4.644871498488753e-05, "loss": 0.6415, "step": 3599 }, { "epoch": 4.1002849002849, "grad_norm": 0.14364983141422272, "learning_rate": 4.644632102549956e-05, "loss": 0.8383, "step": 3600 }, { "epoch": 4.101424501424502, "grad_norm": 0.13314323127269745, "learning_rate": 4.6443926321221886e-05, "loss": 0.9247, "step": 3601 }, { "epoch": 4.102564102564102, "grad_norm": 0.13570788502693176, "learning_rate": 4.644153087213768e-05, "loss": 0.9356, "step": 3602 }, { "epoch": 4.103703703703704, "grad_norm": 0.1606551706790924, "learning_rate": 4.6439134678330134e-05, "loss": 0.78, "step": 3603 }, { "epoch": 4.104843304843305, "grad_norm": 0.17509931325912476, "learning_rate": 4.643673773988249e-05, "loss": 0.8968, "step": 3604 }, { "epoch": 4.105982905982906, "grad_norm": 0.19881106913089752, "learning_rate": 4.643434005687799e-05, "loss": 0.6532, "step": 3605 }, { "epoch": 4.107122507122507, "grad_norm": 0.14632968604564667, "learning_rate": 4.643194162939992e-05, "loss": 0.9583, "step": 3606 }, { "epoch": 4.1082621082621085, "grad_norm": 0.15079665184020996, "learning_rate": 4.642954245753157e-05, "loss": 0.8201, "step": 3607 }, { "epoch": 4.109401709401709, "grad_norm": 0.1575961709022522, "learning_rate": 4.642714254135628e-05, "loss": 0.6698, "step": 3608 }, { "epoch": 4.1105413105413104, "grad_norm": 0.18284007906913757, "learning_rate": 4.642474188095741e-05, "loss": 0.6156, "step": 3609 }, { "epoch": 4.111680911680912, "grad_norm": 0.16812333464622498, "learning_rate": 4.6422340476418344e-05, "loss": 0.861, "step": 3610 }, { "epoch": 4.112820512820512, "grad_norm": 0.1552661657333374, "learning_rate": 4.641993832782246e-05, "loss": 0.6791, "step": 3611 }, { "epoch": 4.113960113960114, "grad_norm": 0.16001765429973602, "learning_rate": 4.6417535435253236e-05, "loss": 0.7138, "step": 3612 }, { "epoch": 4.115099715099715, "grad_norm": 0.13813526928424835, "learning_rate": 4.6415131798794096e-05, "loss": 0.9783, "step": 3613 }, { "epoch": 4.116239316239316, "grad_norm": 0.14909638464450836, "learning_rate": 4.641272741852853e-05, "loss": 0.9292, "step": 3614 }, { "epoch": 4.117378917378917, "grad_norm": 0.15766140818595886, "learning_rate": 4.641032229454007e-05, "loss": 0.876, "step": 3615 }, { "epoch": 4.118518518518519, "grad_norm": 0.1870744526386261, "learning_rate": 4.640791642691223e-05, "loss": 0.7223, "step": 3616 }, { "epoch": 4.119658119658119, "grad_norm": 0.18714767694473267, "learning_rate": 4.640550981572858e-05, "loss": 0.6913, "step": 3617 }, { "epoch": 4.120797720797721, "grad_norm": 0.16612930595874786, "learning_rate": 4.640310246107271e-05, "loss": 0.9073, "step": 3618 }, { "epoch": 4.121937321937322, "grad_norm": 0.19368138909339905, "learning_rate": 4.640069436302823e-05, "loss": 0.6559, "step": 3619 }, { "epoch": 4.123076923076923, "grad_norm": 0.16957665979862213, "learning_rate": 4.6398285521678795e-05, "loss": 0.7556, "step": 3620 }, { "epoch": 4.124216524216524, "grad_norm": 0.14673933386802673, "learning_rate": 4.639587593710805e-05, "loss": 0.8789, "step": 3621 }, { "epoch": 4.1253561253561255, "grad_norm": 0.1646455079317093, "learning_rate": 4.639346560939969e-05, "loss": 0.7582, "step": 3622 }, { "epoch": 4.126495726495726, "grad_norm": 0.1604212373495102, "learning_rate": 4.6391054538637444e-05, "loss": 0.8656, "step": 3623 }, { "epoch": 4.1276353276353275, "grad_norm": 0.17970581352710724, "learning_rate": 4.638864272490505e-05, "loss": 0.7821, "step": 3624 }, { "epoch": 4.128774928774929, "grad_norm": 0.17448319494724274, "learning_rate": 4.6386230168286265e-05, "loss": 0.841, "step": 3625 }, { "epoch": 4.12991452991453, "grad_norm": 0.15706060826778412, "learning_rate": 4.63838168688649e-05, "loss": 0.954, "step": 3626 }, { "epoch": 4.131054131054131, "grad_norm": 0.12947851419448853, "learning_rate": 4.638140282672477e-05, "loss": 1.0501, "step": 3627 }, { "epoch": 4.132193732193732, "grad_norm": 0.14992794394493103, "learning_rate": 4.6378988041949725e-05, "loss": 0.8091, "step": 3628 }, { "epoch": 4.133333333333334, "grad_norm": 0.16071423888206482, "learning_rate": 4.6376572514623625e-05, "loss": 0.8783, "step": 3629 }, { "epoch": 4.134472934472934, "grad_norm": 0.13396671414375305, "learning_rate": 4.6374156244830374e-05, "loss": 0.8796, "step": 3630 }, { "epoch": 4.135612535612536, "grad_norm": 0.13779418170452118, "learning_rate": 4.6371739232653905e-05, "loss": 0.9137, "step": 3631 }, { "epoch": 4.136752136752137, "grad_norm": 0.17061258852481842, "learning_rate": 4.636932147817816e-05, "loss": 0.7282, "step": 3632 }, { "epoch": 4.137891737891738, "grad_norm": 0.14861363172531128, "learning_rate": 4.6366902981487105e-05, "loss": 0.8182, "step": 3633 }, { "epoch": 4.139031339031339, "grad_norm": 0.16506506502628326, "learning_rate": 4.636448374266475e-05, "loss": 0.7298, "step": 3634 }, { "epoch": 4.140170940170941, "grad_norm": 0.15416599810123444, "learning_rate": 4.636206376179514e-05, "loss": 0.8121, "step": 3635 }, { "epoch": 4.141310541310541, "grad_norm": 0.15596051514148712, "learning_rate": 4.6359643038962296e-05, "loss": 0.8538, "step": 3636 }, { "epoch": 4.1424501424501425, "grad_norm": 0.1777096390724182, "learning_rate": 4.635722157425031e-05, "loss": 0.7866, "step": 3637 }, { "epoch": 4.143589743589744, "grad_norm": 0.1438266932964325, "learning_rate": 4.635479936774329e-05, "loss": 0.8084, "step": 3638 }, { "epoch": 4.1447293447293445, "grad_norm": 0.14145760238170624, "learning_rate": 4.635237641952536e-05, "loss": 0.8315, "step": 3639 }, { "epoch": 4.145868945868946, "grad_norm": 0.16704396903514862, "learning_rate": 4.6349952729680677e-05, "loss": 0.7879, "step": 3640 }, { "epoch": 4.147008547008547, "grad_norm": 0.18530404567718506, "learning_rate": 4.6347528298293426e-05, "loss": 0.6051, "step": 3641 }, { "epoch": 4.148148148148148, "grad_norm": 0.18084098398685455, "learning_rate": 4.634510312544781e-05, "loss": 0.7673, "step": 3642 }, { "epoch": 4.149287749287749, "grad_norm": 0.24151214957237244, "learning_rate": 4.634267721122806e-05, "loss": 0.8762, "step": 3643 }, { "epoch": 4.150427350427351, "grad_norm": 0.17945998907089233, "learning_rate": 4.634025055571844e-05, "loss": 0.7239, "step": 3644 }, { "epoch": 4.151566951566951, "grad_norm": 0.1628548949956894, "learning_rate": 4.6337823159003234e-05, "loss": 0.8899, "step": 3645 }, { "epoch": 4.152706552706553, "grad_norm": 0.1715235561132431, "learning_rate": 4.633539502116675e-05, "loss": 0.7984, "step": 3646 }, { "epoch": 4.153846153846154, "grad_norm": 0.15338733792304993, "learning_rate": 4.633296614229332e-05, "loss": 0.7406, "step": 3647 }, { "epoch": 4.154985754985755, "grad_norm": 0.1404416561126709, "learning_rate": 4.6330536522467315e-05, "loss": 0.8761, "step": 3648 }, { "epoch": 4.156125356125356, "grad_norm": 0.17968951165676117, "learning_rate": 4.632810616177312e-05, "loss": 0.7429, "step": 3649 }, { "epoch": 4.157264957264958, "grad_norm": 0.16005079448223114, "learning_rate": 4.632567506029514e-05, "loss": 0.8709, "step": 3650 }, { "epoch": 4.158404558404558, "grad_norm": 0.18263664841651917, "learning_rate": 4.6323243218117816e-05, "loss": 0.7205, "step": 3651 }, { "epoch": 4.15954415954416, "grad_norm": 0.14968404173851013, "learning_rate": 4.632081063532562e-05, "loss": 0.8413, "step": 3652 }, { "epoch": 4.160683760683761, "grad_norm": 0.15817460417747498, "learning_rate": 4.631837731200304e-05, "loss": 0.8515, "step": 3653 }, { "epoch": 4.1618233618233615, "grad_norm": 0.15472495555877686, "learning_rate": 4.631594324823459e-05, "loss": 0.764, "step": 3654 }, { "epoch": 4.162962962962963, "grad_norm": 0.14850182831287384, "learning_rate": 4.6313508444104814e-05, "loss": 1.0525, "step": 3655 }, { "epoch": 4.164102564102564, "grad_norm": 0.12211723625659943, "learning_rate": 4.631107289969827e-05, "loss": 0.9301, "step": 3656 }, { "epoch": 4.165242165242165, "grad_norm": 0.17291347682476044, "learning_rate": 4.630863661509956e-05, "loss": 0.7622, "step": 3657 }, { "epoch": 4.166381766381766, "grad_norm": 0.15008722245693207, "learning_rate": 4.6306199590393306e-05, "loss": 0.737, "step": 3658 }, { "epoch": 4.167521367521368, "grad_norm": 0.19958460330963135, "learning_rate": 4.630376182566415e-05, "loss": 0.5149, "step": 3659 }, { "epoch": 4.168660968660968, "grad_norm": 0.18100185692310333, "learning_rate": 4.630132332099676e-05, "loss": 0.9104, "step": 3660 }, { "epoch": 4.16980056980057, "grad_norm": 0.16496123373508453, "learning_rate": 4.629888407647582e-05, "loss": 0.7213, "step": 3661 }, { "epoch": 4.170940170940171, "grad_norm": 0.1654644012451172, "learning_rate": 4.629644409218608e-05, "loss": 0.9031, "step": 3662 }, { "epoch": 4.172079772079772, "grad_norm": 0.15994341671466827, "learning_rate": 4.629400336821226e-05, "loss": 0.755, "step": 3663 }, { "epoch": 4.173219373219373, "grad_norm": 0.1428716480731964, "learning_rate": 4.6291561904639145e-05, "loss": 0.8363, "step": 3664 }, { "epoch": 4.174358974358975, "grad_norm": 0.18579061329364777, "learning_rate": 4.628911970155154e-05, "loss": 0.6596, "step": 3665 }, { "epoch": 4.175498575498575, "grad_norm": 0.1424458920955658, "learning_rate": 4.6286676759034253e-05, "loss": 0.9963, "step": 3666 }, { "epoch": 4.176638176638177, "grad_norm": 0.1387307345867157, "learning_rate": 4.628423307717215e-05, "loss": 1.0031, "step": 3667 }, { "epoch": 4.177777777777778, "grad_norm": 0.16689176857471466, "learning_rate": 4.62817886560501e-05, "loss": 0.7465, "step": 3668 }, { "epoch": 4.178917378917379, "grad_norm": 0.16913136839866638, "learning_rate": 4.6279343495753e-05, "loss": 0.6266, "step": 3669 }, { "epoch": 4.18005698005698, "grad_norm": 0.15506194531917572, "learning_rate": 4.6276897596365794e-05, "loss": 0.7579, "step": 3670 }, { "epoch": 4.181196581196581, "grad_norm": 0.152205690741539, "learning_rate": 4.6274450957973415e-05, "loss": 0.9007, "step": 3671 }, { "epoch": 4.182336182336182, "grad_norm": 0.1602986454963684, "learning_rate": 4.627200358066085e-05, "loss": 0.7788, "step": 3672 }, { "epoch": 4.183475783475783, "grad_norm": 0.17027804255485535, "learning_rate": 4.62695554645131e-05, "loss": 0.6838, "step": 3673 }, { "epoch": 4.184615384615385, "grad_norm": 0.14730262756347656, "learning_rate": 4.6267106609615195e-05, "loss": 0.8341, "step": 3674 }, { "epoch": 4.185754985754985, "grad_norm": 0.16242659091949463, "learning_rate": 4.6264657016052196e-05, "loss": 0.792, "step": 3675 }, { "epoch": 4.186894586894587, "grad_norm": 0.13900625705718994, "learning_rate": 4.626220668390918e-05, "loss": 0.9605, "step": 3676 }, { "epoch": 4.188034188034188, "grad_norm": 0.13298556208610535, "learning_rate": 4.625975561327126e-05, "loss": 0.8414, "step": 3677 }, { "epoch": 4.189173789173789, "grad_norm": 0.1835649162530899, "learning_rate": 4.625730380422356e-05, "loss": 0.594, "step": 3678 }, { "epoch": 4.19031339031339, "grad_norm": 0.19300466775894165, "learning_rate": 4.625485125685124e-05, "loss": 0.7097, "step": 3679 }, { "epoch": 4.191452991452992, "grad_norm": 0.17401108145713806, "learning_rate": 4.625239797123948e-05, "loss": 0.7094, "step": 3680 }, { "epoch": 4.192592592592592, "grad_norm": 0.15717926621437073, "learning_rate": 4.62499439474735e-05, "loss": 0.7873, "step": 3681 }, { "epoch": 4.193732193732194, "grad_norm": 0.17586669325828552, "learning_rate": 4.624748918563852e-05, "loss": 0.83, "step": 3682 }, { "epoch": 4.194871794871795, "grad_norm": 0.17962506413459778, "learning_rate": 4.624503368581983e-05, "loss": 0.7088, "step": 3683 }, { "epoch": 4.196011396011396, "grad_norm": 0.22976629436016083, "learning_rate": 4.624257744810268e-05, "loss": 0.5079, "step": 3684 }, { "epoch": 4.197150997150997, "grad_norm": 0.13983486592769623, "learning_rate": 4.62401204725724e-05, "loss": 0.7997, "step": 3685 }, { "epoch": 4.1982905982905985, "grad_norm": 0.1361929327249527, "learning_rate": 4.6237662759314326e-05, "loss": 0.8305, "step": 3686 }, { "epoch": 4.199430199430199, "grad_norm": 0.17344947159290314, "learning_rate": 4.623520430841383e-05, "loss": 0.7241, "step": 3687 }, { "epoch": 4.2005698005698004, "grad_norm": 0.17485690116882324, "learning_rate": 4.6232745119956276e-05, "loss": 0.7006, "step": 3688 }, { "epoch": 4.201709401709402, "grad_norm": 0.12447243183851242, "learning_rate": 4.62302851940271e-05, "loss": 0.9402, "step": 3689 }, { "epoch": 4.202849002849002, "grad_norm": 0.1367679089307785, "learning_rate": 4.6227824530711736e-05, "loss": 0.9485, "step": 3690 }, { "epoch": 4.203988603988604, "grad_norm": 0.18233831226825714, "learning_rate": 4.622536313009565e-05, "loss": 0.618, "step": 3691 }, { "epoch": 4.205128205128205, "grad_norm": 0.16943730413913727, "learning_rate": 4.6222900992264336e-05, "loss": 0.8441, "step": 3692 }, { "epoch": 4.206267806267807, "grad_norm": 0.17953555285930634, "learning_rate": 4.62204381173033e-05, "loss": 0.6973, "step": 3693 }, { "epoch": 4.207407407407407, "grad_norm": 0.15341761708259583, "learning_rate": 4.62179745052981e-05, "loss": 0.8015, "step": 3694 }, { "epoch": 4.208547008547009, "grad_norm": 0.15298165380954742, "learning_rate": 4.621551015633429e-05, "loss": 0.8267, "step": 3695 }, { "epoch": 4.20968660968661, "grad_norm": 0.1532493233680725, "learning_rate": 4.621304507049747e-05, "loss": 0.8052, "step": 3696 }, { "epoch": 4.210826210826211, "grad_norm": 0.13848675787448883, "learning_rate": 4.621057924787325e-05, "loss": 1.0222, "step": 3697 }, { "epoch": 4.211965811965812, "grad_norm": 0.1508934497833252, "learning_rate": 4.62081126885473e-05, "loss": 0.8087, "step": 3698 }, { "epoch": 4.2131054131054135, "grad_norm": 0.1419859081506729, "learning_rate": 4.6205645392605267e-05, "loss": 0.8482, "step": 3699 }, { "epoch": 4.214245014245014, "grad_norm": 0.15996615588665009, "learning_rate": 4.620317736013285e-05, "loss": 0.8327, "step": 3700 }, { "epoch": 4.2153846153846155, "grad_norm": 0.15426044166088104, "learning_rate": 4.620070859121578e-05, "loss": 0.7603, "step": 3701 }, { "epoch": 4.216524216524217, "grad_norm": 0.17264823615550995, "learning_rate": 4.61982390859398e-05, "loss": 0.8172, "step": 3702 }, { "epoch": 4.2176638176638175, "grad_norm": 0.19376209378242493, "learning_rate": 4.6195768844390674e-05, "loss": 0.6843, "step": 3703 }, { "epoch": 4.218803418803419, "grad_norm": 0.14135196805000305, "learning_rate": 4.6193297866654203e-05, "loss": 0.9739, "step": 3704 }, { "epoch": 4.21994301994302, "grad_norm": 0.18155446648597717, "learning_rate": 4.6190826152816225e-05, "loss": 0.7857, "step": 3705 }, { "epoch": 4.221082621082621, "grad_norm": 0.14685101807117462, "learning_rate": 4.618835370296258e-05, "loss": 0.8411, "step": 3706 }, { "epoch": 4.222222222222222, "grad_norm": 0.1883654147386551, "learning_rate": 4.618588051717914e-05, "loss": 0.7015, "step": 3707 }, { "epoch": 4.223361823361824, "grad_norm": 0.16308410465717316, "learning_rate": 4.618340659555181e-05, "loss": 0.8265, "step": 3708 }, { "epoch": 4.224501424501424, "grad_norm": 0.1645018309354782, "learning_rate": 4.618093193816652e-05, "loss": 0.6949, "step": 3709 }, { "epoch": 4.225641025641026, "grad_norm": 0.17556124925613403, "learning_rate": 4.61784565451092e-05, "loss": 0.6353, "step": 3710 }, { "epoch": 4.226780626780627, "grad_norm": 0.15007174015045166, "learning_rate": 4.617598041646585e-05, "loss": 0.7372, "step": 3711 }, { "epoch": 4.227920227920228, "grad_norm": 0.14627604186534882, "learning_rate": 4.617350355232247e-05, "loss": 0.9112, "step": 3712 }, { "epoch": 4.229059829059829, "grad_norm": 0.1603684425354004, "learning_rate": 4.617102595276508e-05, "loss": 0.84, "step": 3713 }, { "epoch": 4.230199430199431, "grad_norm": 0.15586189925670624, "learning_rate": 4.616854761787974e-05, "loss": 0.7862, "step": 3714 }, { "epoch": 4.231339031339031, "grad_norm": 0.16218826174736023, "learning_rate": 4.616606854775253e-05, "loss": 0.7294, "step": 3715 }, { "epoch": 4.2324786324786325, "grad_norm": 0.142206072807312, "learning_rate": 4.6163588742469556e-05, "loss": 0.8574, "step": 3716 }, { "epoch": 4.233618233618234, "grad_norm": 0.1761651635169983, "learning_rate": 4.616110820211693e-05, "loss": 0.7301, "step": 3717 }, { "epoch": 4.2347578347578345, "grad_norm": 0.14478951692581177, "learning_rate": 4.6158626926780835e-05, "loss": 0.8113, "step": 3718 }, { "epoch": 4.235897435897436, "grad_norm": 0.18671946227550507, "learning_rate": 4.615614491654744e-05, "loss": 0.6897, "step": 3719 }, { "epoch": 4.237037037037037, "grad_norm": 0.16866810619831085, "learning_rate": 4.615366217150295e-05, "loss": 0.8514, "step": 3720 }, { "epoch": 4.238176638176638, "grad_norm": 0.16729912161827087, "learning_rate": 4.6151178691733596e-05, "loss": 0.8627, "step": 3721 }, { "epoch": 4.239316239316239, "grad_norm": 0.16953368484973907, "learning_rate": 4.614869447732564e-05, "loss": 0.8992, "step": 3722 }, { "epoch": 4.240455840455841, "grad_norm": 0.12552765011787415, "learning_rate": 4.6146209528365376e-05, "loss": 0.9473, "step": 3723 }, { "epoch": 4.241595441595441, "grad_norm": 0.16287629306316376, "learning_rate": 4.614372384493909e-05, "loss": 0.7719, "step": 3724 }, { "epoch": 4.242735042735043, "grad_norm": 0.1784881204366684, "learning_rate": 4.614123742713314e-05, "loss": 0.7364, "step": 3725 }, { "epoch": 4.243874643874644, "grad_norm": 0.15355971455574036, "learning_rate": 4.613875027503387e-05, "loss": 0.8259, "step": 3726 }, { "epoch": 4.245014245014245, "grad_norm": 0.1835944652557373, "learning_rate": 4.613626238872767e-05, "loss": 0.7589, "step": 3727 }, { "epoch": 4.246153846153846, "grad_norm": 0.1890999972820282, "learning_rate": 4.613377376830096e-05, "loss": 0.5468, "step": 3728 }, { "epoch": 4.247293447293448, "grad_norm": 0.16878777742385864, "learning_rate": 4.613128441384016e-05, "loss": 0.7012, "step": 3729 }, { "epoch": 4.248433048433048, "grad_norm": 0.1307375282049179, "learning_rate": 4.612879432543175e-05, "loss": 0.9886, "step": 3730 }, { "epoch": 4.24957264957265, "grad_norm": 0.1786198765039444, "learning_rate": 4.6126303503162196e-05, "loss": 0.8062, "step": 3731 }, { "epoch": 4.250712250712251, "grad_norm": 0.192620649933815, "learning_rate": 4.612381194711803e-05, "loss": 0.6136, "step": 3732 }, { "epoch": 4.2518518518518515, "grad_norm": 0.15778356790542603, "learning_rate": 4.612131965738579e-05, "loss": 1.0118, "step": 3733 }, { "epoch": 4.252991452991453, "grad_norm": 0.17104782164096832, "learning_rate": 4.611882663405203e-05, "loss": 0.6261, "step": 3734 }, { "epoch": 4.254131054131054, "grad_norm": 0.17603422701358795, "learning_rate": 4.6116332877203344e-05, "loss": 0.913, "step": 3735 }, { "epoch": 4.255270655270655, "grad_norm": 0.15426822006702423, "learning_rate": 4.611383838692635e-05, "loss": 0.7447, "step": 3736 }, { "epoch": 4.256410256410256, "grad_norm": 0.19463273882865906, "learning_rate": 4.611134316330768e-05, "loss": 0.7212, "step": 3737 }, { "epoch": 4.257549857549858, "grad_norm": 0.16557398438453674, "learning_rate": 4.6108847206434e-05, "loss": 0.8571, "step": 3738 }, { "epoch": 4.258689458689458, "grad_norm": 0.17092663049697876, "learning_rate": 4.610635051639202e-05, "loss": 0.7672, "step": 3739 }, { "epoch": 4.25982905982906, "grad_norm": 0.1692628413438797, "learning_rate": 4.610385309326844e-05, "loss": 0.6786, "step": 3740 }, { "epoch": 4.260968660968661, "grad_norm": 0.18885384500026703, "learning_rate": 4.610135493715e-05, "loss": 0.8426, "step": 3741 }, { "epoch": 4.262108262108262, "grad_norm": 0.20354245603084564, "learning_rate": 4.6098856048123485e-05, "loss": 0.7192, "step": 3742 }, { "epoch": 4.263247863247863, "grad_norm": 0.14963997900485992, "learning_rate": 4.6096356426275674e-05, "loss": 0.8953, "step": 3743 }, { "epoch": 4.264387464387465, "grad_norm": 0.16042745113372803, "learning_rate": 4.6093856071693386e-05, "loss": 0.7398, "step": 3744 }, { "epoch": 4.265527065527065, "grad_norm": 0.1531853973865509, "learning_rate": 4.6091354984463465e-05, "loss": 0.8418, "step": 3745 }, { "epoch": 4.266666666666667, "grad_norm": 0.14608848094940186, "learning_rate": 4.608885316467279e-05, "loss": 0.8293, "step": 3746 }, { "epoch": 4.267806267806268, "grad_norm": 0.1838957965373993, "learning_rate": 4.6086350612408246e-05, "loss": 0.7711, "step": 3747 }, { "epoch": 4.268945868945869, "grad_norm": 0.16072875261306763, "learning_rate": 4.6083847327756766e-05, "loss": 0.7368, "step": 3748 }, { "epoch": 4.27008547008547, "grad_norm": 0.1710791140794754, "learning_rate": 4.6081343310805275e-05, "loss": 0.7877, "step": 3749 }, { "epoch": 4.2712250712250714, "grad_norm": 0.18032675981521606, "learning_rate": 4.607883856164076e-05, "loss": 0.7567, "step": 3750 }, { "epoch": 4.272364672364672, "grad_norm": 0.16010992228984833, "learning_rate": 4.607633308035022e-05, "loss": 0.8425, "step": 3751 }, { "epoch": 4.273504273504273, "grad_norm": 0.15786418318748474, "learning_rate": 4.607382686702067e-05, "loss": 0.8954, "step": 3752 }, { "epoch": 4.274643874643875, "grad_norm": 0.18093998730182648, "learning_rate": 4.607131992173916e-05, "loss": 0.8716, "step": 3753 }, { "epoch": 4.275783475783475, "grad_norm": 0.1507583111524582, "learning_rate": 4.6068812244592766e-05, "loss": 0.7992, "step": 3754 }, { "epoch": 4.276923076923077, "grad_norm": 0.15738655626773834, "learning_rate": 4.6066303835668575e-05, "loss": 0.8685, "step": 3755 }, { "epoch": 4.278062678062678, "grad_norm": 0.14686685800552368, "learning_rate": 4.6063794695053716e-05, "loss": 0.935, "step": 3756 }, { "epoch": 4.279202279202279, "grad_norm": 0.14715251326560974, "learning_rate": 4.606128482283535e-05, "loss": 0.8501, "step": 3757 }, { "epoch": 4.28034188034188, "grad_norm": 0.16606487333774567, "learning_rate": 4.6058774219100646e-05, "loss": 0.7991, "step": 3758 }, { "epoch": 4.281481481481482, "grad_norm": 0.16110959649085999, "learning_rate": 4.60562628839368e-05, "loss": 0.7071, "step": 3759 }, { "epoch": 4.282621082621082, "grad_norm": 0.19022086262702942, "learning_rate": 4.605375081743104e-05, "loss": 0.6373, "step": 3760 }, { "epoch": 4.283760683760684, "grad_norm": 0.17297887802124023, "learning_rate": 4.605123801967061e-05, "loss": 0.8084, "step": 3761 }, { "epoch": 4.284900284900285, "grad_norm": 0.15393395721912384, "learning_rate": 4.60487244907428e-05, "loss": 0.7803, "step": 3762 }, { "epoch": 4.286039886039886, "grad_norm": 0.14357399940490723, "learning_rate": 4.60462102307349e-05, "loss": 0.7516, "step": 3763 }, { "epoch": 4.287179487179487, "grad_norm": 0.17141439020633698, "learning_rate": 4.604369523973424e-05, "loss": 0.7055, "step": 3764 }, { "epoch": 4.2883190883190885, "grad_norm": 0.15102678537368774, "learning_rate": 4.604117951782817e-05, "loss": 0.8778, "step": 3765 }, { "epoch": 4.289458689458689, "grad_norm": 0.1764611452817917, "learning_rate": 4.603866306510408e-05, "loss": 0.8287, "step": 3766 }, { "epoch": 4.2905982905982905, "grad_norm": 0.1734817773103714, "learning_rate": 4.603614588164936e-05, "loss": 0.7691, "step": 3767 }, { "epoch": 4.291737891737892, "grad_norm": 0.1688023954629898, "learning_rate": 4.603362796755145e-05, "loss": 0.8033, "step": 3768 }, { "epoch": 4.292877492877492, "grad_norm": 0.19275137782096863, "learning_rate": 4.6031109322897794e-05, "loss": 0.5918, "step": 3769 }, { "epoch": 4.294017094017094, "grad_norm": 0.18453450500965118, "learning_rate": 4.6028589947775866e-05, "loss": 0.575, "step": 3770 }, { "epoch": 4.295156695156695, "grad_norm": 0.16129399836063385, "learning_rate": 4.6026069842273196e-05, "loss": 0.7724, "step": 3771 }, { "epoch": 4.296296296296296, "grad_norm": 0.1554025113582611, "learning_rate": 4.602354900647729e-05, "loss": 0.9147, "step": 3772 }, { "epoch": 4.297435897435897, "grad_norm": 0.18058346211910248, "learning_rate": 4.602102744047572e-05, "loss": 0.7304, "step": 3773 }, { "epoch": 4.298575498575499, "grad_norm": 0.1559993177652359, "learning_rate": 4.601850514435605e-05, "loss": 0.8581, "step": 3774 }, { "epoch": 4.2997150997151, "grad_norm": 0.20117342472076416, "learning_rate": 4.60159821182059e-05, "loss": 0.7591, "step": 3775 }, { "epoch": 4.300854700854701, "grad_norm": 0.13583272695541382, "learning_rate": 4.6013458362112895e-05, "loss": 1.0527, "step": 3776 }, { "epoch": 4.301994301994302, "grad_norm": 0.14380411803722382, "learning_rate": 4.6010933876164694e-05, "loss": 0.9164, "step": 3777 }, { "epoch": 4.3031339031339035, "grad_norm": 0.17932040989398956, "learning_rate": 4.600840866044898e-05, "loss": 0.8558, "step": 3778 }, { "epoch": 4.304273504273504, "grad_norm": 0.17651857435703278, "learning_rate": 4.600588271505346e-05, "loss": 0.7339, "step": 3779 }, { "epoch": 4.3054131054131055, "grad_norm": 0.18811674416065216, "learning_rate": 4.600335604006586e-05, "loss": 0.7801, "step": 3780 }, { "epoch": 4.306552706552707, "grad_norm": 0.15561941266059875, "learning_rate": 4.6000828635573955e-05, "loss": 0.9072, "step": 3781 }, { "epoch": 4.3076923076923075, "grad_norm": 0.1979294717311859, "learning_rate": 4.599830050166551e-05, "loss": 0.7837, "step": 3782 }, { "epoch": 4.308831908831909, "grad_norm": 0.15381856262683868, "learning_rate": 4.5995771638428355e-05, "loss": 0.979, "step": 3783 }, { "epoch": 4.30997150997151, "grad_norm": 0.1749049872159958, "learning_rate": 4.5993242045950304e-05, "loss": 0.7201, "step": 3784 }, { "epoch": 4.311111111111111, "grad_norm": 0.1437012404203415, "learning_rate": 4.599071172431922e-05, "loss": 0.7694, "step": 3785 }, { "epoch": 4.312250712250712, "grad_norm": 0.1626712679862976, "learning_rate": 4.5988180673623e-05, "loss": 0.7283, "step": 3786 }, { "epoch": 4.313390313390314, "grad_norm": 0.18755604326725006, "learning_rate": 4.5985648893949554e-05, "loss": 0.595, "step": 3787 }, { "epoch": 4.314529914529914, "grad_norm": 0.16814053058624268, "learning_rate": 4.5983116385386805e-05, "loss": 0.6691, "step": 3788 }, { "epoch": 4.315669515669516, "grad_norm": 0.15925413370132446, "learning_rate": 4.598058314802271e-05, "loss": 0.9136, "step": 3789 }, { "epoch": 4.316809116809117, "grad_norm": 0.1466931253671646, "learning_rate": 4.597804918194528e-05, "loss": 0.8129, "step": 3790 }, { "epoch": 4.317948717948718, "grad_norm": 0.1601477861404419, "learning_rate": 4.597551448724251e-05, "loss": 0.7165, "step": 3791 }, { "epoch": 4.319088319088319, "grad_norm": 0.17426732182502747, "learning_rate": 4.5972979064002444e-05, "loss": 0.7991, "step": 3792 }, { "epoch": 4.320227920227921, "grad_norm": 0.1685943603515625, "learning_rate": 4.597044291231313e-05, "loss": 0.9124, "step": 3793 }, { "epoch": 4.321367521367521, "grad_norm": 0.16514858603477478, "learning_rate": 4.5967906032262666e-05, "loss": 0.8468, "step": 3794 }, { "epoch": 4.3225071225071225, "grad_norm": 0.15124280750751495, "learning_rate": 4.596536842393916e-05, "loss": 0.7713, "step": 3795 }, { "epoch": 4.323646723646724, "grad_norm": 0.19503721594810486, "learning_rate": 4.596283008743075e-05, "loss": 0.5219, "step": 3796 }, { "epoch": 4.3247863247863245, "grad_norm": 0.1756066083908081, "learning_rate": 4.596029102282562e-05, "loss": 0.8621, "step": 3797 }, { "epoch": 4.325925925925926, "grad_norm": 0.1294962763786316, "learning_rate": 4.595775123021193e-05, "loss": 0.9163, "step": 3798 }, { "epoch": 4.327065527065527, "grad_norm": 0.14610126614570618, "learning_rate": 4.595521070967791e-05, "loss": 0.8526, "step": 3799 }, { "epoch": 4.328205128205128, "grad_norm": 0.15656009316444397, "learning_rate": 4.5952669461311795e-05, "loss": 0.7453, "step": 3800 }, { "epoch": 4.329344729344729, "grad_norm": 0.141379714012146, "learning_rate": 4.5950127485201844e-05, "loss": 0.8304, "step": 3801 }, { "epoch": 4.330484330484331, "grad_norm": 0.15407662093639374, "learning_rate": 4.5947584781436354e-05, "loss": 0.6151, "step": 3802 }, { "epoch": 4.331623931623931, "grad_norm": 0.18312354385852814, "learning_rate": 4.594504135010364e-05, "loss": 0.6962, "step": 3803 }, { "epoch": 4.332763532763533, "grad_norm": 0.162358358502388, "learning_rate": 4.5942497191292045e-05, "loss": 0.741, "step": 3804 }, { "epoch": 4.333903133903134, "grad_norm": 0.13303545117378235, "learning_rate": 4.5939952305089923e-05, "loss": 0.9005, "step": 3805 }, { "epoch": 4.335042735042735, "grad_norm": 0.16208922863006592, "learning_rate": 4.593740669158568e-05, "loss": 0.7788, "step": 3806 }, { "epoch": 4.336182336182336, "grad_norm": 0.21589314937591553, "learning_rate": 4.5934860350867715e-05, "loss": 0.5786, "step": 3807 }, { "epoch": 4.337321937321938, "grad_norm": 0.17553921043872833, "learning_rate": 4.593231328302449e-05, "loss": 0.8094, "step": 3808 }, { "epoch": 4.338461538461538, "grad_norm": 0.15923000872135162, "learning_rate": 4.592976548814445e-05, "loss": 0.8614, "step": 3809 }, { "epoch": 4.33960113960114, "grad_norm": 0.16006356477737427, "learning_rate": 4.59272169663161e-05, "loss": 0.8729, "step": 3810 }, { "epoch": 4.340740740740741, "grad_norm": 0.17715489864349365, "learning_rate": 4.5924667717627964e-05, "loss": 0.733, "step": 3811 }, { "epoch": 4.3418803418803416, "grad_norm": 0.13069522380828857, "learning_rate": 4.5922117742168566e-05, "loss": 1.0037, "step": 3812 }, { "epoch": 4.343019943019943, "grad_norm": 0.20016714930534363, "learning_rate": 4.5919567040026484e-05, "loss": 0.5621, "step": 3813 }, { "epoch": 4.344159544159544, "grad_norm": 0.16844871640205383, "learning_rate": 4.591701561129031e-05, "loss": 0.6644, "step": 3814 }, { "epoch": 4.345299145299145, "grad_norm": 0.16659404337406158, "learning_rate": 4.591446345604866e-05, "loss": 0.885, "step": 3815 }, { "epoch": 4.346438746438746, "grad_norm": 0.13851837813854218, "learning_rate": 4.591191057439018e-05, "loss": 0.8889, "step": 3816 }, { "epoch": 4.347578347578348, "grad_norm": 0.14378002285957336, "learning_rate": 4.590935696640354e-05, "loss": 0.8223, "step": 3817 }, { "epoch": 4.348717948717948, "grad_norm": 0.1335776001214981, "learning_rate": 4.5906802632177434e-05, "loss": 0.8589, "step": 3818 }, { "epoch": 4.34985754985755, "grad_norm": 0.15075504779815674, "learning_rate": 4.590424757180057e-05, "loss": 0.8288, "step": 3819 }, { "epoch": 4.350997150997151, "grad_norm": 0.17949166893959045, "learning_rate": 4.590169178536171e-05, "loss": 0.7027, "step": 3820 }, { "epoch": 4.352136752136752, "grad_norm": 0.1760595142841339, "learning_rate": 4.589913527294961e-05, "loss": 0.8434, "step": 3821 }, { "epoch": 4.353276353276353, "grad_norm": 0.1611568033695221, "learning_rate": 4.589657803465308e-05, "loss": 0.8463, "step": 3822 }, { "epoch": 4.354415954415955, "grad_norm": 0.1709708869457245, "learning_rate": 4.5894020070560914e-05, "loss": 0.7497, "step": 3823 }, { "epoch": 4.355555555555555, "grad_norm": 0.19568359851837158, "learning_rate": 4.589146138076198e-05, "loss": 0.7625, "step": 3824 }, { "epoch": 4.356695156695157, "grad_norm": 0.1605634093284607, "learning_rate": 4.588890196534513e-05, "loss": 0.851, "step": 3825 }, { "epoch": 4.357834757834758, "grad_norm": 0.1573217362165451, "learning_rate": 4.588634182439928e-05, "loss": 0.8307, "step": 3826 }, { "epoch": 4.358974358974359, "grad_norm": 0.15628954768180847, "learning_rate": 4.588378095801335e-05, "loss": 0.8441, "step": 3827 }, { "epoch": 4.36011396011396, "grad_norm": 0.14356233179569244, "learning_rate": 4.588121936627626e-05, "loss": 0.9888, "step": 3828 }, { "epoch": 4.3612535612535615, "grad_norm": 0.1678401380777359, "learning_rate": 4.5878657049276996e-05, "loss": 0.6547, "step": 3829 }, { "epoch": 4.362393162393162, "grad_norm": 0.17374159395694733, "learning_rate": 4.587609400710456e-05, "loss": 0.7695, "step": 3830 }, { "epoch": 4.363532763532763, "grad_norm": 0.1446637511253357, "learning_rate": 4.5873530239847986e-05, "loss": 0.8876, "step": 3831 }, { "epoch": 4.364672364672365, "grad_norm": 0.14191895723342896, "learning_rate": 4.5870965747596286e-05, "loss": 0.8637, "step": 3832 }, { "epoch": 4.365811965811965, "grad_norm": 0.18284063041210175, "learning_rate": 4.586840053043856e-05, "loss": 0.7768, "step": 3833 }, { "epoch": 4.366951566951567, "grad_norm": 0.13698497414588928, "learning_rate": 4.5865834588463885e-05, "loss": 0.8514, "step": 3834 }, { "epoch": 4.368091168091168, "grad_norm": 0.21228642761707306, "learning_rate": 4.58632679217614e-05, "loss": 0.5781, "step": 3835 }, { "epoch": 4.36923076923077, "grad_norm": 0.15032905340194702, "learning_rate": 4.5860700530420244e-05, "loss": 0.8327, "step": 3836 }, { "epoch": 4.37037037037037, "grad_norm": 0.15202458202838898, "learning_rate": 4.58581324145296e-05, "loss": 0.8469, "step": 3837 }, { "epoch": 4.371509971509972, "grad_norm": 0.16406792402267456, "learning_rate": 4.5855563574178645e-05, "loss": 0.6976, "step": 3838 }, { "epoch": 4.372649572649573, "grad_norm": 0.1301998645067215, "learning_rate": 4.585299400945662e-05, "loss": 0.9388, "step": 3839 }, { "epoch": 4.373789173789174, "grad_norm": 0.1878965049982071, "learning_rate": 4.5850423720452764e-05, "loss": 0.7147, "step": 3840 }, { "epoch": 4.374928774928775, "grad_norm": 0.14204874634742737, "learning_rate": 4.584785270725636e-05, "loss": 0.8793, "step": 3841 }, { "epoch": 4.3760683760683765, "grad_norm": 0.16943851113319397, "learning_rate": 4.584528096995669e-05, "loss": 0.6463, "step": 3842 }, { "epoch": 4.377207977207977, "grad_norm": 0.17809520661830902, "learning_rate": 4.58427085086431e-05, "loss": 0.7687, "step": 3843 }, { "epoch": 4.3783475783475785, "grad_norm": 0.16223205626010895, "learning_rate": 4.584013532340491e-05, "loss": 0.892, "step": 3844 }, { "epoch": 4.37948717948718, "grad_norm": 0.19467252492904663, "learning_rate": 4.583756141433152e-05, "loss": 0.7755, "step": 3845 }, { "epoch": 4.3806267806267805, "grad_norm": 0.18585580587387085, "learning_rate": 4.583498678151231e-05, "loss": 0.6314, "step": 3846 }, { "epoch": 4.381766381766382, "grad_norm": 0.1589478999376297, "learning_rate": 4.5832411425036724e-05, "loss": 0.7406, "step": 3847 }, { "epoch": 4.382905982905983, "grad_norm": 0.17925570905208588, "learning_rate": 4.582983534499419e-05, "loss": 0.6538, "step": 3848 }, { "epoch": 4.384045584045584, "grad_norm": 0.16629046201705933, "learning_rate": 4.58272585414742e-05, "loss": 0.7406, "step": 3849 }, { "epoch": 4.385185185185185, "grad_norm": 0.16629096865653992, "learning_rate": 4.582468101456624e-05, "loss": 0.8754, "step": 3850 }, { "epoch": 4.386324786324787, "grad_norm": 0.1374756097793579, "learning_rate": 4.582210276435984e-05, "loss": 0.8782, "step": 3851 }, { "epoch": 4.387464387464387, "grad_norm": 0.2001880705356598, "learning_rate": 4.581952379094456e-05, "loss": 0.676, "step": 3852 }, { "epoch": 4.388603988603989, "grad_norm": 0.16196000576019287, "learning_rate": 4.581694409440995e-05, "loss": 0.7972, "step": 3853 }, { "epoch": 4.38974358974359, "grad_norm": 0.1636325716972351, "learning_rate": 4.581436367484564e-05, "loss": 0.7659, "step": 3854 }, { "epoch": 4.390883190883191, "grad_norm": 0.14268454909324646, "learning_rate": 4.581178253234123e-05, "loss": 0.7647, "step": 3855 }, { "epoch": 4.392022792022792, "grad_norm": 0.17906972765922546, "learning_rate": 4.5809200666986386e-05, "loss": 0.7858, "step": 3856 }, { "epoch": 4.3931623931623935, "grad_norm": 0.15701225399971008, "learning_rate": 4.580661807887077e-05, "loss": 0.6254, "step": 3857 }, { "epoch": 4.394301994301994, "grad_norm": 0.18031233549118042, "learning_rate": 4.580403476808409e-05, "loss": 0.8396, "step": 3858 }, { "epoch": 4.3954415954415955, "grad_norm": 0.17270846664905548, "learning_rate": 4.5801450734716075e-05, "loss": 0.7001, "step": 3859 }, { "epoch": 4.396581196581197, "grad_norm": 0.15081731975078583, "learning_rate": 4.579886597885648e-05, "loss": 0.9368, "step": 3860 }, { "epoch": 4.3977207977207975, "grad_norm": 0.16598904132843018, "learning_rate": 4.5796280500595055e-05, "loss": 0.8393, "step": 3861 }, { "epoch": 4.398860398860399, "grad_norm": 0.16655343770980835, "learning_rate": 4.579369430002163e-05, "loss": 0.7856, "step": 3862 }, { "epoch": 4.4, "grad_norm": 0.1844964474439621, "learning_rate": 4.5791107377226017e-05, "loss": 0.8161, "step": 3863 }, { "epoch": 4.401139601139601, "grad_norm": 0.15868549048900604, "learning_rate": 4.578851973229806e-05, "loss": 0.9674, "step": 3864 }, { "epoch": 4.402279202279202, "grad_norm": 0.17895255982875824, "learning_rate": 4.578593136532766e-05, "loss": 0.8309, "step": 3865 }, { "epoch": 4.403418803418804, "grad_norm": 0.16036298871040344, "learning_rate": 4.578334227640469e-05, "loss": 0.8343, "step": 3866 }, { "epoch": 4.404558404558404, "grad_norm": 0.15244902670383453, "learning_rate": 4.578075246561909e-05, "loss": 0.8033, "step": 3867 }, { "epoch": 4.405698005698006, "grad_norm": 0.18762405216693878, "learning_rate": 4.577816193306081e-05, "loss": 0.7284, "step": 3868 }, { "epoch": 4.406837606837607, "grad_norm": 0.17961227893829346, "learning_rate": 4.5775570678819835e-05, "loss": 0.8169, "step": 3869 }, { "epoch": 4.407977207977208, "grad_norm": 0.16521014273166656, "learning_rate": 4.5772978702986155e-05, "loss": 0.8817, "step": 3870 }, { "epoch": 4.409116809116809, "grad_norm": 0.16002601385116577, "learning_rate": 4.577038600564979e-05, "loss": 0.8827, "step": 3871 }, { "epoch": 4.410256410256411, "grad_norm": 0.20933003723621368, "learning_rate": 4.5767792586900805e-05, "loss": 0.5519, "step": 3872 }, { "epoch": 4.411396011396011, "grad_norm": 0.16851255297660828, "learning_rate": 4.5765198446829275e-05, "loss": 0.9285, "step": 3873 }, { "epoch": 4.4125356125356126, "grad_norm": 0.16128350794315338, "learning_rate": 4.576260358552529e-05, "loss": 0.8966, "step": 3874 }, { "epoch": 4.413675213675214, "grad_norm": 0.13369476795196533, "learning_rate": 4.576000800307899e-05, "loss": 0.8694, "step": 3875 }, { "epoch": 4.4148148148148145, "grad_norm": 0.14267423748970032, "learning_rate": 4.5757411699580524e-05, "loss": 0.8961, "step": 3876 }, { "epoch": 4.415954415954416, "grad_norm": 0.12827791273593903, "learning_rate": 4.5754814675120065e-05, "loss": 0.9615, "step": 3877 }, { "epoch": 4.417094017094017, "grad_norm": 0.17608094215393066, "learning_rate": 4.575221692978781e-05, "loss": 0.8662, "step": 3878 }, { "epoch": 4.418233618233618, "grad_norm": 0.13462728261947632, "learning_rate": 4.5749618463673996e-05, "loss": 0.8707, "step": 3879 }, { "epoch": 4.419373219373219, "grad_norm": 0.13011795282363892, "learning_rate": 4.574701927686887e-05, "loss": 0.9188, "step": 3880 }, { "epoch": 4.420512820512821, "grad_norm": 0.1956792026758194, "learning_rate": 4.5744419369462714e-05, "loss": 0.7091, "step": 3881 }, { "epoch": 4.421652421652421, "grad_norm": 0.17394132912158966, "learning_rate": 4.574181874154583e-05, "loss": 0.6981, "step": 3882 }, { "epoch": 4.422792022792023, "grad_norm": 0.17281357944011688, "learning_rate": 4.5739217393208534e-05, "loss": 0.8199, "step": 3883 }, { "epoch": 4.423931623931624, "grad_norm": 0.16370278596878052, "learning_rate": 4.573661532454119e-05, "loss": 0.8011, "step": 3884 }, { "epoch": 4.425071225071225, "grad_norm": 0.19170215725898743, "learning_rate": 4.5734012535634165e-05, "loss": 0.7614, "step": 3885 }, { "epoch": 4.426210826210826, "grad_norm": 0.15054161846637726, "learning_rate": 4.5731409026577866e-05, "loss": 0.8119, "step": 3886 }, { "epoch": 4.427350427350428, "grad_norm": 0.1631488800048828, "learning_rate": 4.572880479746272e-05, "loss": 0.7561, "step": 3887 }, { "epoch": 4.428490028490028, "grad_norm": 0.1880662888288498, "learning_rate": 4.572619984837918e-05, "loss": 0.7763, "step": 3888 }, { "epoch": 4.42962962962963, "grad_norm": 0.16619880497455597, "learning_rate": 4.572359417941772e-05, "loss": 0.8941, "step": 3889 }, { "epoch": 4.430769230769231, "grad_norm": 0.14971984922885895, "learning_rate": 4.572098779066884e-05, "loss": 0.8543, "step": 3890 }, { "epoch": 4.431908831908832, "grad_norm": 0.15457306802272797, "learning_rate": 4.571838068222308e-05, "loss": 0.9068, "step": 3891 }, { "epoch": 4.433048433048433, "grad_norm": 0.1719190776348114, "learning_rate": 4.571577285417098e-05, "loss": 0.78, "step": 3892 }, { "epoch": 4.434188034188034, "grad_norm": 0.14716239273548126, "learning_rate": 4.571316430660312e-05, "loss": 0.9203, "step": 3893 }, { "epoch": 4.435327635327635, "grad_norm": 0.14677460491657257, "learning_rate": 4.5710555039610106e-05, "loss": 0.7262, "step": 3894 }, { "epoch": 4.436467236467236, "grad_norm": 0.14483967423439026, "learning_rate": 4.570794505328256e-05, "loss": 0.8542, "step": 3895 }, { "epoch": 4.437606837606838, "grad_norm": 0.16969621181488037, "learning_rate": 4.570533434771113e-05, "loss": 0.86, "step": 3896 }, { "epoch": 4.438746438746438, "grad_norm": 0.15677432715892792, "learning_rate": 4.57027229229865e-05, "loss": 0.946, "step": 3897 }, { "epoch": 4.43988603988604, "grad_norm": 0.13845199346542358, "learning_rate": 4.570011077919937e-05, "loss": 0.9291, "step": 3898 }, { "epoch": 4.441025641025641, "grad_norm": 0.1764078140258789, "learning_rate": 4.5697497916440466e-05, "loss": 0.8655, "step": 3899 }, { "epoch": 4.442165242165242, "grad_norm": 0.1658494621515274, "learning_rate": 4.5694884334800545e-05, "loss": 0.7908, "step": 3900 }, { "epoch": 4.443304843304843, "grad_norm": 0.14494885504245758, "learning_rate": 4.569227003437038e-05, "loss": 0.8803, "step": 3901 }, { "epoch": 4.444444444444445, "grad_norm": 0.19196559488773346, "learning_rate": 4.568965501524077e-05, "loss": 0.7165, "step": 3902 }, { "epoch": 4.445584045584045, "grad_norm": 0.17780642211437225, "learning_rate": 4.5687039277502554e-05, "loss": 0.6957, "step": 3903 }, { "epoch": 4.446723646723647, "grad_norm": 0.14551912248134613, "learning_rate": 4.568442282124657e-05, "loss": 0.7998, "step": 3904 }, { "epoch": 4.447863247863248, "grad_norm": 0.1480732411146164, "learning_rate": 4.568180564656369e-05, "loss": 0.9287, "step": 3905 }, { "epoch": 4.449002849002849, "grad_norm": 0.16455687582492828, "learning_rate": 4.567918775354483e-05, "loss": 0.8353, "step": 3906 }, { "epoch": 4.45014245014245, "grad_norm": 0.15225185453891754, "learning_rate": 4.5676569142280906e-05, "loss": 0.96, "step": 3907 }, { "epoch": 4.4512820512820515, "grad_norm": 0.19792546331882477, "learning_rate": 4.567394981286288e-05, "loss": 0.6075, "step": 3908 }, { "epoch": 4.452421652421652, "grad_norm": 0.17776194214820862, "learning_rate": 4.567132976538172e-05, "loss": 0.6962, "step": 3909 }, { "epoch": 4.453561253561253, "grad_norm": 0.18801139295101166, "learning_rate": 4.566870899992844e-05, "loss": 0.691, "step": 3910 }, { "epoch": 4.454700854700855, "grad_norm": 0.13846080005168915, "learning_rate": 4.566608751659405e-05, "loss": 0.8789, "step": 3911 }, { "epoch": 4.455840455840455, "grad_norm": 0.14512869715690613, "learning_rate": 4.566346531546961e-05, "loss": 0.8805, "step": 3912 }, { "epoch": 4.456980056980057, "grad_norm": 0.17105993628501892, "learning_rate": 4.56608423966462e-05, "loss": 0.8784, "step": 3913 }, { "epoch": 4.458119658119658, "grad_norm": 0.1698828637599945, "learning_rate": 4.565821876021491e-05, "loss": 0.8107, "step": 3914 }, { "epoch": 4.459259259259259, "grad_norm": 0.1532195657491684, "learning_rate": 4.5655594406266865e-05, "loss": 0.9044, "step": 3915 }, { "epoch": 4.46039886039886, "grad_norm": 0.17213976383209229, "learning_rate": 4.565296933489324e-05, "loss": 0.8151, "step": 3916 }, { "epoch": 4.461538461538462, "grad_norm": 0.17563296854496002, "learning_rate": 4.5650343546185184e-05, "loss": 0.8405, "step": 3917 }, { "epoch": 4.462678062678062, "grad_norm": 0.1937597543001175, "learning_rate": 4.564771704023391e-05, "loss": 0.626, "step": 3918 }, { "epoch": 4.463817663817664, "grad_norm": 0.1611264944076538, "learning_rate": 4.5645089817130635e-05, "loss": 0.7432, "step": 3919 }, { "epoch": 4.464957264957265, "grad_norm": 0.17464205622673035, "learning_rate": 4.5642461876966626e-05, "loss": 0.8682, "step": 3920 }, { "epoch": 4.466096866096866, "grad_norm": 0.1247478649020195, "learning_rate": 4.5639833219833144e-05, "loss": 0.9462, "step": 3921 }, { "epoch": 4.467236467236467, "grad_norm": 0.1708550900220871, "learning_rate": 4.56372038458215e-05, "loss": 0.8219, "step": 3922 }, { "epoch": 4.4683760683760685, "grad_norm": 0.16138328611850739, "learning_rate": 4.563457375502301e-05, "loss": 0.9186, "step": 3923 }, { "epoch": 4.46951566951567, "grad_norm": 0.17023847997188568, "learning_rate": 4.563194294752903e-05, "loss": 0.8432, "step": 3924 }, { "epoch": 4.4706552706552705, "grad_norm": 0.17982469499111176, "learning_rate": 4.562931142343093e-05, "loss": 0.8291, "step": 3925 }, { "epoch": 4.471794871794872, "grad_norm": 0.17798741161823273, "learning_rate": 4.562667918282011e-05, "loss": 0.7993, "step": 3926 }, { "epoch": 4.472934472934473, "grad_norm": 0.1720968335866928, "learning_rate": 4.562404622578801e-05, "loss": 0.7403, "step": 3927 }, { "epoch": 4.474074074074074, "grad_norm": 0.1429809182882309, "learning_rate": 4.5621412552426055e-05, "loss": 0.9623, "step": 3928 }, { "epoch": 4.475213675213675, "grad_norm": 0.1690247803926468, "learning_rate": 4.5618778162825744e-05, "loss": 0.8427, "step": 3929 }, { "epoch": 4.476353276353277, "grad_norm": 0.15227466821670532, "learning_rate": 4.5616143057078565e-05, "loss": 0.845, "step": 3930 }, { "epoch": 4.477492877492877, "grad_norm": 0.1729937195777893, "learning_rate": 4.561350723527603e-05, "loss": 0.8234, "step": 3931 }, { "epoch": 4.478632478632479, "grad_norm": 0.20464777946472168, "learning_rate": 4.561087069750971e-05, "loss": 0.6496, "step": 3932 }, { "epoch": 4.47977207977208, "grad_norm": 0.171608105301857, "learning_rate": 4.5608233443871175e-05, "loss": 0.7075, "step": 3933 }, { "epoch": 4.480911680911681, "grad_norm": 0.17307965457439423, "learning_rate": 4.560559547445201e-05, "loss": 0.8016, "step": 3934 }, { "epoch": 4.482051282051282, "grad_norm": 0.18997186422348022, "learning_rate": 4.560295678934386e-05, "loss": 0.7345, "step": 3935 }, { "epoch": 4.4831908831908835, "grad_norm": 0.15222175419330597, "learning_rate": 4.560031738863836e-05, "loss": 0.8991, "step": 3936 }, { "epoch": 4.484330484330484, "grad_norm": 0.15029212832450867, "learning_rate": 4.559767727242718e-05, "loss": 0.9076, "step": 3937 }, { "epoch": 4.4854700854700855, "grad_norm": 0.16409356892108917, "learning_rate": 4.559503644080203e-05, "loss": 0.763, "step": 3938 }, { "epoch": 4.486609686609687, "grad_norm": 0.172045037150383, "learning_rate": 4.559239489385462e-05, "loss": 0.7699, "step": 3939 }, { "epoch": 4.4877492877492875, "grad_norm": 0.1409396380186081, "learning_rate": 4.5589752631676715e-05, "loss": 0.8871, "step": 3940 }, { "epoch": 4.488888888888889, "grad_norm": 0.1952497512102127, "learning_rate": 4.558710965436007e-05, "loss": 0.6181, "step": 3941 }, { "epoch": 4.49002849002849, "grad_norm": 0.16855819523334503, "learning_rate": 4.55844659619965e-05, "loss": 0.8144, "step": 3942 }, { "epoch": 4.491168091168091, "grad_norm": 0.19144080579280853, "learning_rate": 4.558182155467782e-05, "loss": 0.7071, "step": 3943 }, { "epoch": 4.492307692307692, "grad_norm": 0.20134466886520386, "learning_rate": 4.557917643249588e-05, "loss": 0.5634, "step": 3944 }, { "epoch": 4.493447293447294, "grad_norm": 0.1740138977766037, "learning_rate": 4.557653059554253e-05, "loss": 0.9931, "step": 3945 }, { "epoch": 4.494586894586894, "grad_norm": 0.14771761000156403, "learning_rate": 4.5573884043909704e-05, "loss": 0.8312, "step": 3946 }, { "epoch": 4.495726495726496, "grad_norm": 0.16895060241222382, "learning_rate": 4.55712367776893e-05, "loss": 0.8973, "step": 3947 }, { "epoch": 4.496866096866097, "grad_norm": 0.15613681077957153, "learning_rate": 4.5568588796973276e-05, "loss": 0.7486, "step": 3948 }, { "epoch": 4.498005698005698, "grad_norm": 0.19161924719810486, "learning_rate": 4.55659401018536e-05, "loss": 0.8637, "step": 3949 }, { "epoch": 4.499145299145299, "grad_norm": 0.15739668905735016, "learning_rate": 4.5563290692422266e-05, "loss": 0.8484, "step": 3950 }, { "epoch": 4.500284900284901, "grad_norm": 0.13696597516536713, "learning_rate": 4.5560640568771296e-05, "loss": 0.9657, "step": 3951 }, { "epoch": 4.501424501424501, "grad_norm": 0.16999183595180511, "learning_rate": 4.555798973099274e-05, "loss": 0.7686, "step": 3952 }, { "epoch": 4.5025641025641026, "grad_norm": 0.1501314640045166, "learning_rate": 4.5555338179178664e-05, "loss": 0.8097, "step": 3953 }, { "epoch": 4.503703703703704, "grad_norm": 0.15649497509002686, "learning_rate": 4.555268591342116e-05, "loss": 0.7179, "step": 3954 }, { "epoch": 4.5048433048433045, "grad_norm": 0.18868868052959442, "learning_rate": 4.555003293381236e-05, "loss": 0.7413, "step": 3955 }, { "epoch": 4.505982905982906, "grad_norm": 0.1577426791191101, "learning_rate": 4.5547379240444396e-05, "loss": 0.809, "step": 3956 }, { "epoch": 4.507122507122507, "grad_norm": 0.16229870915412903, "learning_rate": 4.5544724833409456e-05, "loss": 0.8509, "step": 3957 }, { "epoch": 4.508262108262108, "grad_norm": 0.15741589665412903, "learning_rate": 4.554206971279972e-05, "loss": 1.0146, "step": 3958 }, { "epoch": 4.509401709401709, "grad_norm": 0.14903536438941956, "learning_rate": 4.5539413878707414e-05, "loss": 0.8996, "step": 3959 }, { "epoch": 4.510541310541311, "grad_norm": 0.17023679614067078, "learning_rate": 4.5536757331224777e-05, "loss": 0.8998, "step": 3960 }, { "epoch": 4.511680911680911, "grad_norm": 0.1575339436531067, "learning_rate": 4.5534100070444086e-05, "loss": 0.875, "step": 3961 }, { "epoch": 4.512820512820513, "grad_norm": 0.1648140400648117, "learning_rate": 4.553144209645763e-05, "loss": 0.756, "step": 3962 }, { "epoch": 4.513960113960114, "grad_norm": 0.151093527674675, "learning_rate": 4.5528783409357725e-05, "loss": 0.8153, "step": 3963 }, { "epoch": 4.515099715099715, "grad_norm": 0.1625763326883316, "learning_rate": 4.552612400923672e-05, "loss": 0.7967, "step": 3964 }, { "epoch": 4.516239316239316, "grad_norm": 0.20917679369449615, "learning_rate": 4.552346389618698e-05, "loss": 0.5582, "step": 3965 }, { "epoch": 4.517378917378918, "grad_norm": 0.18874263763427734, "learning_rate": 4.5520803070300897e-05, "loss": 0.69, "step": 3966 }, { "epoch": 4.518518518518518, "grad_norm": 0.13929365575313568, "learning_rate": 4.551814153167089e-05, "loss": 0.8557, "step": 3967 }, { "epoch": 4.51965811965812, "grad_norm": 0.16986872255802155, "learning_rate": 4.5515479280389404e-05, "loss": 0.7729, "step": 3968 }, { "epoch": 4.520797720797721, "grad_norm": 0.1871488243341446, "learning_rate": 4.551281631654891e-05, "loss": 0.6171, "step": 3969 }, { "epoch": 4.521937321937322, "grad_norm": 0.167724147439003, "learning_rate": 4.551015264024189e-05, "loss": 0.8391, "step": 3970 }, { "epoch": 4.523076923076923, "grad_norm": 0.14339779317378998, "learning_rate": 4.550748825156087e-05, "loss": 1.005, "step": 3971 }, { "epoch": 4.524216524216524, "grad_norm": 0.166597381234169, "learning_rate": 4.550482315059839e-05, "loss": 0.7027, "step": 3972 }, { "epoch": 4.525356125356125, "grad_norm": 0.15130047500133514, "learning_rate": 4.5502157337447005e-05, "loss": 0.7981, "step": 3973 }, { "epoch": 4.526495726495726, "grad_norm": 0.1574205756187439, "learning_rate": 4.549949081219931e-05, "loss": 0.8686, "step": 3974 }, { "epoch": 4.527635327635328, "grad_norm": 0.16577941179275513, "learning_rate": 4.549682357494793e-05, "loss": 0.8758, "step": 3975 }, { "epoch": 4.528774928774929, "grad_norm": 0.17792902886867523, "learning_rate": 4.5494155625785515e-05, "loss": 0.8144, "step": 3976 }, { "epoch": 4.52991452991453, "grad_norm": 0.1614692211151123, "learning_rate": 4.54914869648047e-05, "loss": 0.9307, "step": 3977 }, { "epoch": 4.531054131054131, "grad_norm": 0.16494810581207275, "learning_rate": 4.54888175920982e-05, "loss": 0.8361, "step": 3978 }, { "epoch": 4.532193732193733, "grad_norm": 0.1610734611749649, "learning_rate": 4.548614750775871e-05, "loss": 0.7365, "step": 3979 }, { "epoch": 4.533333333333333, "grad_norm": 0.153951495885849, "learning_rate": 4.548347671187899e-05, "loss": 0.7872, "step": 3980 }, { "epoch": 4.534472934472935, "grad_norm": 0.20417840778827667, "learning_rate": 4.548080520455178e-05, "loss": 0.6667, "step": 3981 }, { "epoch": 4.535612535612536, "grad_norm": 0.19135449826717377, "learning_rate": 4.5478132985869896e-05, "loss": 0.6333, "step": 3982 }, { "epoch": 4.536752136752137, "grad_norm": 0.13388732075691223, "learning_rate": 4.547546005592614e-05, "loss": 0.8846, "step": 3983 }, { "epoch": 4.537891737891738, "grad_norm": 0.21345093846321106, "learning_rate": 4.5472786414813337e-05, "loss": 0.6624, "step": 3984 }, { "epoch": 4.5390313390313395, "grad_norm": 0.1408410519361496, "learning_rate": 4.547011206262437e-05, "loss": 0.9532, "step": 3985 }, { "epoch": 4.54017094017094, "grad_norm": 0.17190605401992798, "learning_rate": 4.546743699945212e-05, "loss": 0.8389, "step": 3986 }, { "epoch": 4.5413105413105415, "grad_norm": 0.15608234703540802, "learning_rate": 4.546476122538949e-05, "loss": 0.751, "step": 3987 }, { "epoch": 4.542450142450143, "grad_norm": 0.18294699490070343, "learning_rate": 4.546208474052943e-05, "loss": 0.797, "step": 3988 }, { "epoch": 4.543589743589743, "grad_norm": 0.1740257441997528, "learning_rate": 4.545940754496489e-05, "loss": 0.7203, "step": 3989 }, { "epoch": 4.544729344729345, "grad_norm": 0.20635540783405304, "learning_rate": 4.545672963878886e-05, "loss": 0.7776, "step": 3990 }, { "epoch": 4.545868945868946, "grad_norm": 0.15014831721782684, "learning_rate": 4.545405102209437e-05, "loss": 0.9808, "step": 3991 }, { "epoch": 4.547008547008547, "grad_norm": 0.15490661561489105, "learning_rate": 4.545137169497442e-05, "loss": 0.9244, "step": 3992 }, { "epoch": 4.548148148148148, "grad_norm": 0.17073404788970947, "learning_rate": 4.5448691657522105e-05, "loss": 0.7085, "step": 3993 }, { "epoch": 4.54928774928775, "grad_norm": 0.15568168461322784, "learning_rate": 4.5446010909830485e-05, "loss": 0.8868, "step": 3994 }, { "epoch": 4.55042735042735, "grad_norm": 0.17152468860149384, "learning_rate": 4.544332945199268e-05, "loss": 0.7253, "step": 3995 }, { "epoch": 4.551566951566952, "grad_norm": 0.15522730350494385, "learning_rate": 4.5440647284101825e-05, "loss": 0.8265, "step": 3996 }, { "epoch": 4.552706552706553, "grad_norm": 0.15751118957996368, "learning_rate": 4.5437964406251085e-05, "loss": 0.9217, "step": 3997 }, { "epoch": 4.553846153846154, "grad_norm": 0.15734221041202545, "learning_rate": 4.543528081853363e-05, "loss": 0.7803, "step": 3998 }, { "epoch": 4.554985754985755, "grad_norm": 0.21027764678001404, "learning_rate": 4.5432596521042666e-05, "loss": 0.5735, "step": 3999 }, { "epoch": 4.5561253561253565, "grad_norm": 0.17011325061321259, "learning_rate": 4.542991151387145e-05, "loss": 0.6783, "step": 4000 }, { "epoch": 4.557264957264957, "grad_norm": 0.21157589554786682, "learning_rate": 4.5427225797113216e-05, "loss": 0.6127, "step": 4001 }, { "epoch": 4.5584045584045585, "grad_norm": 0.17877617478370667, "learning_rate": 4.5424539370861265e-05, "loss": 0.8501, "step": 4002 }, { "epoch": 4.55954415954416, "grad_norm": 0.1595057249069214, "learning_rate": 4.542185223520888e-05, "loss": 0.8112, "step": 4003 }, { "epoch": 4.5606837606837605, "grad_norm": 0.14397701621055603, "learning_rate": 4.541916439024941e-05, "loss": 0.9503, "step": 4004 }, { "epoch": 4.561823361823362, "grad_norm": 0.18094944953918457, "learning_rate": 4.5416475836076216e-05, "loss": 0.8415, "step": 4005 }, { "epoch": 4.562962962962963, "grad_norm": 0.13389381766319275, "learning_rate": 4.541378657278267e-05, "loss": 0.9761, "step": 4006 }, { "epoch": 4.564102564102564, "grad_norm": 0.16142316162586212, "learning_rate": 4.541109660046216e-05, "loss": 0.7702, "step": 4007 }, { "epoch": 4.565242165242165, "grad_norm": 0.15188150107860565, "learning_rate": 4.5408405919208155e-05, "loss": 0.9682, "step": 4008 }, { "epoch": 4.566381766381767, "grad_norm": 0.1972832828760147, "learning_rate": 4.540571452911408e-05, "loss": 0.7068, "step": 4009 }, { "epoch": 4.567521367521367, "grad_norm": 0.20728431642055511, "learning_rate": 4.540302243027344e-05, "loss": 0.8092, "step": 4010 }, { "epoch": 4.568660968660969, "grad_norm": 0.1817607283592224, "learning_rate": 4.54003296227797e-05, "loss": 0.6995, "step": 4011 }, { "epoch": 4.56980056980057, "grad_norm": 0.14294931292533875, "learning_rate": 4.5397636106726415e-05, "loss": 0.936, "step": 4012 }, { "epoch": 4.570940170940171, "grad_norm": 0.17138810455799103, "learning_rate": 4.539494188220714e-05, "loss": 0.7743, "step": 4013 }, { "epoch": 4.572079772079772, "grad_norm": 0.15707600116729736, "learning_rate": 4.539224694931545e-05, "loss": 0.7838, "step": 4014 }, { "epoch": 4.5732193732193736, "grad_norm": 0.1790095716714859, "learning_rate": 4.538955130814494e-05, "loss": 0.7232, "step": 4015 }, { "epoch": 4.574358974358974, "grad_norm": 0.17105311155319214, "learning_rate": 4.538685495878924e-05, "loss": 0.681, "step": 4016 }, { "epoch": 4.5754985754985755, "grad_norm": 0.1521432250738144, "learning_rate": 4.5384157901342004e-05, "loss": 0.8735, "step": 4017 }, { "epoch": 4.576638176638177, "grad_norm": 0.14876161515712738, "learning_rate": 4.5381460135896905e-05, "loss": 0.8849, "step": 4018 }, { "epoch": 4.5777777777777775, "grad_norm": 0.20951324701309204, "learning_rate": 4.5378761662547653e-05, "loss": 0.6883, "step": 4019 }, { "epoch": 4.578917378917379, "grad_norm": 0.1732478141784668, "learning_rate": 4.537606248138796e-05, "loss": 0.6288, "step": 4020 }, { "epoch": 4.58005698005698, "grad_norm": 0.18079932034015656, "learning_rate": 4.537336259251159e-05, "loss": 0.6669, "step": 4021 }, { "epoch": 4.581196581196581, "grad_norm": 0.20264782011508942, "learning_rate": 4.537066199601231e-05, "loss": 0.5511, "step": 4022 }, { "epoch": 4.582336182336182, "grad_norm": 0.16815167665481567, "learning_rate": 4.536796069198391e-05, "loss": 0.8066, "step": 4023 }, { "epoch": 4.583475783475784, "grad_norm": 0.17291642725467682, "learning_rate": 4.536525868052023e-05, "loss": 0.7432, "step": 4024 }, { "epoch": 4.584615384615384, "grad_norm": 0.14452853798866272, "learning_rate": 4.536255596171511e-05, "loss": 0.8859, "step": 4025 }, { "epoch": 4.585754985754986, "grad_norm": 0.14809772372245789, "learning_rate": 4.535985253566243e-05, "loss": 0.7038, "step": 4026 }, { "epoch": 4.586894586894587, "grad_norm": 0.1389678716659546, "learning_rate": 4.5357148402456075e-05, "loss": 0.8257, "step": 4027 }, { "epoch": 4.588034188034188, "grad_norm": 0.11735313385725021, "learning_rate": 4.535444356218997e-05, "loss": 0.9523, "step": 4028 }, { "epoch": 4.589173789173789, "grad_norm": 0.16080349683761597, "learning_rate": 4.5351738014958065e-05, "loss": 0.8046, "step": 4029 }, { "epoch": 4.590313390313391, "grad_norm": 0.16313500702381134, "learning_rate": 4.5349031760854345e-05, "loss": 0.8245, "step": 4030 }, { "epoch": 4.591452991452991, "grad_norm": 0.16148072481155396, "learning_rate": 4.5346324799972775e-05, "loss": 0.8581, "step": 4031 }, { "epoch": 4.592592592592593, "grad_norm": 0.17064674198627472, "learning_rate": 4.53436171324074e-05, "loss": 0.7685, "step": 4032 }, { "epoch": 4.593732193732194, "grad_norm": 0.15386755764484406, "learning_rate": 4.534090875825226e-05, "loss": 0.924, "step": 4033 }, { "epoch": 4.5948717948717945, "grad_norm": 0.15219347178936005, "learning_rate": 4.533819967760141e-05, "loss": 0.8111, "step": 4034 }, { "epoch": 4.596011396011396, "grad_norm": 0.14534282684326172, "learning_rate": 4.533548989054896e-05, "loss": 0.7756, "step": 4035 }, { "epoch": 4.597150997150997, "grad_norm": 0.191035658121109, "learning_rate": 4.533277939718903e-05, "loss": 0.6912, "step": 4036 }, { "epoch": 4.598290598290598, "grad_norm": 0.15240877866744995, "learning_rate": 4.533006819761576e-05, "loss": 0.9699, "step": 4037 }, { "epoch": 4.599430199430199, "grad_norm": 0.20577841997146606, "learning_rate": 4.532735629192331e-05, "loss": 0.6379, "step": 4038 }, { "epoch": 4.600569800569801, "grad_norm": 0.14312636852264404, "learning_rate": 4.5324643680205865e-05, "loss": 0.839, "step": 4039 }, { "epoch": 4.601709401709401, "grad_norm": 0.15499526262283325, "learning_rate": 4.532193036255766e-05, "loss": 0.8511, "step": 4040 }, { "epoch": 4.602849002849003, "grad_norm": 0.16669511795043945, "learning_rate": 4.531921633907292e-05, "loss": 0.839, "step": 4041 }, { "epoch": 4.603988603988604, "grad_norm": 0.16203133761882782, "learning_rate": 4.531650160984593e-05, "loss": 0.702, "step": 4042 }, { "epoch": 4.605128205128205, "grad_norm": 0.19377486407756805, "learning_rate": 4.531378617497096e-05, "loss": 0.6036, "step": 4043 }, { "epoch": 4.606267806267806, "grad_norm": 0.16267363727092743, "learning_rate": 4.5311070034542334e-05, "loss": 0.8961, "step": 4044 }, { "epoch": 4.607407407407408, "grad_norm": 0.14054971933364868, "learning_rate": 4.5308353188654396e-05, "loss": 1.0982, "step": 4045 }, { "epoch": 4.608547008547008, "grad_norm": 0.14552897214889526, "learning_rate": 4.53056356374015e-05, "loss": 0.7369, "step": 4046 }, { "epoch": 4.60968660968661, "grad_norm": 0.1440308392047882, "learning_rate": 4.530291738087804e-05, "loss": 0.911, "step": 4047 }, { "epoch": 4.610826210826211, "grad_norm": 0.13124576210975647, "learning_rate": 4.5300198419178416e-05, "loss": 0.9021, "step": 4048 }, { "epoch": 4.611965811965812, "grad_norm": 0.14725372195243835, "learning_rate": 4.529747875239709e-05, "loss": 0.9907, "step": 4049 }, { "epoch": 4.613105413105413, "grad_norm": 0.1701754629611969, "learning_rate": 4.52947583806285e-05, "loss": 0.6432, "step": 4050 }, { "epoch": 4.614245014245014, "grad_norm": 0.1853622943162918, "learning_rate": 4.529203730396714e-05, "loss": 0.7776, "step": 4051 }, { "epoch": 4.615384615384615, "grad_norm": 0.1685890555381775, "learning_rate": 4.528931552250753e-05, "loss": 0.8359, "step": 4052 }, { "epoch": 4.616524216524216, "grad_norm": 0.17278628051280975, "learning_rate": 4.528659303634418e-05, "loss": 0.8433, "step": 4053 }, { "epoch": 4.617663817663818, "grad_norm": 0.16110637784004211, "learning_rate": 4.528386984557168e-05, "loss": 0.8852, "step": 4054 }, { "epoch": 4.618803418803418, "grad_norm": 0.1691066175699234, "learning_rate": 4.5281145950284596e-05, "loss": 0.7646, "step": 4055 }, { "epoch": 4.61994301994302, "grad_norm": 0.18858157098293304, "learning_rate": 4.527842135057753e-05, "loss": 0.8638, "step": 4056 }, { "epoch": 4.621082621082621, "grad_norm": 0.15929220616817474, "learning_rate": 4.527569604654514e-05, "loss": 0.9665, "step": 4057 }, { "epoch": 4.622222222222222, "grad_norm": 0.15706954896450043, "learning_rate": 4.527297003828206e-05, "loss": 0.9876, "step": 4058 }, { "epoch": 4.623361823361823, "grad_norm": 0.15249647200107574, "learning_rate": 4.527024332588298e-05, "loss": 0.8959, "step": 4059 }, { "epoch": 4.624501424501425, "grad_norm": 0.17531955242156982, "learning_rate": 4.526751590944261e-05, "loss": 0.8502, "step": 4060 }, { "epoch": 4.625641025641025, "grad_norm": 0.1532467156648636, "learning_rate": 4.526478778905566e-05, "loss": 0.7962, "step": 4061 }, { "epoch": 4.626780626780627, "grad_norm": 0.14758317172527313, "learning_rate": 4.5262058964816924e-05, "loss": 0.8561, "step": 4062 }, { "epoch": 4.627920227920228, "grad_norm": 0.14740172028541565, "learning_rate": 4.525932943682115e-05, "loss": 0.8069, "step": 4063 }, { "epoch": 4.629059829059829, "grad_norm": 0.14879418909549713, "learning_rate": 4.5256599205163144e-05, "loss": 0.8886, "step": 4064 }, { "epoch": 4.63019943019943, "grad_norm": 0.14993621408939362, "learning_rate": 4.525386826993775e-05, "loss": 0.8049, "step": 4065 }, { "epoch": 4.6313390313390315, "grad_norm": 0.15246500074863434, "learning_rate": 4.525113663123981e-05, "loss": 0.9099, "step": 4066 }, { "epoch": 4.632478632478632, "grad_norm": 0.18146242201328278, "learning_rate": 4.52484042891642e-05, "loss": 0.7294, "step": 4067 }, { "epoch": 4.633618233618233, "grad_norm": 0.17115052044391632, "learning_rate": 4.524567124380584e-05, "loss": 0.8254, "step": 4068 }, { "epoch": 4.634757834757835, "grad_norm": 0.150685653090477, "learning_rate": 4.524293749525963e-05, "loss": 0.7949, "step": 4069 }, { "epoch": 4.635897435897435, "grad_norm": 0.1943797767162323, "learning_rate": 4.524020304362053e-05, "loss": 0.7277, "step": 4070 }, { "epoch": 4.637037037037037, "grad_norm": 0.13906507194042206, "learning_rate": 4.523746788898353e-05, "loss": 0.7765, "step": 4071 }, { "epoch": 4.638176638176638, "grad_norm": 0.1925218105316162, "learning_rate": 4.523473203144361e-05, "loss": 0.7024, "step": 4072 }, { "epoch": 4.639316239316239, "grad_norm": 0.16367250680923462, "learning_rate": 4.52319954710958e-05, "loss": 0.8138, "step": 4073 }, { "epoch": 4.64045584045584, "grad_norm": 0.1505347192287445, "learning_rate": 4.522925820803515e-05, "loss": 0.7137, "step": 4074 }, { "epoch": 4.641595441595442, "grad_norm": 0.14122769236564636, "learning_rate": 4.522652024235673e-05, "loss": 0.8795, "step": 4075 }, { "epoch": 4.642735042735043, "grad_norm": 0.15575025975704193, "learning_rate": 4.5223781574155646e-05, "loss": 0.7649, "step": 4076 }, { "epoch": 4.643874643874644, "grad_norm": 0.16633592545986176, "learning_rate": 4.5221042203526995e-05, "loss": 0.8158, "step": 4077 }, { "epoch": 4.645014245014245, "grad_norm": 0.13344813883304596, "learning_rate": 4.521830213056596e-05, "loss": 0.9714, "step": 4078 }, { "epoch": 4.6461538461538465, "grad_norm": 0.1495080441236496, "learning_rate": 4.521556135536768e-05, "loss": 0.817, "step": 4079 }, { "epoch": 4.647293447293447, "grad_norm": 0.16998454928398132, "learning_rate": 4.521281987802737e-05, "loss": 0.8866, "step": 4080 }, { "epoch": 4.6484330484330485, "grad_norm": 0.18713173270225525, "learning_rate": 4.521007769864023e-05, "loss": 0.5714, "step": 4081 }, { "epoch": 4.64957264957265, "grad_norm": 0.1511099636554718, "learning_rate": 4.520733481730152e-05, "loss": 0.8277, "step": 4082 }, { "epoch": 4.6507122507122505, "grad_norm": 0.1812911033630371, "learning_rate": 4.5204591234106496e-05, "loss": 0.6794, "step": 4083 }, { "epoch": 4.651851851851852, "grad_norm": 0.14953500032424927, "learning_rate": 4.5201846949150464e-05, "loss": 0.9122, "step": 4084 }, { "epoch": 4.652991452991453, "grad_norm": 0.1843397468328476, "learning_rate": 4.5199101962528724e-05, "loss": 0.7718, "step": 4085 }, { "epoch": 4.654131054131054, "grad_norm": 0.2112533450126648, "learning_rate": 4.5196356274336624e-05, "loss": 0.5988, "step": 4086 }, { "epoch": 4.655270655270655, "grad_norm": 0.1631164699792862, "learning_rate": 4.519360988466954e-05, "loss": 0.7118, "step": 4087 }, { "epoch": 4.656410256410257, "grad_norm": 0.18719340860843658, "learning_rate": 4.519086279362285e-05, "loss": 0.8051, "step": 4088 }, { "epoch": 4.657549857549857, "grad_norm": 0.15237991511821747, "learning_rate": 4.5188115001291965e-05, "loss": 0.8052, "step": 4089 }, { "epoch": 4.658689458689459, "grad_norm": 0.17108848690986633, "learning_rate": 4.518536650777233e-05, "loss": 0.8029, "step": 4090 }, { "epoch": 4.65982905982906, "grad_norm": 0.1572313755750656, "learning_rate": 4.51826173131594e-05, "loss": 0.9313, "step": 4091 }, { "epoch": 4.660968660968661, "grad_norm": 0.16194182634353638, "learning_rate": 4.517986741754868e-05, "loss": 0.7356, "step": 4092 }, { "epoch": 4.662108262108262, "grad_norm": 0.16652844846248627, "learning_rate": 4.5177116821035665e-05, "loss": 0.8032, "step": 4093 }, { "epoch": 4.663247863247864, "grad_norm": 0.14325307309627533, "learning_rate": 4.517436552371589e-05, "loss": 0.9983, "step": 4094 }, { "epoch": 4.664387464387464, "grad_norm": 0.14562469720840454, "learning_rate": 4.517161352568493e-05, "loss": 1.0189, "step": 4095 }, { "epoch": 4.6655270655270655, "grad_norm": 0.1581488847732544, "learning_rate": 4.516886082703836e-05, "loss": 1.0678, "step": 4096 }, { "epoch": 4.666666666666667, "grad_norm": 0.14162549376487732, "learning_rate": 4.516610742787178e-05, "loss": 0.9617, "step": 4097 }, { "epoch": 4.6678062678062675, "grad_norm": 0.18066850304603577, "learning_rate": 4.516335332828083e-05, "loss": 0.832, "step": 4098 }, { "epoch": 4.668945868945869, "grad_norm": 0.14035151898860931, "learning_rate": 4.5160598528361186e-05, "loss": 0.8838, "step": 4099 }, { "epoch": 4.67008547008547, "grad_norm": 0.15560117363929749, "learning_rate": 4.51578430282085e-05, "loss": 0.831, "step": 4100 }, { "epoch": 4.671225071225071, "grad_norm": 0.15863807499408722, "learning_rate": 4.5155086827918496e-05, "loss": 0.9368, "step": 4101 }, { "epoch": 4.672364672364672, "grad_norm": 0.1644803136587143, "learning_rate": 4.51523299275869e-05, "loss": 0.8162, "step": 4102 }, { "epoch": 4.673504273504274, "grad_norm": 0.15345679223537445, "learning_rate": 4.5149572327309464e-05, "loss": 0.8704, "step": 4103 }, { "epoch": 4.674643874643874, "grad_norm": 0.13855990767478943, "learning_rate": 4.514681402718197e-05, "loss": 0.819, "step": 4104 }, { "epoch": 4.675783475783476, "grad_norm": 0.1828271448612213, "learning_rate": 4.514405502730023e-05, "loss": 0.7551, "step": 4105 }, { "epoch": 4.676923076923077, "grad_norm": 0.15497976541519165, "learning_rate": 4.514129532776005e-05, "loss": 0.8267, "step": 4106 }, { "epoch": 4.678062678062678, "grad_norm": 0.18530358374118805, "learning_rate": 4.513853492865729e-05, "loss": 0.6712, "step": 4107 }, { "epoch": 4.679202279202279, "grad_norm": 0.17878563702106476, "learning_rate": 4.5135773830087845e-05, "loss": 0.7382, "step": 4108 }, { "epoch": 4.680341880341881, "grad_norm": 0.17513959109783173, "learning_rate": 4.513301203214759e-05, "loss": 0.7848, "step": 4109 }, { "epoch": 4.681481481481481, "grad_norm": 0.15837302803993225, "learning_rate": 4.5130249534932475e-05, "loss": 0.9729, "step": 4110 }, { "epoch": 4.682621082621083, "grad_norm": 0.20252671837806702, "learning_rate": 4.512748633853842e-05, "loss": 0.6661, "step": 4111 }, { "epoch": 4.683760683760684, "grad_norm": 0.16751818358898163, "learning_rate": 4.5124722443061425e-05, "loss": 0.7302, "step": 4112 }, { "epoch": 4.6849002849002845, "grad_norm": 0.21062280237674713, "learning_rate": 4.512195784859747e-05, "loss": 0.6217, "step": 4113 }, { "epoch": 4.686039886039886, "grad_norm": 0.1648298054933548, "learning_rate": 4.5119192555242584e-05, "loss": 0.9386, "step": 4114 }, { "epoch": 4.687179487179487, "grad_norm": 0.16665370762348175, "learning_rate": 4.511642656309282e-05, "loss": 0.8003, "step": 4115 }, { "epoch": 4.688319088319088, "grad_norm": 0.13676482439041138, "learning_rate": 4.511365987224423e-05, "loss": 0.7838, "step": 4116 }, { "epoch": 4.689458689458689, "grad_norm": 0.17758703231811523, "learning_rate": 4.5110892482792924e-05, "loss": 0.7245, "step": 4117 }, { "epoch": 4.690598290598291, "grad_norm": 0.15619991719722748, "learning_rate": 4.5108124394835025e-05, "loss": 0.893, "step": 4118 }, { "epoch": 4.691737891737891, "grad_norm": 0.17483027279376984, "learning_rate": 4.510535560846666e-05, "loss": 0.7915, "step": 4119 }, { "epoch": 4.692877492877493, "grad_norm": 0.17119629681110382, "learning_rate": 4.5102586123784005e-05, "loss": 0.8279, "step": 4120 }, { "epoch": 4.694017094017094, "grad_norm": 0.14672748744487762, "learning_rate": 4.509981594088325e-05, "loss": 0.9128, "step": 4121 }, { "epoch": 4.695156695156696, "grad_norm": 0.18604962527751923, "learning_rate": 4.509704505986061e-05, "loss": 0.767, "step": 4122 }, { "epoch": 4.696296296296296, "grad_norm": 0.1733613908290863, "learning_rate": 4.5094273480812336e-05, "loss": 0.7529, "step": 4123 }, { "epoch": 4.697435897435898, "grad_norm": 0.16943800449371338, "learning_rate": 4.509150120383469e-05, "loss": 0.8212, "step": 4124 }, { "epoch": 4.698575498575499, "grad_norm": 0.1841086447238922, "learning_rate": 4.508872822902394e-05, "loss": 0.702, "step": 4125 }, { "epoch": 4.6997150997151, "grad_norm": 0.16594654321670532, "learning_rate": 4.508595455647642e-05, "loss": 0.6729, "step": 4126 }, { "epoch": 4.700854700854701, "grad_norm": 0.229533851146698, "learning_rate": 4.5083180186288465e-05, "loss": 0.6871, "step": 4127 }, { "epoch": 4.7019943019943025, "grad_norm": 0.23124736547470093, "learning_rate": 4.508040511855643e-05, "loss": 0.4359, "step": 4128 }, { "epoch": 4.703133903133903, "grad_norm": 0.1419457644224167, "learning_rate": 4.507762935337671e-05, "loss": 0.966, "step": 4129 }, { "epoch": 4.704273504273504, "grad_norm": 0.18185141682624817, "learning_rate": 4.50748528908457e-05, "loss": 0.7773, "step": 4130 }, { "epoch": 4.705413105413106, "grad_norm": 0.18339097499847412, "learning_rate": 4.507207573105985e-05, "loss": 0.7736, "step": 4131 }, { "epoch": 4.706552706552706, "grad_norm": 0.19014382362365723, "learning_rate": 4.50692978741156e-05, "loss": 0.6542, "step": 4132 }, { "epoch": 4.707692307692308, "grad_norm": 0.13202908635139465, "learning_rate": 4.5066519320109456e-05, "loss": 0.9674, "step": 4133 }, { "epoch": 4.708831908831909, "grad_norm": 0.135972797870636, "learning_rate": 4.506374006913791e-05, "loss": 0.8556, "step": 4134 }, { "epoch": 4.70997150997151, "grad_norm": 0.14986127614974976, "learning_rate": 4.5060960121297495e-05, "loss": 0.9315, "step": 4135 }, { "epoch": 4.711111111111111, "grad_norm": 0.16279983520507812, "learning_rate": 4.505817947668477e-05, "loss": 0.8828, "step": 4136 }, { "epoch": 4.712250712250713, "grad_norm": 0.13684958219528198, "learning_rate": 4.505539813539631e-05, "loss": 0.9831, "step": 4137 }, { "epoch": 4.713390313390313, "grad_norm": 0.16267244517803192, "learning_rate": 4.5052616097528715e-05, "loss": 0.7651, "step": 4138 }, { "epoch": 4.714529914529915, "grad_norm": 0.1724000871181488, "learning_rate": 4.5049833363178624e-05, "loss": 0.7811, "step": 4139 }, { "epoch": 4.715669515669516, "grad_norm": 0.14554689824581146, "learning_rate": 4.504704993244267e-05, "loss": 0.8928, "step": 4140 }, { "epoch": 4.716809116809117, "grad_norm": 0.16116657853126526, "learning_rate": 4.5044265805417564e-05, "loss": 0.9178, "step": 4141 }, { "epoch": 4.717948717948718, "grad_norm": 0.16286273300647736, "learning_rate": 4.504148098219997e-05, "loss": 0.7827, "step": 4142 }, { "epoch": 4.7190883190883195, "grad_norm": 0.197148397564888, "learning_rate": 4.503869546288664e-05, "loss": 0.7308, "step": 4143 }, { "epoch": 4.72022792022792, "grad_norm": 0.14606201648712158, "learning_rate": 4.503590924757429e-05, "loss": 0.9053, "step": 4144 }, { "epoch": 4.7213675213675215, "grad_norm": 0.186607226729393, "learning_rate": 4.503312233635973e-05, "loss": 0.7539, "step": 4145 }, { "epoch": 4.722507122507123, "grad_norm": 0.13478659093379974, "learning_rate": 4.503033472933973e-05, "loss": 0.9322, "step": 4146 }, { "epoch": 4.7236467236467234, "grad_norm": 0.16311326622962952, "learning_rate": 4.502754642661113e-05, "loss": 0.7829, "step": 4147 }, { "epoch": 4.724786324786325, "grad_norm": 0.15414252877235413, "learning_rate": 4.502475742827076e-05, "loss": 0.9992, "step": 4148 }, { "epoch": 4.725925925925926, "grad_norm": 0.1786312609910965, "learning_rate": 4.50219677344155e-05, "loss": 0.7259, "step": 4149 }, { "epoch": 4.727065527065527, "grad_norm": 0.21185603737831116, "learning_rate": 4.501917734514224e-05, "loss": 0.6223, "step": 4150 }, { "epoch": 4.728205128205128, "grad_norm": 0.18022222816944122, "learning_rate": 4.50163862605479e-05, "loss": 0.662, "step": 4151 }, { "epoch": 4.72934472934473, "grad_norm": 0.14460934698581696, "learning_rate": 4.5013594480729405e-05, "loss": 0.9006, "step": 4152 }, { "epoch": 4.73048433048433, "grad_norm": 0.14877143502235413, "learning_rate": 4.501080200578375e-05, "loss": 0.9588, "step": 4153 }, { "epoch": 4.731623931623932, "grad_norm": 0.17735327780246735, "learning_rate": 4.5008008835807906e-05, "loss": 0.8277, "step": 4154 }, { "epoch": 4.732763532763533, "grad_norm": 0.14383308589458466, "learning_rate": 4.50052149708989e-05, "loss": 0.8454, "step": 4155 }, { "epoch": 4.733903133903134, "grad_norm": 0.14807045459747314, "learning_rate": 4.500242041115376e-05, "loss": 0.7425, "step": 4156 }, { "epoch": 4.735042735042735, "grad_norm": 0.14984874427318573, "learning_rate": 4.499962515666954e-05, "loss": 0.7821, "step": 4157 }, { "epoch": 4.7361823361823365, "grad_norm": 0.1719873547554016, "learning_rate": 4.4996829207543355e-05, "loss": 0.9814, "step": 4158 }, { "epoch": 4.737321937321937, "grad_norm": 0.21059659123420715, "learning_rate": 4.49940325638723e-05, "loss": 0.6283, "step": 4159 }, { "epoch": 4.7384615384615385, "grad_norm": 0.1666153520345688, "learning_rate": 4.4991235225753504e-05, "loss": 0.9044, "step": 4160 }, { "epoch": 4.73960113960114, "grad_norm": 0.16677604615688324, "learning_rate": 4.4988437193284135e-05, "loss": 0.7473, "step": 4161 }, { "epoch": 4.7407407407407405, "grad_norm": 0.15281268954277039, "learning_rate": 4.4985638466561376e-05, "loss": 0.8094, "step": 4162 }, { "epoch": 4.741880341880342, "grad_norm": 0.1570868045091629, "learning_rate": 4.4982839045682424e-05, "loss": 0.8231, "step": 4163 }, { "epoch": 4.743019943019943, "grad_norm": 0.16397841274738312, "learning_rate": 4.4980038930744526e-05, "loss": 0.8902, "step": 4164 }, { "epoch": 4.744159544159544, "grad_norm": 0.1561761498451233, "learning_rate": 4.497723812184493e-05, "loss": 0.8723, "step": 4165 }, { "epoch": 4.745299145299145, "grad_norm": 0.1947815865278244, "learning_rate": 4.497443661908091e-05, "loss": 0.6658, "step": 4166 }, { "epoch": 4.746438746438747, "grad_norm": 0.18389423191547394, "learning_rate": 4.497163442254978e-05, "loss": 0.8111, "step": 4167 }, { "epoch": 4.747578347578347, "grad_norm": 0.16813118755817413, "learning_rate": 4.496883153234887e-05, "loss": 0.9714, "step": 4168 }, { "epoch": 4.748717948717949, "grad_norm": 0.1909494698047638, "learning_rate": 4.496602794857552e-05, "loss": 0.8016, "step": 4169 }, { "epoch": 4.74985754985755, "grad_norm": 0.15375453233718872, "learning_rate": 4.4963223671327116e-05, "loss": 0.8755, "step": 4170 }, { "epoch": 4.750997150997151, "grad_norm": 0.16358648240566254, "learning_rate": 4.496041870070106e-05, "loss": 0.7453, "step": 4171 }, { "epoch": 4.752136752136752, "grad_norm": 0.16012993454933167, "learning_rate": 4.495761303679475e-05, "loss": 0.9577, "step": 4172 }, { "epoch": 4.753276353276354, "grad_norm": 0.16110707819461823, "learning_rate": 4.4954806679705676e-05, "loss": 0.7652, "step": 4173 }, { "epoch": 4.754415954415954, "grad_norm": 0.13035143911838531, "learning_rate": 4.495199962953128e-05, "loss": 0.9248, "step": 4174 }, { "epoch": 4.7555555555555555, "grad_norm": 0.14065566658973694, "learning_rate": 4.494919188636908e-05, "loss": 0.8384, "step": 4175 }, { "epoch": 4.756695156695157, "grad_norm": 0.17955157160758972, "learning_rate": 4.4946383450316576e-05, "loss": 0.7852, "step": 4176 }, { "epoch": 4.7578347578347575, "grad_norm": 0.1546027809381485, "learning_rate": 4.494357432147133e-05, "loss": 0.9961, "step": 4177 }, { "epoch": 4.758974358974359, "grad_norm": 0.15851834416389465, "learning_rate": 4.494076449993089e-05, "loss": 0.8677, "step": 4178 }, { "epoch": 4.76011396011396, "grad_norm": 0.17817097902297974, "learning_rate": 4.493795398579287e-05, "loss": 0.705, "step": 4179 }, { "epoch": 4.761253561253561, "grad_norm": 0.1442561000585556, "learning_rate": 4.493514277915488e-05, "loss": 0.8674, "step": 4180 }, { "epoch": 4.762393162393162, "grad_norm": 0.15708814561367035, "learning_rate": 4.4932330880114556e-05, "loss": 0.8519, "step": 4181 }, { "epoch": 4.763532763532764, "grad_norm": 0.14886590838432312, "learning_rate": 4.4929518288769564e-05, "loss": 0.866, "step": 4182 }, { "epoch": 4.764672364672364, "grad_norm": 0.14264659583568573, "learning_rate": 4.49267050052176e-05, "loss": 1.0888, "step": 4183 }, { "epoch": 4.765811965811966, "grad_norm": 0.16858595609664917, "learning_rate": 4.4923891029556375e-05, "loss": 0.7541, "step": 4184 }, { "epoch": 4.766951566951567, "grad_norm": 0.1341504603624344, "learning_rate": 4.492107636188362e-05, "loss": 0.8448, "step": 4185 }, { "epoch": 4.768091168091168, "grad_norm": 0.16006603837013245, "learning_rate": 4.491826100229709e-05, "loss": 1.022, "step": 4186 }, { "epoch": 4.769230769230769, "grad_norm": 0.15668129920959473, "learning_rate": 4.491544495089459e-05, "loss": 0.8296, "step": 4187 }, { "epoch": 4.770370370370371, "grad_norm": 0.16990108788013458, "learning_rate": 4.491262820777392e-05, "loss": 0.7245, "step": 4188 }, { "epoch": 4.771509971509971, "grad_norm": 0.15561148524284363, "learning_rate": 4.490981077303291e-05, "loss": 0.7876, "step": 4189 }, { "epoch": 4.772649572649573, "grad_norm": 0.1542661041021347, "learning_rate": 4.490699264676942e-05, "loss": 0.8008, "step": 4190 }, { "epoch": 4.773789173789174, "grad_norm": 0.15137791633605957, "learning_rate": 4.490417382908133e-05, "loss": 0.8118, "step": 4191 }, { "epoch": 4.7749287749287745, "grad_norm": 0.17010462284088135, "learning_rate": 4.4901354320066555e-05, "loss": 0.9024, "step": 4192 }, { "epoch": 4.776068376068376, "grad_norm": 0.2018144428730011, "learning_rate": 4.489853411982301e-05, "loss": 0.6595, "step": 4193 }, { "epoch": 4.777207977207977, "grad_norm": 0.17283937335014343, "learning_rate": 4.489571322844865e-05, "loss": 0.833, "step": 4194 }, { "epoch": 4.778347578347578, "grad_norm": 0.14774104952812195, "learning_rate": 4.4892891646041455e-05, "loss": 0.7807, "step": 4195 }, { "epoch": 4.779487179487179, "grad_norm": 0.13404740393161774, "learning_rate": 4.489006937269943e-05, "loss": 0.7927, "step": 4196 }, { "epoch": 4.780626780626781, "grad_norm": 0.16537755727767944, "learning_rate": 4.48872464085206e-05, "loss": 0.6315, "step": 4197 }, { "epoch": 4.781766381766381, "grad_norm": 0.17076155543327332, "learning_rate": 4.488442275360301e-05, "loss": 0.7903, "step": 4198 }, { "epoch": 4.782905982905983, "grad_norm": 0.15474078059196472, "learning_rate": 4.4881598408044734e-05, "loss": 0.7381, "step": 4199 }, { "epoch": 4.784045584045584, "grad_norm": 0.16827474534511566, "learning_rate": 4.487877337194388e-05, "loss": 0.7887, "step": 4200 }, { "epoch": 4.785185185185185, "grad_norm": 0.14604023098945618, "learning_rate": 4.4875947645398554e-05, "loss": 0.8949, "step": 4201 }, { "epoch": 4.786324786324786, "grad_norm": 0.15411163866519928, "learning_rate": 4.48731212285069e-05, "loss": 0.8067, "step": 4202 }, { "epoch": 4.787464387464388, "grad_norm": 0.18294286727905273, "learning_rate": 4.48702941213671e-05, "loss": 0.6416, "step": 4203 }, { "epoch": 4.788603988603988, "grad_norm": 0.17204371094703674, "learning_rate": 4.4867466324077344e-05, "loss": 0.7774, "step": 4204 }, { "epoch": 4.78974358974359, "grad_norm": 0.14938533306121826, "learning_rate": 4.4864637836735844e-05, "loss": 0.7033, "step": 4205 }, { "epoch": 4.790883190883191, "grad_norm": 0.14529328048229218, "learning_rate": 4.486180865944084e-05, "loss": 0.8266, "step": 4206 }, { "epoch": 4.792022792022792, "grad_norm": 0.1788632571697235, "learning_rate": 4.485897879229061e-05, "loss": 0.7094, "step": 4207 }, { "epoch": 4.793162393162393, "grad_norm": 0.16603273153305054, "learning_rate": 4.485614823538343e-05, "loss": 0.7224, "step": 4208 }, { "epoch": 4.794301994301994, "grad_norm": 0.18029817938804626, "learning_rate": 4.485331698881762e-05, "loss": 0.8077, "step": 4209 }, { "epoch": 4.795441595441595, "grad_norm": 0.1685163378715515, "learning_rate": 4.4850485052691507e-05, "loss": 0.8523, "step": 4210 }, { "epoch": 4.796581196581196, "grad_norm": 0.17367805540561676, "learning_rate": 4.4847652427103465e-05, "loss": 0.7716, "step": 4211 }, { "epoch": 4.797720797720798, "grad_norm": 0.1601673662662506, "learning_rate": 4.484481911215187e-05, "loss": 0.747, "step": 4212 }, { "epoch": 4.798860398860398, "grad_norm": 0.13713626563549042, "learning_rate": 4.484198510793514e-05, "loss": 0.9217, "step": 4213 }, { "epoch": 4.8, "grad_norm": 0.17279836535453796, "learning_rate": 4.4839150414551686e-05, "loss": 0.7968, "step": 4214 }, { "epoch": 4.801139601139601, "grad_norm": 0.19132784008979797, "learning_rate": 4.48363150321e-05, "loss": 0.7535, "step": 4215 }, { "epoch": 4.802279202279202, "grad_norm": 0.16383789479732513, "learning_rate": 4.4833478960678535e-05, "loss": 0.6406, "step": 4216 }, { "epoch": 4.803418803418803, "grad_norm": 0.18167129158973694, "learning_rate": 4.4830642200385795e-05, "loss": 0.8526, "step": 4217 }, { "epoch": 4.804558404558405, "grad_norm": 0.1492447853088379, "learning_rate": 4.482780475132032e-05, "loss": 0.78, "step": 4218 }, { "epoch": 4.805698005698005, "grad_norm": 0.15305733680725098, "learning_rate": 4.4824966613580664e-05, "loss": 0.7064, "step": 4219 }, { "epoch": 4.806837606837607, "grad_norm": 0.1457163393497467, "learning_rate": 4.48221277872654e-05, "loss": 0.8307, "step": 4220 }, { "epoch": 4.807977207977208, "grad_norm": 0.1764405071735382, "learning_rate": 4.4819288272473125e-05, "loss": 0.8535, "step": 4221 }, { "epoch": 4.8091168091168095, "grad_norm": 0.1517675220966339, "learning_rate": 4.481644806930247e-05, "loss": 0.7826, "step": 4222 }, { "epoch": 4.81025641025641, "grad_norm": 0.15232150256633759, "learning_rate": 4.481360717785207e-05, "loss": 0.8085, "step": 4223 }, { "epoch": 4.8113960113960115, "grad_norm": 0.1400236189365387, "learning_rate": 4.48107655982206e-05, "loss": 0.9212, "step": 4224 }, { "epoch": 4.812535612535613, "grad_norm": 0.197129487991333, "learning_rate": 4.480792333050678e-05, "loss": 0.6547, "step": 4225 }, { "epoch": 4.8136752136752134, "grad_norm": 0.15185502171516418, "learning_rate": 4.48050803748093e-05, "loss": 0.8662, "step": 4226 }, { "epoch": 4.814814814814815, "grad_norm": 0.18135321140289307, "learning_rate": 4.480223673122691e-05, "loss": 0.7618, "step": 4227 }, { "epoch": 4.815954415954416, "grad_norm": 0.14626853168010712, "learning_rate": 4.4799392399858384e-05, "loss": 0.9952, "step": 4228 }, { "epoch": 4.817094017094017, "grad_norm": 0.1954558938741684, "learning_rate": 4.479654738080252e-05, "loss": 0.6954, "step": 4229 }, { "epoch": 4.818233618233618, "grad_norm": 0.17472834885120392, "learning_rate": 4.479370167415812e-05, "loss": 0.6557, "step": 4230 }, { "epoch": 4.81937321937322, "grad_norm": 0.17346905171871185, "learning_rate": 4.4790855280024026e-05, "loss": 0.7406, "step": 4231 }, { "epoch": 4.82051282051282, "grad_norm": 0.1571098119020462, "learning_rate": 4.4788008198499095e-05, "loss": 0.8376, "step": 4232 }, { "epoch": 4.821652421652422, "grad_norm": 0.16063807904720306, "learning_rate": 4.478516042968224e-05, "loss": 0.7809, "step": 4233 }, { "epoch": 4.822792022792023, "grad_norm": 0.19099533557891846, "learning_rate": 4.4782311973672345e-05, "loss": 0.7096, "step": 4234 }, { "epoch": 4.823931623931624, "grad_norm": 0.19397708773612976, "learning_rate": 4.4779462830568344e-05, "loss": 0.6659, "step": 4235 }, { "epoch": 4.825071225071225, "grad_norm": 0.23969055712223053, "learning_rate": 4.477661300046921e-05, "loss": 0.7448, "step": 4236 }, { "epoch": 4.8262108262108265, "grad_norm": 0.20346936583518982, "learning_rate": 4.477376248347393e-05, "loss": 0.783, "step": 4237 }, { "epoch": 4.827350427350427, "grad_norm": 0.15883132815361023, "learning_rate": 4.4770911279681496e-05, "loss": 0.9055, "step": 4238 }, { "epoch": 4.8284900284900285, "grad_norm": 0.1544889211654663, "learning_rate": 4.476805938919094e-05, "loss": 0.8677, "step": 4239 }, { "epoch": 4.82962962962963, "grad_norm": 0.16251614689826965, "learning_rate": 4.476520681210132e-05, "loss": 0.8046, "step": 4240 }, { "epoch": 4.8307692307692305, "grad_norm": 0.15391865372657776, "learning_rate": 4.476235354851172e-05, "loss": 0.7789, "step": 4241 }, { "epoch": 4.831908831908832, "grad_norm": 0.1404588669538498, "learning_rate": 4.475949959852123e-05, "loss": 0.8876, "step": 4242 }, { "epoch": 4.833048433048433, "grad_norm": 0.17338310182094574, "learning_rate": 4.475664496222898e-05, "loss": 0.8681, "step": 4243 }, { "epoch": 4.834188034188034, "grad_norm": 0.15925438702106476, "learning_rate": 4.475378963973411e-05, "loss": 0.7407, "step": 4244 }, { "epoch": 4.835327635327635, "grad_norm": 0.15909427404403687, "learning_rate": 4.475093363113582e-05, "loss": 0.912, "step": 4245 }, { "epoch": 4.836467236467237, "grad_norm": 0.16174373030662537, "learning_rate": 4.474807693653328e-05, "loss": 0.7123, "step": 4246 }, { "epoch": 4.837606837606837, "grad_norm": 0.1355389803647995, "learning_rate": 4.474521955602572e-05, "loss": 1.0165, "step": 4247 }, { "epoch": 4.838746438746439, "grad_norm": 0.16957055032253265, "learning_rate": 4.4742361489712383e-05, "loss": 0.8319, "step": 4248 }, { "epoch": 4.83988603988604, "grad_norm": 0.1739952564239502, "learning_rate": 4.473950273769255e-05, "loss": 0.8228, "step": 4249 }, { "epoch": 4.841025641025641, "grad_norm": 0.15971599519252777, "learning_rate": 4.473664330006549e-05, "loss": 0.7937, "step": 4250 }, { "epoch": 4.842165242165242, "grad_norm": 0.1937943696975708, "learning_rate": 4.473378317693054e-05, "loss": 0.7698, "step": 4251 }, { "epoch": 4.843304843304844, "grad_norm": 0.19935311377048492, "learning_rate": 4.473092236838703e-05, "loss": 0.6701, "step": 4252 }, { "epoch": 4.844444444444444, "grad_norm": 0.15304085612297058, "learning_rate": 4.4728060874534325e-05, "loss": 0.6718, "step": 4253 }, { "epoch": 4.8455840455840455, "grad_norm": 0.17695607244968414, "learning_rate": 4.472519869547182e-05, "loss": 0.7867, "step": 4254 }, { "epoch": 4.846723646723647, "grad_norm": 0.15410280227661133, "learning_rate": 4.472233583129891e-05, "loss": 0.8369, "step": 4255 }, { "epoch": 4.8478632478632475, "grad_norm": 0.4089936912059784, "learning_rate": 4.471947228211505e-05, "loss": 0.6676, "step": 4256 }, { "epoch": 4.849002849002849, "grad_norm": 0.18327905237674713, "learning_rate": 4.471660804801968e-05, "loss": 0.7837, "step": 4257 }, { "epoch": 4.85014245014245, "grad_norm": 0.16768579185009003, "learning_rate": 4.47137431291123e-05, "loss": 0.8029, "step": 4258 }, { "epoch": 4.851282051282051, "grad_norm": 0.15602938830852509, "learning_rate": 4.47108775254924e-05, "loss": 0.9681, "step": 4259 }, { "epoch": 4.852421652421652, "grad_norm": 0.17325910925865173, "learning_rate": 4.470801123725953e-05, "loss": 0.8651, "step": 4260 }, { "epoch": 4.853561253561254, "grad_norm": 0.15763314068317413, "learning_rate": 4.4705144264513224e-05, "loss": 0.8297, "step": 4261 }, { "epoch": 4.854700854700854, "grad_norm": 0.14961732923984528, "learning_rate": 4.4702276607353064e-05, "loss": 0.9468, "step": 4262 }, { "epoch": 4.855840455840456, "grad_norm": 0.15746955573558807, "learning_rate": 4.4699408265878665e-05, "loss": 0.7486, "step": 4263 }, { "epoch": 4.856980056980057, "grad_norm": 0.1406732201576233, "learning_rate": 4.469653924018964e-05, "loss": 1.0252, "step": 4264 }, { "epoch": 4.858119658119658, "grad_norm": 0.1895042359828949, "learning_rate": 4.469366953038564e-05, "loss": 0.6633, "step": 4265 }, { "epoch": 4.859259259259259, "grad_norm": 0.1835954189300537, "learning_rate": 4.4690799136566336e-05, "loss": 0.6703, "step": 4266 }, { "epoch": 4.860398860398861, "grad_norm": 0.16680637001991272, "learning_rate": 4.468792805883143e-05, "loss": 0.8152, "step": 4267 }, { "epoch": 4.861538461538462, "grad_norm": 0.1416798233985901, "learning_rate": 4.468505629728065e-05, "loss": 1.0173, "step": 4268 }, { "epoch": 4.862678062678063, "grad_norm": 0.1459040343761444, "learning_rate": 4.468218385201372e-05, "loss": 0.8913, "step": 4269 }, { "epoch": 4.863817663817664, "grad_norm": 0.1507241427898407, "learning_rate": 4.4679310723130416e-05, "loss": 0.7986, "step": 4270 }, { "epoch": 4.864957264957265, "grad_norm": 0.17497318983078003, "learning_rate": 4.4676436910730546e-05, "loss": 0.8781, "step": 4271 }, { "epoch": 4.866096866096866, "grad_norm": 0.1836574673652649, "learning_rate": 4.467356241491391e-05, "loss": 0.6725, "step": 4272 }, { "epoch": 4.867236467236467, "grad_norm": 0.13472136855125427, "learning_rate": 4.467068723578033e-05, "loss": 0.906, "step": 4273 }, { "epoch": 4.868376068376069, "grad_norm": 0.14699387550354004, "learning_rate": 4.4667811373429704e-05, "loss": 0.8387, "step": 4274 }, { "epoch": 4.869515669515669, "grad_norm": 0.1765470951795578, "learning_rate": 4.4664934827961905e-05, "loss": 0.8405, "step": 4275 }, { "epoch": 4.870655270655271, "grad_norm": 0.15825481712818146, "learning_rate": 4.466205759947683e-05, "loss": 0.8911, "step": 4276 }, { "epoch": 4.871794871794872, "grad_norm": 0.14879028499126434, "learning_rate": 4.4659179688074425e-05, "loss": 0.9072, "step": 4277 }, { "epoch": 4.872934472934473, "grad_norm": 0.16699357330799103, "learning_rate": 4.465630109385465e-05, "loss": 0.7287, "step": 4278 }, { "epoch": 4.874074074074074, "grad_norm": 0.1636863797903061, "learning_rate": 4.465342181691748e-05, "loss": 0.8981, "step": 4279 }, { "epoch": 4.875213675213676, "grad_norm": 0.1806856244802475, "learning_rate": 4.465054185736293e-05, "loss": 0.727, "step": 4280 }, { "epoch": 4.876353276353276, "grad_norm": 0.1562328040599823, "learning_rate": 4.464766121529102e-05, "loss": 0.7446, "step": 4281 }, { "epoch": 4.877492877492878, "grad_norm": 0.18651501834392548, "learning_rate": 4.46447798908018e-05, "loss": 0.7936, "step": 4282 }, { "epoch": 4.878632478632479, "grad_norm": 0.14649750292301178, "learning_rate": 4.464189788399535e-05, "loss": 0.7607, "step": 4283 }, { "epoch": 4.87977207977208, "grad_norm": 0.16038383543491364, "learning_rate": 4.463901519497178e-05, "loss": 0.9292, "step": 4284 }, { "epoch": 4.880911680911681, "grad_norm": 0.18320301175117493, "learning_rate": 4.463613182383119e-05, "loss": 0.6915, "step": 4285 }, { "epoch": 4.8820512820512825, "grad_norm": 0.15676233172416687, "learning_rate": 4.463324777067376e-05, "loss": 0.8755, "step": 4286 }, { "epoch": 4.883190883190883, "grad_norm": 0.1492704153060913, "learning_rate": 4.463036303559964e-05, "loss": 0.7828, "step": 4287 }, { "epoch": 4.8843304843304844, "grad_norm": 0.18921218812465668, "learning_rate": 4.462747761870902e-05, "loss": 0.6043, "step": 4288 }, { "epoch": 4.885470085470086, "grad_norm": 0.15510891377925873, "learning_rate": 4.4624591520102124e-05, "loss": 0.9145, "step": 4289 }, { "epoch": 4.886609686609686, "grad_norm": 0.14122672379016876, "learning_rate": 4.4621704739879204e-05, "loss": 0.757, "step": 4290 }, { "epoch": 4.887749287749288, "grad_norm": 0.17758646607398987, "learning_rate": 4.461881727814052e-05, "loss": 0.7638, "step": 4291 }, { "epoch": 4.888888888888889, "grad_norm": 0.16042566299438477, "learning_rate": 4.4615929134986356e-05, "loss": 0.8693, "step": 4292 }, { "epoch": 4.89002849002849, "grad_norm": 0.17387689650058746, "learning_rate": 4.461304031051703e-05, "loss": 0.8141, "step": 4293 }, { "epoch": 4.891168091168091, "grad_norm": 0.15727101266384125, "learning_rate": 4.461015080483287e-05, "loss": 0.8782, "step": 4294 }, { "epoch": 4.892307692307693, "grad_norm": 0.17457790672779083, "learning_rate": 4.4607260618034256e-05, "loss": 0.6134, "step": 4295 }, { "epoch": 4.893447293447293, "grad_norm": 0.15576937794685364, "learning_rate": 4.460436975022156e-05, "loss": 0.7734, "step": 4296 }, { "epoch": 4.894586894586895, "grad_norm": 0.17607951164245605, "learning_rate": 4.460147820149518e-05, "loss": 0.822, "step": 4297 }, { "epoch": 4.895726495726496, "grad_norm": 0.17070811986923218, "learning_rate": 4.459858597195557e-05, "loss": 0.802, "step": 4298 }, { "epoch": 4.896866096866097, "grad_norm": 0.18165738880634308, "learning_rate": 4.459569306170317e-05, "loss": 0.6937, "step": 4299 }, { "epoch": 4.898005698005698, "grad_norm": 0.18264617025852203, "learning_rate": 4.459279947083846e-05, "loss": 0.7236, "step": 4300 }, { "epoch": 4.8991452991452995, "grad_norm": 0.15589116513729095, "learning_rate": 4.4589905199461946e-05, "loss": 0.9915, "step": 4301 }, { "epoch": 4.9002849002849, "grad_norm": 0.1613919734954834, "learning_rate": 4.458701024767414e-05, "loss": 0.7948, "step": 4302 }, { "epoch": 4.9014245014245015, "grad_norm": 0.16464246809482574, "learning_rate": 4.4584114615575615e-05, "loss": 0.8007, "step": 4303 }, { "epoch": 4.902564102564103, "grad_norm": 0.19122007489204407, "learning_rate": 4.4581218303266935e-05, "loss": 0.7734, "step": 4304 }, { "epoch": 4.9037037037037035, "grad_norm": 0.17448489367961884, "learning_rate": 4.4578321310848695e-05, "loss": 0.815, "step": 4305 }, { "epoch": 4.904843304843305, "grad_norm": 0.17270396649837494, "learning_rate": 4.457542363842151e-05, "loss": 0.8, "step": 4306 }, { "epoch": 4.905982905982906, "grad_norm": 0.1664361208677292, "learning_rate": 4.4572525286086024e-05, "loss": 0.9305, "step": 4307 }, { "epoch": 4.907122507122507, "grad_norm": 0.1542702615261078, "learning_rate": 4.456962625394292e-05, "loss": 0.9192, "step": 4308 }, { "epoch": 4.908262108262108, "grad_norm": 0.1896369904279709, "learning_rate": 4.456672654209287e-05, "loss": 0.7259, "step": 4309 }, { "epoch": 4.90940170940171, "grad_norm": 0.14200268685817719, "learning_rate": 4.45638261506366e-05, "loss": 0.9892, "step": 4310 }, { "epoch": 4.91054131054131, "grad_norm": 0.1676367223262787, "learning_rate": 4.456092507967486e-05, "loss": 0.7585, "step": 4311 }, { "epoch": 4.911680911680912, "grad_norm": 0.174367755651474, "learning_rate": 4.455802332930839e-05, "loss": 0.868, "step": 4312 }, { "epoch": 4.912820512820513, "grad_norm": 0.1585119068622589, "learning_rate": 4.455512089963798e-05, "loss": 0.8462, "step": 4313 }, { "epoch": 4.913960113960114, "grad_norm": 0.16577641665935516, "learning_rate": 4.455221779076446e-05, "loss": 0.7259, "step": 4314 }, { "epoch": 4.915099715099715, "grad_norm": 0.15881091356277466, "learning_rate": 4.454931400278863e-05, "loss": 0.8301, "step": 4315 }, { "epoch": 4.9162393162393165, "grad_norm": 0.14386670291423798, "learning_rate": 4.4546409535811374e-05, "loss": 0.732, "step": 4316 }, { "epoch": 4.917378917378917, "grad_norm": 0.18718674778938293, "learning_rate": 4.454350438993356e-05, "loss": 0.6544, "step": 4317 }, { "epoch": 4.9185185185185185, "grad_norm": 0.15518997609615326, "learning_rate": 4.4540598565256093e-05, "loss": 0.8085, "step": 4318 }, { "epoch": 4.91965811965812, "grad_norm": 0.16699695587158203, "learning_rate": 4.45376920618799e-05, "loss": 0.7404, "step": 4319 }, { "epoch": 4.9207977207977205, "grad_norm": 0.14701253175735474, "learning_rate": 4.453478487990593e-05, "loss": 0.8182, "step": 4320 }, { "epoch": 4.921937321937322, "grad_norm": 0.16137848794460297, "learning_rate": 4.453187701943516e-05, "loss": 0.8008, "step": 4321 }, { "epoch": 4.923076923076923, "grad_norm": 0.1766296625137329, "learning_rate": 4.45289684805686e-05, "loss": 0.6583, "step": 4322 }, { "epoch": 4.924216524216524, "grad_norm": 0.18904195725917816, "learning_rate": 4.452605926340725e-05, "loss": 0.8189, "step": 4323 }, { "epoch": 4.925356125356125, "grad_norm": 0.15321408212184906, "learning_rate": 4.452314936805217e-05, "loss": 0.8261, "step": 4324 }, { "epoch": 4.926495726495727, "grad_norm": 0.16038638353347778, "learning_rate": 4.452023879460442e-05, "loss": 0.7155, "step": 4325 }, { "epoch": 4.927635327635327, "grad_norm": 0.14061199128627777, "learning_rate": 4.45173275431651e-05, "loss": 0.9511, "step": 4326 }, { "epoch": 4.928774928774929, "grad_norm": 0.19432227313518524, "learning_rate": 4.4514415613835313e-05, "loss": 0.6133, "step": 4327 }, { "epoch": 4.92991452991453, "grad_norm": 0.1387859731912613, "learning_rate": 4.4511503006716216e-05, "loss": 0.8883, "step": 4328 }, { "epoch": 4.931054131054131, "grad_norm": 0.15362222492694855, "learning_rate": 4.4508589721908955e-05, "loss": 0.843, "step": 4329 }, { "epoch": 4.932193732193732, "grad_norm": 0.15109822154045105, "learning_rate": 4.450567575951473e-05, "loss": 0.9899, "step": 4330 }, { "epoch": 4.933333333333334, "grad_norm": 0.18527065217494965, "learning_rate": 4.450276111963474e-05, "loss": 0.7677, "step": 4331 }, { "epoch": 4.934472934472934, "grad_norm": 0.13956625759601593, "learning_rate": 4.449984580237023e-05, "loss": 0.9034, "step": 4332 }, { "epoch": 4.9356125356125355, "grad_norm": 0.1639336198568344, "learning_rate": 4.449692980782244e-05, "loss": 0.8179, "step": 4333 }, { "epoch": 4.936752136752137, "grad_norm": 0.17157134413719177, "learning_rate": 4.449401313609267e-05, "loss": 0.6158, "step": 4334 }, { "epoch": 4.9378917378917375, "grad_norm": 0.14175517857074738, "learning_rate": 4.449109578728221e-05, "loss": 0.7851, "step": 4335 }, { "epoch": 4.939031339031339, "grad_norm": 0.17257359623908997, "learning_rate": 4.44881777614924e-05, "loss": 0.7213, "step": 4336 }, { "epoch": 4.94017094017094, "grad_norm": 0.18445263803005219, "learning_rate": 4.448525905882458e-05, "loss": 0.8393, "step": 4337 }, { "epoch": 4.941310541310541, "grad_norm": 0.1446041762828827, "learning_rate": 4.448233967938012e-05, "loss": 0.9169, "step": 4338 }, { "epoch": 4.942450142450142, "grad_norm": 0.15681029856204987, "learning_rate": 4.447941962326044e-05, "loss": 0.8487, "step": 4339 }, { "epoch": 4.943589743589744, "grad_norm": 0.15423312783241272, "learning_rate": 4.4476498890566946e-05, "loss": 0.7903, "step": 4340 }, { "epoch": 4.944729344729344, "grad_norm": 0.1776115745306015, "learning_rate": 4.4473577481401077e-05, "loss": 0.7179, "step": 4341 }, { "epoch": 4.945868945868946, "grad_norm": 0.1941158026456833, "learning_rate": 4.4470655395864315e-05, "loss": 0.7338, "step": 4342 }, { "epoch": 4.947008547008547, "grad_norm": 0.1917916089296341, "learning_rate": 4.446773263405814e-05, "loss": 0.6661, "step": 4343 }, { "epoch": 4.948148148148148, "grad_norm": 0.2219989150762558, "learning_rate": 4.446480919608408e-05, "loss": 0.5835, "step": 4344 }, { "epoch": 4.949287749287749, "grad_norm": 0.13458289206027985, "learning_rate": 4.4461885082043666e-05, "loss": 0.8697, "step": 4345 }, { "epoch": 4.950427350427351, "grad_norm": 0.21749819815158844, "learning_rate": 4.445896029203847e-05, "loss": 0.4783, "step": 4346 }, { "epoch": 4.951566951566951, "grad_norm": 0.1451740562915802, "learning_rate": 4.4456034826170054e-05, "loss": 0.8084, "step": 4347 }, { "epoch": 4.952706552706553, "grad_norm": 0.16358816623687744, "learning_rate": 4.4453108684540056e-05, "loss": 0.8031, "step": 4348 }, { "epoch": 4.953846153846154, "grad_norm": 0.1640550196170807, "learning_rate": 4.44501818672501e-05, "loss": 0.7792, "step": 4349 }, { "epoch": 4.9549857549857546, "grad_norm": 0.1569184958934784, "learning_rate": 4.4447254374401835e-05, "loss": 0.8447, "step": 4350 }, { "epoch": 4.956125356125356, "grad_norm": 0.1534481942653656, "learning_rate": 4.444432620609694e-05, "loss": 0.8901, "step": 4351 }, { "epoch": 4.957264957264957, "grad_norm": 0.17017418146133423, "learning_rate": 4.4441397362437134e-05, "loss": 0.7446, "step": 4352 }, { "epoch": 4.958404558404558, "grad_norm": 0.228437602519989, "learning_rate": 4.443846784352413e-05, "loss": 0.6382, "step": 4353 }, { "epoch": 4.959544159544159, "grad_norm": 0.1891854703426361, "learning_rate": 4.443553764945968e-05, "loss": 0.7518, "step": 4354 }, { "epoch": 4.960683760683761, "grad_norm": 0.16365045309066772, "learning_rate": 4.443260678034555e-05, "loss": 0.8451, "step": 4355 }, { "epoch": 4.961823361823361, "grad_norm": 0.1816239207983017, "learning_rate": 4.4429675236283565e-05, "loss": 0.6822, "step": 4356 }, { "epoch": 4.962962962962963, "grad_norm": 0.18229347467422485, "learning_rate": 4.4426743017375516e-05, "loss": 0.6843, "step": 4357 }, { "epoch": 4.964102564102564, "grad_norm": 0.15037180483341217, "learning_rate": 4.4423810123723264e-05, "loss": 0.8652, "step": 4358 }, { "epoch": 4.965242165242165, "grad_norm": 0.14027701318264008, "learning_rate": 4.442087655542867e-05, "loss": 0.8458, "step": 4359 }, { "epoch": 4.966381766381766, "grad_norm": 0.1816105842590332, "learning_rate": 4.441794231259362e-05, "loss": 0.7752, "step": 4360 }, { "epoch": 4.967521367521368, "grad_norm": 0.17853955924510956, "learning_rate": 4.441500739532004e-05, "loss": 0.7008, "step": 4361 }, { "epoch": 4.968660968660968, "grad_norm": 0.1522316336631775, "learning_rate": 4.441207180370986e-05, "loss": 0.8782, "step": 4362 }, { "epoch": 4.96980056980057, "grad_norm": 0.1653551161289215, "learning_rate": 4.4409135537865044e-05, "loss": 0.754, "step": 4363 }, { "epoch": 4.970940170940171, "grad_norm": 0.18796348571777344, "learning_rate": 4.4406198597887574e-05, "loss": 0.7707, "step": 4364 }, { "epoch": 4.972079772079772, "grad_norm": 0.1707884967327118, "learning_rate": 4.440326098387946e-05, "loss": 0.84, "step": 4365 }, { "epoch": 4.973219373219373, "grad_norm": 0.16822636127471924, "learning_rate": 4.440032269594272e-05, "loss": 0.7032, "step": 4366 }, { "epoch": 4.9743589743589745, "grad_norm": 0.20402517914772034, "learning_rate": 4.4397383734179444e-05, "loss": 0.6482, "step": 4367 }, { "epoch": 4.975498575498576, "grad_norm": 0.20499737560749054, "learning_rate": 4.4394444098691675e-05, "loss": 0.5707, "step": 4368 }, { "epoch": 4.976638176638176, "grad_norm": 0.16235454380512238, "learning_rate": 4.4391503789581526e-05, "loss": 0.8344, "step": 4369 }, { "epoch": 4.977777777777778, "grad_norm": 0.14869852364063263, "learning_rate": 4.438856280695113e-05, "loss": 0.9471, "step": 4370 }, { "epoch": 4.978917378917379, "grad_norm": 0.17793439328670502, "learning_rate": 4.438562115090262e-05, "loss": 0.7396, "step": 4371 }, { "epoch": 4.98005698005698, "grad_norm": 0.15897755324840546, "learning_rate": 4.4382678821538195e-05, "loss": 0.7443, "step": 4372 }, { "epoch": 4.981196581196581, "grad_norm": 0.1621100902557373, "learning_rate": 4.437973581896001e-05, "loss": 0.8844, "step": 4373 }, { "epoch": 4.982336182336183, "grad_norm": 0.1932118684053421, "learning_rate": 4.437679214327032e-05, "loss": 0.7382, "step": 4374 }, { "epoch": 4.983475783475783, "grad_norm": 0.16252288222312927, "learning_rate": 4.4373847794571346e-05, "loss": 0.7316, "step": 4375 }, { "epoch": 4.984615384615385, "grad_norm": 0.17581906914710999, "learning_rate": 4.437090277296536e-05, "loss": 0.7582, "step": 4376 }, { "epoch": 4.985754985754986, "grad_norm": 0.16340146958827972, "learning_rate": 4.436795707855466e-05, "loss": 0.7943, "step": 4377 }, { "epoch": 4.986894586894587, "grad_norm": 0.14689506590366364, "learning_rate": 4.436501071144153e-05, "loss": 0.8851, "step": 4378 }, { "epoch": 4.988034188034188, "grad_norm": 0.17721427977085114, "learning_rate": 4.4362063671728344e-05, "loss": 0.7815, "step": 4379 }, { "epoch": 4.9891737891737895, "grad_norm": 0.18198640644550323, "learning_rate": 4.4359115959517426e-05, "loss": 0.6887, "step": 4380 }, { "epoch": 4.99031339031339, "grad_norm": 0.17018204927444458, "learning_rate": 4.435616757491118e-05, "loss": 0.8557, "step": 4381 }, { "epoch": 4.9914529914529915, "grad_norm": 0.15465298295021057, "learning_rate": 4.435321851801201e-05, "loss": 0.8588, "step": 4382 }, { "epoch": 4.992592592592593, "grad_norm": 0.18045377731323242, "learning_rate": 4.435026878892233e-05, "loss": 0.6417, "step": 4383 }, { "epoch": 4.9937321937321935, "grad_norm": 0.16702929139137268, "learning_rate": 4.43473183877446e-05, "loss": 0.8219, "step": 4384 }, { "epoch": 4.994871794871795, "grad_norm": 0.18059112131595612, "learning_rate": 4.4344367314581306e-05, "loss": 0.8059, "step": 4385 }, { "epoch": 4.996011396011396, "grad_norm": 0.14532163739204407, "learning_rate": 4.434141556953493e-05, "loss": 0.8521, "step": 4386 }, { "epoch": 4.997150997150997, "grad_norm": 0.180996835231781, "learning_rate": 4.433846315270801e-05, "loss": 0.8699, "step": 4387 }, { "epoch": 4.998290598290598, "grad_norm": 0.19515706598758698, "learning_rate": 4.433551006420308e-05, "loss": 0.6812, "step": 4388 }, { "epoch": 4.9994301994302, "grad_norm": 0.18740420043468475, "learning_rate": 4.433255630412271e-05, "loss": 0.7503, "step": 4389 }, { "epoch": 5.0, "grad_norm": 0.2839340567588806, "learning_rate": 4.432960187256949e-05, "loss": 1.0157, "step": 4390 }, { "epoch": 5.001139601139601, "grad_norm": 0.18848787248134613, "learning_rate": 4.4326646769646055e-05, "loss": 0.7713, "step": 4391 }, { "epoch": 5.002279202279202, "grad_norm": 0.19647054374217987, "learning_rate": 4.432369099545502e-05, "loss": 0.734, "step": 4392 }, { "epoch": 5.003418803418803, "grad_norm": 0.14491033554077148, "learning_rate": 4.432073455009905e-05, "loss": 0.7236, "step": 4393 }, { "epoch": 5.004558404558405, "grad_norm": 0.16267147660255432, "learning_rate": 4.431777743368085e-05, "loss": 1.0218, "step": 4394 }, { "epoch": 5.005698005698005, "grad_norm": 0.1726360023021698, "learning_rate": 4.431481964630311e-05, "loss": 0.7848, "step": 4395 }, { "epoch": 5.006837606837607, "grad_norm": 0.16114525496959686, "learning_rate": 4.431186118806857e-05, "loss": 0.9004, "step": 4396 }, { "epoch": 5.007977207977208, "grad_norm": 0.161496102809906, "learning_rate": 4.4308902059079976e-05, "loss": 0.9281, "step": 4397 }, { "epoch": 5.009116809116809, "grad_norm": 0.16877681016921997, "learning_rate": 4.4305942259440114e-05, "loss": 0.9488, "step": 4398 }, { "epoch": 5.01025641025641, "grad_norm": 0.20223818719387054, "learning_rate": 4.4302981789251796e-05, "loss": 0.6802, "step": 4399 }, { "epoch": 5.011396011396012, "grad_norm": 0.16235707700252533, "learning_rate": 4.430002064861783e-05, "loss": 0.7991, "step": 4400 }, { "epoch": 5.012535612535612, "grad_norm": 0.18385708332061768, "learning_rate": 4.429705883764107e-05, "loss": 0.5843, "step": 4401 }, { "epoch": 5.013675213675214, "grad_norm": 0.17766490578651428, "learning_rate": 4.42940963564244e-05, "loss": 0.668, "step": 4402 }, { "epoch": 5.014814814814815, "grad_norm": 0.16907937824726105, "learning_rate": 4.429113320507069e-05, "loss": 0.7932, "step": 4403 }, { "epoch": 5.015954415954416, "grad_norm": 0.1943625956773758, "learning_rate": 4.428816938368288e-05, "loss": 0.7006, "step": 4404 }, { "epoch": 5.017094017094017, "grad_norm": 0.14672493934631348, "learning_rate": 4.4285204892363906e-05, "loss": 0.8476, "step": 4405 }, { "epoch": 5.0182336182336185, "grad_norm": 0.14297886192798615, "learning_rate": 4.428223973121673e-05, "loss": 1.0138, "step": 4406 }, { "epoch": 5.019373219373219, "grad_norm": 0.1751715987920761, "learning_rate": 4.427927390034434e-05, "loss": 0.7919, "step": 4407 }, { "epoch": 5.02051282051282, "grad_norm": 0.18038953840732574, "learning_rate": 4.427630739984975e-05, "loss": 0.802, "step": 4408 }, { "epoch": 5.021652421652422, "grad_norm": 0.1847195029258728, "learning_rate": 4.4273340229836e-05, "loss": 0.7227, "step": 4409 }, { "epoch": 5.022792022792022, "grad_norm": 0.22509942948818207, "learning_rate": 4.427037239040613e-05, "loss": 0.6507, "step": 4410 }, { "epoch": 5.023931623931624, "grad_norm": 0.1595267504453659, "learning_rate": 4.426740388166325e-05, "loss": 0.8574, "step": 4411 }, { "epoch": 5.025071225071225, "grad_norm": 0.2153182029724121, "learning_rate": 4.4264434703710435e-05, "loss": 0.6417, "step": 4412 }, { "epoch": 5.026210826210826, "grad_norm": 0.16790950298309326, "learning_rate": 4.426146485665083e-05, "loss": 0.8256, "step": 4413 }, { "epoch": 5.027350427350427, "grad_norm": 0.14109621942043304, "learning_rate": 4.425849434058758e-05, "loss": 0.8758, "step": 4414 }, { "epoch": 5.028490028490029, "grad_norm": 0.1420576125383377, "learning_rate": 4.425552315562386e-05, "loss": 0.7635, "step": 4415 }, { "epoch": 5.029629629629629, "grad_norm": 0.21221227943897247, "learning_rate": 4.425255130186287e-05, "loss": 0.6607, "step": 4416 }, { "epoch": 5.030769230769231, "grad_norm": 0.19328977167606354, "learning_rate": 4.424957877940782e-05, "loss": 0.5981, "step": 4417 }, { "epoch": 5.031908831908832, "grad_norm": 0.15180927515029907, "learning_rate": 4.4246605588361965e-05, "loss": 0.863, "step": 4418 }, { "epoch": 5.033048433048433, "grad_norm": 0.1497860550880432, "learning_rate": 4.424363172882858e-05, "loss": 0.7794, "step": 4419 }, { "epoch": 5.034188034188034, "grad_norm": 0.18027135729789734, "learning_rate": 4.424065720091094e-05, "loss": 0.7392, "step": 4420 }, { "epoch": 5.0353276353276355, "grad_norm": 0.17033274471759796, "learning_rate": 4.423768200471236e-05, "loss": 0.7714, "step": 4421 }, { "epoch": 5.036467236467236, "grad_norm": 0.16878366470336914, "learning_rate": 4.4234706140336176e-05, "loss": 0.7275, "step": 4422 }, { "epoch": 5.0376068376068375, "grad_norm": 0.13580316305160522, "learning_rate": 4.423172960788576e-05, "loss": 0.8921, "step": 4423 }, { "epoch": 5.038746438746439, "grad_norm": 0.17923130095005035, "learning_rate": 4.422875240746448e-05, "loss": 0.7384, "step": 4424 }, { "epoch": 5.0398860398860394, "grad_norm": 0.18059013783931732, "learning_rate": 4.422577453917576e-05, "loss": 0.8264, "step": 4425 }, { "epoch": 5.041025641025641, "grad_norm": 0.17888416349887848, "learning_rate": 4.422279600312301e-05, "loss": 0.6604, "step": 4426 }, { "epoch": 5.042165242165242, "grad_norm": 0.13452483713626862, "learning_rate": 4.4219816799409696e-05, "loss": 1.0043, "step": 4427 }, { "epoch": 5.043304843304844, "grad_norm": 0.17872373759746552, "learning_rate": 4.421683692813929e-05, "loss": 0.7793, "step": 4428 }, { "epoch": 5.044444444444444, "grad_norm": 0.19449970126152039, "learning_rate": 4.42138563894153e-05, "loss": 0.6775, "step": 4429 }, { "epoch": 5.045584045584046, "grad_norm": 0.17247280478477478, "learning_rate": 4.421087518334123e-05, "loss": 0.8989, "step": 4430 }, { "epoch": 5.046723646723647, "grad_norm": 0.16216397285461426, "learning_rate": 4.420789331002063e-05, "loss": 0.6729, "step": 4431 }, { "epoch": 5.047863247863248, "grad_norm": 0.17316816747188568, "learning_rate": 4.4204910769557075e-05, "loss": 0.7585, "step": 4432 }, { "epoch": 5.049002849002849, "grad_norm": 0.2130867838859558, "learning_rate": 4.4201927562054155e-05, "loss": 0.661, "step": 4433 }, { "epoch": 5.050142450142451, "grad_norm": 0.17586174607276917, "learning_rate": 4.419894368761549e-05, "loss": 0.6849, "step": 4434 }, { "epoch": 5.051282051282051, "grad_norm": 0.16294056177139282, "learning_rate": 4.419595914634471e-05, "loss": 0.7675, "step": 4435 }, { "epoch": 5.0524216524216525, "grad_norm": 0.147496297955513, "learning_rate": 4.4192973938345486e-05, "loss": 0.7813, "step": 4436 }, { "epoch": 5.053561253561254, "grad_norm": 0.15688002109527588, "learning_rate": 4.418998806372149e-05, "loss": 0.817, "step": 4437 }, { "epoch": 5.0547008547008545, "grad_norm": 0.14161467552185059, "learning_rate": 4.4187001522576447e-05, "loss": 0.8762, "step": 4438 }, { "epoch": 5.055840455840456, "grad_norm": 0.15354563295841217, "learning_rate": 4.418401431501407e-05, "loss": 0.8639, "step": 4439 }, { "epoch": 5.056980056980057, "grad_norm": 0.18183383345603943, "learning_rate": 4.418102644113811e-05, "loss": 0.7557, "step": 4440 }, { "epoch": 5.058119658119658, "grad_norm": 0.17570596933364868, "learning_rate": 4.417803790105236e-05, "loss": 0.8905, "step": 4441 }, { "epoch": 5.059259259259259, "grad_norm": 0.1544586569070816, "learning_rate": 4.417504869486061e-05, "loss": 0.7511, "step": 4442 }, { "epoch": 5.060398860398861, "grad_norm": 0.19254599511623383, "learning_rate": 4.41720588226667e-05, "loss": 0.8275, "step": 4443 }, { "epoch": 5.061538461538461, "grad_norm": 0.18347801268100739, "learning_rate": 4.416906828457446e-05, "loss": 0.6592, "step": 4444 }, { "epoch": 5.062678062678063, "grad_norm": 0.18191008269786835, "learning_rate": 4.416607708068775e-05, "loss": 0.7512, "step": 4445 }, { "epoch": 5.063817663817664, "grad_norm": 0.15872415900230408, "learning_rate": 4.416308521111049e-05, "loss": 0.9564, "step": 4446 }, { "epoch": 5.064957264957265, "grad_norm": 0.18554766476154327, "learning_rate": 4.416009267594657e-05, "loss": 0.7825, "step": 4447 }, { "epoch": 5.066096866096866, "grad_norm": 0.1563161313533783, "learning_rate": 4.415709947529995e-05, "loss": 0.795, "step": 4448 }, { "epoch": 5.067236467236468, "grad_norm": 0.14149203896522522, "learning_rate": 4.4154105609274585e-05, "loss": 0.9252, "step": 4449 }, { "epoch": 5.068376068376068, "grad_norm": 0.16730783879756927, "learning_rate": 4.415111107797445e-05, "loss": 0.6496, "step": 4450 }, { "epoch": 5.06951566951567, "grad_norm": 0.17725302278995514, "learning_rate": 4.414811588150357e-05, "loss": 0.8285, "step": 4451 }, { "epoch": 5.070655270655271, "grad_norm": 0.1912432610988617, "learning_rate": 4.414512001996596e-05, "loss": 0.7049, "step": 4452 }, { "epoch": 5.0717948717948715, "grad_norm": 0.13255245983600616, "learning_rate": 4.414212349346569e-05, "loss": 1.0307, "step": 4453 }, { "epoch": 5.072934472934473, "grad_norm": 0.18408933281898499, "learning_rate": 4.413912630210683e-05, "loss": 0.7833, "step": 4454 }, { "epoch": 5.074074074074074, "grad_norm": 0.1737062782049179, "learning_rate": 4.413612844599347e-05, "loss": 0.7166, "step": 4455 }, { "epoch": 5.075213675213675, "grad_norm": 0.1808139979839325, "learning_rate": 4.4133129925229754e-05, "loss": 0.8399, "step": 4456 }, { "epoch": 5.076353276353276, "grad_norm": 0.15159174799919128, "learning_rate": 4.413013073991982e-05, "loss": 0.9164, "step": 4457 }, { "epoch": 5.077492877492878, "grad_norm": 0.1934349536895752, "learning_rate": 4.412713089016783e-05, "loss": 0.7688, "step": 4458 }, { "epoch": 5.078632478632478, "grad_norm": 0.1845472753047943, "learning_rate": 4.412413037607799e-05, "loss": 0.7111, "step": 4459 }, { "epoch": 5.07977207977208, "grad_norm": 0.19052857160568237, "learning_rate": 4.4121129197754505e-05, "loss": 0.644, "step": 4460 }, { "epoch": 5.080911680911681, "grad_norm": 0.1481504887342453, "learning_rate": 4.4118127355301625e-05, "loss": 0.8329, "step": 4461 }, { "epoch": 5.082051282051282, "grad_norm": 0.16264614462852478, "learning_rate": 4.411512484882361e-05, "loss": 0.7063, "step": 4462 }, { "epoch": 5.083190883190883, "grad_norm": 0.15884508192539215, "learning_rate": 4.4112121678424735e-05, "loss": 0.9177, "step": 4463 }, { "epoch": 5.084330484330485, "grad_norm": 0.19420745968818665, "learning_rate": 4.410911784420931e-05, "loss": 0.7955, "step": 4464 }, { "epoch": 5.085470085470085, "grad_norm": 0.13226862251758575, "learning_rate": 4.410611334628169e-05, "loss": 1.0122, "step": 4465 }, { "epoch": 5.086609686609687, "grad_norm": 0.19133615493774414, "learning_rate": 4.4103108184746203e-05, "loss": 0.6311, "step": 4466 }, { "epoch": 5.087749287749288, "grad_norm": 0.19149358570575714, "learning_rate": 4.410010235970723e-05, "loss": 0.7162, "step": 4467 }, { "epoch": 5.088888888888889, "grad_norm": 0.17075753211975098, "learning_rate": 4.409709587126918e-05, "loss": 0.8938, "step": 4468 }, { "epoch": 5.09002849002849, "grad_norm": 0.1778605431318283, "learning_rate": 4.409408871953647e-05, "loss": 0.8742, "step": 4469 }, { "epoch": 5.091168091168091, "grad_norm": 0.14712361991405487, "learning_rate": 4.4091080904613557e-05, "loss": 0.9194, "step": 4470 }, { "epoch": 5.092307692307692, "grad_norm": 0.18350447714328766, "learning_rate": 4.40880724266049e-05, "loss": 0.7872, "step": 4471 }, { "epoch": 5.093447293447293, "grad_norm": 0.17689688503742218, "learning_rate": 4.408506328561499e-05, "loss": 0.7369, "step": 4472 }, { "epoch": 5.094586894586895, "grad_norm": 0.15608465671539307, "learning_rate": 4.4082053481748356e-05, "loss": 0.8439, "step": 4473 }, { "epoch": 5.095726495726495, "grad_norm": 0.18518562614917755, "learning_rate": 4.407904301510953e-05, "loss": 0.7294, "step": 4474 }, { "epoch": 5.096866096866097, "grad_norm": 0.1898130178451538, "learning_rate": 4.407603188580306e-05, "loss": 0.7844, "step": 4475 }, { "epoch": 5.098005698005698, "grad_norm": 0.19604942202568054, "learning_rate": 4.407302009393356e-05, "loss": 0.772, "step": 4476 }, { "epoch": 5.099145299145299, "grad_norm": 0.166913703083992, "learning_rate": 4.407000763960561e-05, "loss": 0.7377, "step": 4477 }, { "epoch": 5.1002849002849, "grad_norm": 0.1579706072807312, "learning_rate": 4.4066994522923854e-05, "loss": 0.88, "step": 4478 }, { "epoch": 5.101424501424502, "grad_norm": 0.14575885236263275, "learning_rate": 4.406398074399294e-05, "loss": 0.7705, "step": 4479 }, { "epoch": 5.102564102564102, "grad_norm": 0.1716032177209854, "learning_rate": 4.406096630291755e-05, "loss": 0.9158, "step": 4480 }, { "epoch": 5.103703703703704, "grad_norm": 0.19217745959758759, "learning_rate": 4.40579511998024e-05, "loss": 0.6437, "step": 4481 }, { "epoch": 5.104843304843305, "grad_norm": 0.1495896875858307, "learning_rate": 4.4054935434752176e-05, "loss": 0.8326, "step": 4482 }, { "epoch": 5.105982905982906, "grad_norm": 0.17360635101795197, "learning_rate": 4.405191900787164e-05, "loss": 0.7865, "step": 4483 }, { "epoch": 5.107122507122507, "grad_norm": 0.17083071172237396, "learning_rate": 4.404890191926557e-05, "loss": 0.9054, "step": 4484 }, { "epoch": 5.1082621082621085, "grad_norm": 0.14887142181396484, "learning_rate": 4.404588416903876e-05, "loss": 0.8747, "step": 4485 }, { "epoch": 5.109401709401709, "grad_norm": 0.1598469316959381, "learning_rate": 4.4042865757296016e-05, "loss": 0.7279, "step": 4486 }, { "epoch": 5.1105413105413104, "grad_norm": 0.19548645615577698, "learning_rate": 4.403984668414216e-05, "loss": 0.8635, "step": 4487 }, { "epoch": 5.111680911680912, "grad_norm": 0.1797143518924713, "learning_rate": 4.403682694968209e-05, "loss": 0.7134, "step": 4488 }, { "epoch": 5.112820512820512, "grad_norm": 0.1726599931716919, "learning_rate": 4.403380655402065e-05, "loss": 0.7458, "step": 4489 }, { "epoch": 5.113960113960114, "grad_norm": 0.18311962485313416, "learning_rate": 4.403078549726277e-05, "loss": 0.7293, "step": 4490 }, { "epoch": 5.115099715099715, "grad_norm": 0.1566498577594757, "learning_rate": 4.4027763779513374e-05, "loss": 0.7851, "step": 4491 }, { "epoch": 5.116239316239316, "grad_norm": 0.1594432145357132, "learning_rate": 4.402474140087742e-05, "loss": 0.8625, "step": 4492 }, { "epoch": 5.117378917378917, "grad_norm": 0.18681517243385315, "learning_rate": 4.402171836145989e-05, "loss": 0.7257, "step": 4493 }, { "epoch": 5.118518518518519, "grad_norm": 0.1775273233652115, "learning_rate": 4.401869466136575e-05, "loss": 0.8234, "step": 4494 }, { "epoch": 5.119658119658119, "grad_norm": 0.19038119912147522, "learning_rate": 4.4015670300700055e-05, "loss": 0.6557, "step": 4495 }, { "epoch": 5.120797720797721, "grad_norm": 0.1493513286113739, "learning_rate": 4.4012645279567835e-05, "loss": 0.8501, "step": 4496 }, { "epoch": 5.121937321937322, "grad_norm": 0.1727079302072525, "learning_rate": 4.400961959807416e-05, "loss": 0.6683, "step": 4497 }, { "epoch": 5.123076923076923, "grad_norm": 0.19639013707637787, "learning_rate": 4.400659325632411e-05, "loss": 0.6409, "step": 4498 }, { "epoch": 5.124216524216524, "grad_norm": 0.16199037432670593, "learning_rate": 4.400356625442282e-05, "loss": 0.9381, "step": 4499 }, { "epoch": 5.1253561253561255, "grad_norm": 0.14642073214054108, "learning_rate": 4.400053859247541e-05, "loss": 0.8541, "step": 4500 }, { "epoch": 5.126495726495726, "grad_norm": 0.14070969820022583, "learning_rate": 4.399751027058704e-05, "loss": 0.8146, "step": 4501 }, { "epoch": 5.1276353276353275, "grad_norm": 0.15018360316753387, "learning_rate": 4.3994481288862896e-05, "loss": 0.8614, "step": 4502 }, { "epoch": 5.128774928774929, "grad_norm": 0.16136611998081207, "learning_rate": 4.3991451647408186e-05, "loss": 0.7543, "step": 4503 }, { "epoch": 5.12991452991453, "grad_norm": 0.1738429218530655, "learning_rate": 4.3988421346328134e-05, "loss": 0.719, "step": 4504 }, { "epoch": 5.131054131054131, "grad_norm": 0.14292465150356293, "learning_rate": 4.398539038572799e-05, "loss": 0.7822, "step": 4505 }, { "epoch": 5.132193732193732, "grad_norm": 0.18575257062911987, "learning_rate": 4.398235876571302e-05, "loss": 0.5717, "step": 4506 }, { "epoch": 5.133333333333334, "grad_norm": 0.17044419050216675, "learning_rate": 4.3979326486388536e-05, "loss": 0.6839, "step": 4507 }, { "epoch": 5.134472934472934, "grad_norm": 0.15873126685619354, "learning_rate": 4.397629354785985e-05, "loss": 0.7823, "step": 4508 }, { "epoch": 5.135612535612536, "grad_norm": 0.17624619603157043, "learning_rate": 4.39732599502323e-05, "loss": 0.8858, "step": 4509 }, { "epoch": 5.136752136752137, "grad_norm": 0.20759175717830658, "learning_rate": 4.3970225693611255e-05, "loss": 0.7144, "step": 4510 }, { "epoch": 5.137891737891738, "grad_norm": 0.16185207664966583, "learning_rate": 4.3967190778102105e-05, "loss": 0.9244, "step": 4511 }, { "epoch": 5.139031339031339, "grad_norm": 0.15253710746765137, "learning_rate": 4.396415520381027e-05, "loss": 0.9152, "step": 4512 }, { "epoch": 5.140170940170941, "grad_norm": 0.17518797516822815, "learning_rate": 4.3961118970841155e-05, "loss": 0.8687, "step": 4513 }, { "epoch": 5.141310541310541, "grad_norm": 0.19038769602775574, "learning_rate": 4.395808207930025e-05, "loss": 0.8081, "step": 4514 }, { "epoch": 5.1424501424501425, "grad_norm": 0.16466586291790009, "learning_rate": 4.3955044529293e-05, "loss": 0.879, "step": 4515 }, { "epoch": 5.143589743589744, "grad_norm": 0.16392403841018677, "learning_rate": 4.3952006320924946e-05, "loss": 0.6409, "step": 4516 }, { "epoch": 5.1447293447293445, "grad_norm": 0.16581577062606812, "learning_rate": 4.394896745430158e-05, "loss": 0.9237, "step": 4517 }, { "epoch": 5.145868945868946, "grad_norm": 0.14342597126960754, "learning_rate": 4.394592792952848e-05, "loss": 0.9664, "step": 4518 }, { "epoch": 5.147008547008547, "grad_norm": 0.15031763911247253, "learning_rate": 4.394288774671119e-05, "loss": 0.7163, "step": 4519 }, { "epoch": 5.148148148148148, "grad_norm": 0.1617755889892578, "learning_rate": 4.393984690595532e-05, "loss": 0.8548, "step": 4520 }, { "epoch": 5.149287749287749, "grad_norm": 0.1638329178094864, "learning_rate": 4.393680540736648e-05, "loss": 0.9415, "step": 4521 }, { "epoch": 5.150427350427351, "grad_norm": 0.24136395752429962, "learning_rate": 4.393376325105031e-05, "loss": 0.5447, "step": 4522 }, { "epoch": 5.151566951566951, "grad_norm": 0.14918941259384155, "learning_rate": 4.3930720437112475e-05, "loss": 0.8344, "step": 4523 }, { "epoch": 5.152706552706553, "grad_norm": 0.14339643716812134, "learning_rate": 4.392767696565866e-05, "loss": 0.8365, "step": 4524 }, { "epoch": 5.153846153846154, "grad_norm": 0.14718779921531677, "learning_rate": 4.392463283679458e-05, "loss": 0.8365, "step": 4525 }, { "epoch": 5.154985754985755, "grad_norm": 0.16732938587665558, "learning_rate": 4.392158805062595e-05, "loss": 0.6836, "step": 4526 }, { "epoch": 5.156125356125356, "grad_norm": 0.18390381336212158, "learning_rate": 4.391854260725854e-05, "loss": 0.5595, "step": 4527 }, { "epoch": 5.157264957264958, "grad_norm": 0.16088280081748962, "learning_rate": 4.391549650679811e-05, "loss": 0.775, "step": 4528 }, { "epoch": 5.158404558404558, "grad_norm": 0.15926873683929443, "learning_rate": 4.391244974935047e-05, "loss": 0.7788, "step": 4529 }, { "epoch": 5.15954415954416, "grad_norm": 0.18737871944904327, "learning_rate": 4.390940233502144e-05, "loss": 0.8515, "step": 4530 }, { "epoch": 5.160683760683761, "grad_norm": 0.16173994541168213, "learning_rate": 4.390635426391688e-05, "loss": 0.929, "step": 4531 }, { "epoch": 5.1618233618233615, "grad_norm": 0.14647237956523895, "learning_rate": 4.3903305536142634e-05, "loss": 0.9413, "step": 4532 }, { "epoch": 5.162962962962963, "grad_norm": 0.19253405928611755, "learning_rate": 4.39002561518046e-05, "loss": 0.6882, "step": 4533 }, { "epoch": 5.164102564102564, "grad_norm": 0.15888677537441254, "learning_rate": 4.3897206111008696e-05, "loss": 0.8317, "step": 4534 }, { "epoch": 5.165242165242165, "grad_norm": 0.1983802765607834, "learning_rate": 4.389415541386085e-05, "loss": 0.8205, "step": 4535 }, { "epoch": 5.166381766381766, "grad_norm": 0.2104257345199585, "learning_rate": 4.3891104060467034e-05, "loss": 0.6703, "step": 4536 }, { "epoch": 5.167521367521368, "grad_norm": 0.20032526552677155, "learning_rate": 4.388805205093323e-05, "loss": 0.6861, "step": 4537 }, { "epoch": 5.168660968660968, "grad_norm": 0.1552596539258957, "learning_rate": 4.388499938536542e-05, "loss": 0.97, "step": 4538 }, { "epoch": 5.16980056980057, "grad_norm": 0.1724402755498886, "learning_rate": 4.3881946063869655e-05, "loss": 1.0131, "step": 4539 }, { "epoch": 5.170940170940171, "grad_norm": 0.15482792258262634, "learning_rate": 4.3878892086551984e-05, "loss": 0.7879, "step": 4540 }, { "epoch": 5.172079772079772, "grad_norm": 0.20186127722263336, "learning_rate": 4.387583745351847e-05, "loss": 0.5108, "step": 4541 }, { "epoch": 5.173219373219373, "grad_norm": 0.1395871937274933, "learning_rate": 4.38727821648752e-05, "loss": 0.8954, "step": 4542 }, { "epoch": 5.174358974358975, "grad_norm": 0.18268820643424988, "learning_rate": 4.386972622072832e-05, "loss": 0.7588, "step": 4543 }, { "epoch": 5.175498575498575, "grad_norm": 0.16029301285743713, "learning_rate": 4.386666962118395e-05, "loss": 0.9042, "step": 4544 }, { "epoch": 5.176638176638177, "grad_norm": 0.17887267470359802, "learning_rate": 4.386361236634826e-05, "loss": 0.9031, "step": 4545 }, { "epoch": 5.177777777777778, "grad_norm": 0.17668810486793518, "learning_rate": 4.386055445632744e-05, "loss": 0.7987, "step": 4546 }, { "epoch": 5.178917378917379, "grad_norm": 0.17249874770641327, "learning_rate": 4.38574958912277e-05, "loss": 0.7989, "step": 4547 }, { "epoch": 5.18005698005698, "grad_norm": 0.18002480268478394, "learning_rate": 4.3854436671155267e-05, "loss": 0.8006, "step": 4548 }, { "epoch": 5.181196581196581, "grad_norm": 0.16322128474712372, "learning_rate": 4.38513767962164e-05, "loss": 0.7371, "step": 4549 }, { "epoch": 5.182336182336182, "grad_norm": 0.1998603790998459, "learning_rate": 4.384831626651737e-05, "loss": 0.7077, "step": 4550 }, { "epoch": 5.183475783475783, "grad_norm": 0.18108196556568146, "learning_rate": 4.384525508216448e-05, "loss": 0.77, "step": 4551 }, { "epoch": 5.184615384615385, "grad_norm": 0.18413852155208588, "learning_rate": 4.384219324326407e-05, "loss": 0.6835, "step": 4552 }, { "epoch": 5.185754985754985, "grad_norm": 0.16383302211761475, "learning_rate": 4.3839130749922464e-05, "loss": 0.755, "step": 4553 }, { "epoch": 5.186894586894587, "grad_norm": 0.18220475316047668, "learning_rate": 4.383606760224604e-05, "loss": 0.6952, "step": 4554 }, { "epoch": 5.188034188034188, "grad_norm": 0.1568390280008316, "learning_rate": 4.383300380034119e-05, "loss": 0.8938, "step": 4555 }, { "epoch": 5.189173789173789, "grad_norm": 0.17869746685028076, "learning_rate": 4.3829939344314324e-05, "loss": 0.6754, "step": 4556 }, { "epoch": 5.19031339031339, "grad_norm": 0.15833452343940735, "learning_rate": 4.3826874234271886e-05, "loss": 0.7728, "step": 4557 }, { "epoch": 5.191452991452992, "grad_norm": 0.17817965149879456, "learning_rate": 4.382380847032032e-05, "loss": 0.7638, "step": 4558 }, { "epoch": 5.192592592592592, "grad_norm": 0.1733039915561676, "learning_rate": 4.382074205256613e-05, "loss": 0.7526, "step": 4559 }, { "epoch": 5.193732193732194, "grad_norm": 0.19883273541927338, "learning_rate": 4.381767498111582e-05, "loss": 0.768, "step": 4560 }, { "epoch": 5.194871794871795, "grad_norm": 0.20844820141792297, "learning_rate": 4.38146072560759e-05, "loss": 0.564, "step": 4561 }, { "epoch": 5.196011396011396, "grad_norm": 0.1791890263557434, "learning_rate": 4.381153887755293e-05, "loss": 0.7737, "step": 4562 }, { "epoch": 5.197150997150997, "grad_norm": 0.17866100370883942, "learning_rate": 4.3808469845653484e-05, "loss": 0.8071, "step": 4563 }, { "epoch": 5.1982905982905985, "grad_norm": 0.1694842129945755, "learning_rate": 4.3805400160484154e-05, "loss": 0.7473, "step": 4564 }, { "epoch": 5.199430199430199, "grad_norm": 0.1864929050207138, "learning_rate": 4.380232982215156e-05, "loss": 0.7087, "step": 4565 }, { "epoch": 5.2005698005698004, "grad_norm": 0.20659984648227692, "learning_rate": 4.379925883076234e-05, "loss": 0.6174, "step": 4566 }, { "epoch": 5.201709401709402, "grad_norm": 0.17501875758171082, "learning_rate": 4.379618718642317e-05, "loss": 0.7386, "step": 4567 }, { "epoch": 5.202849002849002, "grad_norm": 0.1897255778312683, "learning_rate": 4.379311488924074e-05, "loss": 0.748, "step": 4568 }, { "epoch": 5.203988603988604, "grad_norm": 0.1660880595445633, "learning_rate": 4.379004193932174e-05, "loss": 0.8363, "step": 4569 }, { "epoch": 5.205128205128205, "grad_norm": 0.16657590866088867, "learning_rate": 4.3786968336772905e-05, "loss": 0.749, "step": 4570 }, { "epoch": 5.206267806267807, "grad_norm": 0.24494710564613342, "learning_rate": 4.3783894081701e-05, "loss": 0.5043, "step": 4571 }, { "epoch": 5.207407407407407, "grad_norm": 0.1571018099784851, "learning_rate": 4.3780819174212795e-05, "loss": 0.9156, "step": 4572 }, { "epoch": 5.208547008547009, "grad_norm": 0.19126367568969727, "learning_rate": 4.37777436144151e-05, "loss": 0.7136, "step": 4573 }, { "epoch": 5.20968660968661, "grad_norm": 0.14824849367141724, "learning_rate": 4.3774667402414724e-05, "loss": 0.8578, "step": 4574 }, { "epoch": 5.210826210826211, "grad_norm": 0.16744691133499146, "learning_rate": 4.377159053831852e-05, "loss": 0.8394, "step": 4575 }, { "epoch": 5.211965811965812, "grad_norm": 0.17194165289402008, "learning_rate": 4.376851302223335e-05, "loss": 0.7374, "step": 4576 }, { "epoch": 5.2131054131054135, "grad_norm": 0.1519736796617508, "learning_rate": 4.3765434854266106e-05, "loss": 0.9542, "step": 4577 }, { "epoch": 5.214245014245014, "grad_norm": 0.20746491849422455, "learning_rate": 4.3762356034523714e-05, "loss": 0.6339, "step": 4578 }, { "epoch": 5.2153846153846155, "grad_norm": 0.17351919412612915, "learning_rate": 4.375927656311311e-05, "loss": 0.7858, "step": 4579 }, { "epoch": 5.216524216524217, "grad_norm": 0.15151087939739227, "learning_rate": 4.375619644014122e-05, "loss": 0.8794, "step": 4580 }, { "epoch": 5.2176638176638175, "grad_norm": 0.1891581267118454, "learning_rate": 4.375311566571506e-05, "loss": 0.7059, "step": 4581 }, { "epoch": 5.218803418803419, "grad_norm": 0.15533500909805298, "learning_rate": 4.375003423994162e-05, "loss": 0.9234, "step": 4582 }, { "epoch": 5.21994301994302, "grad_norm": 0.21276049315929413, "learning_rate": 4.374695216292792e-05, "loss": 0.5214, "step": 4583 }, { "epoch": 5.221082621082621, "grad_norm": 0.19438743591308594, "learning_rate": 4.374386943478103e-05, "loss": 0.7119, "step": 4584 }, { "epoch": 5.222222222222222, "grad_norm": 0.16696999967098236, "learning_rate": 4.3740786055608006e-05, "loss": 0.9642, "step": 4585 }, { "epoch": 5.223361823361824, "grad_norm": 0.17279542982578278, "learning_rate": 4.373770202551594e-05, "loss": 0.8477, "step": 4586 }, { "epoch": 5.224501424501424, "grad_norm": 0.1531403511762619, "learning_rate": 4.373461734461195e-05, "loss": 0.7827, "step": 4587 }, { "epoch": 5.225641025641026, "grad_norm": 0.1426958590745926, "learning_rate": 4.3731532013003196e-05, "loss": 0.8732, "step": 4588 }, { "epoch": 5.226780626780627, "grad_norm": 0.1578754186630249, "learning_rate": 4.372844603079681e-05, "loss": 0.9706, "step": 4589 }, { "epoch": 5.227920227920228, "grad_norm": 0.1918063908815384, "learning_rate": 4.372535939809999e-05, "loss": 0.6471, "step": 4590 }, { "epoch": 5.229059829059829, "grad_norm": 0.18620924651622772, "learning_rate": 4.372227211501995e-05, "loss": 0.7427, "step": 4591 }, { "epoch": 5.230199430199431, "grad_norm": 0.16538360714912415, "learning_rate": 4.371918418166391e-05, "loss": 0.6797, "step": 4592 }, { "epoch": 5.231339031339031, "grad_norm": 0.1841302365064621, "learning_rate": 4.3716095598139125e-05, "loss": 0.5963, "step": 4593 }, { "epoch": 5.2324786324786325, "grad_norm": 0.15923799574375153, "learning_rate": 4.3713006364552866e-05, "loss": 0.8225, "step": 4594 }, { "epoch": 5.233618233618234, "grad_norm": 0.1487402468919754, "learning_rate": 4.370991648101244e-05, "loss": 0.9186, "step": 4595 }, { "epoch": 5.2347578347578345, "grad_norm": 0.16196595132350922, "learning_rate": 4.3706825947625165e-05, "loss": 0.8662, "step": 4596 }, { "epoch": 5.235897435897436, "grad_norm": 0.14755144715309143, "learning_rate": 4.3703734764498375e-05, "loss": 0.9241, "step": 4597 }, { "epoch": 5.237037037037037, "grad_norm": 0.16342835128307343, "learning_rate": 4.370064293173944e-05, "loss": 0.8717, "step": 4598 }, { "epoch": 5.238176638176638, "grad_norm": 0.17479750514030457, "learning_rate": 4.369755044945575e-05, "loss": 0.7508, "step": 4599 }, { "epoch": 5.239316239316239, "grad_norm": 0.12696468830108643, "learning_rate": 4.3694457317754716e-05, "loss": 0.8177, "step": 4600 }, { "epoch": 5.240455840455841, "grad_norm": 0.211741104722023, "learning_rate": 4.369136353674377e-05, "loss": 0.6509, "step": 4601 }, { "epoch": 5.241595441595441, "grad_norm": 0.1574597805738449, "learning_rate": 4.3688269106530366e-05, "loss": 0.8251, "step": 4602 }, { "epoch": 5.242735042735043, "grad_norm": 0.17478220164775848, "learning_rate": 4.368517402722199e-05, "loss": 0.7645, "step": 4603 }, { "epoch": 5.243874643874644, "grad_norm": 0.16124558448791504, "learning_rate": 4.368207829892612e-05, "loss": 0.8453, "step": 4604 }, { "epoch": 5.245014245014245, "grad_norm": 0.16364283859729767, "learning_rate": 4.367898192175031e-05, "loss": 0.7686, "step": 4605 }, { "epoch": 5.246153846153846, "grad_norm": 0.1508834809064865, "learning_rate": 4.367588489580208e-05, "loss": 0.8383, "step": 4606 }, { "epoch": 5.247293447293448, "grad_norm": 0.14692118763923645, "learning_rate": 4.367278722118902e-05, "loss": 0.7528, "step": 4607 }, { "epoch": 5.248433048433048, "grad_norm": 0.18287882208824158, "learning_rate": 4.366968889801869e-05, "loss": 0.6852, "step": 4608 }, { "epoch": 5.24957264957265, "grad_norm": 0.15826521813869476, "learning_rate": 4.3666589926398745e-05, "loss": 0.6778, "step": 4609 }, { "epoch": 5.250712250712251, "grad_norm": 0.1535518318414688, "learning_rate": 4.366349030643679e-05, "loss": 0.7719, "step": 4610 }, { "epoch": 5.2518518518518515, "grad_norm": 0.16476589441299438, "learning_rate": 4.366039003824049e-05, "loss": 0.8829, "step": 4611 }, { "epoch": 5.252991452991453, "grad_norm": 0.20952336490154266, "learning_rate": 4.365728912191752e-05, "loss": 0.6691, "step": 4612 }, { "epoch": 5.254131054131054, "grad_norm": 0.13219039142131805, "learning_rate": 4.36541875575756e-05, "loss": 0.8266, "step": 4613 }, { "epoch": 5.255270655270655, "grad_norm": 0.19767548143863678, "learning_rate": 4.365108534532245e-05, "loss": 0.736, "step": 4614 }, { "epoch": 5.256410256410256, "grad_norm": 0.1959148496389389, "learning_rate": 4.3647982485265816e-05, "loss": 0.7052, "step": 4615 }, { "epoch": 5.257549857549858, "grad_norm": 0.227487713098526, "learning_rate": 4.364487897751346e-05, "loss": 0.6845, "step": 4616 }, { "epoch": 5.258689458689458, "grad_norm": 0.15235817432403564, "learning_rate": 4.364177482217319e-05, "loss": 0.7937, "step": 4617 }, { "epoch": 5.25982905982906, "grad_norm": 0.1789664924144745, "learning_rate": 4.363867001935281e-05, "loss": 0.8004, "step": 4618 }, { "epoch": 5.260968660968661, "grad_norm": 0.1804170459508896, "learning_rate": 4.3635564569160165e-05, "loss": 0.704, "step": 4619 }, { "epoch": 5.262108262108262, "grad_norm": 0.22812865674495697, "learning_rate": 4.363245847170312e-05, "loss": 0.5592, "step": 4620 }, { "epoch": 5.263247863247863, "grad_norm": 0.1686326116323471, "learning_rate": 4.362935172708954e-05, "loss": 0.8674, "step": 4621 }, { "epoch": 5.264387464387465, "grad_norm": 0.20418672263622284, "learning_rate": 4.362624433542736e-05, "loss": 0.6473, "step": 4622 }, { "epoch": 5.265527065527065, "grad_norm": 0.18240760266780853, "learning_rate": 4.3623136296824486e-05, "loss": 0.612, "step": 4623 }, { "epoch": 5.266666666666667, "grad_norm": 0.2027430385351181, "learning_rate": 4.362002761138887e-05, "loss": 0.4561, "step": 4624 }, { "epoch": 5.267806267806268, "grad_norm": 0.16104884445667267, "learning_rate": 4.361691827922849e-05, "loss": 0.853, "step": 4625 }, { "epoch": 5.268945868945869, "grad_norm": 0.19175918400287628, "learning_rate": 4.361380830045134e-05, "loss": 0.8272, "step": 4626 }, { "epoch": 5.27008547008547, "grad_norm": 0.1846851408481598, "learning_rate": 4.361069767516545e-05, "loss": 0.7665, "step": 4627 }, { "epoch": 5.2712250712250714, "grad_norm": 1.7687084674835205, "learning_rate": 4.360758640347884e-05, "loss": 0.894, "step": 4628 }, { "epoch": 5.272364672364672, "grad_norm": 0.15231172740459442, "learning_rate": 4.360447448549959e-05, "loss": 0.9358, "step": 4629 }, { "epoch": 5.273504273504273, "grad_norm": 0.16438651084899902, "learning_rate": 4.360136192133577e-05, "loss": 0.6978, "step": 4630 }, { "epoch": 5.274643874643875, "grad_norm": 0.18496210873126984, "learning_rate": 4.35982487110955e-05, "loss": 0.6708, "step": 4631 }, { "epoch": 5.275783475783475, "grad_norm": 0.18087878823280334, "learning_rate": 4.359513485488691e-05, "loss": 0.6296, "step": 4632 }, { "epoch": 5.276923076923077, "grad_norm": 0.15958784520626068, "learning_rate": 4.359202035281815e-05, "loss": 0.887, "step": 4633 }, { "epoch": 5.278062678062678, "grad_norm": 0.14426644146442413, "learning_rate": 4.35889052049974e-05, "loss": 0.7466, "step": 4634 }, { "epoch": 5.279202279202279, "grad_norm": 0.17500649392604828, "learning_rate": 4.358578941153284e-05, "loss": 0.7815, "step": 4635 }, { "epoch": 5.28034188034188, "grad_norm": 0.15551333129405975, "learning_rate": 4.358267297253271e-05, "loss": 0.8358, "step": 4636 }, { "epoch": 5.281481481481482, "grad_norm": 0.18527741730213165, "learning_rate": 4.357955588810524e-05, "loss": 0.7355, "step": 4637 }, { "epoch": 5.282621082621082, "grad_norm": 0.1866128146648407, "learning_rate": 4.357643815835871e-05, "loss": 0.8739, "step": 4638 }, { "epoch": 5.283760683760684, "grad_norm": 0.17814934253692627, "learning_rate": 4.357331978340139e-05, "loss": 0.8963, "step": 4639 }, { "epoch": 5.284900284900285, "grad_norm": 0.17876625061035156, "learning_rate": 4.35702007633416e-05, "loss": 0.7765, "step": 4640 }, { "epoch": 5.286039886039886, "grad_norm": 0.1523537039756775, "learning_rate": 4.356708109828766e-05, "loss": 0.915, "step": 4641 }, { "epoch": 5.287179487179487, "grad_norm": 0.17447234690189362, "learning_rate": 4.3563960788347946e-05, "loss": 0.775, "step": 4642 }, { "epoch": 5.2883190883190885, "grad_norm": 0.16959306597709656, "learning_rate": 4.356083983363082e-05, "loss": 0.8528, "step": 4643 }, { "epoch": 5.289458689458689, "grad_norm": 0.1531369984149933, "learning_rate": 4.355771823424468e-05, "loss": 0.832, "step": 4644 }, { "epoch": 5.2905982905982905, "grad_norm": 0.1785728633403778, "learning_rate": 4.355459599029795e-05, "loss": 0.7748, "step": 4645 }, { "epoch": 5.291737891737892, "grad_norm": 0.1715983897447586, "learning_rate": 4.3551473101899084e-05, "loss": 0.7818, "step": 4646 }, { "epoch": 5.292877492877492, "grad_norm": 0.1923236846923828, "learning_rate": 4.354834956915653e-05, "loss": 0.7372, "step": 4647 }, { "epoch": 5.294017094017094, "grad_norm": 0.13914509117603302, "learning_rate": 4.3545225392178795e-05, "loss": 0.8796, "step": 4648 }, { "epoch": 5.295156695156695, "grad_norm": 0.19309070706367493, "learning_rate": 4.354210057107438e-05, "loss": 0.7895, "step": 4649 }, { "epoch": 5.296296296296296, "grad_norm": 0.16951633989810944, "learning_rate": 4.3538975105951824e-05, "loss": 0.8277, "step": 4650 }, { "epoch": 5.297435897435897, "grad_norm": 0.1829831600189209, "learning_rate": 4.353584899691968e-05, "loss": 0.7419, "step": 4651 }, { "epoch": 5.298575498575499, "grad_norm": 0.16239981353282928, "learning_rate": 4.353272224408652e-05, "loss": 0.6628, "step": 4652 }, { "epoch": 5.2997150997151, "grad_norm": 0.16405048966407776, "learning_rate": 4.352959484756096e-05, "loss": 0.8218, "step": 4653 }, { "epoch": 5.300854700854701, "grad_norm": 0.1741565465927124, "learning_rate": 4.352646680745162e-05, "loss": 0.657, "step": 4654 }, { "epoch": 5.301994301994302, "grad_norm": 0.17908942699432373, "learning_rate": 4.352333812386712e-05, "loss": 0.6585, "step": 4655 }, { "epoch": 5.3031339031339035, "grad_norm": 0.1700315624475479, "learning_rate": 4.352020879691616e-05, "loss": 0.7889, "step": 4656 }, { "epoch": 5.304273504273504, "grad_norm": 0.15740863978862762, "learning_rate": 4.351707882670741e-05, "loss": 0.8234, "step": 4657 }, { "epoch": 5.3054131054131055, "grad_norm": 0.16824686527252197, "learning_rate": 4.3513948213349596e-05, "loss": 0.8267, "step": 4658 }, { "epoch": 5.306552706552707, "grad_norm": 0.16591288149356842, "learning_rate": 4.351081695695145e-05, "loss": 0.85, "step": 4659 }, { "epoch": 5.3076923076923075, "grad_norm": 0.15642791986465454, "learning_rate": 4.350768505762173e-05, "loss": 0.9855, "step": 4660 }, { "epoch": 5.308831908831909, "grad_norm": 0.19195495545864105, "learning_rate": 4.35045525154692e-05, "loss": 0.6258, "step": 4661 }, { "epoch": 5.30997150997151, "grad_norm": 0.1710425615310669, "learning_rate": 4.350141933060268e-05, "loss": 0.8954, "step": 4662 }, { "epoch": 5.311111111111111, "grad_norm": 0.16812683641910553, "learning_rate": 4.349828550313098e-05, "loss": 0.8735, "step": 4663 }, { "epoch": 5.312250712250712, "grad_norm": 0.1785348802804947, "learning_rate": 4.349515103316296e-05, "loss": 0.9271, "step": 4664 }, { "epoch": 5.313390313390314, "grad_norm": 0.1716974377632141, "learning_rate": 4.349201592080748e-05, "loss": 0.8202, "step": 4665 }, { "epoch": 5.314529914529914, "grad_norm": 0.17207154631614685, "learning_rate": 4.348888016617344e-05, "loss": 0.664, "step": 4666 }, { "epoch": 5.315669515669516, "grad_norm": 0.13805332779884338, "learning_rate": 4.3485743769369744e-05, "loss": 0.9138, "step": 4667 }, { "epoch": 5.316809116809117, "grad_norm": 0.17947430908679962, "learning_rate": 4.348260673050533e-05, "loss": 0.7313, "step": 4668 }, { "epoch": 5.317948717948718, "grad_norm": 0.13484305143356323, "learning_rate": 4.347946904968916e-05, "loss": 0.9164, "step": 4669 }, { "epoch": 5.319088319088319, "grad_norm": 0.15935564041137695, "learning_rate": 4.34763307270302e-05, "loss": 0.7868, "step": 4670 }, { "epoch": 5.320227920227921, "grad_norm": 0.17295128107070923, "learning_rate": 4.3473191762637476e-05, "loss": 0.8532, "step": 4671 }, { "epoch": 5.321367521367521, "grad_norm": 0.1426529437303543, "learning_rate": 4.347005215662e-05, "loss": 0.9426, "step": 4672 }, { "epoch": 5.3225071225071225, "grad_norm": 0.15112797915935516, "learning_rate": 4.346691190908682e-05, "loss": 0.7629, "step": 4673 }, { "epoch": 5.323646723646724, "grad_norm": 0.18486617505550385, "learning_rate": 4.346377102014699e-05, "loss": 0.7134, "step": 4674 }, { "epoch": 5.3247863247863245, "grad_norm": 0.15515485405921936, "learning_rate": 4.3460629489909635e-05, "loss": 0.796, "step": 4675 }, { "epoch": 5.325925925925926, "grad_norm": 0.18211156129837036, "learning_rate": 4.345748731848384e-05, "loss": 0.7756, "step": 4676 }, { "epoch": 5.327065527065527, "grad_norm": 0.18262286484241486, "learning_rate": 4.345434450597876e-05, "loss": 0.8457, "step": 4677 }, { "epoch": 5.328205128205128, "grad_norm": 0.1742621660232544, "learning_rate": 4.3451201052503545e-05, "loss": 0.6978, "step": 4678 }, { "epoch": 5.329344729344729, "grad_norm": 0.18028375506401062, "learning_rate": 4.344805695816737e-05, "loss": 0.7454, "step": 4679 }, { "epoch": 5.330484330484331, "grad_norm": 0.208678737282753, "learning_rate": 4.344491222307945e-05, "loss": 0.6213, "step": 4680 }, { "epoch": 5.331623931623931, "grad_norm": 0.1581195890903473, "learning_rate": 4.3441766847348994e-05, "loss": 0.878, "step": 4681 }, { "epoch": 5.332763532763533, "grad_norm": 0.14497709274291992, "learning_rate": 4.343862083108528e-05, "loss": 0.964, "step": 4682 }, { "epoch": 5.333903133903134, "grad_norm": 0.1877148300409317, "learning_rate": 4.343547417439754e-05, "loss": 0.7496, "step": 4683 }, { "epoch": 5.335042735042735, "grad_norm": 0.17752593755722046, "learning_rate": 4.343232687739509e-05, "loss": 0.668, "step": 4684 }, { "epoch": 5.336182336182336, "grad_norm": 0.14428646862506866, "learning_rate": 4.3429178940187246e-05, "loss": 0.9615, "step": 4685 }, { "epoch": 5.337321937321938, "grad_norm": 0.17201361060142517, "learning_rate": 4.342603036288333e-05, "loss": 0.8144, "step": 4686 }, { "epoch": 5.338461538461538, "grad_norm": 0.22257472574710846, "learning_rate": 4.342288114559271e-05, "loss": 0.5684, "step": 4687 }, { "epoch": 5.33960113960114, "grad_norm": 0.21891720592975616, "learning_rate": 4.341973128842476e-05, "loss": 0.6096, "step": 4688 }, { "epoch": 5.340740740740741, "grad_norm": 0.161695197224617, "learning_rate": 4.341658079148889e-05, "loss": 0.7581, "step": 4689 }, { "epoch": 5.3418803418803416, "grad_norm": 0.19810344278812408, "learning_rate": 4.341342965489453e-05, "loss": 0.6447, "step": 4690 }, { "epoch": 5.343019943019943, "grad_norm": 0.1555669605731964, "learning_rate": 4.3410277878751116e-05, "loss": 0.8272, "step": 4691 }, { "epoch": 5.344159544159544, "grad_norm": 0.16029733419418335, "learning_rate": 4.340712546316812e-05, "loss": 0.8415, "step": 4692 }, { "epoch": 5.345299145299145, "grad_norm": 0.25236281752586365, "learning_rate": 4.340397240825504e-05, "loss": 0.7204, "step": 4693 }, { "epoch": 5.346438746438746, "grad_norm": 0.17049390077590942, "learning_rate": 4.3400818714121384e-05, "loss": 0.9162, "step": 4694 }, { "epoch": 5.347578347578348, "grad_norm": 0.183467298746109, "learning_rate": 4.339766438087669e-05, "loss": 0.7065, "step": 4695 }, { "epoch": 5.348717948717948, "grad_norm": 0.18054646253585815, "learning_rate": 4.3394509408630524e-05, "loss": 0.8164, "step": 4696 }, { "epoch": 5.34985754985755, "grad_norm": 0.14842738211154938, "learning_rate": 4.339135379749246e-05, "loss": 0.753, "step": 4697 }, { "epoch": 5.350997150997151, "grad_norm": 0.1693161129951477, "learning_rate": 4.3388197547572103e-05, "loss": 0.8731, "step": 4698 }, { "epoch": 5.352136752136752, "grad_norm": 0.1440298855304718, "learning_rate": 4.338504065897908e-05, "loss": 0.9219, "step": 4699 }, { "epoch": 5.353276353276353, "grad_norm": 0.16139906644821167, "learning_rate": 4.338188313182303e-05, "loss": 0.8839, "step": 4700 }, { "epoch": 5.354415954415955, "grad_norm": 0.16127200424671173, "learning_rate": 4.337872496621363e-05, "loss": 0.7403, "step": 4701 }, { "epoch": 5.355555555555555, "grad_norm": 0.1390729695558548, "learning_rate": 4.337556616226057e-05, "loss": 0.9944, "step": 4702 }, { "epoch": 5.356695156695157, "grad_norm": 0.14836028218269348, "learning_rate": 4.3372406720073565e-05, "loss": 0.8955, "step": 4703 }, { "epoch": 5.357834757834758, "grad_norm": 0.16672396659851074, "learning_rate": 4.336924663976235e-05, "loss": 0.8626, "step": 4704 }, { "epoch": 5.358974358974359, "grad_norm": 0.20330262184143066, "learning_rate": 4.3366085921436684e-05, "loss": 0.7125, "step": 4705 }, { "epoch": 5.36011396011396, "grad_norm": 0.1828739047050476, "learning_rate": 4.336292456520634e-05, "loss": 0.6565, "step": 4706 }, { "epoch": 5.3612535612535615, "grad_norm": 0.16110581159591675, "learning_rate": 4.335976257118114e-05, "loss": 0.9851, "step": 4707 }, { "epoch": 5.362393162393162, "grad_norm": 0.1568196415901184, "learning_rate": 4.335659993947089e-05, "loss": 0.8132, "step": 4708 }, { "epoch": 5.363532763532763, "grad_norm": 0.14966516196727753, "learning_rate": 4.335343667018544e-05, "loss": 0.8918, "step": 4709 }, { "epoch": 5.364672364672365, "grad_norm": 0.1884099245071411, "learning_rate": 4.3350272763434665e-05, "loss": 0.7622, "step": 4710 }, { "epoch": 5.365811965811965, "grad_norm": 0.1634645015001297, "learning_rate": 4.334710821932846e-05, "loss": 0.8002, "step": 4711 }, { "epoch": 5.366951566951567, "grad_norm": 0.18431422114372253, "learning_rate": 4.334394303797672e-05, "loss": 0.674, "step": 4712 }, { "epoch": 5.368091168091168, "grad_norm": 0.17992058396339417, "learning_rate": 4.3340777219489405e-05, "loss": 0.8785, "step": 4713 }, { "epoch": 5.36923076923077, "grad_norm": 0.1728856861591339, "learning_rate": 4.333761076397646e-05, "loss": 0.8496, "step": 4714 }, { "epoch": 5.37037037037037, "grad_norm": 0.14123381674289703, "learning_rate": 4.3334443671547854e-05, "loss": 0.9924, "step": 4715 }, { "epoch": 5.371509971509972, "grad_norm": 0.1983249932527542, "learning_rate": 4.33312759423136e-05, "loss": 0.7903, "step": 4716 }, { "epoch": 5.372649572649573, "grad_norm": 0.1768140345811844, "learning_rate": 4.3328107576383724e-05, "loss": 0.8515, "step": 4717 }, { "epoch": 5.373789173789174, "grad_norm": 0.19550885260105133, "learning_rate": 4.332493857386827e-05, "loss": 0.547, "step": 4718 }, { "epoch": 5.374928774928775, "grad_norm": 0.1622675657272339, "learning_rate": 4.3321768934877306e-05, "loss": 0.7579, "step": 4719 }, { "epoch": 5.3760683760683765, "grad_norm": 0.16373273730278015, "learning_rate": 4.3318598659520924e-05, "loss": 0.6989, "step": 4720 }, { "epoch": 5.377207977207977, "grad_norm": 0.15886545181274414, "learning_rate": 4.331542774790923e-05, "loss": 0.9148, "step": 4721 }, { "epoch": 5.3783475783475785, "grad_norm": 0.16536206007003784, "learning_rate": 4.331225620015237e-05, "loss": 0.8694, "step": 4722 }, { "epoch": 5.37948717948718, "grad_norm": 0.1420564502477646, "learning_rate": 4.330908401636048e-05, "loss": 1.0271, "step": 4723 }, { "epoch": 5.3806267806267805, "grad_norm": 0.21553678810596466, "learning_rate": 4.330591119664376e-05, "loss": 0.5341, "step": 4724 }, { "epoch": 5.381766381766382, "grad_norm": 0.1756429821252823, "learning_rate": 4.33027377411124e-05, "loss": 0.7417, "step": 4725 }, { "epoch": 5.382905982905983, "grad_norm": 0.1877029687166214, "learning_rate": 4.329956364987663e-05, "loss": 0.6992, "step": 4726 }, { "epoch": 5.384045584045584, "grad_norm": 0.17877577245235443, "learning_rate": 4.329638892304669e-05, "loss": 0.8747, "step": 4727 }, { "epoch": 5.385185185185185, "grad_norm": 0.17161720991134644, "learning_rate": 4.3293213560732846e-05, "loss": 0.7654, "step": 4728 }, { "epoch": 5.386324786324787, "grad_norm": 0.1431724578142166, "learning_rate": 4.329003756304539e-05, "loss": 0.8518, "step": 4729 }, { "epoch": 5.387464387464387, "grad_norm": 0.19463858008384705, "learning_rate": 4.328686093009463e-05, "loss": 0.7453, "step": 4730 }, { "epoch": 5.388603988603989, "grad_norm": 0.12225417047739029, "learning_rate": 4.32836836619909e-05, "loss": 0.9798, "step": 4731 }, { "epoch": 5.38974358974359, "grad_norm": 0.16380740702152252, "learning_rate": 4.328050575884456e-05, "loss": 0.9735, "step": 4732 }, { "epoch": 5.390883190883191, "grad_norm": 0.1878177672624588, "learning_rate": 4.327732722076597e-05, "loss": 0.749, "step": 4733 }, { "epoch": 5.392022792022792, "grad_norm": 0.16649720072746277, "learning_rate": 4.327414804786556e-05, "loss": 0.8673, "step": 4734 }, { "epoch": 5.3931623931623935, "grad_norm": 0.17342236638069153, "learning_rate": 4.327096824025373e-05, "loss": 0.7371, "step": 4735 }, { "epoch": 5.394301994301994, "grad_norm": 0.17925062775611877, "learning_rate": 4.326778779804092e-05, "loss": 0.831, "step": 4736 }, { "epoch": 5.3954415954415955, "grad_norm": 0.17266659438610077, "learning_rate": 4.326460672133761e-05, "loss": 0.8094, "step": 4737 }, { "epoch": 5.396581196581197, "grad_norm": 0.18214453756809235, "learning_rate": 4.3261425010254276e-05, "loss": 0.6258, "step": 4738 }, { "epoch": 5.3977207977207975, "grad_norm": 0.20587857067584991, "learning_rate": 4.3258242664901436e-05, "loss": 0.691, "step": 4739 }, { "epoch": 5.398860398860399, "grad_norm": 0.15734121203422546, "learning_rate": 4.325505968538962e-05, "loss": 0.8235, "step": 4740 }, { "epoch": 5.4, "grad_norm": 0.17362411320209503, "learning_rate": 4.3251876071829376e-05, "loss": 0.782, "step": 4741 }, { "epoch": 5.401139601139601, "grad_norm": 0.1847684532403946, "learning_rate": 4.324869182433128e-05, "loss": 0.7992, "step": 4742 }, { "epoch": 5.402279202279202, "grad_norm": 0.1615762561559677, "learning_rate": 4.3245506943005935e-05, "loss": 0.8391, "step": 4743 }, { "epoch": 5.403418803418804, "grad_norm": 0.19296681880950928, "learning_rate": 4.324232142796396e-05, "loss": 0.7808, "step": 4744 }, { "epoch": 5.404558404558404, "grad_norm": 0.13754847645759583, "learning_rate": 4.3239135279316e-05, "loss": 0.9375, "step": 4745 }, { "epoch": 5.405698005698006, "grad_norm": 0.17377373576164246, "learning_rate": 4.3235948497172706e-05, "loss": 0.8375, "step": 4746 }, { "epoch": 5.406837606837607, "grad_norm": 0.1487252116203308, "learning_rate": 4.323276108164478e-05, "loss": 0.8079, "step": 4747 }, { "epoch": 5.407977207977208, "grad_norm": 0.17759546637535095, "learning_rate": 4.322957303284291e-05, "loss": 0.7903, "step": 4748 }, { "epoch": 5.409116809116809, "grad_norm": 0.1557711511850357, "learning_rate": 4.322638435087785e-05, "loss": 0.7515, "step": 4749 }, { "epoch": 5.410256410256411, "grad_norm": 0.1949656456708908, "learning_rate": 4.322319503586033e-05, "loss": 0.6748, "step": 4750 }, { "epoch": 5.411396011396011, "grad_norm": 0.15114526450634003, "learning_rate": 4.322000508790113e-05, "loss": 0.8124, "step": 4751 }, { "epoch": 5.4125356125356126, "grad_norm": 0.1899373084306717, "learning_rate": 4.321681450711105e-05, "loss": 0.6624, "step": 4752 }, { "epoch": 5.413675213675214, "grad_norm": 0.19785648584365845, "learning_rate": 4.321362329360091e-05, "loss": 0.6209, "step": 4753 }, { "epoch": 5.4148148148148145, "grad_norm": 0.1653890311717987, "learning_rate": 4.321043144748155e-05, "loss": 0.8083, "step": 4754 }, { "epoch": 5.415954415954416, "grad_norm": 0.14718776941299438, "learning_rate": 4.3207238968863816e-05, "loss": 0.9134, "step": 4755 }, { "epoch": 5.417094017094017, "grad_norm": 0.1585494726896286, "learning_rate": 4.320404585785861e-05, "loss": 0.8426, "step": 4756 }, { "epoch": 5.418233618233618, "grad_norm": 0.16712836921215057, "learning_rate": 4.320085211457682e-05, "loss": 0.6322, "step": 4757 }, { "epoch": 5.419373219373219, "grad_norm": 0.17706210911273956, "learning_rate": 4.31976577391294e-05, "loss": 0.6066, "step": 4758 }, { "epoch": 5.420512820512821, "grad_norm": 0.18222695589065552, "learning_rate": 4.319446273162727e-05, "loss": 0.77, "step": 4759 }, { "epoch": 5.421652421652421, "grad_norm": 0.1559290587902069, "learning_rate": 4.319126709218142e-05, "loss": 0.9307, "step": 4760 }, { "epoch": 5.422792022792023, "grad_norm": 0.15937025845050812, "learning_rate": 4.318807082090283e-05, "loss": 0.82, "step": 4761 }, { "epoch": 5.423931623931624, "grad_norm": 0.20229588449001312, "learning_rate": 4.3184873917902534e-05, "loss": 0.7103, "step": 4762 }, { "epoch": 5.425071225071225, "grad_norm": 0.15629231929779053, "learning_rate": 4.3181676383291544e-05, "loss": 0.8671, "step": 4763 }, { "epoch": 5.426210826210826, "grad_norm": 0.1496598720550537, "learning_rate": 4.317847821718095e-05, "loss": 0.8332, "step": 4764 }, { "epoch": 5.427350427350428, "grad_norm": 0.1945844441652298, "learning_rate": 4.31752794196818e-05, "loss": 0.6872, "step": 4765 }, { "epoch": 5.428490028490028, "grad_norm": 0.1952946037054062, "learning_rate": 4.317207999090523e-05, "loss": 0.7917, "step": 4766 }, { "epoch": 5.42962962962963, "grad_norm": 0.1743546426296234, "learning_rate": 4.3168879930962334e-05, "loss": 0.6679, "step": 4767 }, { "epoch": 5.430769230769231, "grad_norm": 0.16394762694835663, "learning_rate": 4.3165679239964287e-05, "loss": 0.9371, "step": 4768 }, { "epoch": 5.431908831908832, "grad_norm": 0.1752413809299469, "learning_rate": 4.316247791802224e-05, "loss": 0.8274, "step": 4769 }, { "epoch": 5.433048433048433, "grad_norm": 0.1843094378709793, "learning_rate": 4.315927596524738e-05, "loss": 0.6571, "step": 4770 }, { "epoch": 5.434188034188034, "grad_norm": 0.21671952307224274, "learning_rate": 4.315607338175093e-05, "loss": 0.6488, "step": 4771 }, { "epoch": 5.435327635327635, "grad_norm": 0.17073369026184082, "learning_rate": 4.315287016764413e-05, "loss": 0.8613, "step": 4772 }, { "epoch": 5.436467236467236, "grad_norm": 0.16119903326034546, "learning_rate": 4.314966632303822e-05, "loss": 0.6998, "step": 4773 }, { "epoch": 5.437606837606838, "grad_norm": 0.17918699979782104, "learning_rate": 4.314646184804449e-05, "loss": 0.6837, "step": 4774 }, { "epoch": 5.438746438746438, "grad_norm": 0.16108837723731995, "learning_rate": 4.314325674277424e-05, "loss": 0.8389, "step": 4775 }, { "epoch": 5.43988603988604, "grad_norm": 0.24914149940013885, "learning_rate": 4.314005100733879e-05, "loss": 0.5163, "step": 4776 }, { "epoch": 5.441025641025641, "grad_norm": 0.1424771100282669, "learning_rate": 4.313684464184947e-05, "loss": 1.0362, "step": 4777 }, { "epoch": 5.442165242165242, "grad_norm": 0.176498681306839, "learning_rate": 4.313363764641768e-05, "loss": 0.7503, "step": 4778 }, { "epoch": 5.443304843304843, "grad_norm": 0.194095179438591, "learning_rate": 4.313043002115477e-05, "loss": 0.6867, "step": 4779 }, { "epoch": 5.444444444444445, "grad_norm": 0.15234620869159698, "learning_rate": 4.3127221766172174e-05, "loss": 0.8719, "step": 4780 }, { "epoch": 5.445584045584045, "grad_norm": 0.14816038310527802, "learning_rate": 4.312401288158131e-05, "loss": 0.9187, "step": 4781 }, { "epoch": 5.446723646723647, "grad_norm": 0.16246698796749115, "learning_rate": 4.3120803367493644e-05, "loss": 0.8479, "step": 4782 }, { "epoch": 5.447863247863248, "grad_norm": 0.17278218269348145, "learning_rate": 4.3117593224020634e-05, "loss": 0.7367, "step": 4783 }, { "epoch": 5.449002849002849, "grad_norm": 0.1845463514328003, "learning_rate": 4.3114382451273803e-05, "loss": 0.6743, "step": 4784 }, { "epoch": 5.45014245014245, "grad_norm": 0.22713877260684967, "learning_rate": 4.311117104936465e-05, "loss": 0.5501, "step": 4785 }, { "epoch": 5.4512820512820515, "grad_norm": 0.16534022986888885, "learning_rate": 4.3107959018404714e-05, "loss": 0.7514, "step": 4786 }, { "epoch": 5.452421652421652, "grad_norm": 0.16976851224899292, "learning_rate": 4.310474635850556e-05, "loss": 0.802, "step": 4787 }, { "epoch": 5.453561253561253, "grad_norm": 0.211656391620636, "learning_rate": 4.3101533069778795e-05, "loss": 0.6399, "step": 4788 }, { "epoch": 5.454700854700855, "grad_norm": 0.17565855383872986, "learning_rate": 4.3098319152335995e-05, "loss": 0.6492, "step": 4789 }, { "epoch": 5.455840455840455, "grad_norm": 0.23472091555595398, "learning_rate": 4.309510460628881e-05, "loss": 0.7435, "step": 4790 }, { "epoch": 5.456980056980057, "grad_norm": 0.1532360315322876, "learning_rate": 4.3091889431748865e-05, "loss": 0.7947, "step": 4791 }, { "epoch": 5.458119658119658, "grad_norm": 0.17084607481956482, "learning_rate": 4.308867362882786e-05, "loss": 0.7802, "step": 4792 }, { "epoch": 5.459259259259259, "grad_norm": 0.17251650989055634, "learning_rate": 4.308545719763747e-05, "loss": 0.7557, "step": 4793 }, { "epoch": 5.46039886039886, "grad_norm": 0.17980605363845825, "learning_rate": 4.3082240138289406e-05, "loss": 0.8287, "step": 4794 }, { "epoch": 5.461538461538462, "grad_norm": 0.16261500120162964, "learning_rate": 4.307902245089544e-05, "loss": 1.0245, "step": 4795 }, { "epoch": 5.462678062678062, "grad_norm": 0.17562450468540192, "learning_rate": 4.307580413556729e-05, "loss": 0.9018, "step": 4796 }, { "epoch": 5.463817663817664, "grad_norm": 0.18925507366657257, "learning_rate": 4.307258519241675e-05, "loss": 0.8676, "step": 4797 }, { "epoch": 5.464957264957265, "grad_norm": 0.14949603378772736, "learning_rate": 4.306936562155563e-05, "loss": 0.965, "step": 4798 }, { "epoch": 5.466096866096866, "grad_norm": 0.19909444451332092, "learning_rate": 4.306614542309576e-05, "loss": 0.6622, "step": 4799 }, { "epoch": 5.467236467236467, "grad_norm": 0.16760773956775665, "learning_rate": 4.3062924597148974e-05, "loss": 0.7352, "step": 4800 }, { "epoch": 5.4683760683760685, "grad_norm": 0.17996075749397278, "learning_rate": 4.305970314382714e-05, "loss": 0.9359, "step": 4801 }, { "epoch": 5.46951566951567, "grad_norm": 0.1547965258359909, "learning_rate": 4.305648106324215e-05, "loss": 0.8512, "step": 4802 }, { "epoch": 5.4706552706552705, "grad_norm": 0.1951674520969391, "learning_rate": 4.305325835550592e-05, "loss": 0.7304, "step": 4803 }, { "epoch": 5.471794871794872, "grad_norm": 0.16296637058258057, "learning_rate": 4.3050035020730385e-05, "loss": 0.8394, "step": 4804 }, { "epoch": 5.472934472934473, "grad_norm": 0.2014588862657547, "learning_rate": 4.304681105902749e-05, "loss": 0.7037, "step": 4805 }, { "epoch": 5.474074074074074, "grad_norm": 0.20429980754852295, "learning_rate": 4.304358647050922e-05, "loss": 0.8858, "step": 4806 }, { "epoch": 5.475213675213675, "grad_norm": 0.15735960006713867, "learning_rate": 4.3040361255287574e-05, "loss": 0.7007, "step": 4807 }, { "epoch": 5.476353276353277, "grad_norm": 0.16200728714466095, "learning_rate": 4.303713541347456e-05, "loss": 0.7849, "step": 4808 }, { "epoch": 5.477492877492877, "grad_norm": 0.16980691254138947, "learning_rate": 4.303390894518224e-05, "loss": 0.7656, "step": 4809 }, { "epoch": 5.478632478632479, "grad_norm": 0.18173424899578094, "learning_rate": 4.303068185052267e-05, "loss": 0.6668, "step": 4810 }, { "epoch": 5.47977207977208, "grad_norm": 0.18690398335456848, "learning_rate": 4.3027454129607935e-05, "loss": 0.7381, "step": 4811 }, { "epoch": 5.480911680911681, "grad_norm": 0.17304465174674988, "learning_rate": 4.302422578255014e-05, "loss": 0.7926, "step": 4812 }, { "epoch": 5.482051282051282, "grad_norm": 0.15293779969215393, "learning_rate": 4.3020996809461414e-05, "loss": 0.6334, "step": 4813 }, { "epoch": 5.4831908831908835, "grad_norm": 0.1855514496564865, "learning_rate": 4.301776721045392e-05, "loss": 0.787, "step": 4814 }, { "epoch": 5.484330484330484, "grad_norm": 0.17089486122131348, "learning_rate": 4.301453698563981e-05, "loss": 0.7256, "step": 4815 }, { "epoch": 5.4854700854700855, "grad_norm": 0.18133726716041565, "learning_rate": 4.301130613513129e-05, "loss": 0.7665, "step": 4816 }, { "epoch": 5.486609686609687, "grad_norm": 0.1925647258758545, "learning_rate": 4.3008074659040586e-05, "loss": 0.5695, "step": 4817 }, { "epoch": 5.4877492877492875, "grad_norm": 0.18926025927066803, "learning_rate": 4.300484255747992e-05, "loss": 0.6418, "step": 4818 }, { "epoch": 5.488888888888889, "grad_norm": 0.18105900287628174, "learning_rate": 4.300160983056156e-05, "loss": 0.7047, "step": 4819 }, { "epoch": 5.49002849002849, "grad_norm": 0.17401130497455597, "learning_rate": 4.29983764783978e-05, "loss": 0.7611, "step": 4820 }, { "epoch": 5.491168091168091, "grad_norm": 0.1705498993396759, "learning_rate": 4.29951425011009e-05, "loss": 0.7604, "step": 4821 }, { "epoch": 5.492307692307692, "grad_norm": 0.15717458724975586, "learning_rate": 4.2991907898783235e-05, "loss": 0.9174, "step": 4822 }, { "epoch": 5.493447293447294, "grad_norm": 0.16977642476558685, "learning_rate": 4.298867267155713e-05, "loss": 0.8904, "step": 4823 }, { "epoch": 5.494586894586894, "grad_norm": 0.1568237841129303, "learning_rate": 4.298543681953495e-05, "loss": 0.7963, "step": 4824 }, { "epoch": 5.495726495726496, "grad_norm": 0.18053722381591797, "learning_rate": 4.2982200342829094e-05, "loss": 0.8133, "step": 4825 }, { "epoch": 5.496866096866097, "grad_norm": 0.20148184895515442, "learning_rate": 4.297896324155196e-05, "loss": 0.7223, "step": 4826 }, { "epoch": 5.498005698005698, "grad_norm": 0.15087102353572845, "learning_rate": 4.297572551581599e-05, "loss": 0.7705, "step": 4827 }, { "epoch": 5.499145299145299, "grad_norm": 0.1942681074142456, "learning_rate": 4.297248716573365e-05, "loss": 0.7027, "step": 4828 }, { "epoch": 5.500284900284901, "grad_norm": 0.1641755998134613, "learning_rate": 4.29692481914174e-05, "loss": 0.8382, "step": 4829 }, { "epoch": 5.501424501424501, "grad_norm": 0.16210035979747772, "learning_rate": 4.296600859297974e-05, "loss": 0.8203, "step": 4830 }, { "epoch": 5.5025641025641026, "grad_norm": 0.23534131050109863, "learning_rate": 4.29627683705332e-05, "loss": 0.5255, "step": 4831 }, { "epoch": 5.503703703703704, "grad_norm": 0.18147152662277222, "learning_rate": 4.295952752419032e-05, "loss": 0.8388, "step": 4832 }, { "epoch": 5.5048433048433045, "grad_norm": 0.1641654372215271, "learning_rate": 4.2956286054063656e-05, "loss": 0.8352, "step": 4833 }, { "epoch": 5.505982905982906, "grad_norm": 0.1740059107542038, "learning_rate": 4.2953043960265793e-05, "loss": 0.6831, "step": 4834 }, { "epoch": 5.507122507122507, "grad_norm": 0.16150124371051788, "learning_rate": 4.294980124290935e-05, "loss": 0.9212, "step": 4835 }, { "epoch": 5.508262108262108, "grad_norm": 0.17982876300811768, "learning_rate": 4.2946557902106944e-05, "loss": 0.6717, "step": 4836 }, { "epoch": 5.509401709401709, "grad_norm": 0.19507353007793427, "learning_rate": 4.294331393797123e-05, "loss": 0.6143, "step": 4837 }, { "epoch": 5.510541310541311, "grad_norm": 0.17204579710960388, "learning_rate": 4.294006935061488e-05, "loss": 0.8285, "step": 4838 }, { "epoch": 5.511680911680911, "grad_norm": 0.2067815661430359, "learning_rate": 4.293682414015059e-05, "loss": 0.6785, "step": 4839 }, { "epoch": 5.512820512820513, "grad_norm": 0.17989838123321533, "learning_rate": 4.293357830669106e-05, "loss": 0.6755, "step": 4840 }, { "epoch": 5.513960113960114, "grad_norm": 0.1755257546901703, "learning_rate": 4.293033185034904e-05, "loss": 0.7745, "step": 4841 }, { "epoch": 5.515099715099715, "grad_norm": 0.16593924164772034, "learning_rate": 4.292708477123729e-05, "loss": 0.6809, "step": 4842 }, { "epoch": 5.516239316239316, "grad_norm": 0.18866293132305145, "learning_rate": 4.2923837069468586e-05, "loss": 0.7132, "step": 4843 }, { "epoch": 5.517378917378918, "grad_norm": 0.16058719158172607, "learning_rate": 4.292058874515573e-05, "loss": 0.7929, "step": 4844 }, { "epoch": 5.518518518518518, "grad_norm": 0.18849186599254608, "learning_rate": 4.291733979841155e-05, "loss": 0.6137, "step": 4845 }, { "epoch": 5.51965811965812, "grad_norm": 0.20065782964229584, "learning_rate": 4.291409022934888e-05, "loss": 0.6236, "step": 4846 }, { "epoch": 5.520797720797721, "grad_norm": 0.22605036199092865, "learning_rate": 4.2910840038080594e-05, "loss": 0.5011, "step": 4847 }, { "epoch": 5.521937321937322, "grad_norm": 0.192116841673851, "learning_rate": 4.290758922471957e-05, "loss": 0.7773, "step": 4848 }, { "epoch": 5.523076923076923, "grad_norm": 0.17766951024532318, "learning_rate": 4.290433778937873e-05, "loss": 0.8467, "step": 4849 }, { "epoch": 5.524216524216524, "grad_norm": 0.1707712709903717, "learning_rate": 4.290108573217101e-05, "loss": 0.8411, "step": 4850 }, { "epoch": 5.525356125356125, "grad_norm": 0.15306320786476135, "learning_rate": 4.289783305320935e-05, "loss": 0.9233, "step": 4851 }, { "epoch": 5.526495726495726, "grad_norm": 0.12808679044246674, "learning_rate": 4.289457975260673e-05, "loss": 0.8705, "step": 4852 }, { "epoch": 5.527635327635328, "grad_norm": 0.18593312799930573, "learning_rate": 4.2891325830476146e-05, "loss": 0.6938, "step": 4853 }, { "epoch": 5.528774928774929, "grad_norm": 0.18112976849079132, "learning_rate": 4.288807128693061e-05, "loss": 0.7557, "step": 4854 }, { "epoch": 5.52991452991453, "grad_norm": 0.21150390803813934, "learning_rate": 4.288481612208317e-05, "loss": 0.6065, "step": 4855 }, { "epoch": 5.531054131054131, "grad_norm": 0.19397857785224915, "learning_rate": 4.2881560336046874e-05, "loss": 0.7736, "step": 4856 }, { "epoch": 5.532193732193733, "grad_norm": 0.17135579884052277, "learning_rate": 4.287830392893482e-05, "loss": 0.9113, "step": 4857 }, { "epoch": 5.533333333333333, "grad_norm": 0.24377498030662537, "learning_rate": 4.287504690086011e-05, "loss": 0.8753, "step": 4858 }, { "epoch": 5.534472934472935, "grad_norm": 0.18869635462760925, "learning_rate": 4.287178925193586e-05, "loss": 0.6228, "step": 4859 }, { "epoch": 5.535612535612536, "grad_norm": 0.18486569821834564, "learning_rate": 4.2868530982275215e-05, "loss": 0.6519, "step": 4860 }, { "epoch": 5.536752136752137, "grad_norm": 0.1529039591550827, "learning_rate": 4.286527209199136e-05, "loss": 0.8446, "step": 4861 }, { "epoch": 5.537891737891738, "grad_norm": 0.18323606252670288, "learning_rate": 4.286201258119746e-05, "loss": 0.6771, "step": 4862 }, { "epoch": 5.5390313390313395, "grad_norm": 0.17818498611450195, "learning_rate": 4.2858752450006764e-05, "loss": 0.6363, "step": 4863 }, { "epoch": 5.54017094017094, "grad_norm": 0.20588888227939606, "learning_rate": 4.285549169853247e-05, "loss": 0.6694, "step": 4864 }, { "epoch": 5.5413105413105415, "grad_norm": 0.14603447914123535, "learning_rate": 4.285223032688785e-05, "loss": 0.919, "step": 4865 }, { "epoch": 5.542450142450143, "grad_norm": 0.18710605800151825, "learning_rate": 4.284896833518618e-05, "loss": 0.7944, "step": 4866 }, { "epoch": 5.543589743589743, "grad_norm": 0.1813526749610901, "learning_rate": 4.284570572354075e-05, "loss": 0.8102, "step": 4867 }, { "epoch": 5.544729344729345, "grad_norm": 0.18585647642612457, "learning_rate": 4.284244249206488e-05, "loss": 0.6563, "step": 4868 }, { "epoch": 5.545868945868946, "grad_norm": 0.1963941752910614, "learning_rate": 4.2839178640871926e-05, "loss": 0.6104, "step": 4869 }, { "epoch": 5.547008547008547, "grad_norm": 0.17948034405708313, "learning_rate": 4.2835914170075237e-05, "loss": 0.6885, "step": 4870 }, { "epoch": 5.548148148148148, "grad_norm": 0.20369914174079895, "learning_rate": 4.28326490797882e-05, "loss": 0.5942, "step": 4871 }, { "epoch": 5.54928774928775, "grad_norm": 0.16400721669197083, "learning_rate": 4.282938337012421e-05, "loss": 0.94, "step": 4872 }, { "epoch": 5.55042735042735, "grad_norm": 0.17267654836177826, "learning_rate": 4.2826117041196714e-05, "loss": 0.8322, "step": 4873 }, { "epoch": 5.551566951566952, "grad_norm": 0.20895081758499146, "learning_rate": 4.282285009311915e-05, "loss": 0.6476, "step": 4874 }, { "epoch": 5.552706552706553, "grad_norm": 0.1930321305990219, "learning_rate": 4.2819582526005e-05, "loss": 0.7806, "step": 4875 }, { "epoch": 5.553846153846154, "grad_norm": 0.1591208130121231, "learning_rate": 4.281631433996773e-05, "loss": 0.8056, "step": 4876 }, { "epoch": 5.554985754985755, "grad_norm": 0.23983530700206757, "learning_rate": 4.281304553512087e-05, "loss": 0.7979, "step": 4877 }, { "epoch": 5.5561253561253565, "grad_norm": 0.19674810767173767, "learning_rate": 4.280977611157796e-05, "loss": 0.5482, "step": 4878 }, { "epoch": 5.557264957264957, "grad_norm": 0.17274408042430878, "learning_rate": 4.280650606945254e-05, "loss": 0.8055, "step": 4879 }, { "epoch": 5.5584045584045585, "grad_norm": 0.16266362369060516, "learning_rate": 4.28032354088582e-05, "loss": 0.7655, "step": 4880 }, { "epoch": 5.55954415954416, "grad_norm": 0.14792993664741516, "learning_rate": 4.2799964129908533e-05, "loss": 0.8576, "step": 4881 }, { "epoch": 5.5606837606837605, "grad_norm": 0.16226008534431458, "learning_rate": 4.279669223271716e-05, "loss": 0.7377, "step": 4882 }, { "epoch": 5.561823361823362, "grad_norm": 0.1945132315158844, "learning_rate": 4.2793419717397734e-05, "loss": 0.7414, "step": 4883 }, { "epoch": 5.562962962962963, "grad_norm": 0.19473624229431152, "learning_rate": 4.27901465840639e-05, "loss": 0.784, "step": 4884 }, { "epoch": 5.564102564102564, "grad_norm": 0.20556029677391052, "learning_rate": 4.278687283282936e-05, "loss": 0.6137, "step": 4885 }, { "epoch": 5.565242165242165, "grad_norm": 0.1734907478094101, "learning_rate": 4.278359846380781e-05, "loss": 0.7178, "step": 4886 }, { "epoch": 5.566381766381767, "grad_norm": 0.2000545710325241, "learning_rate": 4.278032347711297e-05, "loss": 0.5692, "step": 4887 }, { "epoch": 5.567521367521367, "grad_norm": 0.18035154044628143, "learning_rate": 4.277704787285861e-05, "loss": 0.8012, "step": 4888 }, { "epoch": 5.568660968660969, "grad_norm": 0.21270377933979034, "learning_rate": 4.277377165115849e-05, "loss": 0.664, "step": 4889 }, { "epoch": 5.56980056980057, "grad_norm": 0.1641324758529663, "learning_rate": 4.277049481212639e-05, "loss": 0.9684, "step": 4890 }, { "epoch": 5.570940170940171, "grad_norm": 0.16035698354244232, "learning_rate": 4.2767217355876155e-05, "loss": 0.7883, "step": 4891 }, { "epoch": 5.572079772079772, "grad_norm": 0.1673315018415451, "learning_rate": 4.276393928252159e-05, "loss": 0.7712, "step": 4892 }, { "epoch": 5.5732193732193736, "grad_norm": 0.1887262910604477, "learning_rate": 4.2760660592176565e-05, "loss": 0.6812, "step": 4893 }, { "epoch": 5.574358974358974, "grad_norm": 0.17907768487930298, "learning_rate": 4.2757381284954955e-05, "loss": 0.7801, "step": 4894 }, { "epoch": 5.5754985754985755, "grad_norm": 0.14310143887996674, "learning_rate": 4.275410136097067e-05, "loss": 0.91, "step": 4895 }, { "epoch": 5.576638176638177, "grad_norm": 0.16130873560905457, "learning_rate": 4.2750820820337605e-05, "loss": 0.9366, "step": 4896 }, { "epoch": 5.5777777777777775, "grad_norm": 0.18696458637714386, "learning_rate": 4.2747539663169725e-05, "loss": 0.6416, "step": 4897 }, { "epoch": 5.578917378917379, "grad_norm": 0.19012431800365448, "learning_rate": 4.274425788958098e-05, "loss": 0.9844, "step": 4898 }, { "epoch": 5.58005698005698, "grad_norm": 0.18784600496292114, "learning_rate": 4.274097549968538e-05, "loss": 0.8611, "step": 4899 }, { "epoch": 5.581196581196581, "grad_norm": 0.1906341016292572, "learning_rate": 4.273769249359689e-05, "loss": 0.7214, "step": 4900 }, { "epoch": 5.582336182336182, "grad_norm": 0.1430826038122177, "learning_rate": 4.2734408871429574e-05, "loss": 0.9122, "step": 4901 }, { "epoch": 5.583475783475784, "grad_norm": 0.2282940298318863, "learning_rate": 4.273112463329747e-05, "loss": 0.5778, "step": 4902 }, { "epoch": 5.584615384615384, "grad_norm": 0.15738672018051147, "learning_rate": 4.272783977931464e-05, "loss": 0.8408, "step": 4903 }, { "epoch": 5.585754985754986, "grad_norm": 0.1856355220079422, "learning_rate": 4.2724554309595186e-05, "loss": 0.8689, "step": 4904 }, { "epoch": 5.586894586894587, "grad_norm": 0.17705173790454865, "learning_rate": 4.272126822425322e-05, "loss": 0.7853, "step": 4905 }, { "epoch": 5.588034188034188, "grad_norm": 0.1721915900707245, "learning_rate": 4.271798152340287e-05, "loss": 0.8317, "step": 4906 }, { "epoch": 5.589173789173789, "grad_norm": 0.18221662938594818, "learning_rate": 4.27146942071583e-05, "loss": 0.7288, "step": 4907 }, { "epoch": 5.590313390313391, "grad_norm": 0.17120833694934845, "learning_rate": 4.271140627563368e-05, "loss": 0.7885, "step": 4908 }, { "epoch": 5.591452991452991, "grad_norm": 0.1811331957578659, "learning_rate": 4.270811772894322e-05, "loss": 0.7704, "step": 4909 }, { "epoch": 5.592592592592593, "grad_norm": 0.16168002784252167, "learning_rate": 4.270482856720113e-05, "loss": 0.8568, "step": 4910 }, { "epoch": 5.593732193732194, "grad_norm": 0.15206867456436157, "learning_rate": 4.2701538790521655e-05, "loss": 0.891, "step": 4911 }, { "epoch": 5.5948717948717945, "grad_norm": 0.18935824930667877, "learning_rate": 4.2698248399019056e-05, "loss": 0.7073, "step": 4912 }, { "epoch": 5.596011396011396, "grad_norm": 0.15101583302021027, "learning_rate": 4.269495739280763e-05, "loss": 0.92, "step": 4913 }, { "epoch": 5.597150997150997, "grad_norm": 0.17057983577251434, "learning_rate": 4.2691665772001665e-05, "loss": 0.7265, "step": 4914 }, { "epoch": 5.598290598290598, "grad_norm": 0.17918899655342102, "learning_rate": 4.26883735367155e-05, "loss": 0.7757, "step": 4915 }, { "epoch": 5.599430199430199, "grad_norm": 0.1816730946302414, "learning_rate": 4.268508068706347e-05, "loss": 0.6764, "step": 4916 }, { "epoch": 5.600569800569801, "grad_norm": 0.14690978825092316, "learning_rate": 4.2681787223159964e-05, "loss": 0.9026, "step": 4917 }, { "epoch": 5.601709401709401, "grad_norm": 0.18491557240486145, "learning_rate": 4.267849314511936e-05, "loss": 0.5996, "step": 4918 }, { "epoch": 5.602849002849003, "grad_norm": 0.1776413768529892, "learning_rate": 4.2675198453056065e-05, "loss": 0.7781, "step": 4919 }, { "epoch": 5.603988603988604, "grad_norm": 0.17524947226047516, "learning_rate": 4.267190314708453e-05, "loss": 0.6182, "step": 4920 }, { "epoch": 5.605128205128205, "grad_norm": 0.1926114559173584, "learning_rate": 4.2668607227319193e-05, "loss": 0.7867, "step": 4921 }, { "epoch": 5.606267806267806, "grad_norm": 0.16125115752220154, "learning_rate": 4.2665310693874544e-05, "loss": 0.8537, "step": 4922 }, { "epoch": 5.607407407407408, "grad_norm": 0.1547136753797531, "learning_rate": 4.266201354686508e-05, "loss": 0.9099, "step": 4923 }, { "epoch": 5.608547008547008, "grad_norm": 0.1847689300775528, "learning_rate": 4.2658715786405303e-05, "loss": 0.8013, "step": 4924 }, { "epoch": 5.60968660968661, "grad_norm": 0.1845116764307022, "learning_rate": 4.2655417412609764e-05, "loss": 0.7532, "step": 4925 }, { "epoch": 5.610826210826211, "grad_norm": 0.1838240772485733, "learning_rate": 4.265211842559303e-05, "loss": 0.8396, "step": 4926 }, { "epoch": 5.611965811965812, "grad_norm": 0.17016226053237915, "learning_rate": 4.2648818825469684e-05, "loss": 0.7146, "step": 4927 }, { "epoch": 5.613105413105413, "grad_norm": 0.1836037039756775, "learning_rate": 4.2645518612354324e-05, "loss": 0.5695, "step": 4928 }, { "epoch": 5.614245014245014, "grad_norm": 0.16213347017765045, "learning_rate": 4.264221778636158e-05, "loss": 0.8066, "step": 4929 }, { "epoch": 5.615384615384615, "grad_norm": 0.14991053938865662, "learning_rate": 4.263891634760609e-05, "loss": 0.9105, "step": 4930 }, { "epoch": 5.616524216524216, "grad_norm": 0.1772087812423706, "learning_rate": 4.263561429620253e-05, "loss": 0.8621, "step": 4931 }, { "epoch": 5.617663817663818, "grad_norm": 0.16358356177806854, "learning_rate": 4.263231163226559e-05, "loss": 0.8971, "step": 4932 }, { "epoch": 5.618803418803418, "grad_norm": 0.1786920428276062, "learning_rate": 4.262900835590997e-05, "loss": 0.6598, "step": 4933 }, { "epoch": 5.61994301994302, "grad_norm": 0.1441824585199356, "learning_rate": 4.2625704467250416e-05, "loss": 0.8508, "step": 4934 }, { "epoch": 5.621082621082621, "grad_norm": 0.1651386171579361, "learning_rate": 4.262239996640168e-05, "loss": 0.7861, "step": 4935 }, { "epoch": 5.622222222222222, "grad_norm": 0.19873394072055817, "learning_rate": 4.2619094853478526e-05, "loss": 0.7008, "step": 4936 }, { "epoch": 5.623361823361823, "grad_norm": 0.1520562618970871, "learning_rate": 4.261578912859575e-05, "loss": 0.8863, "step": 4937 }, { "epoch": 5.624501424501425, "grad_norm": 0.19953817129135132, "learning_rate": 4.261248279186818e-05, "loss": 0.77, "step": 4938 }, { "epoch": 5.625641025641025, "grad_norm": 0.15713857114315033, "learning_rate": 4.2609175843410645e-05, "loss": 0.8491, "step": 4939 }, { "epoch": 5.626780626780627, "grad_norm": 0.17172771692276, "learning_rate": 4.2605868283338014e-05, "loss": 0.7961, "step": 4940 }, { "epoch": 5.627920227920228, "grad_norm": 0.1584072858095169, "learning_rate": 4.260256011176515e-05, "loss": 0.8584, "step": 4941 }, { "epoch": 5.629059829059829, "grad_norm": 0.14709335565567017, "learning_rate": 4.259925132880698e-05, "loss": 0.8929, "step": 4942 }, { "epoch": 5.63019943019943, "grad_norm": 0.2108181267976761, "learning_rate": 4.2595941934578406e-05, "loss": 0.7498, "step": 4943 }, { "epoch": 5.6313390313390315, "grad_norm": 0.17729757726192474, "learning_rate": 4.259263192919438e-05, "loss": 0.7161, "step": 4944 }, { "epoch": 5.632478632478632, "grad_norm": 0.18931356072425842, "learning_rate": 4.258932131276986e-05, "loss": 0.7106, "step": 4945 }, { "epoch": 5.633618233618233, "grad_norm": 0.19616897404193878, "learning_rate": 4.2586010085419847e-05, "loss": 0.7033, "step": 4946 }, { "epoch": 5.634757834757835, "grad_norm": 0.1599295288324356, "learning_rate": 4.258269824725934e-05, "loss": 0.8281, "step": 4947 }, { "epoch": 5.635897435897435, "grad_norm": 0.2065536081790924, "learning_rate": 4.257938579840338e-05, "loss": 0.6753, "step": 4948 }, { "epoch": 5.637037037037037, "grad_norm": 0.1769915074110031, "learning_rate": 4.2576072738967e-05, "loss": 0.6098, "step": 4949 }, { "epoch": 5.638176638176638, "grad_norm": 0.18057970702648163, "learning_rate": 4.257275906906527e-05, "loss": 0.8321, "step": 4950 }, { "epoch": 5.639316239316239, "grad_norm": 0.14909537136554718, "learning_rate": 4.256944478881331e-05, "loss": 0.7847, "step": 4951 }, { "epoch": 5.64045584045584, "grad_norm": 0.18198081851005554, "learning_rate": 4.2566129898326205e-05, "loss": 0.8104, "step": 4952 }, { "epoch": 5.641595441595442, "grad_norm": 0.16259215772151947, "learning_rate": 4.2562814397719106e-05, "loss": 0.7178, "step": 4953 }, { "epoch": 5.642735042735043, "grad_norm": 0.15607447922229767, "learning_rate": 4.255949828710716e-05, "loss": 0.9134, "step": 4954 }, { "epoch": 5.643874643874644, "grad_norm": 0.15382663905620575, "learning_rate": 4.255618156660555e-05, "loss": 0.8821, "step": 4955 }, { "epoch": 5.645014245014245, "grad_norm": 0.1556910276412964, "learning_rate": 4.255286423632948e-05, "loss": 0.8285, "step": 4956 }, { "epoch": 5.6461538461538465, "grad_norm": 0.16671329736709595, "learning_rate": 4.254954629639416e-05, "loss": 0.8298, "step": 4957 }, { "epoch": 5.647293447293447, "grad_norm": 0.1555967479944229, "learning_rate": 4.254622774691484e-05, "loss": 0.8776, "step": 4958 }, { "epoch": 5.6484330484330485, "grad_norm": 0.16595129668712616, "learning_rate": 4.254290858800678e-05, "loss": 0.7158, "step": 4959 }, { "epoch": 5.64957264957265, "grad_norm": 0.1903461515903473, "learning_rate": 4.2539588819785255e-05, "loss": 0.7337, "step": 4960 }, { "epoch": 5.6507122507122505, "grad_norm": 0.16233199834823608, "learning_rate": 4.253626844236558e-05, "loss": 0.8324, "step": 4961 }, { "epoch": 5.651851851851852, "grad_norm": 0.18301106989383698, "learning_rate": 4.253294745586307e-05, "loss": 0.8412, "step": 4962 }, { "epoch": 5.652991452991453, "grad_norm": 0.1980588436126709, "learning_rate": 4.2529625860393085e-05, "loss": 0.8236, "step": 4963 }, { "epoch": 5.654131054131054, "grad_norm": 0.18101529777050018, "learning_rate": 4.252630365607099e-05, "loss": 0.7715, "step": 4964 }, { "epoch": 5.655270655270655, "grad_norm": 0.20688144862651825, "learning_rate": 4.2522980843012175e-05, "loss": 0.8297, "step": 4965 }, { "epoch": 5.656410256410257, "grad_norm": 0.179339200258255, "learning_rate": 4.251965742133203e-05, "loss": 0.6817, "step": 4966 }, { "epoch": 5.657549857549857, "grad_norm": 0.16479340195655823, "learning_rate": 4.2516333391146014e-05, "loss": 0.8987, "step": 4967 }, { "epoch": 5.658689458689459, "grad_norm": 0.19317205250263214, "learning_rate": 4.251300875256957e-05, "loss": 0.6499, "step": 4968 }, { "epoch": 5.65982905982906, "grad_norm": 0.17912079393863678, "learning_rate": 4.250968350571817e-05, "loss": 0.7337, "step": 4969 }, { "epoch": 5.660968660968661, "grad_norm": 0.19748400151729584, "learning_rate": 4.250635765070731e-05, "loss": 0.7389, "step": 4970 }, { "epoch": 5.662108262108262, "grad_norm": 0.1881670206785202, "learning_rate": 4.250303118765251e-05, "loss": 0.619, "step": 4971 }, { "epoch": 5.663247863247864, "grad_norm": 0.1812215894460678, "learning_rate": 4.2499704116669295e-05, "loss": 0.6389, "step": 4972 }, { "epoch": 5.664387464387464, "grad_norm": 0.15834550559520721, "learning_rate": 4.2496376437873245e-05, "loss": 0.7883, "step": 4973 }, { "epoch": 5.6655270655270655, "grad_norm": 0.16520103812217712, "learning_rate": 4.249304815137991e-05, "loss": 0.8922, "step": 4974 }, { "epoch": 5.666666666666667, "grad_norm": 0.1468709409236908, "learning_rate": 4.248971925730492e-05, "loss": 0.866, "step": 4975 }, { "epoch": 5.6678062678062675, "grad_norm": 0.15948764979839325, "learning_rate": 4.248638975576387e-05, "loss": 0.877, "step": 4976 }, { "epoch": 5.668945868945869, "grad_norm": 0.19492647051811218, "learning_rate": 4.248305964687242e-05, "loss": 0.8116, "step": 4977 }, { "epoch": 5.67008547008547, "grad_norm": 0.1607757806777954, "learning_rate": 4.2479728930746246e-05, "loss": 0.8762, "step": 4978 }, { "epoch": 5.671225071225071, "grad_norm": 0.1571768820285797, "learning_rate": 4.247639760750099e-05, "loss": 0.8207, "step": 4979 }, { "epoch": 5.672364672364672, "grad_norm": 0.19270595908164978, "learning_rate": 4.247306567725241e-05, "loss": 0.6118, "step": 4980 }, { "epoch": 5.673504273504274, "grad_norm": 0.16036999225616455, "learning_rate": 4.24697331401162e-05, "loss": 0.8286, "step": 4981 }, { "epoch": 5.674643874643874, "grad_norm": 0.20208951830863953, "learning_rate": 4.246639999620811e-05, "loss": 0.6747, "step": 4982 }, { "epoch": 5.675783475783476, "grad_norm": 0.1437763273715973, "learning_rate": 4.246306624564391e-05, "loss": 0.8368, "step": 4983 }, { "epoch": 5.676923076923077, "grad_norm": 0.19348663091659546, "learning_rate": 4.2459731888539406e-05, "loss": 0.7593, "step": 4984 }, { "epoch": 5.678062678062678, "grad_norm": 0.1824321150779724, "learning_rate": 4.24563969250104e-05, "loss": 0.7766, "step": 4985 }, { "epoch": 5.679202279202279, "grad_norm": 0.1617376059293747, "learning_rate": 4.245306135517272e-05, "loss": 0.8278, "step": 4986 }, { "epoch": 5.680341880341881, "grad_norm": 0.20494359731674194, "learning_rate": 4.244972517914222e-05, "loss": 0.8161, "step": 4987 }, { "epoch": 5.681481481481481, "grad_norm": 0.19541950523853302, "learning_rate": 4.244638839703478e-05, "loss": 0.5734, "step": 4988 }, { "epoch": 5.682621082621083, "grad_norm": 0.17641092836856842, "learning_rate": 4.24430510089663e-05, "loss": 0.8392, "step": 4989 }, { "epoch": 5.683760683760684, "grad_norm": 0.18785534799098969, "learning_rate": 4.243971301505268e-05, "loss": 0.7964, "step": 4990 }, { "epoch": 5.6849002849002845, "grad_norm": 0.21321099996566772, "learning_rate": 4.2436374415409875e-05, "loss": 0.669, "step": 4991 }, { "epoch": 5.686039886039886, "grad_norm": 0.1813569813966751, "learning_rate": 4.243303521015383e-05, "loss": 0.6711, "step": 4992 }, { "epoch": 5.687179487179487, "grad_norm": 0.17279620468616486, "learning_rate": 4.2429695399400535e-05, "loss": 0.827, "step": 4993 }, { "epoch": 5.688319088319088, "grad_norm": 0.1700081080198288, "learning_rate": 4.2426354983265984e-05, "loss": 0.864, "step": 4994 }, { "epoch": 5.689458689458689, "grad_norm": 0.1332874298095703, "learning_rate": 4.2423013961866206e-05, "loss": 0.8532, "step": 4995 }, { "epoch": 5.690598290598291, "grad_norm": 0.14816050231456757, "learning_rate": 4.2419672335317237e-05, "loss": 0.8706, "step": 4996 }, { "epoch": 5.691737891737891, "grad_norm": 0.1966293901205063, "learning_rate": 4.241633010373515e-05, "loss": 0.732, "step": 4997 }, { "epoch": 5.692877492877493, "grad_norm": 0.19324849545955658, "learning_rate": 4.2412987267236015e-05, "loss": 0.6829, "step": 4998 }, { "epoch": 5.694017094017094, "grad_norm": 0.15970183908939362, "learning_rate": 4.2409643825935944e-05, "loss": 0.8116, "step": 4999 }, { "epoch": 5.695156695156696, "grad_norm": 0.20089729130268097, "learning_rate": 4.2406299779951067e-05, "loss": 0.6485, "step": 5000 }, { "epoch": 5.696296296296296, "grad_norm": 0.1744430661201477, "learning_rate": 4.2402955129397534e-05, "loss": 0.8652, "step": 5001 }, { "epoch": 5.697435897435898, "grad_norm": 0.17672611773014069, "learning_rate": 4.239960987439152e-05, "loss": 0.7967, "step": 5002 }, { "epoch": 5.698575498575499, "grad_norm": 0.17788653075695038, "learning_rate": 4.2396264015049195e-05, "loss": 0.8278, "step": 5003 }, { "epoch": 5.6997150997151, "grad_norm": 0.15854409337043762, "learning_rate": 4.2392917551486786e-05, "loss": 0.884, "step": 5004 }, { "epoch": 5.700854700854701, "grad_norm": 0.18261882662773132, "learning_rate": 4.238957048382052e-05, "loss": 0.7924, "step": 5005 }, { "epoch": 5.7019943019943025, "grad_norm": 0.16455203294754028, "learning_rate": 4.238622281216664e-05, "loss": 0.916, "step": 5006 }, { "epoch": 5.703133903133903, "grad_norm": 0.17314814031124115, "learning_rate": 4.2382874536641436e-05, "loss": 0.7614, "step": 5007 }, { "epoch": 5.704273504273504, "grad_norm": 0.1945575624704361, "learning_rate": 4.2379525657361194e-05, "loss": 0.6869, "step": 5008 }, { "epoch": 5.705413105413106, "grad_norm": 0.18877752125263214, "learning_rate": 4.237617617444224e-05, "loss": 0.7701, "step": 5009 }, { "epoch": 5.706552706552706, "grad_norm": 0.18141210079193115, "learning_rate": 4.2372826088000895e-05, "loss": 0.6616, "step": 5010 }, { "epoch": 5.707692307692308, "grad_norm": 0.1711808443069458, "learning_rate": 4.236947539815353e-05, "loss": 0.7514, "step": 5011 }, { "epoch": 5.708831908831909, "grad_norm": 0.19431094825267792, "learning_rate": 4.2366124105016513e-05, "loss": 0.6112, "step": 5012 }, { "epoch": 5.70997150997151, "grad_norm": 0.17094816267490387, "learning_rate": 4.236277220870626e-05, "loss": 0.78, "step": 5013 }, { "epoch": 5.711111111111111, "grad_norm": 0.1716306060552597, "learning_rate": 4.235941970933917e-05, "loss": 0.8503, "step": 5014 }, { "epoch": 5.712250712250713, "grad_norm": 0.14870516955852509, "learning_rate": 4.235606660703169e-05, "loss": 0.8315, "step": 5015 }, { "epoch": 5.713390313390313, "grad_norm": 0.16047079861164093, "learning_rate": 4.235271290190029e-05, "loss": 0.8526, "step": 5016 }, { "epoch": 5.714529914529915, "grad_norm": 0.16539376974105835, "learning_rate": 4.234935859406146e-05, "loss": 0.7944, "step": 5017 }, { "epoch": 5.715669515669516, "grad_norm": 0.1937294900417328, "learning_rate": 4.234600368363169e-05, "loss": 0.7728, "step": 5018 }, { "epoch": 5.716809116809117, "grad_norm": 0.25908979773521423, "learning_rate": 4.234264817072751e-05, "loss": 0.6999, "step": 5019 }, { "epoch": 5.717948717948718, "grad_norm": 0.251360684633255, "learning_rate": 4.233929205546547e-05, "loss": 0.6301, "step": 5020 }, { "epoch": 5.7190883190883195, "grad_norm": 0.1455957293510437, "learning_rate": 4.233593533796213e-05, "loss": 0.7768, "step": 5021 }, { "epoch": 5.72022792022792, "grad_norm": 0.18815159797668457, "learning_rate": 4.233257801833408e-05, "loss": 0.7348, "step": 5022 }, { "epoch": 5.7213675213675215, "grad_norm": 0.18029174208641052, "learning_rate": 4.232922009669793e-05, "loss": 0.7225, "step": 5023 }, { "epoch": 5.722507122507123, "grad_norm": 0.1669107973575592, "learning_rate": 4.2325861573170316e-05, "loss": 0.9198, "step": 5024 }, { "epoch": 5.7236467236467234, "grad_norm": 0.17809349298477173, "learning_rate": 4.2322502447867884e-05, "loss": 0.792, "step": 5025 }, { "epoch": 5.724786324786325, "grad_norm": 0.17792947590351105, "learning_rate": 4.2319142720907296e-05, "loss": 0.7769, "step": 5026 }, { "epoch": 5.725925925925926, "grad_norm": 0.17586131393909454, "learning_rate": 4.2315782392405255e-05, "loss": 0.7883, "step": 5027 }, { "epoch": 5.727065527065527, "grad_norm": 0.16819684207439423, "learning_rate": 4.231242146247848e-05, "loss": 0.7843, "step": 5028 }, { "epoch": 5.728205128205128, "grad_norm": 0.19293169677257538, "learning_rate": 4.23090599312437e-05, "loss": 0.6307, "step": 5029 }, { "epoch": 5.72934472934473, "grad_norm": 0.21308547258377075, "learning_rate": 4.230569779881766e-05, "loss": 0.6307, "step": 5030 }, { "epoch": 5.73048433048433, "grad_norm": 0.1603964865207672, "learning_rate": 4.230233506531715e-05, "loss": 0.7958, "step": 5031 }, { "epoch": 5.731623931623932, "grad_norm": 0.15044721961021423, "learning_rate": 4.229897173085896e-05, "loss": 0.8002, "step": 5032 }, { "epoch": 5.732763532763533, "grad_norm": 0.18368439376354218, "learning_rate": 4.229560779555991e-05, "loss": 0.7507, "step": 5033 }, { "epoch": 5.733903133903134, "grad_norm": 0.18186402320861816, "learning_rate": 4.229224325953684e-05, "loss": 0.6836, "step": 5034 }, { "epoch": 5.735042735042735, "grad_norm": 0.15832217037677765, "learning_rate": 4.22888781229066e-05, "loss": 0.7728, "step": 5035 }, { "epoch": 5.7361823361823365, "grad_norm": 0.16162727773189545, "learning_rate": 4.228551238578609e-05, "loss": 0.8511, "step": 5036 }, { "epoch": 5.737321937321937, "grad_norm": 0.18553316593170166, "learning_rate": 4.2282146048292186e-05, "loss": 0.7614, "step": 5037 }, { "epoch": 5.7384615384615385, "grad_norm": 0.2758096754550934, "learning_rate": 4.227877911054183e-05, "loss": 0.8668, "step": 5038 }, { "epoch": 5.73960113960114, "grad_norm": 0.16694800555706024, "learning_rate": 4.227541157265196e-05, "loss": 0.7571, "step": 5039 }, { "epoch": 5.7407407407407405, "grad_norm": 0.206983745098114, "learning_rate": 4.227204343473954e-05, "loss": 0.691, "step": 5040 }, { "epoch": 5.741880341880342, "grad_norm": 0.1555628925561905, "learning_rate": 4.226867469692155e-05, "loss": 0.9326, "step": 5041 }, { "epoch": 5.743019943019943, "grad_norm": 0.16682085394859314, "learning_rate": 4.2265305359315e-05, "loss": 0.7891, "step": 5042 }, { "epoch": 5.744159544159544, "grad_norm": 0.1864742785692215, "learning_rate": 4.2261935422036916e-05, "loss": 0.6711, "step": 5043 }, { "epoch": 5.745299145299145, "grad_norm": 0.15288496017456055, "learning_rate": 4.2258564885204335e-05, "loss": 0.9792, "step": 5044 }, { "epoch": 5.746438746438747, "grad_norm": 0.15460316836833954, "learning_rate": 4.2255193748934344e-05, "loss": 0.6918, "step": 5045 }, { "epoch": 5.747578347578347, "grad_norm": 0.18567848205566406, "learning_rate": 4.225182201334402e-05, "loss": 0.9028, "step": 5046 }, { "epoch": 5.748717948717949, "grad_norm": 0.18545792996883392, "learning_rate": 4.224844967855048e-05, "loss": 0.7331, "step": 5047 }, { "epoch": 5.74985754985755, "grad_norm": 0.16540810465812683, "learning_rate": 4.224507674467084e-05, "loss": 0.7839, "step": 5048 }, { "epoch": 5.750997150997151, "grad_norm": 0.12994205951690674, "learning_rate": 4.2241703211822266e-05, "loss": 1.0731, "step": 5049 }, { "epoch": 5.752136752136752, "grad_norm": 0.17859558761119843, "learning_rate": 4.223832908012192e-05, "loss": 0.8567, "step": 5050 }, { "epoch": 5.753276353276354, "grad_norm": 0.19815339148044586, "learning_rate": 4.223495434968701e-05, "loss": 0.6952, "step": 5051 }, { "epoch": 5.754415954415954, "grad_norm": 0.16859059035778046, "learning_rate": 4.2231579020634725e-05, "loss": 0.8569, "step": 5052 }, { "epoch": 5.7555555555555555, "grad_norm": 0.16981007158756256, "learning_rate": 4.2228203093082326e-05, "loss": 0.729, "step": 5053 }, { "epoch": 5.756695156695157, "grad_norm": 0.18223218619823456, "learning_rate": 4.2224826567147045e-05, "loss": 0.9647, "step": 5054 }, { "epoch": 5.7578347578347575, "grad_norm": 0.17027547955513, "learning_rate": 4.222144944294618e-05, "loss": 0.9492, "step": 5055 }, { "epoch": 5.758974358974359, "grad_norm": 0.17369578778743744, "learning_rate": 4.2218071720597006e-05, "loss": 0.7478, "step": 5056 }, { "epoch": 5.76011396011396, "grad_norm": 0.1581421196460724, "learning_rate": 4.221469340021686e-05, "loss": 0.7778, "step": 5057 }, { "epoch": 5.761253561253561, "grad_norm": 0.1838197261095047, "learning_rate": 4.2211314481923067e-05, "loss": 0.7994, "step": 5058 }, { "epoch": 5.762393162393162, "grad_norm": 0.15999150276184082, "learning_rate": 4.2207934965833e-05, "loss": 0.8862, "step": 5059 }, { "epoch": 5.763532763532764, "grad_norm": 0.1751394122838974, "learning_rate": 4.220455485206402e-05, "loss": 0.7856, "step": 5060 }, { "epoch": 5.764672364672364, "grad_norm": 0.13892380893230438, "learning_rate": 4.2201174140733535e-05, "loss": 0.9042, "step": 5061 }, { "epoch": 5.765811965811966, "grad_norm": 0.16788841784000397, "learning_rate": 4.219779283195897e-05, "loss": 0.9338, "step": 5062 }, { "epoch": 5.766951566951567, "grad_norm": 0.22517052292823792, "learning_rate": 4.219441092585777e-05, "loss": 0.7178, "step": 5063 }, { "epoch": 5.768091168091168, "grad_norm": 0.1257663071155548, "learning_rate": 4.219102842254739e-05, "loss": 0.9102, "step": 5064 }, { "epoch": 5.769230769230769, "grad_norm": 0.1383146196603775, "learning_rate": 4.218764532214532e-05, "loss": 0.8179, "step": 5065 }, { "epoch": 5.770370370370371, "grad_norm": 0.19434872269630432, "learning_rate": 4.2184261624769064e-05, "loss": 0.6797, "step": 5066 }, { "epoch": 5.771509971509971, "grad_norm": 0.15667624771595, "learning_rate": 4.218087733053614e-05, "loss": 0.981, "step": 5067 }, { "epoch": 5.772649572649573, "grad_norm": 0.1542641520500183, "learning_rate": 4.21774924395641e-05, "loss": 0.9116, "step": 5068 }, { "epoch": 5.773789173789174, "grad_norm": 0.18515565991401672, "learning_rate": 4.217410695197051e-05, "loss": 0.8837, "step": 5069 }, { "epoch": 5.7749287749287745, "grad_norm": 0.17671780288219452, "learning_rate": 4.2170720867872954e-05, "loss": 0.854, "step": 5070 }, { "epoch": 5.776068376068376, "grad_norm": 0.18356047570705414, "learning_rate": 4.216733418738904e-05, "loss": 0.8568, "step": 5071 }, { "epoch": 5.777207977207977, "grad_norm": 0.19212129712104797, "learning_rate": 4.216394691063641e-05, "loss": 0.6283, "step": 5072 }, { "epoch": 5.778347578347578, "grad_norm": 0.17566898465156555, "learning_rate": 4.2160559037732694e-05, "loss": 0.84, "step": 5073 }, { "epoch": 5.779487179487179, "grad_norm": 0.15893234312534332, "learning_rate": 4.215717056879558e-05, "loss": 0.785, "step": 5074 }, { "epoch": 5.780626780626781, "grad_norm": 0.15502238273620605, "learning_rate": 4.2153781503942744e-05, "loss": 0.9261, "step": 5075 }, { "epoch": 5.781766381766381, "grad_norm": 0.18266864120960236, "learning_rate": 4.21503918432919e-05, "loss": 0.6755, "step": 5076 }, { "epoch": 5.782905982905983, "grad_norm": 0.17326800525188446, "learning_rate": 4.214700158696079e-05, "loss": 0.7029, "step": 5077 }, { "epoch": 5.784045584045584, "grad_norm": 0.1718059480190277, "learning_rate": 4.214361073506715e-05, "loss": 0.8196, "step": 5078 }, { "epoch": 5.785185185185185, "grad_norm": 0.18499065935611725, "learning_rate": 4.2140219287728776e-05, "loss": 0.7205, "step": 5079 }, { "epoch": 5.786324786324786, "grad_norm": 0.1749192476272583, "learning_rate": 4.2136827245063444e-05, "loss": 0.8313, "step": 5080 }, { "epoch": 5.787464387464388, "grad_norm": 0.16277176141738892, "learning_rate": 4.2133434607188973e-05, "loss": 0.7105, "step": 5081 }, { "epoch": 5.788603988603988, "grad_norm": 0.1911974400281906, "learning_rate": 4.2130041374223205e-05, "loss": 0.6628, "step": 5082 }, { "epoch": 5.78974358974359, "grad_norm": 0.2054838091135025, "learning_rate": 4.212664754628399e-05, "loss": 0.6555, "step": 5083 }, { "epoch": 5.790883190883191, "grad_norm": 0.22773410379886627, "learning_rate": 4.212325312348921e-05, "loss": 0.5956, "step": 5084 }, { "epoch": 5.792022792022792, "grad_norm": 0.19942957162857056, "learning_rate": 4.211985810595677e-05, "loss": 0.577, "step": 5085 }, { "epoch": 5.793162393162393, "grad_norm": 0.1661948412656784, "learning_rate": 4.211646249380456e-05, "loss": 0.8215, "step": 5086 }, { "epoch": 5.794301994301994, "grad_norm": 0.16536158323287964, "learning_rate": 4.211306628715054e-05, "loss": 0.6998, "step": 5087 }, { "epoch": 5.795441595441595, "grad_norm": 0.15531805157661438, "learning_rate": 4.210966948611267e-05, "loss": 0.8808, "step": 5088 }, { "epoch": 5.796581196581196, "grad_norm": 0.23157507181167603, "learning_rate": 4.210627209080893e-05, "loss": 0.6034, "step": 5089 }, { "epoch": 5.797720797720798, "grad_norm": 0.19956889748573303, "learning_rate": 4.210287410135732e-05, "loss": 0.7874, "step": 5090 }, { "epoch": 5.798860398860398, "grad_norm": 0.17820307612419128, "learning_rate": 4.209947551787585e-05, "loss": 0.7935, "step": 5091 }, { "epoch": 5.8, "grad_norm": 0.198835089802742, "learning_rate": 4.2096076340482585e-05, "loss": 0.768, "step": 5092 }, { "epoch": 5.801139601139601, "grad_norm": 0.19154450297355652, "learning_rate": 4.2092676569295566e-05, "loss": 0.641, "step": 5093 }, { "epoch": 5.802279202279202, "grad_norm": 0.1891769915819168, "learning_rate": 4.2089276204432884e-05, "loss": 0.8174, "step": 5094 }, { "epoch": 5.803418803418803, "grad_norm": 0.17773933708667755, "learning_rate": 4.2085875246012644e-05, "loss": 0.7182, "step": 5095 }, { "epoch": 5.804558404558405, "grad_norm": 0.18512803316116333, "learning_rate": 4.208247369415297e-05, "loss": 0.8497, "step": 5096 }, { "epoch": 5.805698005698005, "grad_norm": 0.15735124051570892, "learning_rate": 4.2079071548972e-05, "loss": 0.9022, "step": 5097 }, { "epoch": 5.806837606837607, "grad_norm": 0.17104484140872955, "learning_rate": 4.207566881058792e-05, "loss": 0.7536, "step": 5098 }, { "epoch": 5.807977207977208, "grad_norm": 0.15059272944927216, "learning_rate": 4.20722654791189e-05, "loss": 0.9023, "step": 5099 }, { "epoch": 5.8091168091168095, "grad_norm": 0.2025473415851593, "learning_rate": 4.206886155468316e-05, "loss": 0.639, "step": 5100 }, { "epoch": 5.81025641025641, "grad_norm": 0.17586733400821686, "learning_rate": 4.2065457037398905e-05, "loss": 0.968, "step": 5101 }, { "epoch": 5.8113960113960115, "grad_norm": 0.20486289262771606, "learning_rate": 4.206205192738441e-05, "loss": 0.639, "step": 5102 }, { "epoch": 5.812535612535613, "grad_norm": 0.1609588861465454, "learning_rate": 4.205864622475791e-05, "loss": 0.9063, "step": 5103 }, { "epoch": 5.8136752136752134, "grad_norm": 0.17861494421958923, "learning_rate": 4.205523992963773e-05, "loss": 0.7507, "step": 5104 }, { "epoch": 5.814814814814815, "grad_norm": 0.1902887225151062, "learning_rate": 4.2051833042142166e-05, "loss": 0.8143, "step": 5105 }, { "epoch": 5.815954415954416, "grad_norm": 0.19511087238788605, "learning_rate": 4.204842556238954e-05, "loss": 0.636, "step": 5106 }, { "epoch": 5.817094017094017, "grad_norm": 0.1550368368625641, "learning_rate": 4.2045017490498214e-05, "loss": 0.823, "step": 5107 }, { "epoch": 5.818233618233618, "grad_norm": 0.16333742439746857, "learning_rate": 4.2041608826586564e-05, "loss": 0.8188, "step": 5108 }, { "epoch": 5.81937321937322, "grad_norm": 0.1677020639181137, "learning_rate": 4.2038199570772966e-05, "loss": 0.6652, "step": 5109 }, { "epoch": 5.82051282051282, "grad_norm": 0.20748074352741241, "learning_rate": 4.2034789723175835e-05, "loss": 0.6676, "step": 5110 }, { "epoch": 5.821652421652422, "grad_norm": 0.1845245361328125, "learning_rate": 4.203137928391363e-05, "loss": 0.672, "step": 5111 }, { "epoch": 5.822792022792023, "grad_norm": 0.2240746170282364, "learning_rate": 4.202796825310478e-05, "loss": 0.7392, "step": 5112 }, { "epoch": 5.823931623931624, "grad_norm": 0.1537604182958603, "learning_rate": 4.202455663086776e-05, "loss": 0.9385, "step": 5113 }, { "epoch": 5.825071225071225, "grad_norm": 0.2583048343658447, "learning_rate": 4.202114441732107e-05, "loss": 0.4114, "step": 5114 }, { "epoch": 5.8262108262108265, "grad_norm": 0.15980015695095062, "learning_rate": 4.201773161258323e-05, "loss": 0.8168, "step": 5115 }, { "epoch": 5.827350427350427, "grad_norm": 0.15042923390865326, "learning_rate": 4.201431821677277e-05, "loss": 0.7829, "step": 5116 }, { "epoch": 5.8284900284900285, "grad_norm": 0.19120368361473083, "learning_rate": 4.201090423000825e-05, "loss": 0.6365, "step": 5117 }, { "epoch": 5.82962962962963, "grad_norm": 0.17045742273330688, "learning_rate": 4.2007489652408246e-05, "loss": 0.7093, "step": 5118 }, { "epoch": 5.8307692307692305, "grad_norm": 0.18401744961738586, "learning_rate": 4.200407448409135e-05, "loss": 0.7326, "step": 5119 }, { "epoch": 5.831908831908832, "grad_norm": 0.15033607184886932, "learning_rate": 4.200065872517619e-05, "loss": 0.8376, "step": 5120 }, { "epoch": 5.833048433048433, "grad_norm": 0.15364952385425568, "learning_rate": 4.1997242375781396e-05, "loss": 0.8633, "step": 5121 }, { "epoch": 5.834188034188034, "grad_norm": 0.21868816018104553, "learning_rate": 4.1993825436025645e-05, "loss": 0.6797, "step": 5122 }, { "epoch": 5.835327635327635, "grad_norm": 0.15554660558700562, "learning_rate": 4.1990407906027595e-05, "loss": 0.8344, "step": 5123 }, { "epoch": 5.836467236467237, "grad_norm": 0.16153044998645782, "learning_rate": 4.1986989785905954e-05, "loss": 1.0173, "step": 5124 }, { "epoch": 5.837606837606837, "grad_norm": 0.18046070635318756, "learning_rate": 4.1983571075779447e-05, "loss": 0.7211, "step": 5125 }, { "epoch": 5.838746438746439, "grad_norm": 0.15778635442256927, "learning_rate": 4.198015177576681e-05, "loss": 0.825, "step": 5126 }, { "epoch": 5.83988603988604, "grad_norm": 0.18100708723068237, "learning_rate": 4.19767318859868e-05, "loss": 0.7342, "step": 5127 }, { "epoch": 5.841025641025641, "grad_norm": 0.2530038058757782, "learning_rate": 4.1973311406558214e-05, "loss": 0.7057, "step": 5128 }, { "epoch": 5.842165242165242, "grad_norm": 0.1759106069803238, "learning_rate": 4.196989033759984e-05, "loss": 0.7695, "step": 5129 }, { "epoch": 5.843304843304844, "grad_norm": 0.1924852430820465, "learning_rate": 4.196646867923051e-05, "loss": 0.6453, "step": 5130 }, { "epoch": 5.844444444444444, "grad_norm": 0.18492716550827026, "learning_rate": 4.1963046431569064e-05, "loss": 0.6236, "step": 5131 }, { "epoch": 5.8455840455840455, "grad_norm": 0.18507033586502075, "learning_rate": 4.1959623594734365e-05, "loss": 0.6366, "step": 5132 }, { "epoch": 5.846723646723647, "grad_norm": 0.17751383781433105, "learning_rate": 4.195620016884531e-05, "loss": 0.8312, "step": 5133 }, { "epoch": 5.8478632478632475, "grad_norm": 0.1232931986451149, "learning_rate": 4.195277615402078e-05, "loss": 0.9088, "step": 5134 }, { "epoch": 5.849002849002849, "grad_norm": 0.15958844125270844, "learning_rate": 4.194935155037972e-05, "loss": 0.8025, "step": 5135 }, { "epoch": 5.85014245014245, "grad_norm": 0.16987775266170502, "learning_rate": 4.194592635804106e-05, "loss": 0.914, "step": 5136 }, { "epoch": 5.851282051282051, "grad_norm": 0.20572727918624878, "learning_rate": 4.194250057712379e-05, "loss": 0.6833, "step": 5137 }, { "epoch": 5.852421652421652, "grad_norm": 0.1674432009458542, "learning_rate": 4.1939074207746874e-05, "loss": 0.9532, "step": 5138 }, { "epoch": 5.853561253561254, "grad_norm": 0.17399823665618896, "learning_rate": 4.1935647250029334e-05, "loss": 0.7167, "step": 5139 }, { "epoch": 5.854700854700854, "grad_norm": 0.19129186868667603, "learning_rate": 4.193221970409019e-05, "loss": 0.8143, "step": 5140 }, { "epoch": 5.855840455840456, "grad_norm": 0.18846487998962402, "learning_rate": 4.19287915700485e-05, "loss": 0.7287, "step": 5141 }, { "epoch": 5.856980056980057, "grad_norm": 0.167376309633255, "learning_rate": 4.192536284802331e-05, "loss": 0.8228, "step": 5142 }, { "epoch": 5.858119658119658, "grad_norm": 0.1933199018239975, "learning_rate": 4.1921933538133726e-05, "loss": 0.7813, "step": 5143 }, { "epoch": 5.859259259259259, "grad_norm": 0.16732868552207947, "learning_rate": 4.191850364049886e-05, "loss": 0.7563, "step": 5144 }, { "epoch": 5.860398860398861, "grad_norm": 0.20382776856422424, "learning_rate": 4.1915073155237836e-05, "loss": 0.6792, "step": 5145 }, { "epoch": 5.861538461538462, "grad_norm": 0.23003341257572174, "learning_rate": 4.1911642082469806e-05, "loss": 0.6546, "step": 5146 }, { "epoch": 5.862678062678063, "grad_norm": 0.20285111665725708, "learning_rate": 4.190821042231393e-05, "loss": 0.6847, "step": 5147 }, { "epoch": 5.863817663817664, "grad_norm": 0.17044898867607117, "learning_rate": 4.190477817488941e-05, "loss": 0.9053, "step": 5148 }, { "epoch": 5.864957264957265, "grad_norm": 0.16001960635185242, "learning_rate": 4.190134534031547e-05, "loss": 0.7839, "step": 5149 }, { "epoch": 5.866096866096866, "grad_norm": 0.18786023557186127, "learning_rate": 4.1897911918711316e-05, "loss": 0.624, "step": 5150 }, { "epoch": 5.867236467236467, "grad_norm": 0.19194890558719635, "learning_rate": 4.1894477910196215e-05, "loss": 0.828, "step": 5151 }, { "epoch": 5.868376068376069, "grad_norm": 0.18132710456848145, "learning_rate": 4.189104331488943e-05, "loss": 0.767, "step": 5152 }, { "epoch": 5.869515669515669, "grad_norm": 0.19260728359222412, "learning_rate": 4.188760813291027e-05, "loss": 0.7264, "step": 5153 }, { "epoch": 5.870655270655271, "grad_norm": 0.18859456479549408, "learning_rate": 4.188417236437803e-05, "loss": 0.7041, "step": 5154 }, { "epoch": 5.871794871794872, "grad_norm": 0.17015713453292847, "learning_rate": 4.188073600941206e-05, "loss": 0.8557, "step": 5155 }, { "epoch": 5.872934472934473, "grad_norm": 0.15460067987442017, "learning_rate": 4.1877299068131695e-05, "loss": 0.9462, "step": 5156 }, { "epoch": 5.874074074074074, "grad_norm": 0.15997248888015747, "learning_rate": 4.187386154065633e-05, "loss": 0.9338, "step": 5157 }, { "epoch": 5.875213675213676, "grad_norm": 0.2065652459859848, "learning_rate": 4.1870423427105347e-05, "loss": 0.7578, "step": 5158 }, { "epoch": 5.876353276353276, "grad_norm": 0.12909001111984253, "learning_rate": 4.186698472759817e-05, "loss": 0.9939, "step": 5159 }, { "epoch": 5.877492877492878, "grad_norm": 0.20031005144119263, "learning_rate": 4.186354544225422e-05, "loss": 0.5834, "step": 5160 }, { "epoch": 5.878632478632479, "grad_norm": 0.1697911024093628, "learning_rate": 4.1860105571192964e-05, "loss": 0.8348, "step": 5161 }, { "epoch": 5.87977207977208, "grad_norm": 0.1760026067495346, "learning_rate": 4.1856665114533884e-05, "loss": 0.8509, "step": 5162 }, { "epoch": 5.880911680911681, "grad_norm": 0.17915961146354675, "learning_rate": 4.1853224072396455e-05, "loss": 0.7879, "step": 5163 }, { "epoch": 5.8820512820512825, "grad_norm": 0.18653781712055206, "learning_rate": 4.184978244490022e-05, "loss": 0.6107, "step": 5164 }, { "epoch": 5.883190883190883, "grad_norm": 0.1962171345949173, "learning_rate": 4.184634023216469e-05, "loss": 0.6755, "step": 5165 }, { "epoch": 5.8843304843304844, "grad_norm": 0.17595814168453217, "learning_rate": 4.184289743430945e-05, "loss": 0.6782, "step": 5166 }, { "epoch": 5.885470085470086, "grad_norm": 0.17818598449230194, "learning_rate": 4.1839454051454066e-05, "loss": 0.8382, "step": 5167 }, { "epoch": 5.886609686609686, "grad_norm": 0.17707544565200806, "learning_rate": 4.1836010083718126e-05, "loss": 0.7483, "step": 5168 }, { "epoch": 5.887749287749288, "grad_norm": 0.14160114526748657, "learning_rate": 4.1832565531221254e-05, "loss": 0.9062, "step": 5169 }, { "epoch": 5.888888888888889, "grad_norm": 0.21327587962150574, "learning_rate": 4.182912039408309e-05, "loss": 0.7015, "step": 5170 }, { "epoch": 5.89002849002849, "grad_norm": 0.16977612674236298, "learning_rate": 4.1825674672423296e-05, "loss": 0.9099, "step": 5171 }, { "epoch": 5.891168091168091, "grad_norm": 0.1685507893562317, "learning_rate": 4.182222836636156e-05, "loss": 0.8978, "step": 5172 }, { "epoch": 5.892307692307693, "grad_norm": 0.20601628720760345, "learning_rate": 4.181878147601756e-05, "loss": 0.7116, "step": 5173 }, { "epoch": 5.893447293447293, "grad_norm": 0.1586993783712387, "learning_rate": 4.1815334001511045e-05, "loss": 0.7348, "step": 5174 }, { "epoch": 5.894586894586895, "grad_norm": 0.17004381120204926, "learning_rate": 4.1811885942961716e-05, "loss": 0.8967, "step": 5175 }, { "epoch": 5.895726495726496, "grad_norm": 0.16897311806678772, "learning_rate": 4.180843730048937e-05, "loss": 0.9032, "step": 5176 }, { "epoch": 5.896866096866097, "grad_norm": 0.15307942032814026, "learning_rate": 4.1804988074213765e-05, "loss": 0.8533, "step": 5177 }, { "epoch": 5.898005698005698, "grad_norm": 0.18711508810520172, "learning_rate": 4.1801538264254717e-05, "loss": 0.7271, "step": 5178 }, { "epoch": 5.8991452991452995, "grad_norm": 0.15038523077964783, "learning_rate": 4.179808787073203e-05, "loss": 0.8249, "step": 5179 }, { "epoch": 5.9002849002849, "grad_norm": 0.16373242437839508, "learning_rate": 4.1794636893765564e-05, "loss": 0.8624, "step": 5180 }, { "epoch": 5.9014245014245015, "grad_norm": 0.2116909921169281, "learning_rate": 4.179118533347517e-05, "loss": 0.7566, "step": 5181 }, { "epoch": 5.902564102564103, "grad_norm": 0.1693403720855713, "learning_rate": 4.178773318998074e-05, "loss": 0.7506, "step": 5182 }, { "epoch": 5.9037037037037035, "grad_norm": 0.15692570805549622, "learning_rate": 4.178428046340216e-05, "loss": 0.6163, "step": 5183 }, { "epoch": 5.904843304843305, "grad_norm": 0.1439063400030136, "learning_rate": 4.178082715385937e-05, "loss": 0.9431, "step": 5184 }, { "epoch": 5.905982905982906, "grad_norm": 0.16660434007644653, "learning_rate": 4.1777373261472305e-05, "loss": 0.9886, "step": 5185 }, { "epoch": 5.907122507122507, "grad_norm": 0.17003867030143738, "learning_rate": 4.1773918786360925e-05, "loss": 0.6948, "step": 5186 }, { "epoch": 5.908262108262108, "grad_norm": 0.18516221642494202, "learning_rate": 4.1770463728645214e-05, "loss": 0.5875, "step": 5187 }, { "epoch": 5.90940170940171, "grad_norm": 0.17292432487010956, "learning_rate": 4.1767008088445185e-05, "loss": 0.9163, "step": 5188 }, { "epoch": 5.91054131054131, "grad_norm": 0.1750919222831726, "learning_rate": 4.1763551865880854e-05, "loss": 0.7027, "step": 5189 }, { "epoch": 5.911680911680912, "grad_norm": 0.16704607009887695, "learning_rate": 4.176009506107227e-05, "loss": 0.9621, "step": 5190 }, { "epoch": 5.912820512820513, "grad_norm": 0.1801830530166626, "learning_rate": 4.175663767413949e-05, "loss": 0.6906, "step": 5191 }, { "epoch": 5.913960113960114, "grad_norm": 0.26363036036491394, "learning_rate": 4.175317970520261e-05, "loss": 0.938, "step": 5192 }, { "epoch": 5.915099715099715, "grad_norm": 0.17508383095264435, "learning_rate": 4.174972115438172e-05, "loss": 0.7674, "step": 5193 }, { "epoch": 5.9162393162393165, "grad_norm": 0.16467712819576263, "learning_rate": 4.1746262021796956e-05, "loss": 0.8915, "step": 5194 }, { "epoch": 5.917378917378917, "grad_norm": 0.20937953889369965, "learning_rate": 4.174280230756845e-05, "loss": 0.7949, "step": 5195 }, { "epoch": 5.9185185185185185, "grad_norm": 0.17789477109909058, "learning_rate": 4.173934201181639e-05, "loss": 0.796, "step": 5196 }, { "epoch": 5.91965811965812, "grad_norm": 0.15541639924049377, "learning_rate": 4.173588113466094e-05, "loss": 0.8389, "step": 5197 }, { "epoch": 5.9207977207977205, "grad_norm": 0.19931167364120483, "learning_rate": 4.1732419676222315e-05, "loss": 0.761, "step": 5198 }, { "epoch": 5.921937321937322, "grad_norm": 0.17033979296684265, "learning_rate": 4.172895763662075e-05, "loss": 0.8802, "step": 5199 }, { "epoch": 5.923076923076923, "grad_norm": 0.14268510043621063, "learning_rate": 4.172549501597647e-05, "loss": 0.9945, "step": 5200 }, { "epoch": 5.924216524216524, "grad_norm": 0.21729013323783875, "learning_rate": 4.1722031814409754e-05, "loss": 0.7922, "step": 5201 }, { "epoch": 5.925356125356125, "grad_norm": 0.17441385984420776, "learning_rate": 4.1718568032040895e-05, "loss": 0.8094, "step": 5202 }, { "epoch": 5.926495726495727, "grad_norm": 0.16148298978805542, "learning_rate": 4.1715103668990184e-05, "loss": 0.9102, "step": 5203 }, { "epoch": 5.927635327635327, "grad_norm": 0.25135836005210876, "learning_rate": 4.1711638725377955e-05, "loss": 0.5235, "step": 5204 }, { "epoch": 5.928774928774929, "grad_norm": 0.17879357933998108, "learning_rate": 4.170817320132456e-05, "loss": 0.8012, "step": 5205 }, { "epoch": 5.92991452991453, "grad_norm": 0.19453158974647522, "learning_rate": 4.170470709695037e-05, "loss": 0.7937, "step": 5206 }, { "epoch": 5.931054131054131, "grad_norm": 0.1418699026107788, "learning_rate": 4.170124041237575e-05, "loss": 0.8227, "step": 5207 }, { "epoch": 5.932193732193732, "grad_norm": 0.15917539596557617, "learning_rate": 4.169777314772113e-05, "loss": 0.7265, "step": 5208 }, { "epoch": 5.933333333333334, "grad_norm": 0.15594954788684845, "learning_rate": 4.169430530310693e-05, "loss": 0.8795, "step": 5209 }, { "epoch": 5.934472934472934, "grad_norm": 0.16013088822364807, "learning_rate": 4.169083687865359e-05, "loss": 0.9405, "step": 5210 }, { "epoch": 5.9356125356125355, "grad_norm": 0.1493656039237976, "learning_rate": 4.16873678744816e-05, "loss": 0.8744, "step": 5211 }, { "epoch": 5.936752136752137, "grad_norm": 0.1798950731754303, "learning_rate": 4.168389829071142e-05, "loss": 0.6973, "step": 5212 }, { "epoch": 5.9378917378917375, "grad_norm": 0.15619346499443054, "learning_rate": 4.1680428127463576e-05, "loss": 0.9694, "step": 5213 }, { "epoch": 5.939031339031339, "grad_norm": 0.17171142995357513, "learning_rate": 4.1676957384858584e-05, "loss": 0.7721, "step": 5214 }, { "epoch": 5.94017094017094, "grad_norm": 0.21483421325683594, "learning_rate": 4.167348606301701e-05, "loss": 0.7483, "step": 5215 }, { "epoch": 5.941310541310541, "grad_norm": 0.1769595593214035, "learning_rate": 4.167001416205941e-05, "loss": 0.7624, "step": 5216 }, { "epoch": 5.942450142450142, "grad_norm": 0.17398640513420105, "learning_rate": 4.166654168210637e-05, "loss": 0.7972, "step": 5217 }, { "epoch": 5.943589743589744, "grad_norm": 0.16116030514240265, "learning_rate": 4.166306862327851e-05, "loss": 0.9679, "step": 5218 }, { "epoch": 5.944729344729344, "grad_norm": 0.16955454647541046, "learning_rate": 4.165959498569646e-05, "loss": 0.8423, "step": 5219 }, { "epoch": 5.945868945868946, "grad_norm": 0.14700596034526825, "learning_rate": 4.165612076948086e-05, "loss": 0.8902, "step": 5220 }, { "epoch": 5.947008547008547, "grad_norm": 0.2059997171163559, "learning_rate": 4.165264597475237e-05, "loss": 0.5552, "step": 5221 }, { "epoch": 5.948148148148148, "grad_norm": 0.16482126712799072, "learning_rate": 4.164917060163169e-05, "loss": 0.7523, "step": 5222 }, { "epoch": 5.949287749287749, "grad_norm": 0.17832493782043457, "learning_rate": 4.1645694650239543e-05, "loss": 0.7924, "step": 5223 }, { "epoch": 5.950427350427351, "grad_norm": 0.1778307408094406, "learning_rate": 4.1642218120696633e-05, "loss": 0.9122, "step": 5224 }, { "epoch": 5.951566951566951, "grad_norm": 0.16003210842609406, "learning_rate": 4.163874101312373e-05, "loss": 0.741, "step": 5225 }, { "epoch": 5.952706552706553, "grad_norm": 0.21680839359760284, "learning_rate": 4.163526332764159e-05, "loss": 0.688, "step": 5226 }, { "epoch": 5.953846153846154, "grad_norm": 0.1745540350675583, "learning_rate": 4.163178506437101e-05, "loss": 0.6564, "step": 5227 }, { "epoch": 5.9549857549857546, "grad_norm": 0.18622197210788727, "learning_rate": 4.1628306223432796e-05, "loss": 0.7516, "step": 5228 }, { "epoch": 5.956125356125356, "grad_norm": 0.20277757942676544, "learning_rate": 4.1624826804947776e-05, "loss": 0.6995, "step": 5229 }, { "epoch": 5.957264957264957, "grad_norm": 0.16135753691196442, "learning_rate": 4.162134680903681e-05, "loss": 0.8785, "step": 5230 }, { "epoch": 5.958404558404558, "grad_norm": 0.18638533353805542, "learning_rate": 4.161786623582075e-05, "loss": 0.7628, "step": 5231 }, { "epoch": 5.959544159544159, "grad_norm": 0.1866452842950821, "learning_rate": 4.1614385085420506e-05, "loss": 0.7243, "step": 5232 }, { "epoch": 5.960683760683761, "grad_norm": 0.1564568281173706, "learning_rate": 4.1610903357956964e-05, "loss": 0.7086, "step": 5233 }, { "epoch": 5.961823361823361, "grad_norm": 0.18204548954963684, "learning_rate": 4.1607421053551085e-05, "loss": 0.8382, "step": 5234 }, { "epoch": 5.962962962962963, "grad_norm": 0.17170333862304688, "learning_rate": 4.16039381723238e-05, "loss": 0.83, "step": 5235 }, { "epoch": 5.964102564102564, "grad_norm": 0.1687270998954773, "learning_rate": 4.160045471439607e-05, "loss": 0.7975, "step": 5236 }, { "epoch": 5.965242165242165, "grad_norm": 0.18899831175804138, "learning_rate": 4.1596970679888904e-05, "loss": 0.7623, "step": 5237 }, { "epoch": 5.966381766381766, "grad_norm": 0.2305077463388443, "learning_rate": 4.1593486068923304e-05, "loss": 0.5313, "step": 5238 }, { "epoch": 5.967521367521368, "grad_norm": 0.15524014830589294, "learning_rate": 4.15900008816203e-05, "loss": 0.889, "step": 5239 }, { "epoch": 5.968660968660968, "grad_norm": 0.17087559401988983, "learning_rate": 4.158651511810094e-05, "loss": 0.6742, "step": 5240 }, { "epoch": 5.96980056980057, "grad_norm": 0.18067264556884766, "learning_rate": 4.1583028778486306e-05, "loss": 0.831, "step": 5241 }, { "epoch": 5.970940170940171, "grad_norm": 0.19102199375629425, "learning_rate": 4.157954186289747e-05, "loss": 0.6895, "step": 5242 }, { "epoch": 5.972079772079772, "grad_norm": 0.1627579778432846, "learning_rate": 4.1576054371455556e-05, "loss": 0.781, "step": 5243 }, { "epoch": 5.973219373219373, "grad_norm": 0.2128324806690216, "learning_rate": 4.157256630428169e-05, "loss": 0.5727, "step": 5244 }, { "epoch": 5.9743589743589745, "grad_norm": 0.18273428082466125, "learning_rate": 4.156907766149701e-05, "loss": 0.8635, "step": 5245 }, { "epoch": 5.975498575498576, "grad_norm": 0.18876968324184418, "learning_rate": 4.1565588443222717e-05, "loss": 0.7594, "step": 5246 }, { "epoch": 5.976638176638176, "grad_norm": 0.17812277376651764, "learning_rate": 4.156209864957996e-05, "loss": 0.8341, "step": 5247 }, { "epoch": 5.977777777777778, "grad_norm": 0.16235648095607758, "learning_rate": 4.155860828068998e-05, "loss": 0.8527, "step": 5248 }, { "epoch": 5.978917378917379, "grad_norm": 0.16066183149814606, "learning_rate": 4.155511733667401e-05, "loss": 0.8579, "step": 5249 }, { "epoch": 5.98005698005698, "grad_norm": 0.18555085361003876, "learning_rate": 4.155162581765327e-05, "loss": 0.8633, "step": 5250 }, { "epoch": 5.981196581196581, "grad_norm": 0.17659147083759308, "learning_rate": 4.1548133723749063e-05, "loss": 0.8195, "step": 5251 }, { "epoch": 5.982336182336183, "grad_norm": 0.16259127855300903, "learning_rate": 4.1544641055082655e-05, "loss": 0.8269, "step": 5252 }, { "epoch": 5.983475783475783, "grad_norm": 0.20572814345359802, "learning_rate": 4.154114781177537e-05, "loss": 0.789, "step": 5253 }, { "epoch": 5.984615384615385, "grad_norm": 0.2040083110332489, "learning_rate": 4.1537653993948534e-05, "loss": 0.7576, "step": 5254 }, { "epoch": 5.985754985754986, "grad_norm": 0.19470487534999847, "learning_rate": 4.1534159601723485e-05, "loss": 0.7658, "step": 5255 }, { "epoch": 5.986894586894587, "grad_norm": 0.20511211454868317, "learning_rate": 4.153066463522162e-05, "loss": 0.6674, "step": 5256 }, { "epoch": 5.988034188034188, "grad_norm": 0.16894656419754028, "learning_rate": 4.15271690945643e-05, "loss": 0.7289, "step": 5257 }, { "epoch": 5.9891737891737895, "grad_norm": 0.16726945340633392, "learning_rate": 4.152367297987295e-05, "loss": 0.6524, "step": 5258 }, { "epoch": 5.99031339031339, "grad_norm": 0.2010265290737152, "learning_rate": 4.1520176291269e-05, "loss": 0.7551, "step": 5259 }, { "epoch": 5.9914529914529915, "grad_norm": 0.22712863981723785, "learning_rate": 4.15166790288739e-05, "loss": 0.6571, "step": 5260 }, { "epoch": 5.992592592592593, "grad_norm": 0.1745138317346573, "learning_rate": 4.1513181192809116e-05, "loss": 0.9201, "step": 5261 }, { "epoch": 5.9937321937321935, "grad_norm": 0.16642703115940094, "learning_rate": 4.150968278319614e-05, "loss": 0.7621, "step": 5262 }, { "epoch": 5.994871794871795, "grad_norm": 0.18345484137535095, "learning_rate": 4.150618380015647e-05, "loss": 0.7618, "step": 5263 }, { "epoch": 5.996011396011396, "grad_norm": 0.162578746676445, "learning_rate": 4.150268424381164e-05, "loss": 0.8196, "step": 5264 }, { "epoch": 5.997150997150997, "grad_norm": 0.15569950640201569, "learning_rate": 4.149918411428321e-05, "loss": 0.8533, "step": 5265 }, { "epoch": 5.998290598290598, "grad_norm": 0.1814025491476059, "learning_rate": 4.149568341169275e-05, "loss": 0.6732, "step": 5266 }, { "epoch": 5.9994301994302, "grad_norm": 0.1958899050951004, "learning_rate": 4.149218213616184e-05, "loss": 0.8781, "step": 5267 }, { "epoch": 6.0, "grad_norm": 0.3204585313796997, "learning_rate": 4.1488680287812085e-05, "loss": 0.865, "step": 5268 }, { "epoch": 6.001139601139601, "grad_norm": 0.1715724766254425, "learning_rate": 4.148517786676512e-05, "loss": 0.8186, "step": 5269 }, { "epoch": 6.002279202279202, "grad_norm": 0.1656695306301117, "learning_rate": 4.14816748731426e-05, "loss": 0.872, "step": 5270 }, { "epoch": 6.003418803418803, "grad_norm": 0.16879065334796906, "learning_rate": 4.147817130706617e-05, "loss": 0.7316, "step": 5271 }, { "epoch": 6.004558404558405, "grad_norm": 0.19011043012142181, "learning_rate": 4.1474667168657556e-05, "loss": 0.9102, "step": 5272 }, { "epoch": 6.005698005698005, "grad_norm": 0.17683662474155426, "learning_rate": 4.147116245803844e-05, "loss": 0.8387, "step": 5273 }, { "epoch": 6.006837606837607, "grad_norm": 0.14864976704120636, "learning_rate": 4.146765717533054e-05, "loss": 0.9381, "step": 5274 }, { "epoch": 6.007977207977208, "grad_norm": 0.1777380406856537, "learning_rate": 4.146415132065564e-05, "loss": 0.8416, "step": 5275 }, { "epoch": 6.009116809116809, "grad_norm": 0.17405077815055847, "learning_rate": 4.146064489413548e-05, "loss": 0.6389, "step": 5276 }, { "epoch": 6.01025641025641, "grad_norm": 0.1455533653497696, "learning_rate": 4.145713789589185e-05, "loss": 0.8916, "step": 5277 }, { "epoch": 6.011396011396012, "grad_norm": 0.15930397808551788, "learning_rate": 4.145363032604658e-05, "loss": 0.8626, "step": 5278 }, { "epoch": 6.012535612535612, "grad_norm": 0.19944411516189575, "learning_rate": 4.1450122184721465e-05, "loss": 0.6064, "step": 5279 }, { "epoch": 6.013675213675214, "grad_norm": 0.20567545294761658, "learning_rate": 4.1446613472038376e-05, "loss": 0.632, "step": 5280 }, { "epoch": 6.014814814814815, "grad_norm": 0.1783902496099472, "learning_rate": 4.144310418811917e-05, "loss": 0.8538, "step": 5281 }, { "epoch": 6.015954415954416, "grad_norm": 0.17142640054225922, "learning_rate": 4.143959433308574e-05, "loss": 0.76, "step": 5282 }, { "epoch": 6.017094017094017, "grad_norm": 0.1823304146528244, "learning_rate": 4.1436083907059985e-05, "loss": 0.8067, "step": 5283 }, { "epoch": 6.0182336182336185, "grad_norm": 0.18380099534988403, "learning_rate": 4.143257291016385e-05, "loss": 0.6778, "step": 5284 }, { "epoch": 6.019373219373219, "grad_norm": 0.1562369167804718, "learning_rate": 4.1429061342519246e-05, "loss": 0.9111, "step": 5285 }, { "epoch": 6.02051282051282, "grad_norm": 0.18521913886070251, "learning_rate": 4.142554920424818e-05, "loss": 0.7249, "step": 5286 }, { "epoch": 6.021652421652422, "grad_norm": 0.22115492820739746, "learning_rate": 4.142203649547261e-05, "loss": 0.6986, "step": 5287 }, { "epoch": 6.022792022792022, "grad_norm": 0.18572677671909332, "learning_rate": 4.1418523216314555e-05, "loss": 0.6859, "step": 5288 }, { "epoch": 6.023931623931624, "grad_norm": 0.19951315224170685, "learning_rate": 4.141500936689604e-05, "loss": 0.578, "step": 5289 }, { "epoch": 6.025071225071225, "grad_norm": 0.16582657396793365, "learning_rate": 4.14114949473391e-05, "loss": 0.7907, "step": 5290 }, { "epoch": 6.026210826210826, "grad_norm": 0.17808274924755096, "learning_rate": 4.140797995776582e-05, "loss": 0.8106, "step": 5291 }, { "epoch": 6.027350427350427, "grad_norm": 0.18221062421798706, "learning_rate": 4.1404464398298264e-05, "loss": 0.6451, "step": 5292 }, { "epoch": 6.028490028490029, "grad_norm": 0.1840764284133911, "learning_rate": 4.1400948269058555e-05, "loss": 0.7731, "step": 5293 }, { "epoch": 6.029629629629629, "grad_norm": 0.14906363189220428, "learning_rate": 4.1397431570168806e-05, "loss": 0.8789, "step": 5294 }, { "epoch": 6.030769230769231, "grad_norm": 0.21626117825508118, "learning_rate": 4.1393914301751166e-05, "loss": 0.7083, "step": 5295 }, { "epoch": 6.031908831908832, "grad_norm": 0.17459997534751892, "learning_rate": 4.13903964639278e-05, "loss": 0.8056, "step": 5296 }, { "epoch": 6.033048433048433, "grad_norm": 0.19678983092308044, "learning_rate": 4.138687805682089e-05, "loss": 0.7631, "step": 5297 }, { "epoch": 6.034188034188034, "grad_norm": 0.18847918510437012, "learning_rate": 4.138335908055264e-05, "loss": 0.6923, "step": 5298 }, { "epoch": 6.0353276353276355, "grad_norm": 0.20920641720294952, "learning_rate": 4.137983953524527e-05, "loss": 0.6162, "step": 5299 }, { "epoch": 6.036467236467236, "grad_norm": 0.17030276358127594, "learning_rate": 4.137631942102104e-05, "loss": 0.8526, "step": 5300 }, { "epoch": 6.0376068376068375, "grad_norm": 0.16370107233524323, "learning_rate": 4.137279873800219e-05, "loss": 0.6122, "step": 5301 }, { "epoch": 6.038746438746439, "grad_norm": 0.20291702449321747, "learning_rate": 4.136927748631102e-05, "loss": 0.749, "step": 5302 }, { "epoch": 6.0398860398860394, "grad_norm": 0.16677983105182648, "learning_rate": 4.136575566606982e-05, "loss": 0.9102, "step": 5303 }, { "epoch": 6.041025641025641, "grad_norm": 0.1749075949192047, "learning_rate": 4.136223327740093e-05, "loss": 0.7655, "step": 5304 }, { "epoch": 6.042165242165242, "grad_norm": 0.20569486916065216, "learning_rate": 4.135871032042668e-05, "loss": 0.6869, "step": 5305 }, { "epoch": 6.043304843304844, "grad_norm": 0.19171242415905, "learning_rate": 4.135518679526943e-05, "loss": 0.6695, "step": 5306 }, { "epoch": 6.044444444444444, "grad_norm": 0.16397686302661896, "learning_rate": 4.135166270205157e-05, "loss": 0.7385, "step": 5307 }, { "epoch": 6.045584045584046, "grad_norm": 0.21968406438827515, "learning_rate": 4.134813804089549e-05, "loss": 0.5997, "step": 5308 }, { "epoch": 6.046723646723647, "grad_norm": 0.15962359309196472, "learning_rate": 4.1344612811923625e-05, "loss": 0.926, "step": 5309 }, { "epoch": 6.047863247863248, "grad_norm": 0.1336166262626648, "learning_rate": 4.1341087015258405e-05, "loss": 1.0021, "step": 5310 }, { "epoch": 6.049002849002849, "grad_norm": 0.171193465590477, "learning_rate": 4.13375606510223e-05, "loss": 0.7357, "step": 5311 }, { "epoch": 6.050142450142451, "grad_norm": 0.1886366605758667, "learning_rate": 4.133403371933778e-05, "loss": 0.7643, "step": 5312 }, { "epoch": 6.051282051282051, "grad_norm": 0.17218226194381714, "learning_rate": 4.133050622032736e-05, "loss": 0.742, "step": 5313 }, { "epoch": 6.0524216524216525, "grad_norm": 0.15594317018985748, "learning_rate": 4.132697815411354e-05, "loss": 0.9512, "step": 5314 }, { "epoch": 6.053561253561254, "grad_norm": 0.14497053623199463, "learning_rate": 4.132344952081887e-05, "loss": 0.8102, "step": 5315 }, { "epoch": 6.0547008547008545, "grad_norm": 0.18346065282821655, "learning_rate": 4.1319920320565906e-05, "loss": 0.7741, "step": 5316 }, { "epoch": 6.055840455840456, "grad_norm": 0.15506385266780853, "learning_rate": 4.1316390553477236e-05, "loss": 0.726, "step": 5317 }, { "epoch": 6.056980056980057, "grad_norm": 0.20411111414432526, "learning_rate": 4.131286021967545e-05, "loss": 0.6767, "step": 5318 }, { "epoch": 6.058119658119658, "grad_norm": 0.16498993337154388, "learning_rate": 4.130932931928316e-05, "loss": 0.7701, "step": 5319 }, { "epoch": 6.059259259259259, "grad_norm": 0.19005899131298065, "learning_rate": 4.130579785242302e-05, "loss": 0.8537, "step": 5320 }, { "epoch": 6.060398860398861, "grad_norm": 0.15106602013111115, "learning_rate": 4.130226581921768e-05, "loss": 0.8611, "step": 5321 }, { "epoch": 6.061538461538461, "grad_norm": 0.1821715235710144, "learning_rate": 4.129873321978982e-05, "loss": 0.6789, "step": 5322 }, { "epoch": 6.062678062678063, "grad_norm": 0.15828917920589447, "learning_rate": 4.129520005426213e-05, "loss": 0.6968, "step": 5323 }, { "epoch": 6.063817663817664, "grad_norm": 0.15542666614055634, "learning_rate": 4.129166632275733e-05, "loss": 0.9701, "step": 5324 }, { "epoch": 6.064957264957265, "grad_norm": 0.19357499480247498, "learning_rate": 4.128813202539815e-05, "loss": 0.6658, "step": 5325 }, { "epoch": 6.066096866096866, "grad_norm": 0.2077610343694687, "learning_rate": 4.128459716230736e-05, "loss": 0.6631, "step": 5326 }, { "epoch": 6.067236467236468, "grad_norm": 0.14219580590724945, "learning_rate": 4.128106173360773e-05, "loss": 0.9613, "step": 5327 }, { "epoch": 6.068376068376068, "grad_norm": 0.17766830325126648, "learning_rate": 4.127752573942204e-05, "loss": 0.8159, "step": 5328 }, { "epoch": 6.06951566951567, "grad_norm": 0.16196675598621368, "learning_rate": 4.1273989179873126e-05, "loss": 0.8398, "step": 5329 }, { "epoch": 6.070655270655271, "grad_norm": 0.13799361884593964, "learning_rate": 4.127045205508382e-05, "loss": 0.9066, "step": 5330 }, { "epoch": 6.0717948717948715, "grad_norm": 0.16934914886951447, "learning_rate": 4.126691436517696e-05, "loss": 0.7317, "step": 5331 }, { "epoch": 6.072934472934473, "grad_norm": 0.16968083381652832, "learning_rate": 4.126337611027544e-05, "loss": 0.8202, "step": 5332 }, { "epoch": 6.074074074074074, "grad_norm": 0.17433682084083557, "learning_rate": 4.1259837290502135e-05, "loss": 0.7458, "step": 5333 }, { "epoch": 6.075213675213675, "grad_norm": 0.23448632657527924, "learning_rate": 4.125629790597997e-05, "loss": 0.565, "step": 5334 }, { "epoch": 6.076353276353276, "grad_norm": 0.18482531607151031, "learning_rate": 4.125275795683187e-05, "loss": 0.723, "step": 5335 }, { "epoch": 6.077492877492878, "grad_norm": 0.17934885621070862, "learning_rate": 4.1249217443180795e-05, "loss": 0.7322, "step": 5336 }, { "epoch": 6.078632478632478, "grad_norm": 0.2015325129032135, "learning_rate": 4.124567636514971e-05, "loss": 0.6252, "step": 5337 }, { "epoch": 6.07977207977208, "grad_norm": 0.2114899903535843, "learning_rate": 4.1242134722861606e-05, "loss": 0.7364, "step": 5338 }, { "epoch": 6.080911680911681, "grad_norm": 0.15407277643680573, "learning_rate": 4.1238592516439505e-05, "loss": 0.9147, "step": 5339 }, { "epoch": 6.082051282051282, "grad_norm": 0.1575423777103424, "learning_rate": 4.123504974600643e-05, "loss": 0.7913, "step": 5340 }, { "epoch": 6.083190883190883, "grad_norm": 0.1540532261133194, "learning_rate": 4.123150641168542e-05, "loss": 0.8318, "step": 5341 }, { "epoch": 6.084330484330485, "grad_norm": 0.16054439544677734, "learning_rate": 4.122796251359956e-05, "loss": 0.7438, "step": 5342 }, { "epoch": 6.085470085470085, "grad_norm": 0.1887817233800888, "learning_rate": 4.122441805187194e-05, "loss": 0.7258, "step": 5343 }, { "epoch": 6.086609686609687, "grad_norm": 0.1544388383626938, "learning_rate": 4.1220873026625664e-05, "loss": 0.9499, "step": 5344 }, { "epoch": 6.087749287749288, "grad_norm": 0.15911957621574402, "learning_rate": 4.1217327437983854e-05, "loss": 0.8015, "step": 5345 }, { "epoch": 6.088888888888889, "grad_norm": 0.19378520548343658, "learning_rate": 4.121378128606967e-05, "loss": 0.7329, "step": 5346 }, { "epoch": 6.09002849002849, "grad_norm": 0.1973607987165451, "learning_rate": 4.121023457100627e-05, "loss": 0.8194, "step": 5347 }, { "epoch": 6.091168091168091, "grad_norm": 0.2163930982351303, "learning_rate": 4.1206687292916845e-05, "loss": 0.6304, "step": 5348 }, { "epoch": 6.092307692307692, "grad_norm": 0.1876726597547531, "learning_rate": 4.12031394519246e-05, "loss": 0.6908, "step": 5349 }, { "epoch": 6.093447293447293, "grad_norm": 0.16954608261585236, "learning_rate": 4.1199591048152766e-05, "loss": 0.9084, "step": 5350 }, { "epoch": 6.094586894586895, "grad_norm": 0.14664937555789948, "learning_rate": 4.119604208172458e-05, "loss": 0.9353, "step": 5351 }, { "epoch": 6.095726495726495, "grad_norm": 0.17605088651180267, "learning_rate": 4.119249255276332e-05, "loss": 0.859, "step": 5352 }, { "epoch": 6.096866096866097, "grad_norm": 0.15377052128314972, "learning_rate": 4.1188942461392266e-05, "loss": 0.8562, "step": 5353 }, { "epoch": 6.098005698005698, "grad_norm": 0.17394904792308807, "learning_rate": 4.118539180773472e-05, "loss": 0.8247, "step": 5354 }, { "epoch": 6.099145299145299, "grad_norm": 0.16856645047664642, "learning_rate": 4.1181840591914e-05, "loss": 0.8811, "step": 5355 }, { "epoch": 6.1002849002849, "grad_norm": 0.2298177182674408, "learning_rate": 4.117828881405346e-05, "loss": 0.6079, "step": 5356 }, { "epoch": 6.101424501424502, "grad_norm": 0.21722614765167236, "learning_rate": 4.1174736474276464e-05, "loss": 0.6172, "step": 5357 }, { "epoch": 6.102564102564102, "grad_norm": 0.14236940443515778, "learning_rate": 4.117118357270638e-05, "loss": 0.9813, "step": 5358 }, { "epoch": 6.103703703703704, "grad_norm": 0.16800394654273987, "learning_rate": 4.1167630109466626e-05, "loss": 0.6162, "step": 5359 }, { "epoch": 6.104843304843305, "grad_norm": 0.1543532907962799, "learning_rate": 4.116407608468062e-05, "loss": 0.859, "step": 5360 }, { "epoch": 6.105982905982906, "grad_norm": 0.20111872255802155, "learning_rate": 4.116052149847179e-05, "loss": 0.594, "step": 5361 }, { "epoch": 6.107122507122507, "grad_norm": 0.19053106009960175, "learning_rate": 4.115696635096361e-05, "loss": 0.7708, "step": 5362 }, { "epoch": 6.1082621082621085, "grad_norm": 0.18006569147109985, "learning_rate": 4.115341064227956e-05, "loss": 0.8295, "step": 5363 }, { "epoch": 6.109401709401709, "grad_norm": 0.15343549847602844, "learning_rate": 4.114985437254314e-05, "loss": 0.8868, "step": 5364 }, { "epoch": 6.1105413105413104, "grad_norm": 0.16695021092891693, "learning_rate": 4.114629754187786e-05, "loss": 0.9424, "step": 5365 }, { "epoch": 6.111680911680912, "grad_norm": 0.19037920236587524, "learning_rate": 4.114274015040726e-05, "loss": 0.7676, "step": 5366 }, { "epoch": 6.112820512820512, "grad_norm": 0.16210995614528656, "learning_rate": 4.113918219825491e-05, "loss": 0.8741, "step": 5367 }, { "epoch": 6.113960113960114, "grad_norm": 0.20548321306705475, "learning_rate": 4.113562368554438e-05, "loss": 0.6218, "step": 5368 }, { "epoch": 6.115099715099715, "grad_norm": 0.19467325508594513, "learning_rate": 4.113206461239926e-05, "loss": 0.5885, "step": 5369 }, { "epoch": 6.116239316239316, "grad_norm": 0.2024887353181839, "learning_rate": 4.112850497894317e-05, "loss": 0.9011, "step": 5370 }, { "epoch": 6.117378917378917, "grad_norm": 0.18437287211418152, "learning_rate": 4.112494478529975e-05, "loss": 0.9799, "step": 5371 }, { "epoch": 6.118518518518519, "grad_norm": 0.17807888984680176, "learning_rate": 4.112138403159266e-05, "loss": 0.7263, "step": 5372 }, { "epoch": 6.119658119658119, "grad_norm": 0.19107209146022797, "learning_rate": 4.111782271794556e-05, "loss": 0.761, "step": 5373 }, { "epoch": 6.120797720797721, "grad_norm": 0.1876402199268341, "learning_rate": 4.111426084448217e-05, "loss": 0.7739, "step": 5374 }, { "epoch": 6.121937321937322, "grad_norm": 0.17592483758926392, "learning_rate": 4.111069841132617e-05, "loss": 0.8204, "step": 5375 }, { "epoch": 6.123076923076923, "grad_norm": 0.18379825353622437, "learning_rate": 4.110713541860132e-05, "loss": 0.7587, "step": 5376 }, { "epoch": 6.124216524216524, "grad_norm": 0.2072933465242386, "learning_rate": 4.110357186643135e-05, "loss": 0.713, "step": 5377 }, { "epoch": 6.1253561253561255, "grad_norm": 0.22593344748020172, "learning_rate": 4.110000775494005e-05, "loss": 0.74, "step": 5378 }, { "epoch": 6.126495726495726, "grad_norm": 0.156778022646904, "learning_rate": 4.109644308425121e-05, "loss": 0.8376, "step": 5379 }, { "epoch": 6.1276353276353275, "grad_norm": 0.20049187541007996, "learning_rate": 4.109287785448863e-05, "loss": 0.7331, "step": 5380 }, { "epoch": 6.128774928774929, "grad_norm": 0.1793132722377777, "learning_rate": 4.1089312065776146e-05, "loss": 0.8058, "step": 5381 }, { "epoch": 6.12991452991453, "grad_norm": 0.17604891955852509, "learning_rate": 4.108574571823761e-05, "loss": 0.633, "step": 5382 }, { "epoch": 6.131054131054131, "grad_norm": 0.16516968607902527, "learning_rate": 4.10821788119969e-05, "loss": 0.9, "step": 5383 }, { "epoch": 6.132193732193732, "grad_norm": 0.18298101425170898, "learning_rate": 4.107861134717788e-05, "loss": 0.6686, "step": 5384 }, { "epoch": 6.133333333333334, "grad_norm": 0.18403977155685425, "learning_rate": 4.107504332390447e-05, "loss": 0.6766, "step": 5385 }, { "epoch": 6.134472934472934, "grad_norm": 0.15163980424404144, "learning_rate": 4.107147474230061e-05, "loss": 0.934, "step": 5386 }, { "epoch": 6.135612535612536, "grad_norm": 0.1759641021490097, "learning_rate": 4.106790560249023e-05, "loss": 0.8518, "step": 5387 }, { "epoch": 6.136752136752137, "grad_norm": 0.15870292484760284, "learning_rate": 4.10643359045973e-05, "loss": 0.8359, "step": 5388 }, { "epoch": 6.137891737891738, "grad_norm": 0.17169325053691864, "learning_rate": 4.106076564874581e-05, "loss": 0.8743, "step": 5389 }, { "epoch": 6.139031339031339, "grad_norm": 0.19816268980503082, "learning_rate": 4.105719483505976e-05, "loss": 0.8448, "step": 5390 }, { "epoch": 6.140170940170941, "grad_norm": 0.1616421937942505, "learning_rate": 4.105362346366317e-05, "loss": 0.8303, "step": 5391 }, { "epoch": 6.141310541310541, "grad_norm": 0.1825469583272934, "learning_rate": 4.10500515346801e-05, "loss": 0.8529, "step": 5392 }, { "epoch": 6.1424501424501425, "grad_norm": 0.1738435924053192, "learning_rate": 4.10464790482346e-05, "loss": 0.7349, "step": 5393 }, { "epoch": 6.143589743589744, "grad_norm": 0.15835785865783691, "learning_rate": 4.1042906004450754e-05, "loss": 0.7235, "step": 5394 }, { "epoch": 6.1447293447293445, "grad_norm": 0.251121461391449, "learning_rate": 4.103933240345266e-05, "loss": 0.6078, "step": 5395 }, { "epoch": 6.145868945868946, "grad_norm": 0.1763714998960495, "learning_rate": 4.1035758245364455e-05, "loss": 0.6898, "step": 5396 }, { "epoch": 6.147008547008547, "grad_norm": 0.17268727719783783, "learning_rate": 4.1032183530310256e-05, "loss": 0.7949, "step": 5397 }, { "epoch": 6.148148148148148, "grad_norm": 0.17881803214550018, "learning_rate": 4.102860825841423e-05, "loss": 0.7018, "step": 5398 }, { "epoch": 6.149287749287749, "grad_norm": 0.1909571886062622, "learning_rate": 4.102503242980057e-05, "loss": 0.8175, "step": 5399 }, { "epoch": 6.150427350427351, "grad_norm": 0.1631585657596588, "learning_rate": 4.1021456044593466e-05, "loss": 0.6682, "step": 5400 }, { "epoch": 6.151566951566951, "grad_norm": 0.2057657539844513, "learning_rate": 4.101787910291713e-05, "loss": 0.8057, "step": 5401 }, { "epoch": 6.152706552706553, "grad_norm": 0.17795559763908386, "learning_rate": 4.10143016048958e-05, "loss": 0.8306, "step": 5402 }, { "epoch": 6.153846153846154, "grad_norm": 0.19012929499149323, "learning_rate": 4.101072355065374e-05, "loss": 0.7311, "step": 5403 }, { "epoch": 6.154985754985755, "grad_norm": 0.14840446412563324, "learning_rate": 4.1007144940315225e-05, "loss": 0.7818, "step": 5404 }, { "epoch": 6.156125356125356, "grad_norm": 0.19507408142089844, "learning_rate": 4.100356577400454e-05, "loss": 0.6246, "step": 5405 }, { "epoch": 6.157264957264958, "grad_norm": 0.13652437925338745, "learning_rate": 4.099998605184601e-05, "loss": 0.9509, "step": 5406 }, { "epoch": 6.158404558404558, "grad_norm": 0.1518937349319458, "learning_rate": 4.099640577396396e-05, "loss": 0.8777, "step": 5407 }, { "epoch": 6.15954415954416, "grad_norm": 0.14349329471588135, "learning_rate": 4.099282494048275e-05, "loss": 0.7823, "step": 5408 }, { "epoch": 6.160683760683761, "grad_norm": 0.1953577846288681, "learning_rate": 4.098924355152675e-05, "loss": 0.7073, "step": 5409 }, { "epoch": 6.1618233618233615, "grad_norm": 0.19672128558158875, "learning_rate": 4.098566160722035e-05, "loss": 0.7058, "step": 5410 }, { "epoch": 6.162962962962963, "grad_norm": 0.2700105905532837, "learning_rate": 4.0982079107687965e-05, "loss": 0.4254, "step": 5411 }, { "epoch": 6.164102564102564, "grad_norm": 0.22202476859092712, "learning_rate": 4.0978496053054013e-05, "loss": 0.5088, "step": 5412 }, { "epoch": 6.165242165242165, "grad_norm": 0.21494624018669128, "learning_rate": 4.097491244344296e-05, "loss": 0.6267, "step": 5413 }, { "epoch": 6.166381766381766, "grad_norm": 0.1644032895565033, "learning_rate": 4.097132827897926e-05, "loss": 0.7998, "step": 5414 }, { "epoch": 6.167521367521368, "grad_norm": 0.18602855503559113, "learning_rate": 4.0967743559787414e-05, "loss": 0.8451, "step": 5415 }, { "epoch": 6.168660968660968, "grad_norm": 0.1747872233390808, "learning_rate": 4.096415828599192e-05, "loss": 0.8646, "step": 5416 }, { "epoch": 6.16980056980057, "grad_norm": 0.18089835345745087, "learning_rate": 4.0960572457717305e-05, "loss": 0.7518, "step": 5417 }, { "epoch": 6.170940170940171, "grad_norm": 0.18630164861679077, "learning_rate": 4.0956986075088125e-05, "loss": 0.7452, "step": 5418 }, { "epoch": 6.172079772079772, "grad_norm": 0.1786157488822937, "learning_rate": 4.095339913822892e-05, "loss": 0.8762, "step": 5419 }, { "epoch": 6.173219373219373, "grad_norm": 0.17823855578899384, "learning_rate": 4.094981164726431e-05, "loss": 0.7615, "step": 5420 }, { "epoch": 6.174358974358975, "grad_norm": 0.18031442165374756, "learning_rate": 4.094622360231887e-05, "loss": 0.8003, "step": 5421 }, { "epoch": 6.175498575498575, "grad_norm": 0.19077175855636597, "learning_rate": 4.094263500351724e-05, "loss": 0.8451, "step": 5422 }, { "epoch": 6.176638176638177, "grad_norm": 0.20899420976638794, "learning_rate": 4.093904585098404e-05, "loss": 0.7411, "step": 5423 }, { "epoch": 6.177777777777778, "grad_norm": 0.1612003743648529, "learning_rate": 4.0935456144843956e-05, "loss": 0.9227, "step": 5424 }, { "epoch": 6.178917378917379, "grad_norm": 0.18165160715579987, "learning_rate": 4.0931865885221656e-05, "loss": 0.6729, "step": 5425 }, { "epoch": 6.18005698005698, "grad_norm": 0.16721685230731964, "learning_rate": 4.092827507224183e-05, "loss": 0.8923, "step": 5426 }, { "epoch": 6.181196581196581, "grad_norm": 0.18058645725250244, "learning_rate": 4.092468370602922e-05, "loss": 0.8386, "step": 5427 }, { "epoch": 6.182336182336182, "grad_norm": 0.1631706804037094, "learning_rate": 4.092109178670855e-05, "loss": 0.7912, "step": 5428 }, { "epoch": 6.183475783475783, "grad_norm": 0.20347100496292114, "learning_rate": 4.0917499314404576e-05, "loss": 0.6502, "step": 5429 }, { "epoch": 6.184615384615385, "grad_norm": 0.2409413605928421, "learning_rate": 4.091390628924208e-05, "loss": 0.6087, "step": 5430 }, { "epoch": 6.185754985754985, "grad_norm": 0.18910107016563416, "learning_rate": 4.091031271134586e-05, "loss": 0.6308, "step": 5431 }, { "epoch": 6.186894586894587, "grad_norm": 0.19836777448654175, "learning_rate": 4.0906718580840716e-05, "loss": 0.6492, "step": 5432 }, { "epoch": 6.188034188034188, "grad_norm": 0.14992153644561768, "learning_rate": 4.090312389785149e-05, "loss": 0.8728, "step": 5433 }, { "epoch": 6.189173789173789, "grad_norm": 0.17563802003860474, "learning_rate": 4.089952866250305e-05, "loss": 0.8546, "step": 5434 }, { "epoch": 6.19031339031339, "grad_norm": 0.228382870554924, "learning_rate": 4.089593287492024e-05, "loss": 0.5632, "step": 5435 }, { "epoch": 6.191452991452992, "grad_norm": 0.24549713730812073, "learning_rate": 4.089233653522798e-05, "loss": 0.8282, "step": 5436 }, { "epoch": 6.192592592592592, "grad_norm": 0.19304408133029938, "learning_rate": 4.0888739643551165e-05, "loss": 0.7496, "step": 5437 }, { "epoch": 6.193732193732194, "grad_norm": 0.19243228435516357, "learning_rate": 4.088514220001472e-05, "loss": 0.7399, "step": 5438 }, { "epoch": 6.194871794871795, "grad_norm": 0.21948397159576416, "learning_rate": 4.088154420474362e-05, "loss": 0.7033, "step": 5439 }, { "epoch": 6.196011396011396, "grad_norm": 0.19144877791404724, "learning_rate": 4.08779456578628e-05, "loss": 0.7552, "step": 5440 }, { "epoch": 6.197150997150997, "grad_norm": 0.1941777914762497, "learning_rate": 4.087434655949727e-05, "loss": 0.5746, "step": 5441 }, { "epoch": 6.1982905982905985, "grad_norm": 0.16696923971176147, "learning_rate": 4.087074690977203e-05, "loss": 0.9095, "step": 5442 }, { "epoch": 6.199430199430199, "grad_norm": 0.2027294635772705, "learning_rate": 4.08671467088121e-05, "loss": 0.5538, "step": 5443 }, { "epoch": 6.2005698005698004, "grad_norm": 0.22019366919994354, "learning_rate": 4.0863545956742534e-05, "loss": 0.5056, "step": 5444 }, { "epoch": 6.201709401709402, "grad_norm": 0.21672432124614716, "learning_rate": 4.0859944653688385e-05, "loss": 0.6106, "step": 5445 }, { "epoch": 6.202849002849002, "grad_norm": 0.1908385455608368, "learning_rate": 4.085634279977475e-05, "loss": 0.7042, "step": 5446 }, { "epoch": 6.203988603988604, "grad_norm": 0.17540811002254486, "learning_rate": 4.085274039512672e-05, "loss": 0.7663, "step": 5447 }, { "epoch": 6.205128205128205, "grad_norm": 0.22469978034496307, "learning_rate": 4.084913743986942e-05, "loss": 0.5877, "step": 5448 }, { "epoch": 6.206267806267807, "grad_norm": 0.1598920077085495, "learning_rate": 4.0845533934128e-05, "loss": 0.9132, "step": 5449 }, { "epoch": 6.207407407407407, "grad_norm": 0.18967831134796143, "learning_rate": 4.084192987802761e-05, "loss": 0.8009, "step": 5450 }, { "epoch": 6.208547008547009, "grad_norm": 0.18315912783145905, "learning_rate": 4.083832527169342e-05, "loss": 0.8127, "step": 5451 }, { "epoch": 6.20968660968661, "grad_norm": 0.15255817770957947, "learning_rate": 4.083472011525065e-05, "loss": 0.8159, "step": 5452 }, { "epoch": 6.210826210826211, "grad_norm": 0.16031622886657715, "learning_rate": 4.083111440882449e-05, "loss": 0.7793, "step": 5453 }, { "epoch": 6.211965811965812, "grad_norm": 0.1582925021648407, "learning_rate": 4.082750815254021e-05, "loss": 0.8903, "step": 5454 }, { "epoch": 6.2131054131054135, "grad_norm": 0.21032319962978363, "learning_rate": 4.082390134652303e-05, "loss": 0.8633, "step": 5455 }, { "epoch": 6.214245014245014, "grad_norm": 0.2191559225320816, "learning_rate": 4.082029399089825e-05, "loss": 0.4965, "step": 5456 }, { "epoch": 6.2153846153846155, "grad_norm": 0.1581375002861023, "learning_rate": 4.081668608579114e-05, "loss": 0.8054, "step": 5457 }, { "epoch": 6.216524216524217, "grad_norm": 0.17443591356277466, "learning_rate": 4.081307763132704e-05, "loss": 0.7814, "step": 5458 }, { "epoch": 6.2176638176638175, "grad_norm": 0.18770234286785126, "learning_rate": 4.080946862763126e-05, "loss": 0.8021, "step": 5459 }, { "epoch": 6.218803418803419, "grad_norm": 0.14728766679763794, "learning_rate": 4.0805859074829164e-05, "loss": 0.8208, "step": 5460 }, { "epoch": 6.21994301994302, "grad_norm": 0.17098896205425262, "learning_rate": 4.080224897304611e-05, "loss": 0.827, "step": 5461 }, { "epoch": 6.221082621082621, "grad_norm": 0.16605925559997559, "learning_rate": 4.07986383224075e-05, "loss": 0.777, "step": 5462 }, { "epoch": 6.222222222222222, "grad_norm": 0.17997348308563232, "learning_rate": 4.079502712303873e-05, "loss": 0.7636, "step": 5463 }, { "epoch": 6.223361823361824, "grad_norm": 0.22712866961956024, "learning_rate": 4.079141537506523e-05, "loss": 0.6999, "step": 5464 }, { "epoch": 6.224501424501424, "grad_norm": 0.22008147835731506, "learning_rate": 4.0787803078612455e-05, "loss": 0.7733, "step": 5465 }, { "epoch": 6.225641025641026, "grad_norm": 0.17507445812225342, "learning_rate": 4.0784190233805855e-05, "loss": 0.9629, "step": 5466 }, { "epoch": 6.226780626780627, "grad_norm": 0.20270459353923798, "learning_rate": 4.078057684077092e-05, "loss": 0.6832, "step": 5467 }, { "epoch": 6.227920227920228, "grad_norm": 0.1600881665945053, "learning_rate": 4.077696289963317e-05, "loss": 0.8731, "step": 5468 }, { "epoch": 6.229059829059829, "grad_norm": 0.2190249115228653, "learning_rate": 4.07733484105181e-05, "loss": 0.7833, "step": 5469 }, { "epoch": 6.230199430199431, "grad_norm": 0.16586245596408844, "learning_rate": 4.0769733373551254e-05, "loss": 0.6337, "step": 5470 }, { "epoch": 6.231339031339031, "grad_norm": 0.17984691262245178, "learning_rate": 4.076611778885822e-05, "loss": 0.6924, "step": 5471 }, { "epoch": 6.2324786324786325, "grad_norm": 0.17775210738182068, "learning_rate": 4.076250165656454e-05, "loss": 0.8526, "step": 5472 }, { "epoch": 6.233618233618234, "grad_norm": 0.17768071591854095, "learning_rate": 4.0758884976795843e-05, "loss": 0.6849, "step": 5473 }, { "epoch": 6.2347578347578345, "grad_norm": 0.17510420083999634, "learning_rate": 4.075526774967773e-05, "loss": 0.7369, "step": 5474 }, { "epoch": 6.235897435897436, "grad_norm": 0.17719455063343048, "learning_rate": 4.075164997533584e-05, "loss": 0.7602, "step": 5475 }, { "epoch": 6.237037037037037, "grad_norm": 0.1920306533575058, "learning_rate": 4.0748031653895823e-05, "loss": 0.719, "step": 5476 }, { "epoch": 6.238176638176638, "grad_norm": 0.1743188202381134, "learning_rate": 4.0744412785483375e-05, "loss": 0.7839, "step": 5477 }, { "epoch": 6.239316239316239, "grad_norm": 0.17863917350769043, "learning_rate": 4.0740793370224165e-05, "loss": 0.6877, "step": 5478 }, { "epoch": 6.240455840455841, "grad_norm": 0.18066370487213135, "learning_rate": 4.073717340824391e-05, "loss": 0.9225, "step": 5479 }, { "epoch": 6.241595441595441, "grad_norm": 0.17196287214756012, "learning_rate": 4.073355289966835e-05, "loss": 0.7665, "step": 5480 }, { "epoch": 6.242735042735043, "grad_norm": 0.19007204473018646, "learning_rate": 4.072993184462323e-05, "loss": 0.674, "step": 5481 }, { "epoch": 6.243874643874644, "grad_norm": 0.22861039638519287, "learning_rate": 4.072631024323431e-05, "loss": 0.4813, "step": 5482 }, { "epoch": 6.245014245014245, "grad_norm": 0.20271611213684082, "learning_rate": 4.07226880956274e-05, "loss": 0.5928, "step": 5483 }, { "epoch": 6.246153846153846, "grad_norm": 0.1714460551738739, "learning_rate": 4.071906540192829e-05, "loss": 0.6774, "step": 5484 }, { "epoch": 6.247293447293448, "grad_norm": 0.19361095130443573, "learning_rate": 4.071544216226281e-05, "loss": 0.4751, "step": 5485 }, { "epoch": 6.248433048433048, "grad_norm": 0.1699151247739792, "learning_rate": 4.0711818376756814e-05, "loss": 0.827, "step": 5486 }, { "epoch": 6.24957264957265, "grad_norm": 0.17237791419029236, "learning_rate": 4.0708194045536154e-05, "loss": 0.7743, "step": 5487 }, { "epoch": 6.250712250712251, "grad_norm": 0.17621074616909027, "learning_rate": 4.070456916872671e-05, "loss": 0.7688, "step": 5488 }, { "epoch": 6.2518518518518515, "grad_norm": 0.17917349934577942, "learning_rate": 4.07009437464544e-05, "loss": 0.7973, "step": 5489 }, { "epoch": 6.252991452991453, "grad_norm": 0.17991836369037628, "learning_rate": 4.069731777884512e-05, "loss": 0.8146, "step": 5490 }, { "epoch": 6.254131054131054, "grad_norm": 0.16662746667861938, "learning_rate": 4.069369126602485e-05, "loss": 0.886, "step": 5491 }, { "epoch": 6.255270655270655, "grad_norm": 0.19454513490200043, "learning_rate": 4.0690064208119505e-05, "loss": 0.7909, "step": 5492 }, { "epoch": 6.256410256410256, "grad_norm": 0.15574559569358826, "learning_rate": 4.068643660525509e-05, "loss": 0.7042, "step": 5493 }, { "epoch": 6.257549857549858, "grad_norm": 0.2107429802417755, "learning_rate": 4.0682808457557594e-05, "loss": 0.7224, "step": 5494 }, { "epoch": 6.258689458689458, "grad_norm": 0.18273282051086426, "learning_rate": 4.0679179765153035e-05, "loss": 0.9002, "step": 5495 }, { "epoch": 6.25982905982906, "grad_norm": 0.19854958355426788, "learning_rate": 4.067555052816744e-05, "loss": 0.7524, "step": 5496 }, { "epoch": 6.260968660968661, "grad_norm": 0.16223762929439545, "learning_rate": 4.0671920746726866e-05, "loss": 0.8428, "step": 5497 }, { "epoch": 6.262108262108262, "grad_norm": 0.16430439054965973, "learning_rate": 4.0668290420957395e-05, "loss": 0.8178, "step": 5498 }, { "epoch": 6.263247863247863, "grad_norm": 0.20250779390335083, "learning_rate": 4.0664659550985105e-05, "loss": 0.695, "step": 5499 }, { "epoch": 6.264387464387465, "grad_norm": 0.17243987321853638, "learning_rate": 4.066102813693611e-05, "loss": 0.7791, "step": 5500 }, { "epoch": 6.265527065527065, "grad_norm": 0.18005329370498657, "learning_rate": 4.0657396178936534e-05, "loss": 0.7831, "step": 5501 }, { "epoch": 6.266666666666667, "grad_norm": 0.20994611084461212, "learning_rate": 4.065376367711255e-05, "loss": 0.6416, "step": 5502 }, { "epoch": 6.267806267806268, "grad_norm": 0.18187794089317322, "learning_rate": 4.065013063159028e-05, "loss": 0.5668, "step": 5503 }, { "epoch": 6.268945868945869, "grad_norm": 0.20314699411392212, "learning_rate": 4.064649704249595e-05, "loss": 0.6542, "step": 5504 }, { "epoch": 6.27008547008547, "grad_norm": 0.20696061849594116, "learning_rate": 4.0642862909955745e-05, "loss": 0.6915, "step": 5505 }, { "epoch": 6.2712250712250714, "grad_norm": 0.19110968708992004, "learning_rate": 4.06392282340959e-05, "loss": 0.6842, "step": 5506 }, { "epoch": 6.272364672364672, "grad_norm": 0.2018856555223465, "learning_rate": 4.0635593015042636e-05, "loss": 0.7229, "step": 5507 }, { "epoch": 6.273504273504273, "grad_norm": 0.200718954205513, "learning_rate": 4.063195725292225e-05, "loss": 0.5362, "step": 5508 }, { "epoch": 6.274643874643875, "grad_norm": 0.18743805587291718, "learning_rate": 4.0628320947860985e-05, "loss": 0.8418, "step": 5509 }, { "epoch": 6.275783475783475, "grad_norm": 0.17090246081352234, "learning_rate": 4.062468409998515e-05, "loss": 0.7145, "step": 5510 }, { "epoch": 6.276923076923077, "grad_norm": 0.15754278004169464, "learning_rate": 4.062104670942108e-05, "loss": 0.7671, "step": 5511 }, { "epoch": 6.278062678062678, "grad_norm": 0.17991505563259125, "learning_rate": 4.06174087762951e-05, "loss": 0.776, "step": 5512 }, { "epoch": 6.279202279202279, "grad_norm": 0.18786561489105225, "learning_rate": 4.0613770300733565e-05, "loss": 0.718, "step": 5513 }, { "epoch": 6.28034188034188, "grad_norm": 0.2050205022096634, "learning_rate": 4.061013128286285e-05, "loss": 0.6767, "step": 5514 }, { "epoch": 6.281481481481482, "grad_norm": 0.17826548218727112, "learning_rate": 4.060649172280934e-05, "loss": 0.522, "step": 5515 }, { "epoch": 6.282621082621082, "grad_norm": 0.17450261116027832, "learning_rate": 4.060285162069946e-05, "loss": 0.916, "step": 5516 }, { "epoch": 6.283760683760684, "grad_norm": 0.2118103951215744, "learning_rate": 4.059921097665963e-05, "loss": 0.8111, "step": 5517 }, { "epoch": 6.284900284900285, "grad_norm": 0.16722458600997925, "learning_rate": 4.059556979081631e-05, "loss": 0.8077, "step": 5518 }, { "epoch": 6.286039886039886, "grad_norm": 0.18036098778247833, "learning_rate": 4.059192806329596e-05, "loss": 0.6988, "step": 5519 }, { "epoch": 6.287179487179487, "grad_norm": 0.20158371329307556, "learning_rate": 4.0588285794225076e-05, "loss": 0.7276, "step": 5520 }, { "epoch": 6.2883190883190885, "grad_norm": 0.15440712869167328, "learning_rate": 4.0584642983730146e-05, "loss": 0.8107, "step": 5521 }, { "epoch": 6.289458689458689, "grad_norm": 0.17782963812351227, "learning_rate": 4.058099963193772e-05, "loss": 0.84, "step": 5522 }, { "epoch": 6.2905982905982905, "grad_norm": 0.22440436482429504, "learning_rate": 4.0577355738974324e-05, "loss": 0.662, "step": 5523 }, { "epoch": 6.291737891737892, "grad_norm": 0.18663540482521057, "learning_rate": 4.057371130496652e-05, "loss": 0.7808, "step": 5524 }, { "epoch": 6.292877492877492, "grad_norm": 0.20182882249355316, "learning_rate": 4.0570066330040906e-05, "loss": 0.7635, "step": 5525 }, { "epoch": 6.294017094017094, "grad_norm": 0.16176316142082214, "learning_rate": 4.056642081432407e-05, "loss": 0.8451, "step": 5526 }, { "epoch": 6.295156695156695, "grad_norm": 0.1974044144153595, "learning_rate": 4.0562774757942624e-05, "loss": 0.6236, "step": 5527 }, { "epoch": 6.296296296296296, "grad_norm": 0.16082333028316498, "learning_rate": 4.055912816102321e-05, "loss": 0.8136, "step": 5528 }, { "epoch": 6.297435897435897, "grad_norm": 0.1818101704120636, "learning_rate": 4.055548102369249e-05, "loss": 0.7774, "step": 5529 }, { "epoch": 6.298575498575499, "grad_norm": 0.17527911067008972, "learning_rate": 4.055183334607714e-05, "loss": 0.7551, "step": 5530 }, { "epoch": 6.2997150997151, "grad_norm": 0.1772170513868332, "learning_rate": 4.054818512830385e-05, "loss": 0.8582, "step": 5531 }, { "epoch": 6.300854700854701, "grad_norm": 0.19636009633541107, "learning_rate": 4.054453637049933e-05, "loss": 0.582, "step": 5532 }, { "epoch": 6.301994301994302, "grad_norm": 0.1604292094707489, "learning_rate": 4.0540887072790326e-05, "loss": 0.8054, "step": 5533 }, { "epoch": 6.3031339031339035, "grad_norm": 0.1790705770254135, "learning_rate": 4.053723723530356e-05, "loss": 0.771, "step": 5534 }, { "epoch": 6.304273504273504, "grad_norm": 0.18854586780071259, "learning_rate": 4.0533586858165816e-05, "loss": 0.7078, "step": 5535 }, { "epoch": 6.3054131054131055, "grad_norm": 0.1725122183561325, "learning_rate": 4.0529935941503896e-05, "loss": 0.8923, "step": 5536 }, { "epoch": 6.306552706552707, "grad_norm": 0.18810155987739563, "learning_rate": 4.0526284485444585e-05, "loss": 0.8407, "step": 5537 }, { "epoch": 6.3076923076923075, "grad_norm": 0.21764864027500153, "learning_rate": 4.052263249011472e-05, "loss": 0.6224, "step": 5538 }, { "epoch": 6.308831908831909, "grad_norm": 0.20365047454833984, "learning_rate": 4.0518979955641144e-05, "loss": 0.7715, "step": 5539 }, { "epoch": 6.30997150997151, "grad_norm": 0.1990799754858017, "learning_rate": 4.051532688215071e-05, "loss": 0.6924, "step": 5540 }, { "epoch": 6.311111111111111, "grad_norm": 0.2261703908443451, "learning_rate": 4.0511673269770304e-05, "loss": 0.7812, "step": 5541 }, { "epoch": 6.312250712250712, "grad_norm": 0.1494823694229126, "learning_rate": 4.050801911862684e-05, "loss": 0.8382, "step": 5542 }, { "epoch": 6.313390313390314, "grad_norm": 0.19464197754859924, "learning_rate": 4.050436442884722e-05, "loss": 0.7306, "step": 5543 }, { "epoch": 6.314529914529914, "grad_norm": 0.20078547298908234, "learning_rate": 4.0500709200558375e-05, "loss": 0.7558, "step": 5544 }, { "epoch": 6.315669515669516, "grad_norm": 0.20504049956798553, "learning_rate": 4.049705343388729e-05, "loss": 0.578, "step": 5545 }, { "epoch": 6.316809116809117, "grad_norm": 0.2148253619670868, "learning_rate": 4.049339712896091e-05, "loss": 0.7114, "step": 5546 }, { "epoch": 6.317948717948718, "grad_norm": 0.1700192242860794, "learning_rate": 4.048974028590624e-05, "loss": 0.9176, "step": 5547 }, { "epoch": 6.319088319088319, "grad_norm": 0.18755079805850983, "learning_rate": 4.04860829048503e-05, "loss": 0.6967, "step": 5548 }, { "epoch": 6.320227920227921, "grad_norm": 0.15865486860275269, "learning_rate": 4.048242498592011e-05, "loss": 0.8662, "step": 5549 }, { "epoch": 6.321367521367521, "grad_norm": 0.17340147495269775, "learning_rate": 4.047876652924273e-05, "loss": 0.6638, "step": 5550 }, { "epoch": 6.3225071225071225, "grad_norm": 0.17811904847621918, "learning_rate": 4.047510753494521e-05, "loss": 0.681, "step": 5551 }, { "epoch": 6.323646723646724, "grad_norm": 0.16214832663536072, "learning_rate": 4.0471448003154654e-05, "loss": 0.8522, "step": 5552 }, { "epoch": 6.3247863247863245, "grad_norm": 0.192167729139328, "learning_rate": 4.046778793399816e-05, "loss": 0.7349, "step": 5553 }, { "epoch": 6.325925925925926, "grad_norm": 0.21371591091156006, "learning_rate": 4.0464127327602865e-05, "loss": 0.5996, "step": 5554 }, { "epoch": 6.327065527065527, "grad_norm": 0.231183260679245, "learning_rate": 4.046046618409589e-05, "loss": 0.6828, "step": 5555 }, { "epoch": 6.328205128205128, "grad_norm": 0.22443002462387085, "learning_rate": 4.0456804503604414e-05, "loss": 0.7126, "step": 5556 }, { "epoch": 6.329344729344729, "grad_norm": 0.19145648181438446, "learning_rate": 4.045314228625561e-05, "loss": 0.9129, "step": 5557 }, { "epoch": 6.330484330484331, "grad_norm": 0.19444787502288818, "learning_rate": 4.044947953217667e-05, "loss": 0.7147, "step": 5558 }, { "epoch": 6.331623931623931, "grad_norm": 0.16869063675403595, "learning_rate": 4.044581624149483e-05, "loss": 0.9214, "step": 5559 }, { "epoch": 6.332763532763533, "grad_norm": 0.1611240804195404, "learning_rate": 4.0442152414337305e-05, "loss": 0.8649, "step": 5560 }, { "epoch": 6.333903133903134, "grad_norm": 0.17310021817684174, "learning_rate": 4.043848805083137e-05, "loss": 0.7633, "step": 5561 }, { "epoch": 6.335042735042735, "grad_norm": 0.20856855809688568, "learning_rate": 4.043482315110429e-05, "loss": 0.707, "step": 5562 }, { "epoch": 6.336182336182336, "grad_norm": 0.2036934494972229, "learning_rate": 4.043115771528335e-05, "loss": 0.7533, "step": 5563 }, { "epoch": 6.337321937321938, "grad_norm": 0.17045928537845612, "learning_rate": 4.042749174349587e-05, "loss": 0.796, "step": 5564 }, { "epoch": 6.338461538461538, "grad_norm": 0.1955578476190567, "learning_rate": 4.0423825235869185e-05, "loss": 0.6873, "step": 5565 }, { "epoch": 6.33960113960114, "grad_norm": 0.15852148830890656, "learning_rate": 4.0420158192530624e-05, "loss": 0.8393, "step": 5566 }, { "epoch": 6.340740740740741, "grad_norm": 0.1966450959444046, "learning_rate": 4.041649061360756e-05, "loss": 0.6703, "step": 5567 }, { "epoch": 6.3418803418803416, "grad_norm": 0.16714483499526978, "learning_rate": 4.041282249922739e-05, "loss": 0.7973, "step": 5568 }, { "epoch": 6.343019943019943, "grad_norm": 0.18942955136299133, "learning_rate": 4.04091538495175e-05, "loss": 0.849, "step": 5569 }, { "epoch": 6.344159544159544, "grad_norm": 0.18266943097114563, "learning_rate": 4.040548466460533e-05, "loss": 0.7453, "step": 5570 }, { "epoch": 6.345299145299145, "grad_norm": 0.15580274164676666, "learning_rate": 4.040181494461831e-05, "loss": 0.8869, "step": 5571 }, { "epoch": 6.346438746438746, "grad_norm": 0.1870015263557434, "learning_rate": 4.0398144689683895e-05, "loss": 0.8276, "step": 5572 }, { "epoch": 6.347578347578348, "grad_norm": 0.16944514214992523, "learning_rate": 4.039447389992957e-05, "loss": 0.9026, "step": 5573 }, { "epoch": 6.348717948717948, "grad_norm": 0.19888898730278015, "learning_rate": 4.0390802575482836e-05, "loss": 0.7978, "step": 5574 }, { "epoch": 6.34985754985755, "grad_norm": 0.21368567645549774, "learning_rate": 4.038713071647121e-05, "loss": 0.6742, "step": 5575 }, { "epoch": 6.350997150997151, "grad_norm": 0.24270881712436676, "learning_rate": 4.038345832302221e-05, "loss": 0.6214, "step": 5576 }, { "epoch": 6.352136752136752, "grad_norm": 0.1544049084186554, "learning_rate": 4.03797853952634e-05, "loss": 0.9009, "step": 5577 }, { "epoch": 6.353276353276353, "grad_norm": 0.14882121980190277, "learning_rate": 4.037611193332236e-05, "loss": 0.8344, "step": 5578 }, { "epoch": 6.354415954415955, "grad_norm": 0.21202531456947327, "learning_rate": 4.0372437937326654e-05, "loss": 0.6494, "step": 5579 }, { "epoch": 6.355555555555555, "grad_norm": 0.15874454379081726, "learning_rate": 4.036876340740391e-05, "loss": 0.6908, "step": 5580 }, { "epoch": 6.356695156695157, "grad_norm": 0.19329917430877686, "learning_rate": 4.0365088343681746e-05, "loss": 0.7809, "step": 5581 }, { "epoch": 6.357834757834758, "grad_norm": 0.17928197979927063, "learning_rate": 4.0361412746287806e-05, "loss": 0.8637, "step": 5582 }, { "epoch": 6.358974358974359, "grad_norm": 0.23423759639263153, "learning_rate": 4.0357736615349765e-05, "loss": 0.5022, "step": 5583 }, { "epoch": 6.36011396011396, "grad_norm": 0.1744619607925415, "learning_rate": 4.035405995099529e-05, "loss": 0.792, "step": 5584 }, { "epoch": 6.3612535612535615, "grad_norm": 0.19806456565856934, "learning_rate": 4.035038275335209e-05, "loss": 0.6759, "step": 5585 }, { "epoch": 6.362393162393162, "grad_norm": 0.18646864593029022, "learning_rate": 4.034670502254789e-05, "loss": 0.7262, "step": 5586 }, { "epoch": 6.363532763532763, "grad_norm": 0.1802903264760971, "learning_rate": 4.0343026758710424e-05, "loss": 0.8121, "step": 5587 }, { "epoch": 6.364672364672365, "grad_norm": 0.1669279783964157, "learning_rate": 4.0339347961967434e-05, "loss": 0.7895, "step": 5588 }, { "epoch": 6.365811965811965, "grad_norm": 0.20458777248859406, "learning_rate": 4.033566863244671e-05, "loss": 0.7021, "step": 5589 }, { "epoch": 6.366951566951567, "grad_norm": 0.16863298416137695, "learning_rate": 4.033198877027604e-05, "loss": 0.8631, "step": 5590 }, { "epoch": 6.368091168091168, "grad_norm": 0.15161456167697906, "learning_rate": 4.032830837558324e-05, "loss": 0.8739, "step": 5591 }, { "epoch": 6.36923076923077, "grad_norm": 0.2144942432641983, "learning_rate": 4.032462744849613e-05, "loss": 0.811, "step": 5592 }, { "epoch": 6.37037037037037, "grad_norm": 0.19144073128700256, "learning_rate": 4.032094598914258e-05, "loss": 0.8182, "step": 5593 }, { "epoch": 6.371509971509972, "grad_norm": 0.1798471063375473, "learning_rate": 4.0317263997650435e-05, "loss": 0.8006, "step": 5594 }, { "epoch": 6.372649572649573, "grad_norm": 0.16783884167671204, "learning_rate": 4.0313581474147585e-05, "loss": 0.8669, "step": 5595 }, { "epoch": 6.373789173789174, "grad_norm": 0.26721659302711487, "learning_rate": 4.0309898418761935e-05, "loss": 0.4397, "step": 5596 }, { "epoch": 6.374928774928775, "grad_norm": 0.18910394608974457, "learning_rate": 4.030621483162142e-05, "loss": 0.6172, "step": 5597 }, { "epoch": 6.3760683760683765, "grad_norm": 0.17705880105495453, "learning_rate": 4.030253071285397e-05, "loss": 0.6707, "step": 5598 }, { "epoch": 6.377207977207977, "grad_norm": 0.17414258420467377, "learning_rate": 4.029884606258755e-05, "loss": 0.8989, "step": 5599 }, { "epoch": 6.3783475783475785, "grad_norm": 0.18619024753570557, "learning_rate": 4.029516088095012e-05, "loss": 0.8322, "step": 5600 }, { "epoch": 6.37948717948718, "grad_norm": 0.2084839940071106, "learning_rate": 4.0291475168069704e-05, "loss": 0.7855, "step": 5601 }, { "epoch": 6.3806267806267805, "grad_norm": 0.19497095048427582, "learning_rate": 4.02877889240743e-05, "loss": 0.7825, "step": 5602 }, { "epoch": 6.381766381766382, "grad_norm": 0.18872429430484772, "learning_rate": 4.028410214909194e-05, "loss": 0.823, "step": 5603 }, { "epoch": 6.382905982905983, "grad_norm": 0.16437706351280212, "learning_rate": 4.0280414843250694e-05, "loss": 0.7583, "step": 5604 }, { "epoch": 6.384045584045584, "grad_norm": 0.1967761516571045, "learning_rate": 4.0276727006678605e-05, "loss": 0.7592, "step": 5605 }, { "epoch": 6.385185185185185, "grad_norm": 0.18910476565361023, "learning_rate": 4.027303863950378e-05, "loss": 0.78, "step": 5606 }, { "epoch": 6.386324786324787, "grad_norm": 0.19769850373268127, "learning_rate": 4.026934974185432e-05, "loss": 0.6605, "step": 5607 }, { "epoch": 6.387464387464387, "grad_norm": 0.16382963955402374, "learning_rate": 4.026566031385836e-05, "loss": 0.7627, "step": 5608 }, { "epoch": 6.388603988603989, "grad_norm": 0.21894732117652893, "learning_rate": 4.0261970355644035e-05, "loss": 0.4727, "step": 5609 }, { "epoch": 6.38974358974359, "grad_norm": 0.15893937647342682, "learning_rate": 4.025827986733951e-05, "loss": 0.8326, "step": 5610 }, { "epoch": 6.390883190883191, "grad_norm": 0.18021126091480255, "learning_rate": 4.025458884907296e-05, "loss": 0.7186, "step": 5611 }, { "epoch": 6.392022792022792, "grad_norm": 0.1703164279460907, "learning_rate": 4.025089730097258e-05, "loss": 0.8183, "step": 5612 }, { "epoch": 6.3931623931623935, "grad_norm": 0.20678535103797913, "learning_rate": 4.024720522316661e-05, "loss": 0.7038, "step": 5613 }, { "epoch": 6.394301994301994, "grad_norm": 0.18387947976589203, "learning_rate": 4.024351261578328e-05, "loss": 0.8208, "step": 5614 }, { "epoch": 6.3954415954415955, "grad_norm": 0.17696422338485718, "learning_rate": 4.023981947895082e-05, "loss": 0.9805, "step": 5615 }, { "epoch": 6.396581196581197, "grad_norm": 0.1638094037771225, "learning_rate": 4.023612581279752e-05, "loss": 0.7836, "step": 5616 }, { "epoch": 6.3977207977207975, "grad_norm": 0.14453627169132233, "learning_rate": 4.023243161745168e-05, "loss": 0.8727, "step": 5617 }, { "epoch": 6.398860398860399, "grad_norm": 0.18408818542957306, "learning_rate": 4.02287368930416e-05, "loss": 0.7912, "step": 5618 }, { "epoch": 6.4, "grad_norm": 0.1662292629480362, "learning_rate": 4.0225041639695607e-05, "loss": 0.6997, "step": 5619 }, { "epoch": 6.401139601139601, "grad_norm": 0.18224625289440155, "learning_rate": 4.022134585754205e-05, "loss": 0.9088, "step": 5620 }, { "epoch": 6.402279202279202, "grad_norm": 0.19629694521427155, "learning_rate": 4.021764954670929e-05, "loss": 0.745, "step": 5621 }, { "epoch": 6.403418803418804, "grad_norm": 0.16984345018863678, "learning_rate": 4.021395270732571e-05, "loss": 0.7422, "step": 5622 }, { "epoch": 6.404558404558404, "grad_norm": 0.2379043996334076, "learning_rate": 4.021025533951972e-05, "loss": 0.4332, "step": 5623 }, { "epoch": 6.405698005698006, "grad_norm": 0.19199815392494202, "learning_rate": 4.020655744341973e-05, "loss": 0.8657, "step": 5624 }, { "epoch": 6.406837606837607, "grad_norm": 0.18495675921440125, "learning_rate": 4.020285901915418e-05, "loss": 0.7719, "step": 5625 }, { "epoch": 6.407977207977208, "grad_norm": 0.1718648225069046, "learning_rate": 4.019916006685154e-05, "loss": 0.7742, "step": 5626 }, { "epoch": 6.409116809116809, "grad_norm": 0.2106252759695053, "learning_rate": 4.019546058664026e-05, "loss": 0.6134, "step": 5627 }, { "epoch": 6.410256410256411, "grad_norm": 0.22573627531528473, "learning_rate": 4.0191760578648844e-05, "loss": 0.5967, "step": 5628 }, { "epoch": 6.411396011396011, "grad_norm": 0.2238600254058838, "learning_rate": 4.0188060043005814e-05, "loss": 0.6215, "step": 5629 }, { "epoch": 6.4125356125356126, "grad_norm": 0.15070021152496338, "learning_rate": 4.018435897983969e-05, "loss": 0.774, "step": 5630 }, { "epoch": 6.413675213675214, "grad_norm": 0.21966534852981567, "learning_rate": 4.018065738927902e-05, "loss": 0.7222, "step": 5631 }, { "epoch": 6.4148148148148145, "grad_norm": 0.2008989304304123, "learning_rate": 4.017695527145238e-05, "loss": 0.7044, "step": 5632 }, { "epoch": 6.415954415954416, "grad_norm": 0.16906078159809113, "learning_rate": 4.0173252626488335e-05, "loss": 0.7342, "step": 5633 }, { "epoch": 6.417094017094017, "grad_norm": 0.14185194671154022, "learning_rate": 4.01695494545155e-05, "loss": 0.9029, "step": 5634 }, { "epoch": 6.418233618233618, "grad_norm": 0.17673517763614655, "learning_rate": 4.0165845755662504e-05, "loss": 0.8179, "step": 5635 }, { "epoch": 6.419373219373219, "grad_norm": 0.2244737297296524, "learning_rate": 4.016214153005797e-05, "loss": 0.648, "step": 5636 }, { "epoch": 6.420512820512821, "grad_norm": 0.23522430658340454, "learning_rate": 4.015843677783057e-05, "loss": 0.5819, "step": 5637 }, { "epoch": 6.421652421652421, "grad_norm": 0.16448065638542175, "learning_rate": 4.0154731499108975e-05, "loss": 0.823, "step": 5638 }, { "epoch": 6.422792022792023, "grad_norm": 0.18005219101905823, "learning_rate": 4.0151025694021884e-05, "loss": 0.7005, "step": 5639 }, { "epoch": 6.423931623931624, "grad_norm": 0.22259432077407837, "learning_rate": 4.0147319362697997e-05, "loss": 0.5743, "step": 5640 }, { "epoch": 6.425071225071225, "grad_norm": 0.19946357607841492, "learning_rate": 4.0143612505266055e-05, "loss": 0.7333, "step": 5641 }, { "epoch": 6.426210826210826, "grad_norm": 0.2001507580280304, "learning_rate": 4.01399051218548e-05, "loss": 0.8802, "step": 5642 }, { "epoch": 6.427350427350428, "grad_norm": 0.1886977255344391, "learning_rate": 4.013619721259302e-05, "loss": 0.6688, "step": 5643 }, { "epoch": 6.428490028490028, "grad_norm": 0.19466671347618103, "learning_rate": 4.013248877760948e-05, "loss": 0.7361, "step": 5644 }, { "epoch": 6.42962962962963, "grad_norm": 0.19370336830615997, "learning_rate": 4.012877981703298e-05, "loss": 0.7927, "step": 5645 }, { "epoch": 6.430769230769231, "grad_norm": 0.17426654696464539, "learning_rate": 4.0125070330992366e-05, "loss": 0.8787, "step": 5646 }, { "epoch": 6.431908831908832, "grad_norm": 0.1852133572101593, "learning_rate": 4.0121360319616453e-05, "loss": 0.7635, "step": 5647 }, { "epoch": 6.433048433048433, "grad_norm": 0.18585824966430664, "learning_rate": 4.0117649783034124e-05, "loss": 0.8038, "step": 5648 }, { "epoch": 6.434188034188034, "grad_norm": 0.13975170254707336, "learning_rate": 4.011393872137424e-05, "loss": 0.9754, "step": 5649 }, { "epoch": 6.435327635327635, "grad_norm": 0.17845340073108673, "learning_rate": 4.01102271347657e-05, "loss": 0.6753, "step": 5650 }, { "epoch": 6.436467236467236, "grad_norm": 0.21644507348537445, "learning_rate": 4.010651502333742e-05, "loss": 0.5942, "step": 5651 }, { "epoch": 6.437606837606838, "grad_norm": 0.2078523337841034, "learning_rate": 4.0102802387218324e-05, "loss": 0.7106, "step": 5652 }, { "epoch": 6.438746438746438, "grad_norm": 0.2149255871772766, "learning_rate": 4.0099089226537376e-05, "loss": 0.7849, "step": 5653 }, { "epoch": 6.43988603988604, "grad_norm": 0.23576869070529938, "learning_rate": 4.009537554142353e-05, "loss": 0.6039, "step": 5654 }, { "epoch": 6.441025641025641, "grad_norm": 0.18726636469364166, "learning_rate": 4.009166133200578e-05, "loss": 0.9907, "step": 5655 }, { "epoch": 6.442165242165242, "grad_norm": 0.16134890913963318, "learning_rate": 4.0087946598413136e-05, "loss": 0.8894, "step": 5656 }, { "epoch": 6.443304843304843, "grad_norm": 0.17697754502296448, "learning_rate": 4.008423134077461e-05, "loss": 0.7024, "step": 5657 }, { "epoch": 6.444444444444445, "grad_norm": 0.17293867468833923, "learning_rate": 4.008051555921925e-05, "loss": 0.9143, "step": 5658 }, { "epoch": 6.445584045584045, "grad_norm": 0.15596815943717957, "learning_rate": 4.007679925387611e-05, "loss": 0.7836, "step": 5659 }, { "epoch": 6.446723646723647, "grad_norm": 0.18265388906002045, "learning_rate": 4.007308242487427e-05, "loss": 0.7222, "step": 5660 }, { "epoch": 6.447863247863248, "grad_norm": 0.17139224708080292, "learning_rate": 4.006936507234284e-05, "loss": 0.9408, "step": 5661 }, { "epoch": 6.449002849002849, "grad_norm": 0.2636291980743408, "learning_rate": 4.0065647196410894e-05, "loss": 0.5335, "step": 5662 }, { "epoch": 6.45014245014245, "grad_norm": 0.16780348122119904, "learning_rate": 4.006192879720761e-05, "loss": 0.7985, "step": 5663 }, { "epoch": 6.4512820512820515, "grad_norm": 0.17742887139320374, "learning_rate": 4.005820987486211e-05, "loss": 0.7793, "step": 5664 }, { "epoch": 6.452421652421652, "grad_norm": 0.18413466215133667, "learning_rate": 4.005449042950358e-05, "loss": 0.6578, "step": 5665 }, { "epoch": 6.453561253561253, "grad_norm": 0.19075767695903778, "learning_rate": 4.005077046126119e-05, "loss": 0.6387, "step": 5666 }, { "epoch": 6.454700854700855, "grad_norm": 0.15881739556789398, "learning_rate": 4.004704997026415e-05, "loss": 0.8274, "step": 5667 }, { "epoch": 6.455840455840455, "grad_norm": 0.15998823940753937, "learning_rate": 4.004332895664169e-05, "loss": 0.9557, "step": 5668 }, { "epoch": 6.456980056980057, "grad_norm": 0.17333857715129852, "learning_rate": 4.003960742052305e-05, "loss": 0.7169, "step": 5669 }, { "epoch": 6.458119658119658, "grad_norm": 0.23144127428531647, "learning_rate": 4.0035885362037473e-05, "loss": 0.7375, "step": 5670 }, { "epoch": 6.459259259259259, "grad_norm": 0.1870126575231552, "learning_rate": 4.003216278131425e-05, "loss": 0.6237, "step": 5671 }, { "epoch": 6.46039886039886, "grad_norm": 0.17142246663570404, "learning_rate": 4.0028439678482686e-05, "loss": 0.7533, "step": 5672 }, { "epoch": 6.461538461538462, "grad_norm": 0.24917946755886078, "learning_rate": 4.0024716053672074e-05, "loss": 0.4811, "step": 5673 }, { "epoch": 6.462678062678062, "grad_norm": 0.20272578299045563, "learning_rate": 4.002099190701176e-05, "loss": 0.8904, "step": 5674 }, { "epoch": 6.463817663817664, "grad_norm": 0.2139831781387329, "learning_rate": 4.001726723863109e-05, "loss": 0.7117, "step": 5675 }, { "epoch": 6.464957264957265, "grad_norm": 0.20929937064647675, "learning_rate": 4.001354204865943e-05, "loss": 0.6985, "step": 5676 }, { "epoch": 6.466096866096866, "grad_norm": 0.17709870636463165, "learning_rate": 4.0009816337226166e-05, "loss": 0.7949, "step": 5677 }, { "epoch": 6.467236467236467, "grad_norm": 0.19222743809223175, "learning_rate": 4.00060901044607e-05, "loss": 0.7244, "step": 5678 }, { "epoch": 6.4683760683760685, "grad_norm": 0.18339277803897858, "learning_rate": 4.000236335049247e-05, "loss": 0.5729, "step": 5679 }, { "epoch": 6.46951566951567, "grad_norm": 0.18804150819778442, "learning_rate": 3.999863607545089e-05, "loss": 0.7491, "step": 5680 }, { "epoch": 6.4706552706552705, "grad_norm": 0.16036182641983032, "learning_rate": 3.999490827946544e-05, "loss": 0.7275, "step": 5681 }, { "epoch": 6.471794871794872, "grad_norm": 0.18374961614608765, "learning_rate": 3.999117996266559e-05, "loss": 0.7085, "step": 5682 }, { "epoch": 6.472934472934473, "grad_norm": 0.1866759955883026, "learning_rate": 3.998745112518083e-05, "loss": 0.8317, "step": 5683 }, { "epoch": 6.474074074074074, "grad_norm": 0.18938614428043365, "learning_rate": 3.998372176714068e-05, "loss": 0.5777, "step": 5684 }, { "epoch": 6.475213675213675, "grad_norm": 0.2183467596769333, "learning_rate": 3.997999188867466e-05, "loss": 0.7164, "step": 5685 }, { "epoch": 6.476353276353277, "grad_norm": 0.17132146656513214, "learning_rate": 3.9976261489912336e-05, "loss": 0.8326, "step": 5686 }, { "epoch": 6.477492877492877, "grad_norm": 0.22032825648784637, "learning_rate": 3.997253057098327e-05, "loss": 0.6774, "step": 5687 }, { "epoch": 6.478632478632479, "grad_norm": 0.20137335360050201, "learning_rate": 3.996879913201703e-05, "loss": 0.7788, "step": 5688 }, { "epoch": 6.47977207977208, "grad_norm": 0.17918981611728668, "learning_rate": 3.9965067173143236e-05, "loss": 0.8275, "step": 5689 }, { "epoch": 6.480911680911681, "grad_norm": 0.2228441834449768, "learning_rate": 3.99613346944915e-05, "loss": 0.6525, "step": 5690 }, { "epoch": 6.482051282051282, "grad_norm": 0.19778205454349518, "learning_rate": 3.995760169619148e-05, "loss": 0.8469, "step": 5691 }, { "epoch": 6.4831908831908835, "grad_norm": 0.1902667135000229, "learning_rate": 3.995386817837281e-05, "loss": 0.7686, "step": 5692 }, { "epoch": 6.484330484330484, "grad_norm": 0.15250743925571442, "learning_rate": 3.995013414116517e-05, "loss": 0.9219, "step": 5693 }, { "epoch": 6.4854700854700855, "grad_norm": 0.1628999561071396, "learning_rate": 3.994639958469827e-05, "loss": 0.8197, "step": 5694 }, { "epoch": 6.486609686609687, "grad_norm": 0.20843957364559174, "learning_rate": 3.99426645091018e-05, "loss": 0.6886, "step": 5695 }, { "epoch": 6.4877492877492875, "grad_norm": 0.195245161652565, "learning_rate": 3.9938928914505506e-05, "loss": 0.7892, "step": 5696 }, { "epoch": 6.488888888888889, "grad_norm": 0.150603249669075, "learning_rate": 3.993519280103913e-05, "loss": 0.8768, "step": 5697 }, { "epoch": 6.49002849002849, "grad_norm": 0.1576722413301468, "learning_rate": 3.993145616883243e-05, "loss": 0.8517, "step": 5698 }, { "epoch": 6.491168091168091, "grad_norm": 0.24340248107910156, "learning_rate": 3.99277190180152e-05, "loss": 0.6631, "step": 5699 }, { "epoch": 6.492307692307692, "grad_norm": 0.29386258125305176, "learning_rate": 3.992398134871723e-05, "loss": 0.5602, "step": 5700 }, { "epoch": 6.493447293447294, "grad_norm": 0.20805922150611877, "learning_rate": 3.992024316106835e-05, "loss": 0.8387, "step": 5701 }, { "epoch": 6.494586894586894, "grad_norm": 0.16634730994701385, "learning_rate": 3.99165044551984e-05, "loss": 0.7965, "step": 5702 }, { "epoch": 6.495726495726496, "grad_norm": 0.17054076492786407, "learning_rate": 3.991276523123722e-05, "loss": 0.8582, "step": 5703 }, { "epoch": 6.496866096866097, "grad_norm": 0.1614423543214798, "learning_rate": 3.990902548931471e-05, "loss": 0.9786, "step": 5704 }, { "epoch": 6.498005698005698, "grad_norm": 0.2027364820241928, "learning_rate": 3.990528522956073e-05, "loss": 0.6458, "step": 5705 }, { "epoch": 6.499145299145299, "grad_norm": 0.15930069983005524, "learning_rate": 3.99015444521052e-05, "loss": 0.9637, "step": 5706 }, { "epoch": 6.500284900284901, "grad_norm": 0.17900559306144714, "learning_rate": 3.989780315707806e-05, "loss": 0.7651, "step": 5707 }, { "epoch": 6.501424501424501, "grad_norm": 0.17746704816818237, "learning_rate": 3.989406134460925e-05, "loss": 0.7902, "step": 5708 }, { "epoch": 6.5025641025641026, "grad_norm": 0.1514768898487091, "learning_rate": 3.989031901482873e-05, "loss": 0.8559, "step": 5709 }, { "epoch": 6.503703703703704, "grad_norm": 0.20717908442020416, "learning_rate": 3.988657616786649e-05, "loss": 0.8852, "step": 5710 }, { "epoch": 6.5048433048433045, "grad_norm": 0.19845721125602722, "learning_rate": 3.98828328038525e-05, "loss": 0.8086, "step": 5711 }, { "epoch": 6.505982905982906, "grad_norm": 0.1781129539012909, "learning_rate": 3.987908892291681e-05, "loss": 0.8174, "step": 5712 }, { "epoch": 6.507122507122507, "grad_norm": 0.17747661471366882, "learning_rate": 3.987534452518944e-05, "loss": 0.7402, "step": 5713 }, { "epoch": 6.508262108262108, "grad_norm": 0.21257731318473816, "learning_rate": 3.9871599610800456e-05, "loss": 0.8022, "step": 5714 }, { "epoch": 6.509401709401709, "grad_norm": 0.2018202692270279, "learning_rate": 3.9867854179879914e-05, "loss": 0.862, "step": 5715 }, { "epoch": 6.510541310541311, "grad_norm": 0.16338565945625305, "learning_rate": 3.986410823255791e-05, "loss": 0.8783, "step": 5716 }, { "epoch": 6.511680911680911, "grad_norm": 0.16927994787693024, "learning_rate": 3.986036176896455e-05, "loss": 0.7268, "step": 5717 }, { "epoch": 6.512820512820513, "grad_norm": 0.2141008824110031, "learning_rate": 3.985661478922996e-05, "loss": 0.644, "step": 5718 }, { "epoch": 6.513960113960114, "grad_norm": 0.16924583911895752, "learning_rate": 3.985286729348429e-05, "loss": 0.6518, "step": 5719 }, { "epoch": 6.515099715099715, "grad_norm": 0.21093595027923584, "learning_rate": 3.984911928185769e-05, "loss": 0.6287, "step": 5720 }, { "epoch": 6.516239316239316, "grad_norm": 0.18122833967208862, "learning_rate": 3.9845370754480335e-05, "loss": 0.8333, "step": 5721 }, { "epoch": 6.517378917378918, "grad_norm": 0.21343985199928284, "learning_rate": 3.984162171148243e-05, "loss": 0.7368, "step": 5722 }, { "epoch": 6.518518518518518, "grad_norm": 0.21879547834396362, "learning_rate": 3.983787215299419e-05, "loss": 0.5762, "step": 5723 }, { "epoch": 6.51965811965812, "grad_norm": 0.2099982500076294, "learning_rate": 3.983412207914585e-05, "loss": 0.6849, "step": 5724 }, { "epoch": 6.520797720797721, "grad_norm": 0.21218600869178772, "learning_rate": 3.9830371490067654e-05, "loss": 0.7337, "step": 5725 }, { "epoch": 6.521937321937322, "grad_norm": 0.17987145483493805, "learning_rate": 3.982662038588988e-05, "loss": 0.828, "step": 5726 }, { "epoch": 6.523076923076923, "grad_norm": 0.21735313534736633, "learning_rate": 3.9822868766742796e-05, "loss": 0.6736, "step": 5727 }, { "epoch": 6.524216524216524, "grad_norm": 0.14864061772823334, "learning_rate": 3.9819116632756715e-05, "loss": 0.8228, "step": 5728 }, { "epoch": 6.525356125356125, "grad_norm": 0.15621700882911682, "learning_rate": 3.981536398406197e-05, "loss": 0.8113, "step": 5729 }, { "epoch": 6.526495726495726, "grad_norm": 0.16754594445228577, "learning_rate": 3.981161082078888e-05, "loss": 0.9663, "step": 5730 }, { "epoch": 6.527635327635328, "grad_norm": 0.1653737723827362, "learning_rate": 3.980785714306782e-05, "loss": 0.8668, "step": 5731 }, { "epoch": 6.528774928774929, "grad_norm": 0.18010717630386353, "learning_rate": 3.9804102951029165e-05, "loss": 0.8138, "step": 5732 }, { "epoch": 6.52991452991453, "grad_norm": 0.18655574321746826, "learning_rate": 3.98003482448033e-05, "loss": 0.6944, "step": 5733 }, { "epoch": 6.531054131054131, "grad_norm": 0.2002817690372467, "learning_rate": 3.9796593024520633e-05, "loss": 0.6958, "step": 5734 }, { "epoch": 6.532193732193733, "grad_norm": 0.16353358328342438, "learning_rate": 3.97928372903116e-05, "loss": 0.9086, "step": 5735 }, { "epoch": 6.533333333333333, "grad_norm": 0.2514662444591522, "learning_rate": 3.9789081042306656e-05, "loss": 0.7088, "step": 5736 }, { "epoch": 6.534472934472935, "grad_norm": 0.16266684234142303, "learning_rate": 3.978532428063625e-05, "loss": 0.8651, "step": 5737 }, { "epoch": 6.535612535612536, "grad_norm": 0.15383143723011017, "learning_rate": 3.978156700543088e-05, "loss": 0.946, "step": 5738 }, { "epoch": 6.536752136752137, "grad_norm": 0.16969384253025055, "learning_rate": 3.977780921682103e-05, "loss": 0.8188, "step": 5739 }, { "epoch": 6.537891737891738, "grad_norm": 0.15869368612766266, "learning_rate": 3.977405091493723e-05, "loss": 0.8662, "step": 5740 }, { "epoch": 6.5390313390313395, "grad_norm": 0.18994982540607452, "learning_rate": 3.977029209991001e-05, "loss": 0.6285, "step": 5741 }, { "epoch": 6.54017094017094, "grad_norm": 0.1764412671327591, "learning_rate": 3.9766532771869934e-05, "loss": 0.7685, "step": 5742 }, { "epoch": 6.5413105413105415, "grad_norm": 0.18912284076213837, "learning_rate": 3.976277293094756e-05, "loss": 0.8679, "step": 5743 }, { "epoch": 6.542450142450143, "grad_norm": 0.2137213796377182, "learning_rate": 3.975901257727349e-05, "loss": 0.5847, "step": 5744 }, { "epoch": 6.543589743589743, "grad_norm": 0.20754949748516083, "learning_rate": 3.975525171097832e-05, "loss": 0.7142, "step": 5745 }, { "epoch": 6.544729344729345, "grad_norm": 0.172889843583107, "learning_rate": 3.975149033219269e-05, "loss": 0.8403, "step": 5746 }, { "epoch": 6.545868945868946, "grad_norm": 0.1972753405570984, "learning_rate": 3.974772844104722e-05, "loss": 0.6786, "step": 5747 }, { "epoch": 6.547008547008547, "grad_norm": 0.16647538542747498, "learning_rate": 3.9743966037672595e-05, "loss": 0.8863, "step": 5748 }, { "epoch": 6.548148148148148, "grad_norm": 0.19450418651103973, "learning_rate": 3.974020312219948e-05, "loss": 0.7308, "step": 5749 }, { "epoch": 6.54928774928775, "grad_norm": 0.17630018293857574, "learning_rate": 3.9736439694758576e-05, "loss": 0.7399, "step": 5750 }, { "epoch": 6.55042735042735, "grad_norm": 0.16004161536693573, "learning_rate": 3.9732675755480596e-05, "loss": 0.7736, "step": 5751 }, { "epoch": 6.551566951566952, "grad_norm": 0.17618541419506073, "learning_rate": 3.972891130449628e-05, "loss": 0.6743, "step": 5752 }, { "epoch": 6.552706552706553, "grad_norm": 0.1759839653968811, "learning_rate": 3.9725146341936355e-05, "loss": 0.7801, "step": 5753 }, { "epoch": 6.553846153846154, "grad_norm": 0.24746043980121613, "learning_rate": 3.972138086793161e-05, "loss": 0.4182, "step": 5754 }, { "epoch": 6.554985754985755, "grad_norm": 0.22830608487129211, "learning_rate": 3.9717614882612816e-05, "loss": 0.4794, "step": 5755 }, { "epoch": 6.5561253561253565, "grad_norm": 0.19676141440868378, "learning_rate": 3.971384838611079e-05, "loss": 0.7074, "step": 5756 }, { "epoch": 6.557264957264957, "grad_norm": 0.14778468012809753, "learning_rate": 3.971008137855635e-05, "loss": 0.8634, "step": 5757 }, { "epoch": 6.5584045584045585, "grad_norm": 0.1831897646188736, "learning_rate": 3.970631386008033e-05, "loss": 0.7055, "step": 5758 }, { "epoch": 6.55954415954416, "grad_norm": 0.1810082495212555, "learning_rate": 3.9702545830813586e-05, "loss": 0.6765, "step": 5759 }, { "epoch": 6.5606837606837605, "grad_norm": 0.224855437874794, "learning_rate": 3.969877729088699e-05, "loss": 0.7338, "step": 5760 }, { "epoch": 6.561823361823362, "grad_norm": 0.18405993282794952, "learning_rate": 3.969500824043144e-05, "loss": 0.6423, "step": 5761 }, { "epoch": 6.562962962962963, "grad_norm": 0.2224266678094864, "learning_rate": 3.969123867957784e-05, "loss": 0.7585, "step": 5762 }, { "epoch": 6.564102564102564, "grad_norm": 0.18538491427898407, "learning_rate": 3.968746860845712e-05, "loss": 0.8427, "step": 5763 }, { "epoch": 6.565242165242165, "grad_norm": 0.17075911164283752, "learning_rate": 3.968369802720023e-05, "loss": 0.6958, "step": 5764 }, { "epoch": 6.566381766381767, "grad_norm": 0.2074151635169983, "learning_rate": 3.967992693593812e-05, "loss": 0.5261, "step": 5765 }, { "epoch": 6.567521367521367, "grad_norm": 0.16936489939689636, "learning_rate": 3.967615533480178e-05, "loss": 0.8159, "step": 5766 }, { "epoch": 6.568660968660969, "grad_norm": 0.1583995372056961, "learning_rate": 3.96723832239222e-05, "loss": 0.7726, "step": 5767 }, { "epoch": 6.56980056980057, "grad_norm": 0.16948576271533966, "learning_rate": 3.9668610603430414e-05, "loss": 0.808, "step": 5768 }, { "epoch": 6.570940170940171, "grad_norm": 0.2328588217496872, "learning_rate": 3.9664837473457436e-05, "loss": 0.5682, "step": 5769 }, { "epoch": 6.572079772079772, "grad_norm": 0.20211148262023926, "learning_rate": 3.966106383413433e-05, "loss": 0.7412, "step": 5770 }, { "epoch": 6.5732193732193736, "grad_norm": 0.23642036318778992, "learning_rate": 3.965728968559215e-05, "loss": 0.5093, "step": 5771 }, { "epoch": 6.574358974358974, "grad_norm": 0.1864829957485199, "learning_rate": 3.9653515027961984e-05, "loss": 0.6585, "step": 5772 }, { "epoch": 6.5754985754985755, "grad_norm": 0.17517369985580444, "learning_rate": 3.964973986137496e-05, "loss": 0.8928, "step": 5773 }, { "epoch": 6.576638176638177, "grad_norm": 0.18355229496955872, "learning_rate": 3.964596418596218e-05, "loss": 0.6959, "step": 5774 }, { "epoch": 6.5777777777777775, "grad_norm": 0.16656942665576935, "learning_rate": 3.9642188001854784e-05, "loss": 0.7535, "step": 5775 }, { "epoch": 6.578917378917379, "grad_norm": 0.1704527884721756, "learning_rate": 3.963841130918393e-05, "loss": 0.9112, "step": 5776 }, { "epoch": 6.58005698005698, "grad_norm": 0.1782730668783188, "learning_rate": 3.96346341080808e-05, "loss": 0.8264, "step": 5777 }, { "epoch": 6.581196581196581, "grad_norm": 0.20108534395694733, "learning_rate": 3.963085639867659e-05, "loss": 0.7168, "step": 5778 }, { "epoch": 6.582336182336182, "grad_norm": 0.1494375467300415, "learning_rate": 3.962707818110249e-05, "loss": 0.8728, "step": 5779 }, { "epoch": 6.583475783475784, "grad_norm": 0.1992223560810089, "learning_rate": 3.9623299455489746e-05, "loss": 0.6526, "step": 5780 }, { "epoch": 6.584615384615384, "grad_norm": 0.17835864424705505, "learning_rate": 3.9619520221969596e-05, "loss": 0.7503, "step": 5781 }, { "epoch": 6.585754985754986, "grad_norm": 0.2087980955839157, "learning_rate": 3.9615740480673305e-05, "loss": 0.8608, "step": 5782 }, { "epoch": 6.586894586894587, "grad_norm": 0.16845928132534027, "learning_rate": 3.961196023173215e-05, "loss": 0.8565, "step": 5783 }, { "epoch": 6.588034188034188, "grad_norm": 0.2142847180366516, "learning_rate": 3.960817947527744e-05, "loss": 0.6946, "step": 5784 }, { "epoch": 6.589173789173789, "grad_norm": 0.19557322561740875, "learning_rate": 3.9604398211440475e-05, "loss": 0.7325, "step": 5785 }, { "epoch": 6.590313390313391, "grad_norm": 0.17932438850402832, "learning_rate": 3.96006164403526e-05, "loss": 0.7565, "step": 5786 }, { "epoch": 6.591452991452991, "grad_norm": 0.1822424679994583, "learning_rate": 3.9596834162145165e-05, "loss": 0.6851, "step": 5787 }, { "epoch": 6.592592592592593, "grad_norm": 0.16308601200580597, "learning_rate": 3.959305137694955e-05, "loss": 0.8898, "step": 5788 }, { "epoch": 6.593732193732194, "grad_norm": 0.23532971739768982, "learning_rate": 3.958926808489711e-05, "loss": 0.6649, "step": 5789 }, { "epoch": 6.5948717948717945, "grad_norm": 0.20048783719539642, "learning_rate": 3.9585484286119285e-05, "loss": 0.7028, "step": 5790 }, { "epoch": 6.596011396011396, "grad_norm": 0.20119497179985046, "learning_rate": 3.958169998074748e-05, "loss": 0.7255, "step": 5791 }, { "epoch": 6.597150997150997, "grad_norm": 0.18448786437511444, "learning_rate": 3.957791516891313e-05, "loss": 0.6643, "step": 5792 }, { "epoch": 6.598290598290598, "grad_norm": 0.1583247184753418, "learning_rate": 3.9574129850747685e-05, "loss": 0.8638, "step": 5793 }, { "epoch": 6.599430199430199, "grad_norm": 0.18137192726135254, "learning_rate": 3.957034402638265e-05, "loss": 0.7553, "step": 5794 }, { "epoch": 6.600569800569801, "grad_norm": 0.1952274590730667, "learning_rate": 3.9566557695949494e-05, "loss": 0.709, "step": 5795 }, { "epoch": 6.601709401709401, "grad_norm": 0.1755567044019699, "learning_rate": 3.9562770859579726e-05, "loss": 0.8634, "step": 5796 }, { "epoch": 6.602849002849003, "grad_norm": 0.19217680394649506, "learning_rate": 3.955898351740488e-05, "loss": 0.7931, "step": 5797 }, { "epoch": 6.603988603988604, "grad_norm": 0.15367969870567322, "learning_rate": 3.95551956695565e-05, "loss": 0.8867, "step": 5798 }, { "epoch": 6.605128205128205, "grad_norm": 0.19388960301876068, "learning_rate": 3.955140731616614e-05, "loss": 0.8554, "step": 5799 }, { "epoch": 6.606267806267806, "grad_norm": 0.19001491367816925, "learning_rate": 3.954761845736539e-05, "loss": 0.7809, "step": 5800 }, { "epoch": 6.607407407407408, "grad_norm": 0.19071049988269806, "learning_rate": 3.954382909328586e-05, "loss": 0.7899, "step": 5801 }, { "epoch": 6.608547008547008, "grad_norm": 0.16556024551391602, "learning_rate": 3.954003922405913e-05, "loss": 0.8841, "step": 5802 }, { "epoch": 6.60968660968661, "grad_norm": 0.2018676996231079, "learning_rate": 3.953624884981686e-05, "loss": 0.7669, "step": 5803 }, { "epoch": 6.610826210826211, "grad_norm": 0.21063284575939178, "learning_rate": 3.9532457970690695e-05, "loss": 0.628, "step": 5804 }, { "epoch": 6.611965811965812, "grad_norm": 0.15658971667289734, "learning_rate": 3.95286665868123e-05, "loss": 1.0052, "step": 5805 }, { "epoch": 6.613105413105413, "grad_norm": 0.16427630186080933, "learning_rate": 3.952487469831336e-05, "loss": 0.9534, "step": 5806 }, { "epoch": 6.614245014245014, "grad_norm": 0.21921603381633759, "learning_rate": 3.9521082305325576e-05, "loss": 0.8093, "step": 5807 }, { "epoch": 6.615384615384615, "grad_norm": 0.2193557620048523, "learning_rate": 3.9517289407980675e-05, "loss": 0.6805, "step": 5808 }, { "epoch": 6.616524216524216, "grad_norm": 0.2055230438709259, "learning_rate": 3.951349600641039e-05, "loss": 0.7105, "step": 5809 }, { "epoch": 6.617663817663818, "grad_norm": 0.1983133852481842, "learning_rate": 3.950970210074647e-05, "loss": 0.697, "step": 5810 }, { "epoch": 6.618803418803418, "grad_norm": 0.2131713628768921, "learning_rate": 3.9505907691120694e-05, "loss": 0.7308, "step": 5811 }, { "epoch": 6.61994301994302, "grad_norm": 0.15987317264080048, "learning_rate": 3.9502112777664854e-05, "loss": 0.8438, "step": 5812 }, { "epoch": 6.621082621082621, "grad_norm": 0.22608226537704468, "learning_rate": 3.949831736051076e-05, "loss": 0.6202, "step": 5813 }, { "epoch": 6.622222222222222, "grad_norm": 0.14655257761478424, "learning_rate": 3.949452143979023e-05, "loss": 0.8999, "step": 5814 }, { "epoch": 6.623361823361823, "grad_norm": 0.19745361804962158, "learning_rate": 3.949072501563512e-05, "loss": 0.5827, "step": 5815 }, { "epoch": 6.624501424501425, "grad_norm": 0.2333279848098755, "learning_rate": 3.948692808817727e-05, "loss": 0.5832, "step": 5816 }, { "epoch": 6.625641025641025, "grad_norm": 0.20285245776176453, "learning_rate": 3.948313065754857e-05, "loss": 0.6778, "step": 5817 }, { "epoch": 6.626780626780627, "grad_norm": 0.1782693713903427, "learning_rate": 3.947933272388091e-05, "loss": 0.654, "step": 5818 }, { "epoch": 6.627920227920228, "grad_norm": 0.180553138256073, "learning_rate": 3.9475534287306215e-05, "loss": 0.7623, "step": 5819 }, { "epoch": 6.629059829059829, "grad_norm": 0.19957351684570312, "learning_rate": 3.9471735347956404e-05, "loss": 0.6796, "step": 5820 }, { "epoch": 6.63019943019943, "grad_norm": 0.23662860691547394, "learning_rate": 3.9467935905963426e-05, "loss": 0.7036, "step": 5821 }, { "epoch": 6.6313390313390315, "grad_norm": 0.1856183409690857, "learning_rate": 3.946413596145925e-05, "loss": 0.6507, "step": 5822 }, { "epoch": 6.632478632478632, "grad_norm": 0.18436004221439362, "learning_rate": 3.946033551457585e-05, "loss": 0.7845, "step": 5823 }, { "epoch": 6.633618233618233, "grad_norm": 0.18803197145462036, "learning_rate": 3.945653456544524e-05, "loss": 0.7594, "step": 5824 }, { "epoch": 6.634757834757835, "grad_norm": 0.16534455120563507, "learning_rate": 3.9452733114199414e-05, "loss": 0.7276, "step": 5825 }, { "epoch": 6.635897435897435, "grad_norm": 0.1900593787431717, "learning_rate": 3.944893116097043e-05, "loss": 0.7, "step": 5826 }, { "epoch": 6.637037037037037, "grad_norm": 0.2036062628030777, "learning_rate": 3.944512870589033e-05, "loss": 0.5818, "step": 5827 }, { "epoch": 6.638176638176638, "grad_norm": 0.151173934340477, "learning_rate": 3.9441325749091185e-05, "loss": 0.7745, "step": 5828 }, { "epoch": 6.639316239316239, "grad_norm": 0.16362364590168, "learning_rate": 3.9437522290705085e-05, "loss": 0.9502, "step": 5829 }, { "epoch": 6.64045584045584, "grad_norm": 0.17408059537410736, "learning_rate": 3.943371833086414e-05, "loss": 0.7773, "step": 5830 }, { "epoch": 6.641595441595442, "grad_norm": 0.2067190259695053, "learning_rate": 3.942991386970046e-05, "loss": 0.6148, "step": 5831 }, { "epoch": 6.642735042735043, "grad_norm": 0.16280965507030487, "learning_rate": 3.942610890734618e-05, "loss": 0.8159, "step": 5832 }, { "epoch": 6.643874643874644, "grad_norm": 0.15361712872982025, "learning_rate": 3.942230344393347e-05, "loss": 0.8658, "step": 5833 }, { "epoch": 6.645014245014245, "grad_norm": 0.18002967536449432, "learning_rate": 3.941849747959451e-05, "loss": 0.9421, "step": 5834 }, { "epoch": 6.6461538461538465, "grad_norm": 0.20684634149074554, "learning_rate": 3.941469101446147e-05, "loss": 0.6003, "step": 5835 }, { "epoch": 6.647293447293447, "grad_norm": 0.19789065420627594, "learning_rate": 3.941088404866657e-05, "loss": 0.7451, "step": 5836 }, { "epoch": 6.6484330484330485, "grad_norm": 0.2085425853729248, "learning_rate": 3.940707658234205e-05, "loss": 0.5644, "step": 5837 }, { "epoch": 6.64957264957265, "grad_norm": 0.1652538925409317, "learning_rate": 3.9403268615620126e-05, "loss": 0.7978, "step": 5838 }, { "epoch": 6.6507122507122505, "grad_norm": 0.18277491629123688, "learning_rate": 3.9399460148633084e-05, "loss": 0.6377, "step": 5839 }, { "epoch": 6.651851851851852, "grad_norm": 0.22560293972492218, "learning_rate": 3.939565118151318e-05, "loss": 0.7258, "step": 5840 }, { "epoch": 6.652991452991453, "grad_norm": 0.22446361184120178, "learning_rate": 3.9391841714392734e-05, "loss": 0.6234, "step": 5841 }, { "epoch": 6.654131054131054, "grad_norm": 0.16494520008563995, "learning_rate": 3.9388031747404045e-05, "loss": 0.9295, "step": 5842 }, { "epoch": 6.655270655270655, "grad_norm": 0.15717755258083344, "learning_rate": 3.938422128067944e-05, "loss": 0.8762, "step": 5843 }, { "epoch": 6.656410256410257, "grad_norm": 0.1929350197315216, "learning_rate": 3.938041031435128e-05, "loss": 0.8247, "step": 5844 }, { "epoch": 6.657549857549857, "grad_norm": 0.17361636459827423, "learning_rate": 3.937659884855193e-05, "loss": 0.8348, "step": 5845 }, { "epoch": 6.658689458689459, "grad_norm": 0.1679273396730423, "learning_rate": 3.9372786883413756e-05, "loss": 0.8137, "step": 5846 }, { "epoch": 6.65982905982906, "grad_norm": 0.1900729537010193, "learning_rate": 3.936897441906917e-05, "loss": 0.7637, "step": 5847 }, { "epoch": 6.660968660968661, "grad_norm": 0.1886366456747055, "learning_rate": 3.936516145565059e-05, "loss": 0.5926, "step": 5848 }, { "epoch": 6.662108262108262, "grad_norm": 0.16536475718021393, "learning_rate": 3.936134799329044e-05, "loss": 0.871, "step": 5849 }, { "epoch": 6.663247863247864, "grad_norm": 0.18564140796661377, "learning_rate": 3.935753403212119e-05, "loss": 0.7842, "step": 5850 }, { "epoch": 6.664387464387464, "grad_norm": 0.2144373059272766, "learning_rate": 3.9353719572275294e-05, "loss": 0.7704, "step": 5851 }, { "epoch": 6.6655270655270655, "grad_norm": 0.16584932804107666, "learning_rate": 3.934990461388525e-05, "loss": 0.7655, "step": 5852 }, { "epoch": 6.666666666666667, "grad_norm": 0.18302355706691742, "learning_rate": 3.934608915708355e-05, "loss": 0.79, "step": 5853 }, { "epoch": 6.6678062678062675, "grad_norm": 0.22107873857021332, "learning_rate": 3.9342273202002725e-05, "loss": 0.6241, "step": 5854 }, { "epoch": 6.668945868945869, "grad_norm": 0.16592036187648773, "learning_rate": 3.933845674877531e-05, "loss": 0.9072, "step": 5855 }, { "epoch": 6.67008547008547, "grad_norm": 0.19325651228427887, "learning_rate": 3.9334639797533854e-05, "loss": 0.7101, "step": 5856 }, { "epoch": 6.671225071225071, "grad_norm": 0.13958954811096191, "learning_rate": 3.933082234841094e-05, "loss": 1.0207, "step": 5857 }, { "epoch": 6.672364672364672, "grad_norm": 0.1926376074552536, "learning_rate": 3.932700440153917e-05, "loss": 0.6568, "step": 5858 }, { "epoch": 6.673504273504274, "grad_norm": 0.15818478167057037, "learning_rate": 3.932318595705113e-05, "loss": 0.9745, "step": 5859 }, { "epoch": 6.674643874643874, "grad_norm": 0.16915123164653778, "learning_rate": 3.931936701507945e-05, "loss": 0.8881, "step": 5860 }, { "epoch": 6.675783475783476, "grad_norm": 0.18347162008285522, "learning_rate": 3.931554757575677e-05, "loss": 0.6816, "step": 5861 }, { "epoch": 6.676923076923077, "grad_norm": 0.15807324647903442, "learning_rate": 3.931172763921577e-05, "loss": 0.8644, "step": 5862 }, { "epoch": 6.678062678062678, "grad_norm": 0.20925036072731018, "learning_rate": 3.93079072055891e-05, "loss": 0.7324, "step": 5863 }, { "epoch": 6.679202279202279, "grad_norm": 0.1938028782606125, "learning_rate": 3.9304086275009464e-05, "loss": 0.6967, "step": 5864 }, { "epoch": 6.680341880341881, "grad_norm": 0.1903635412454605, "learning_rate": 3.930026484760958e-05, "loss": 0.8094, "step": 5865 }, { "epoch": 6.681481481481481, "grad_norm": 0.17423859238624573, "learning_rate": 3.929644292352217e-05, "loss": 0.8438, "step": 5866 }, { "epoch": 6.682621082621083, "grad_norm": 0.26916250586509705, "learning_rate": 3.929262050287998e-05, "loss": 0.6472, "step": 5867 }, { "epoch": 6.683760683760684, "grad_norm": 0.19918455183506012, "learning_rate": 3.9288797585815787e-05, "loss": 0.637, "step": 5868 }, { "epoch": 6.6849002849002845, "grad_norm": 0.2415318787097931, "learning_rate": 3.928497417246235e-05, "loss": 0.5483, "step": 5869 }, { "epoch": 6.686039886039886, "grad_norm": 0.22512377798557281, "learning_rate": 3.928115026295247e-05, "loss": 0.5247, "step": 5870 }, { "epoch": 6.687179487179487, "grad_norm": 0.17512984573841095, "learning_rate": 3.927732585741898e-05, "loss": 0.779, "step": 5871 }, { "epoch": 6.688319088319088, "grad_norm": 0.19650410115718842, "learning_rate": 3.927350095599469e-05, "loss": 0.6823, "step": 5872 }, { "epoch": 6.689458689458689, "grad_norm": 0.19306553900241852, "learning_rate": 3.926967555881247e-05, "loss": 0.788, "step": 5873 }, { "epoch": 6.690598290598291, "grad_norm": 0.24310457706451416, "learning_rate": 3.9265849666005174e-05, "loss": 0.9546, "step": 5874 }, { "epoch": 6.691737891737891, "grad_norm": 0.20699258148670197, "learning_rate": 3.926202327770568e-05, "loss": 0.6547, "step": 5875 }, { "epoch": 6.692877492877493, "grad_norm": 0.16633839905261993, "learning_rate": 3.925819639404691e-05, "loss": 0.878, "step": 5876 }, { "epoch": 6.694017094017094, "grad_norm": 0.2062740921974182, "learning_rate": 3.925436901516175e-05, "loss": 0.7389, "step": 5877 }, { "epoch": 6.695156695156696, "grad_norm": 0.1940494179725647, "learning_rate": 3.9250541141183166e-05, "loss": 0.668, "step": 5878 }, { "epoch": 6.696296296296296, "grad_norm": 0.1629640758037567, "learning_rate": 3.9246712772244096e-05, "loss": 0.9344, "step": 5879 }, { "epoch": 6.697435897435898, "grad_norm": 0.17727266252040863, "learning_rate": 3.924288390847752e-05, "loss": 0.7256, "step": 5880 }, { "epoch": 6.698575498575499, "grad_norm": 0.1548059582710266, "learning_rate": 3.9239054550016415e-05, "loss": 0.8553, "step": 5881 }, { "epoch": 6.6997150997151, "grad_norm": 0.2040969431400299, "learning_rate": 3.9235224696993785e-05, "loss": 0.7916, "step": 5882 }, { "epoch": 6.700854700854701, "grad_norm": 0.2270168960094452, "learning_rate": 3.9231394349542656e-05, "loss": 0.6614, "step": 5883 }, { "epoch": 6.7019943019943025, "grad_norm": 0.16240133345127106, "learning_rate": 3.9227563507796065e-05, "loss": 0.9146, "step": 5884 }, { "epoch": 6.703133903133903, "grad_norm": 0.18176966905593872, "learning_rate": 3.9223732171887064e-05, "loss": 0.8457, "step": 5885 }, { "epoch": 6.704273504273504, "grad_norm": 0.19711975753307343, "learning_rate": 3.921990034194873e-05, "loss": 0.7225, "step": 5886 }, { "epoch": 6.705413105413106, "grad_norm": 0.2124066799879074, "learning_rate": 3.9216068018114163e-05, "loss": 0.6209, "step": 5887 }, { "epoch": 6.706552706552706, "grad_norm": 0.16848540306091309, "learning_rate": 3.921223520051644e-05, "loss": 0.9434, "step": 5888 }, { "epoch": 6.707692307692308, "grad_norm": 0.19832824170589447, "learning_rate": 3.9208401889288716e-05, "loss": 0.7989, "step": 5889 }, { "epoch": 6.708831908831909, "grad_norm": 0.18089281022548676, "learning_rate": 3.920456808456412e-05, "loss": 0.8511, "step": 5890 }, { "epoch": 6.70997150997151, "grad_norm": 0.17510640621185303, "learning_rate": 3.920073378647581e-05, "loss": 0.7544, "step": 5891 }, { "epoch": 6.711111111111111, "grad_norm": 0.18664780259132385, "learning_rate": 3.9196898995156965e-05, "loss": 0.8927, "step": 5892 }, { "epoch": 6.712250712250713, "grad_norm": 0.1773071587085724, "learning_rate": 3.919306371074078e-05, "loss": 0.799, "step": 5893 }, { "epoch": 6.713390313390313, "grad_norm": 0.20973248779773712, "learning_rate": 3.918922793336045e-05, "loss": 0.6017, "step": 5894 }, { "epoch": 6.714529914529915, "grad_norm": 0.19266605377197266, "learning_rate": 3.9185391663149224e-05, "loss": 0.7711, "step": 5895 }, { "epoch": 6.715669515669516, "grad_norm": 0.19082137942314148, "learning_rate": 3.918155490024034e-05, "loss": 0.7924, "step": 5896 }, { "epoch": 6.716809116809117, "grad_norm": 0.19654573500156403, "learning_rate": 3.9177717644767045e-05, "loss": 0.7591, "step": 5897 }, { "epoch": 6.717948717948718, "grad_norm": 0.1775243729352951, "learning_rate": 3.9173879896862625e-05, "loss": 0.7881, "step": 5898 }, { "epoch": 6.7190883190883195, "grad_norm": 0.23833443224430084, "learning_rate": 3.9170041656660386e-05, "loss": 0.6271, "step": 5899 }, { "epoch": 6.72022792022792, "grad_norm": 0.15929442644119263, "learning_rate": 3.9166202924293624e-05, "loss": 0.9971, "step": 5900 }, { "epoch": 6.7213675213675215, "grad_norm": 0.19255511462688446, "learning_rate": 3.916236369989569e-05, "loss": 0.7336, "step": 5901 }, { "epoch": 6.722507122507123, "grad_norm": 0.15640977025032043, "learning_rate": 3.9158523983599904e-05, "loss": 0.8545, "step": 5902 }, { "epoch": 6.7236467236467234, "grad_norm": 0.20344340801239014, "learning_rate": 3.915468377553965e-05, "loss": 0.7012, "step": 5903 }, { "epoch": 6.724786324786325, "grad_norm": 0.1709020584821701, "learning_rate": 3.9150843075848306e-05, "loss": 0.8126, "step": 5904 }, { "epoch": 6.725925925925926, "grad_norm": 0.20648181438446045, "learning_rate": 3.914700188465926e-05, "loss": 0.7476, "step": 5905 }, { "epoch": 6.727065527065527, "grad_norm": 0.23185275495052338, "learning_rate": 3.914316020210594e-05, "loss": 0.7011, "step": 5906 }, { "epoch": 6.728205128205128, "grad_norm": 0.1782190352678299, "learning_rate": 3.913931802832177e-05, "loss": 0.8867, "step": 5907 }, { "epoch": 6.72934472934473, "grad_norm": 0.15411274135112762, "learning_rate": 3.9135475363440204e-05, "loss": 0.7985, "step": 5908 }, { "epoch": 6.73048433048433, "grad_norm": 0.19791282713413239, "learning_rate": 3.91316322075947e-05, "loss": 0.8322, "step": 5909 }, { "epoch": 6.731623931623932, "grad_norm": 0.156734436750412, "learning_rate": 3.912778856091875e-05, "loss": 0.9158, "step": 5910 }, { "epoch": 6.732763532763533, "grad_norm": 0.19053269922733307, "learning_rate": 3.9123944423545856e-05, "loss": 0.8376, "step": 5911 }, { "epoch": 6.733903133903134, "grad_norm": 0.18442152440547943, "learning_rate": 3.912009979560953e-05, "loss": 0.8331, "step": 5912 }, { "epoch": 6.735042735042735, "grad_norm": 0.16653503477573395, "learning_rate": 3.91162546772433e-05, "loss": 0.8032, "step": 5913 }, { "epoch": 6.7361823361823365, "grad_norm": 0.1338692307472229, "learning_rate": 3.9112409068580734e-05, "loss": 0.9277, "step": 5914 }, { "epoch": 6.737321937321937, "grad_norm": 0.21259275078773499, "learning_rate": 3.910856296975539e-05, "loss": 0.6737, "step": 5915 }, { "epoch": 6.7384615384615385, "grad_norm": 0.20762629806995392, "learning_rate": 3.910471638090086e-05, "loss": 0.6558, "step": 5916 }, { "epoch": 6.73960113960114, "grad_norm": 0.20877009630203247, "learning_rate": 3.910086930215073e-05, "loss": 0.6069, "step": 5917 }, { "epoch": 6.7407407407407405, "grad_norm": 0.1782436966896057, "learning_rate": 3.909702173363864e-05, "loss": 0.7485, "step": 5918 }, { "epoch": 6.741880341880342, "grad_norm": 0.22678615152835846, "learning_rate": 3.909317367549822e-05, "loss": 0.6518, "step": 5919 }, { "epoch": 6.743019943019943, "grad_norm": 0.17655235528945923, "learning_rate": 3.908932512786312e-05, "loss": 0.7872, "step": 5920 }, { "epoch": 6.744159544159544, "grad_norm": 0.2063540816307068, "learning_rate": 3.9085476090867e-05, "loss": 0.6143, "step": 5921 }, { "epoch": 6.745299145299145, "grad_norm": 0.1563992202281952, "learning_rate": 3.9081626564643574e-05, "loss": 0.7947, "step": 5922 }, { "epoch": 6.746438746438747, "grad_norm": 0.1319338083267212, "learning_rate": 3.907777654932653e-05, "loss": 0.8895, "step": 5923 }, { "epoch": 6.747578347578347, "grad_norm": 0.18121173977851868, "learning_rate": 3.907392604504959e-05, "loss": 0.7324, "step": 5924 }, { "epoch": 6.748717948717949, "grad_norm": 0.2089747190475464, "learning_rate": 3.90700750519465e-05, "loss": 0.6822, "step": 5925 }, { "epoch": 6.74985754985755, "grad_norm": 0.1886071413755417, "learning_rate": 3.9066223570151014e-05, "loss": 0.731, "step": 5926 }, { "epoch": 6.750997150997151, "grad_norm": 0.17594726383686066, "learning_rate": 3.9062371599796886e-05, "loss": 0.6334, "step": 5927 }, { "epoch": 6.752136752136752, "grad_norm": 0.16101515293121338, "learning_rate": 3.905851914101793e-05, "loss": 0.8636, "step": 5928 }, { "epoch": 6.753276353276354, "grad_norm": 0.19269603490829468, "learning_rate": 3.9054666193947944e-05, "loss": 0.792, "step": 5929 }, { "epoch": 6.754415954415954, "grad_norm": 0.1676628440618515, "learning_rate": 3.905081275872075e-05, "loss": 0.7984, "step": 5930 }, { "epoch": 6.7555555555555555, "grad_norm": 0.19541795551776886, "learning_rate": 3.904695883547019e-05, "loss": 0.8965, "step": 5931 }, { "epoch": 6.756695156695157, "grad_norm": 0.18709918856620789, "learning_rate": 3.904310442433011e-05, "loss": 0.8357, "step": 5932 }, { "epoch": 6.7578347578347575, "grad_norm": 0.1864151805639267, "learning_rate": 3.903924952543441e-05, "loss": 0.7147, "step": 5933 }, { "epoch": 6.758974358974359, "grad_norm": 0.15796662867069244, "learning_rate": 3.903539413891696e-05, "loss": 0.8757, "step": 5934 }, { "epoch": 6.76011396011396, "grad_norm": 0.19118762016296387, "learning_rate": 3.903153826491167e-05, "loss": 0.7308, "step": 5935 }, { "epoch": 6.761253561253561, "grad_norm": 0.14818577468395233, "learning_rate": 3.9027681903552475e-05, "loss": 1.0174, "step": 5936 }, { "epoch": 6.762393162393162, "grad_norm": 0.20847098529338837, "learning_rate": 3.9023825054973305e-05, "loss": 0.6998, "step": 5937 }, { "epoch": 6.763532763532764, "grad_norm": 0.23740985989570618, "learning_rate": 3.901996771930812e-05, "loss": 0.4863, "step": 5938 }, { "epoch": 6.764672364672364, "grad_norm": 0.13948723673820496, "learning_rate": 3.901610989669092e-05, "loss": 0.7495, "step": 5939 }, { "epoch": 6.765811965811966, "grad_norm": 0.1309722512960434, "learning_rate": 3.9012251587255666e-05, "loss": 0.9935, "step": 5940 }, { "epoch": 6.766951566951567, "grad_norm": 0.21944525837898254, "learning_rate": 3.9008392791136385e-05, "loss": 0.7677, "step": 5941 }, { "epoch": 6.768091168091168, "grad_norm": 0.21262919902801514, "learning_rate": 3.90045335084671e-05, "loss": 0.6654, "step": 5942 }, { "epoch": 6.769230769230769, "grad_norm": 0.21239960193634033, "learning_rate": 3.900067373938185e-05, "loss": 0.8509, "step": 5943 }, { "epoch": 6.770370370370371, "grad_norm": 0.20576876401901245, "learning_rate": 3.89968134840147e-05, "loss": 0.7516, "step": 5944 }, { "epoch": 6.771509971509971, "grad_norm": 0.1977962851524353, "learning_rate": 3.899295274249972e-05, "loss": 0.7108, "step": 5945 }, { "epoch": 6.772649572649573, "grad_norm": 0.28916770219802856, "learning_rate": 3.898909151497102e-05, "loss": 0.6947, "step": 5946 }, { "epoch": 6.773789173789174, "grad_norm": 0.22454294562339783, "learning_rate": 3.89852298015627e-05, "loss": 0.5575, "step": 5947 }, { "epoch": 6.7749287749287745, "grad_norm": 0.1765683889389038, "learning_rate": 3.898136760240889e-05, "loss": 0.8813, "step": 5948 }, { "epoch": 6.776068376068376, "grad_norm": 0.15932169556617737, "learning_rate": 3.897750491764372e-05, "loss": 0.6905, "step": 5949 }, { "epoch": 6.777207977207977, "grad_norm": 0.15663357079029083, "learning_rate": 3.897364174740139e-05, "loss": 0.8492, "step": 5950 }, { "epoch": 6.778347578347578, "grad_norm": 0.1646343618631363, "learning_rate": 3.896977809181604e-05, "loss": 0.8219, "step": 5951 }, { "epoch": 6.779487179487179, "grad_norm": 0.17472128570079803, "learning_rate": 3.896591395102188e-05, "loss": 0.8449, "step": 5952 }, { "epoch": 6.780626780626781, "grad_norm": 0.2021871656179428, "learning_rate": 3.8962049325153124e-05, "loss": 0.7462, "step": 5953 }, { "epoch": 6.781766381766381, "grad_norm": 0.17948055267333984, "learning_rate": 3.8958184214344e-05, "loss": 0.7316, "step": 5954 }, { "epoch": 6.782905982905983, "grad_norm": 0.17953024804592133, "learning_rate": 3.895431861872875e-05, "loss": 0.7895, "step": 5955 }, { "epoch": 6.784045584045584, "grad_norm": 0.15803547203540802, "learning_rate": 3.895045253844165e-05, "loss": 0.8049, "step": 5956 }, { "epoch": 6.785185185185185, "grad_norm": 0.2365812510251999, "learning_rate": 3.8946585973616956e-05, "loss": 0.4408, "step": 5957 }, { "epoch": 6.786324786324786, "grad_norm": 0.19496726989746094, "learning_rate": 3.894271892438899e-05, "loss": 0.8899, "step": 5958 }, { "epoch": 6.787464387464388, "grad_norm": 0.20378226041793823, "learning_rate": 3.893885139089204e-05, "loss": 0.6466, "step": 5959 }, { "epoch": 6.788603988603988, "grad_norm": 0.20074985921382904, "learning_rate": 3.8934983373260456e-05, "loss": 0.5904, "step": 5960 }, { "epoch": 6.78974358974359, "grad_norm": 0.17071180045604706, "learning_rate": 3.8931114871628584e-05, "loss": 1.0478, "step": 5961 }, { "epoch": 6.790883190883191, "grad_norm": 0.18325185775756836, "learning_rate": 3.892724588613077e-05, "loss": 0.8355, "step": 5962 }, { "epoch": 6.792022792022792, "grad_norm": 0.20274940133094788, "learning_rate": 3.892337641690141e-05, "loss": 0.6405, "step": 5963 }, { "epoch": 6.793162393162393, "grad_norm": 0.1966695487499237, "learning_rate": 3.891950646407491e-05, "loss": 0.7737, "step": 5964 }, { "epoch": 6.794301994301994, "grad_norm": 0.1600027233362198, "learning_rate": 3.891563602778565e-05, "loss": 0.826, "step": 5965 }, { "epoch": 6.795441595441595, "grad_norm": 0.22741127014160156, "learning_rate": 3.89117651081681e-05, "loss": 0.8079, "step": 5966 }, { "epoch": 6.796581196581196, "grad_norm": 0.18806487321853638, "learning_rate": 3.890789370535668e-05, "loss": 0.8599, "step": 5967 }, { "epoch": 6.797720797720798, "grad_norm": 0.19617880880832672, "learning_rate": 3.890402181948587e-05, "loss": 0.7257, "step": 5968 }, { "epoch": 6.798860398860398, "grad_norm": 0.17069341242313385, "learning_rate": 3.890014945069015e-05, "loss": 0.8443, "step": 5969 }, { "epoch": 6.8, "grad_norm": 0.21661441028118134, "learning_rate": 3.889627659910401e-05, "loss": 0.6373, "step": 5970 }, { "epoch": 6.801139601139601, "grad_norm": 0.1777721792459488, "learning_rate": 3.889240326486197e-05, "loss": 0.7308, "step": 5971 }, { "epoch": 6.802279202279202, "grad_norm": 0.16292446851730347, "learning_rate": 3.8888529448098554e-05, "loss": 0.9607, "step": 5972 }, { "epoch": 6.803418803418803, "grad_norm": 0.15493546426296234, "learning_rate": 3.888465514894832e-05, "loss": 0.9694, "step": 5973 }, { "epoch": 6.804558404558405, "grad_norm": 0.2350025773048401, "learning_rate": 3.888078036754583e-05, "loss": 0.4756, "step": 5974 }, { "epoch": 6.805698005698005, "grad_norm": 0.18948960304260254, "learning_rate": 3.887690510402567e-05, "loss": 0.6344, "step": 5975 }, { "epoch": 6.806837606837607, "grad_norm": 0.23959320783615112, "learning_rate": 3.887302935852243e-05, "loss": 0.7438, "step": 5976 }, { "epoch": 6.807977207977208, "grad_norm": 0.15740106999874115, "learning_rate": 3.8869153131170725e-05, "loss": 0.682, "step": 5977 }, { "epoch": 6.8091168091168095, "grad_norm": 0.20803236961364746, "learning_rate": 3.88652764221052e-05, "loss": 0.7569, "step": 5978 }, { "epoch": 6.81025641025641, "grad_norm": 0.19682174921035767, "learning_rate": 3.886139923146049e-05, "loss": 0.6669, "step": 5979 }, { "epoch": 6.8113960113960115, "grad_norm": 0.18188917636871338, "learning_rate": 3.885752155937127e-05, "loss": 0.8527, "step": 5980 }, { "epoch": 6.812535612535613, "grad_norm": 0.2449973225593567, "learning_rate": 3.885364340597222e-05, "loss": 0.6625, "step": 5981 }, { "epoch": 6.8136752136752134, "grad_norm": 0.22377589344978333, "learning_rate": 3.8849764771398025e-05, "loss": 0.6388, "step": 5982 }, { "epoch": 6.814814814814815, "grad_norm": 0.17622581124305725, "learning_rate": 3.884588565578342e-05, "loss": 0.7033, "step": 5983 }, { "epoch": 6.815954415954416, "grad_norm": 0.16889968514442444, "learning_rate": 3.884200605926312e-05, "loss": 0.9071, "step": 5984 }, { "epoch": 6.817094017094017, "grad_norm": 0.21841172873973846, "learning_rate": 3.88381259819719e-05, "loss": 0.7623, "step": 5985 }, { "epoch": 6.818233618233618, "grad_norm": 0.19975070655345917, "learning_rate": 3.8834245424044505e-05, "loss": 0.5701, "step": 5986 }, { "epoch": 6.81937321937322, "grad_norm": 0.18981358408927917, "learning_rate": 3.883036438561572e-05, "loss": 0.6684, "step": 5987 }, { "epoch": 6.82051282051282, "grad_norm": 0.19890616834163666, "learning_rate": 3.882648286682034e-05, "loss": 0.9101, "step": 5988 }, { "epoch": 6.821652421652422, "grad_norm": 0.20187322795391083, "learning_rate": 3.882260086779319e-05, "loss": 0.727, "step": 5989 }, { "epoch": 6.822792022792023, "grad_norm": 0.17234423756599426, "learning_rate": 3.881871838866911e-05, "loss": 0.7585, "step": 5990 }, { "epoch": 6.823931623931624, "grad_norm": 0.19274017214775085, "learning_rate": 3.881483542958293e-05, "loss": 0.6255, "step": 5991 }, { "epoch": 6.825071225071225, "grad_norm": 0.20389336347579956, "learning_rate": 3.8810951990669526e-05, "loss": 0.8204, "step": 5992 }, { "epoch": 6.8262108262108265, "grad_norm": 0.253685861825943, "learning_rate": 3.8807068072063777e-05, "loss": 0.7393, "step": 5993 }, { "epoch": 6.827350427350427, "grad_norm": 0.17790208756923676, "learning_rate": 3.880318367390059e-05, "loss": 0.6648, "step": 5994 }, { "epoch": 6.8284900284900285, "grad_norm": 0.22549931704998016, "learning_rate": 3.879929879631487e-05, "loss": 0.5051, "step": 5995 }, { "epoch": 6.82962962962963, "grad_norm": 0.15260136127471924, "learning_rate": 3.879541343944156e-05, "loss": 0.8469, "step": 5996 }, { "epoch": 6.8307692307692305, "grad_norm": 0.20349223911762238, "learning_rate": 3.87915276034156e-05, "loss": 0.6843, "step": 5997 }, { "epoch": 6.831908831908832, "grad_norm": 0.20144180953502655, "learning_rate": 3.8787641288371953e-05, "loss": 0.8198, "step": 5998 }, { "epoch": 6.833048433048433, "grad_norm": 0.1843477040529251, "learning_rate": 3.878375449444561e-05, "loss": 0.913, "step": 5999 }, { "epoch": 6.834188034188034, "grad_norm": 0.20599128305912018, "learning_rate": 3.877986722177158e-05, "loss": 0.7206, "step": 6000 }, { "epoch": 6.835327635327635, "grad_norm": 0.18114681541919708, "learning_rate": 3.8775979470484856e-05, "loss": 0.7812, "step": 6001 }, { "epoch": 6.836467236467237, "grad_norm": 0.1992897242307663, "learning_rate": 3.8772091240720485e-05, "loss": 0.8578, "step": 6002 }, { "epoch": 6.837606837606837, "grad_norm": 0.17705799639225006, "learning_rate": 3.876820253261351e-05, "loss": 0.7819, "step": 6003 }, { "epoch": 6.838746438746439, "grad_norm": 0.2017027884721756, "learning_rate": 3.8764313346299e-05, "loss": 0.7741, "step": 6004 }, { "epoch": 6.83988603988604, "grad_norm": 0.20565707981586456, "learning_rate": 3.876042368191203e-05, "loss": 0.6396, "step": 6005 }, { "epoch": 6.841025641025641, "grad_norm": 0.19953148066997528, "learning_rate": 3.875653353958771e-05, "loss": 0.5716, "step": 6006 }, { "epoch": 6.842165242165242, "grad_norm": 0.18805484473705292, "learning_rate": 3.875264291946115e-05, "loss": 0.828, "step": 6007 }, { "epoch": 6.843304843304844, "grad_norm": 0.1909594088792801, "learning_rate": 3.874875182166748e-05, "loss": 0.7643, "step": 6008 }, { "epoch": 6.844444444444444, "grad_norm": 0.1765531301498413, "learning_rate": 3.874486024634185e-05, "loss": 0.7989, "step": 6009 }, { "epoch": 6.8455840455840455, "grad_norm": 0.25546133518218994, "learning_rate": 3.874096819361942e-05, "loss": 0.5542, "step": 6010 }, { "epoch": 6.846723646723647, "grad_norm": 0.21030516922473907, "learning_rate": 3.873707566363538e-05, "loss": 0.5716, "step": 6011 }, { "epoch": 6.8478632478632475, "grad_norm": 0.15768931806087494, "learning_rate": 3.873318265652493e-05, "loss": 0.9758, "step": 6012 }, { "epoch": 6.849002849002849, "grad_norm": 0.24656745791435242, "learning_rate": 3.872928917242328e-05, "loss": 0.7578, "step": 6013 }, { "epoch": 6.85014245014245, "grad_norm": 0.21918340027332306, "learning_rate": 3.872539521146565e-05, "loss": 0.6557, "step": 6014 }, { "epoch": 6.851282051282051, "grad_norm": 0.2026132196187973, "learning_rate": 3.872150077378731e-05, "loss": 0.7647, "step": 6015 }, { "epoch": 6.852421652421652, "grad_norm": 0.20676188170909882, "learning_rate": 3.871760585952351e-05, "loss": 0.6727, "step": 6016 }, { "epoch": 6.853561253561254, "grad_norm": 0.17937958240509033, "learning_rate": 3.871371046880954e-05, "loss": 0.7444, "step": 6017 }, { "epoch": 6.854700854700854, "grad_norm": 0.20318861305713654, "learning_rate": 3.870981460178069e-05, "loss": 0.8459, "step": 6018 }, { "epoch": 6.855840455840456, "grad_norm": 0.16790197789669037, "learning_rate": 3.870591825857227e-05, "loss": 0.7932, "step": 6019 }, { "epoch": 6.856980056980057, "grad_norm": 0.20463784039020538, "learning_rate": 3.870202143931962e-05, "loss": 0.7491, "step": 6020 }, { "epoch": 6.858119658119658, "grad_norm": 0.1709291934967041, "learning_rate": 3.8698124144158096e-05, "loss": 0.7971, "step": 6021 }, { "epoch": 6.859259259259259, "grad_norm": 0.21086876094341278, "learning_rate": 3.869422637322304e-05, "loss": 0.7615, "step": 6022 }, { "epoch": 6.860398860398861, "grad_norm": 0.16039279103279114, "learning_rate": 3.8690328126649845e-05, "loss": 0.7821, "step": 6023 }, { "epoch": 6.861538461538462, "grad_norm": 0.17380090057849884, "learning_rate": 3.8686429404573905e-05, "loss": 0.8232, "step": 6024 }, { "epoch": 6.862678062678063, "grad_norm": 0.18670782446861267, "learning_rate": 3.868253020713063e-05, "loss": 0.6692, "step": 6025 }, { "epoch": 6.863817663817664, "grad_norm": 0.16718174517154694, "learning_rate": 3.867863053445546e-05, "loss": 0.8576, "step": 6026 }, { "epoch": 6.864957264957265, "grad_norm": 0.1585511863231659, "learning_rate": 3.8674730386683835e-05, "loss": 0.7946, "step": 6027 }, { "epoch": 6.866096866096866, "grad_norm": 0.190469428896904, "learning_rate": 3.8670829763951214e-05, "loss": 0.7718, "step": 6028 }, { "epoch": 6.867236467236467, "grad_norm": 0.19864922761917114, "learning_rate": 3.8666928666393085e-05, "loss": 0.6731, "step": 6029 }, { "epoch": 6.868376068376069, "grad_norm": 0.22138063609600067, "learning_rate": 3.866302709414494e-05, "loss": 0.6512, "step": 6030 }, { "epoch": 6.869515669515669, "grad_norm": 0.22428637742996216, "learning_rate": 3.865912504734228e-05, "loss": 0.5032, "step": 6031 }, { "epoch": 6.870655270655271, "grad_norm": 0.17769813537597656, "learning_rate": 3.8655222526120647e-05, "loss": 0.8244, "step": 6032 }, { "epoch": 6.871794871794872, "grad_norm": 0.18228816986083984, "learning_rate": 3.865131953061559e-05, "loss": 0.7863, "step": 6033 }, { "epoch": 6.872934472934473, "grad_norm": 0.16256453096866608, "learning_rate": 3.864741606096267e-05, "loss": 0.784, "step": 6034 }, { "epoch": 6.874074074074074, "grad_norm": 0.20455239713191986, "learning_rate": 3.864351211729745e-05, "loss": 0.8365, "step": 6035 }, { "epoch": 6.875213675213676, "grad_norm": 0.2072412371635437, "learning_rate": 3.863960769975554e-05, "loss": 0.7205, "step": 6036 }, { "epoch": 6.876353276353276, "grad_norm": 0.15931065380573273, "learning_rate": 3.8635702808472544e-05, "loss": 0.9042, "step": 6037 }, { "epoch": 6.877492877492878, "grad_norm": 0.15490956604480743, "learning_rate": 3.8631797443584084e-05, "loss": 0.8965, "step": 6038 }, { "epoch": 6.878632478632479, "grad_norm": 0.208894744515419, "learning_rate": 3.862789160522582e-05, "loss": 0.6424, "step": 6039 }, { "epoch": 6.87977207977208, "grad_norm": 0.20632405579090118, "learning_rate": 3.862398529353341e-05, "loss": 0.7237, "step": 6040 }, { "epoch": 6.880911680911681, "grad_norm": 0.16035480797290802, "learning_rate": 3.862007850864251e-05, "loss": 0.8734, "step": 6041 }, { "epoch": 6.8820512820512825, "grad_norm": 0.17163221538066864, "learning_rate": 3.8616171250688825e-05, "loss": 0.8483, "step": 6042 }, { "epoch": 6.883190883190883, "grad_norm": 0.18672995269298553, "learning_rate": 3.8612263519808075e-05, "loss": 0.7181, "step": 6043 }, { "epoch": 6.8843304843304844, "grad_norm": 0.16936668753623962, "learning_rate": 3.860835531613598e-05, "loss": 0.7153, "step": 6044 }, { "epoch": 6.885470085470086, "grad_norm": 0.18464472889900208, "learning_rate": 3.860444663980829e-05, "loss": 0.7194, "step": 6045 }, { "epoch": 6.886609686609686, "grad_norm": 0.1461935192346573, "learning_rate": 3.860053749096074e-05, "loss": 0.9748, "step": 6046 }, { "epoch": 6.887749287749288, "grad_norm": 0.2069118469953537, "learning_rate": 3.859662786972913e-05, "loss": 0.6863, "step": 6047 }, { "epoch": 6.888888888888889, "grad_norm": 0.15878000855445862, "learning_rate": 3.8592717776249244e-05, "loss": 0.6677, "step": 6048 }, { "epoch": 6.89002849002849, "grad_norm": 0.15306872129440308, "learning_rate": 3.858880721065689e-05, "loss": 0.8074, "step": 6049 }, { "epoch": 6.891168091168091, "grad_norm": 0.1978970170021057, "learning_rate": 3.858489617308789e-05, "loss": 0.7025, "step": 6050 }, { "epoch": 6.892307692307693, "grad_norm": 0.17932195961475372, "learning_rate": 3.858098466367809e-05, "loss": 0.8567, "step": 6051 }, { "epoch": 6.893447293447293, "grad_norm": 0.16375882923603058, "learning_rate": 3.857707268256334e-05, "loss": 0.7409, "step": 6052 }, { "epoch": 6.894586894586895, "grad_norm": 0.19845464825630188, "learning_rate": 3.857316022987952e-05, "loss": 0.6167, "step": 6053 }, { "epoch": 6.895726495726496, "grad_norm": 0.1802733838558197, "learning_rate": 3.8569247305762514e-05, "loss": 0.7946, "step": 6054 }, { "epoch": 6.896866096866097, "grad_norm": 0.19858969748020172, "learning_rate": 3.8565333910348245e-05, "loss": 0.734, "step": 6055 }, { "epoch": 6.898005698005698, "grad_norm": 0.19176946580410004, "learning_rate": 3.856142004377262e-05, "loss": 0.7769, "step": 6056 }, { "epoch": 6.8991452991452995, "grad_norm": 0.18545280396938324, "learning_rate": 3.855750570617158e-05, "loss": 0.6867, "step": 6057 }, { "epoch": 6.9002849002849, "grad_norm": 0.24399203062057495, "learning_rate": 3.855359089768108e-05, "loss": 0.9097, "step": 6058 }, { "epoch": 6.9014245014245015, "grad_norm": 0.17382477223873138, "learning_rate": 3.85496756184371e-05, "loss": 0.7393, "step": 6059 }, { "epoch": 6.902564102564103, "grad_norm": 0.1953514963388443, "learning_rate": 3.8545759868575625e-05, "loss": 0.6214, "step": 6060 }, { "epoch": 6.9037037037037035, "grad_norm": 0.14534224569797516, "learning_rate": 3.8541843648232656e-05, "loss": 0.9603, "step": 6061 }, { "epoch": 6.904843304843305, "grad_norm": 0.21155428886413574, "learning_rate": 3.8537926957544225e-05, "loss": 0.6902, "step": 6062 }, { "epoch": 6.905982905982906, "grad_norm": 0.2420935034751892, "learning_rate": 3.853400979664635e-05, "loss": 0.5605, "step": 6063 }, { "epoch": 6.907122507122507, "grad_norm": 0.20479215681552887, "learning_rate": 3.85300921656751e-05, "loss": 0.6898, "step": 6064 }, { "epoch": 6.908262108262108, "grad_norm": 0.15989598631858826, "learning_rate": 3.852617406476654e-05, "loss": 0.836, "step": 6065 }, { "epoch": 6.90940170940171, "grad_norm": 0.18351493775844574, "learning_rate": 3.852225549405677e-05, "loss": 0.6975, "step": 6066 }, { "epoch": 6.91054131054131, "grad_norm": 0.20182476937770844, "learning_rate": 3.851833645368187e-05, "loss": 0.7256, "step": 6067 }, { "epoch": 6.911680911680912, "grad_norm": 0.19139719009399414, "learning_rate": 3.851441694377796e-05, "loss": 0.7843, "step": 6068 }, { "epoch": 6.912820512820513, "grad_norm": 0.1669207364320755, "learning_rate": 3.85104969644812e-05, "loss": 0.7148, "step": 6069 }, { "epoch": 6.913960113960114, "grad_norm": 0.17730924487113953, "learning_rate": 3.8506576515927717e-05, "loss": 0.8612, "step": 6070 }, { "epoch": 6.915099715099715, "grad_norm": 0.16904395818710327, "learning_rate": 3.85026555982537e-05, "loss": 0.7853, "step": 6071 }, { "epoch": 6.9162393162393165, "grad_norm": 0.2015823870897293, "learning_rate": 3.849873421159531e-05, "loss": 0.6923, "step": 6072 }, { "epoch": 6.917378917378917, "grad_norm": 0.16778279840946198, "learning_rate": 3.849481235608877e-05, "loss": 0.9067, "step": 6073 }, { "epoch": 6.9185185185185185, "grad_norm": 0.16917239129543304, "learning_rate": 3.849089003187027e-05, "loss": 0.8165, "step": 6074 }, { "epoch": 6.91965811965812, "grad_norm": 0.23813937604427338, "learning_rate": 3.8486967239076075e-05, "loss": 0.5043, "step": 6075 }, { "epoch": 6.9207977207977205, "grad_norm": 0.24754557013511658, "learning_rate": 3.8483043977842415e-05, "loss": 0.5972, "step": 6076 }, { "epoch": 6.921937321937322, "grad_norm": 0.5422152876853943, "learning_rate": 3.847912024830556e-05, "loss": 0.9602, "step": 6077 }, { "epoch": 6.923076923076923, "grad_norm": 0.19803474843502045, "learning_rate": 3.847519605060178e-05, "loss": 0.6874, "step": 6078 }, { "epoch": 6.924216524216524, "grad_norm": 0.19350609183311462, "learning_rate": 3.847127138486739e-05, "loss": 0.8061, "step": 6079 }, { "epoch": 6.925356125356125, "grad_norm": 0.20874303579330444, "learning_rate": 3.846734625123871e-05, "loss": 0.6525, "step": 6080 }, { "epoch": 6.926495726495727, "grad_norm": 0.1934829205274582, "learning_rate": 3.846342064985204e-05, "loss": 0.8948, "step": 6081 }, { "epoch": 6.927635327635327, "grad_norm": 0.2268676608800888, "learning_rate": 3.845949458084376e-05, "loss": 0.6561, "step": 6082 }, { "epoch": 6.928774928774929, "grad_norm": 0.19096451997756958, "learning_rate": 3.845556804435021e-05, "loss": 0.7082, "step": 6083 }, { "epoch": 6.92991452991453, "grad_norm": 0.2186242789030075, "learning_rate": 3.8451641040507794e-05, "loss": 0.5949, "step": 6084 }, { "epoch": 6.931054131054131, "grad_norm": 0.1837964653968811, "learning_rate": 3.844771356945287e-05, "loss": 0.7762, "step": 6085 }, { "epoch": 6.932193732193732, "grad_norm": 0.19915224611759186, "learning_rate": 3.844378563132189e-05, "loss": 0.8012, "step": 6086 }, { "epoch": 6.933333333333334, "grad_norm": 0.19100026786327362, "learning_rate": 3.8439857226251255e-05, "loss": 0.7764, "step": 6087 }, { "epoch": 6.934472934472934, "grad_norm": 0.15966521203517914, "learning_rate": 3.843592835437743e-05, "loss": 0.7474, "step": 6088 }, { "epoch": 6.9356125356125355, "grad_norm": 0.17729613184928894, "learning_rate": 3.843199901583685e-05, "loss": 0.7772, "step": 6089 }, { "epoch": 6.936752136752137, "grad_norm": 0.21062202751636505, "learning_rate": 3.8428069210766016e-05, "loss": 0.7281, "step": 6090 }, { "epoch": 6.9378917378917375, "grad_norm": 0.1801566630601883, "learning_rate": 3.842413893930141e-05, "loss": 0.7834, "step": 6091 }, { "epoch": 6.939031339031339, "grad_norm": 0.19413518905639648, "learning_rate": 3.842020820157954e-05, "loss": 0.9733, "step": 6092 }, { "epoch": 6.94017094017094, "grad_norm": 0.21270233392715454, "learning_rate": 3.8416276997736935e-05, "loss": 0.6284, "step": 6093 }, { "epoch": 6.941310541310541, "grad_norm": 0.25936126708984375, "learning_rate": 3.841234532791013e-05, "loss": 0.65, "step": 6094 }, { "epoch": 6.942450142450142, "grad_norm": 0.14870886504650116, "learning_rate": 3.84084131922357e-05, "loss": 0.9136, "step": 6095 }, { "epoch": 6.943589743589744, "grad_norm": 0.2110133320093155, "learning_rate": 3.84044805908502e-05, "loss": 0.7237, "step": 6096 }, { "epoch": 6.944729344729344, "grad_norm": 0.17981089651584625, "learning_rate": 3.840054752389022e-05, "loss": 0.8673, "step": 6097 }, { "epoch": 6.945868945868946, "grad_norm": 0.21633680164813995, "learning_rate": 3.839661399149237e-05, "loss": 0.6492, "step": 6098 }, { "epoch": 6.947008547008547, "grad_norm": 0.1665615737438202, "learning_rate": 3.839267999379329e-05, "loss": 0.7253, "step": 6099 }, { "epoch": 6.948148148148148, "grad_norm": 0.16829712688922882, "learning_rate": 3.838874553092959e-05, "loss": 0.9606, "step": 6100 }, { "epoch": 6.949287749287749, "grad_norm": 0.2334202229976654, "learning_rate": 3.838481060303795e-05, "loss": 0.6407, "step": 6101 }, { "epoch": 6.950427350427351, "grad_norm": 0.17394769191741943, "learning_rate": 3.8380875210255026e-05, "loss": 0.7099, "step": 6102 }, { "epoch": 6.951566951566951, "grad_norm": 0.15774554014205933, "learning_rate": 3.8376939352717504e-05, "loss": 1.03, "step": 6103 }, { "epoch": 6.952706552706553, "grad_norm": 0.21563377976417542, "learning_rate": 3.8373003030562096e-05, "loss": 0.5559, "step": 6104 }, { "epoch": 6.953846153846154, "grad_norm": 0.15935485064983368, "learning_rate": 3.836906624392552e-05, "loss": 0.949, "step": 6105 }, { "epoch": 6.9549857549857546, "grad_norm": 0.18906600773334503, "learning_rate": 3.8365128992944496e-05, "loss": 0.6684, "step": 6106 }, { "epoch": 6.956125356125356, "grad_norm": 0.19462430477142334, "learning_rate": 3.83611912777558e-05, "loss": 0.7422, "step": 6107 }, { "epoch": 6.957264957264957, "grad_norm": 0.16680681705474854, "learning_rate": 3.835725309849618e-05, "loss": 0.9508, "step": 6108 }, { "epoch": 6.958404558404558, "grad_norm": 0.20158030092716217, "learning_rate": 3.835331445530243e-05, "loss": 0.7641, "step": 6109 }, { "epoch": 6.959544159544159, "grad_norm": 0.165785014629364, "learning_rate": 3.834937534831135e-05, "loss": 1.009, "step": 6110 }, { "epoch": 6.960683760683761, "grad_norm": 0.260774701833725, "learning_rate": 3.834543577765975e-05, "loss": 0.5141, "step": 6111 }, { "epoch": 6.961823361823361, "grad_norm": 0.20132353901863098, "learning_rate": 3.834149574348447e-05, "loss": 0.7545, "step": 6112 }, { "epoch": 6.962962962962963, "grad_norm": 0.18978120386600494, "learning_rate": 3.833755524592235e-05, "loss": 0.7082, "step": 6113 }, { "epoch": 6.964102564102564, "grad_norm": 0.19258476793766022, "learning_rate": 3.833361428511026e-05, "loss": 0.8663, "step": 6114 }, { "epoch": 6.965242165242165, "grad_norm": 0.19694635272026062, "learning_rate": 3.832967286118507e-05, "loss": 0.8062, "step": 6115 }, { "epoch": 6.966381766381766, "grad_norm": 0.17284518480300903, "learning_rate": 3.832573097428369e-05, "loss": 0.6405, "step": 6116 }, { "epoch": 6.967521367521368, "grad_norm": 0.196293443441391, "learning_rate": 3.832178862454303e-05, "loss": 0.7117, "step": 6117 }, { "epoch": 6.968660968660968, "grad_norm": 0.1941792070865631, "learning_rate": 3.831784581210002e-05, "loss": 0.6874, "step": 6118 }, { "epoch": 6.96980056980057, "grad_norm": 0.19943450391292572, "learning_rate": 3.831390253709159e-05, "loss": 0.6166, "step": 6119 }, { "epoch": 6.970940170940171, "grad_norm": 0.17430053651332855, "learning_rate": 3.830995879965471e-05, "loss": 0.885, "step": 6120 }, { "epoch": 6.972079772079772, "grad_norm": 0.19655632972717285, "learning_rate": 3.8306014599926366e-05, "loss": 0.7789, "step": 6121 }, { "epoch": 6.973219373219373, "grad_norm": 0.19573719799518585, "learning_rate": 3.830206993804354e-05, "loss": 0.8234, "step": 6122 }, { "epoch": 6.9743589743589745, "grad_norm": 0.1745665967464447, "learning_rate": 3.829812481414325e-05, "loss": 0.6927, "step": 6123 }, { "epoch": 6.975498575498576, "grad_norm": 0.18195831775665283, "learning_rate": 3.8294179228362515e-05, "loss": 0.7576, "step": 6124 }, { "epoch": 6.976638176638176, "grad_norm": 0.22098475694656372, "learning_rate": 3.829023318083837e-05, "loss": 0.7319, "step": 6125 }, { "epoch": 6.977777777777778, "grad_norm": 0.22530747950077057, "learning_rate": 3.828628667170788e-05, "loss": 0.7975, "step": 6126 }, { "epoch": 6.978917378917379, "grad_norm": 0.1645485907793045, "learning_rate": 3.8282339701108115e-05, "loss": 0.7984, "step": 6127 }, { "epoch": 6.98005698005698, "grad_norm": 0.16129399836063385, "learning_rate": 3.8278392269176175e-05, "loss": 0.8035, "step": 6128 }, { "epoch": 6.981196581196581, "grad_norm": 0.2013663351535797, "learning_rate": 3.827444437604915e-05, "loss": 0.6483, "step": 6129 }, { "epoch": 6.982336182336183, "grad_norm": 0.16806060075759888, "learning_rate": 3.827049602186416e-05, "loss": 0.6591, "step": 6130 }, { "epoch": 6.983475783475783, "grad_norm": 0.1734466850757599, "learning_rate": 3.826654720675835e-05, "loss": 0.8583, "step": 6131 }, { "epoch": 6.984615384615385, "grad_norm": 0.19261620938777924, "learning_rate": 3.826259793086888e-05, "loss": 0.6961, "step": 6132 }, { "epoch": 6.985754985754986, "grad_norm": 0.1568852961063385, "learning_rate": 3.825864819433291e-05, "loss": 0.8037, "step": 6133 }, { "epoch": 6.986894586894587, "grad_norm": 0.17459647357463837, "learning_rate": 3.8254697997287624e-05, "loss": 0.8462, "step": 6134 }, { "epoch": 6.988034188034188, "grad_norm": 0.16817878186702728, "learning_rate": 3.825074733987023e-05, "loss": 0.8529, "step": 6135 }, { "epoch": 6.9891737891737895, "grad_norm": 0.17553623020648956, "learning_rate": 3.824679622221794e-05, "loss": 0.8571, "step": 6136 }, { "epoch": 6.99031339031339, "grad_norm": 0.14740072190761566, "learning_rate": 3.824284464446799e-05, "loss": 0.9423, "step": 6137 }, { "epoch": 6.9914529914529915, "grad_norm": 0.17177456617355347, "learning_rate": 3.823889260675763e-05, "loss": 0.7721, "step": 6138 }, { "epoch": 6.992592592592593, "grad_norm": 0.17124785482883453, "learning_rate": 3.823494010922411e-05, "loss": 0.777, "step": 6139 }, { "epoch": 6.9937321937321935, "grad_norm": 0.19509488344192505, "learning_rate": 3.823098715200473e-05, "loss": 0.7244, "step": 6140 }, { "epoch": 6.994871794871795, "grad_norm": 0.17017509043216705, "learning_rate": 3.8227033735236784e-05, "loss": 0.9012, "step": 6141 }, { "epoch": 6.996011396011396, "grad_norm": 0.18812443315982819, "learning_rate": 3.8223079859057575e-05, "loss": 0.8421, "step": 6142 }, { "epoch": 6.997150997150997, "grad_norm": 0.1462877094745636, "learning_rate": 3.821912552360445e-05, "loss": 0.8403, "step": 6143 }, { "epoch": 6.998290598290598, "grad_norm": 0.17546626925468445, "learning_rate": 3.821517072901474e-05, "loss": 0.7151, "step": 6144 }, { "epoch": 6.9994301994302, "grad_norm": 0.2128238081932068, "learning_rate": 3.82112154754258e-05, "loss": 0.6944, "step": 6145 }, { "epoch": 7.0, "grad_norm": 0.5616998672485352, "learning_rate": 3.820725976297502e-05, "loss": 0.222, "step": 6146 }, { "epoch": 7.001139601139601, "grad_norm": 0.15877407789230347, "learning_rate": 3.820330359179979e-05, "loss": 0.7071, "step": 6147 }, { "epoch": 7.002279202279202, "grad_norm": 0.21806888282299042, "learning_rate": 3.819934696203752e-05, "loss": 0.782, "step": 6148 }, { "epoch": 7.003418803418803, "grad_norm": 0.20868085324764252, "learning_rate": 3.819538987382562e-05, "loss": 0.6292, "step": 6149 }, { "epoch": 7.004558404558405, "grad_norm": 0.22230400145053864, "learning_rate": 3.8191432327301556e-05, "loss": 0.7988, "step": 6150 }, { "epoch": 7.005698005698005, "grad_norm": 0.1857542246580124, "learning_rate": 3.818747432260276e-05, "loss": 0.7849, "step": 6151 }, { "epoch": 7.006837606837607, "grad_norm": 0.17437143623828888, "learning_rate": 3.818351585986673e-05, "loss": 0.819, "step": 6152 }, { "epoch": 7.007977207977208, "grad_norm": 0.19998261332511902, "learning_rate": 3.817955693923092e-05, "loss": 0.773, "step": 6153 }, { "epoch": 7.009116809116809, "grad_norm": 0.18506497144699097, "learning_rate": 3.817559756083287e-05, "loss": 0.7527, "step": 6154 }, { "epoch": 7.01025641025641, "grad_norm": 0.17287065088748932, "learning_rate": 3.817163772481007e-05, "loss": 0.8289, "step": 6155 }, { "epoch": 7.011396011396012, "grad_norm": 0.20515497028827667, "learning_rate": 3.816767743130008e-05, "loss": 0.6524, "step": 6156 }, { "epoch": 7.012535612535612, "grad_norm": 0.22639547288417816, "learning_rate": 3.8163716680440444e-05, "loss": 0.5337, "step": 6157 }, { "epoch": 7.013675213675214, "grad_norm": 0.23270073533058167, "learning_rate": 3.8159755472368716e-05, "loss": 0.6143, "step": 6158 }, { "epoch": 7.014814814814815, "grad_norm": 0.19774219393730164, "learning_rate": 3.815579380722249e-05, "loss": 0.7952, "step": 6159 }, { "epoch": 7.015954415954416, "grad_norm": 0.21318969130516052, "learning_rate": 3.815183168513937e-05, "loss": 0.7876, "step": 6160 }, { "epoch": 7.017094017094017, "grad_norm": 0.1774902492761612, "learning_rate": 3.814786910625698e-05, "loss": 0.7481, "step": 6161 }, { "epoch": 7.0182336182336185, "grad_norm": 0.19848689436912537, "learning_rate": 3.814390607071293e-05, "loss": 0.7195, "step": 6162 }, { "epoch": 7.019373219373219, "grad_norm": 0.19199149310588837, "learning_rate": 3.813994257864487e-05, "loss": 0.6329, "step": 6163 }, { "epoch": 7.02051282051282, "grad_norm": 0.15332777798175812, "learning_rate": 3.8135978630190475e-05, "loss": 0.8689, "step": 6164 }, { "epoch": 7.021652421652422, "grad_norm": 0.21588227152824402, "learning_rate": 3.813201422548742e-05, "loss": 0.7189, "step": 6165 }, { "epoch": 7.022792022792022, "grad_norm": 0.16512146592140198, "learning_rate": 3.812804936467339e-05, "loss": 0.7091, "step": 6166 }, { "epoch": 7.023931623931624, "grad_norm": 0.17718267440795898, "learning_rate": 3.812408404788612e-05, "loss": 0.8396, "step": 6167 }, { "epoch": 7.025071225071225, "grad_norm": 0.1793379932641983, "learning_rate": 3.81201182752633e-05, "loss": 0.7302, "step": 6168 }, { "epoch": 7.026210826210826, "grad_norm": 0.2009889781475067, "learning_rate": 3.811615204694271e-05, "loss": 0.6743, "step": 6169 }, { "epoch": 7.027350427350427, "grad_norm": 0.19245830178260803, "learning_rate": 3.811218536306208e-05, "loss": 0.7848, "step": 6170 }, { "epoch": 7.028490028490029, "grad_norm": 0.1399477869272232, "learning_rate": 3.810821822375919e-05, "loss": 0.8796, "step": 6171 }, { "epoch": 7.029629629629629, "grad_norm": 0.15103209018707275, "learning_rate": 3.810425062917184e-05, "loss": 0.8595, "step": 6172 }, { "epoch": 7.030769230769231, "grad_norm": 0.16661714017391205, "learning_rate": 3.810028257943783e-05, "loss": 0.7775, "step": 6173 }, { "epoch": 7.031908831908832, "grad_norm": 0.3529140055179596, "learning_rate": 3.809631407469497e-05, "loss": 0.8598, "step": 6174 }, { "epoch": 7.033048433048433, "grad_norm": 0.1784033477306366, "learning_rate": 3.809234511508111e-05, "loss": 0.6518, "step": 6175 }, { "epoch": 7.034188034188034, "grad_norm": 0.15606975555419922, "learning_rate": 3.808837570073411e-05, "loss": 0.9845, "step": 6176 }, { "epoch": 7.0353276353276355, "grad_norm": 0.19400037825107574, "learning_rate": 3.808440583179182e-05, "loss": 0.7577, "step": 6177 }, { "epoch": 7.036467236467236, "grad_norm": 0.16465899348258972, "learning_rate": 3.808043550839214e-05, "loss": 0.9296, "step": 6178 }, { "epoch": 7.0376068376068375, "grad_norm": 0.20099498331546783, "learning_rate": 3.807646473067296e-05, "loss": 0.7348, "step": 6179 }, { "epoch": 7.038746438746439, "grad_norm": 0.21224263310432434, "learning_rate": 3.80724934987722e-05, "loss": 0.6274, "step": 6180 }, { "epoch": 7.0398860398860394, "grad_norm": 0.18550077080726624, "learning_rate": 3.8068521812827786e-05, "loss": 0.7556, "step": 6181 }, { "epoch": 7.041025641025641, "grad_norm": 0.17198899388313293, "learning_rate": 3.806454967297768e-05, "loss": 0.8068, "step": 6182 }, { "epoch": 7.042165242165242, "grad_norm": 0.19705481827259064, "learning_rate": 3.806057707935983e-05, "loss": 0.658, "step": 6183 }, { "epoch": 7.043304843304844, "grad_norm": 0.16149069368839264, "learning_rate": 3.805660403211222e-05, "loss": 0.8061, "step": 6184 }, { "epoch": 7.044444444444444, "grad_norm": 0.22355586290359497, "learning_rate": 3.805263053137285e-05, "loss": 0.6092, "step": 6185 }, { "epoch": 7.045584045584046, "grad_norm": 0.19000141322612762, "learning_rate": 3.804865657727972e-05, "loss": 0.7292, "step": 6186 }, { "epoch": 7.046723646723647, "grad_norm": 0.1919831931591034, "learning_rate": 3.804468216997087e-05, "loss": 0.7595, "step": 6187 }, { "epoch": 7.047863247863248, "grad_norm": 0.19090311229228973, "learning_rate": 3.804070730958433e-05, "loss": 0.8146, "step": 6188 }, { "epoch": 7.049002849002849, "grad_norm": 0.15896199643611908, "learning_rate": 3.8036731996258176e-05, "loss": 0.7582, "step": 6189 }, { "epoch": 7.050142450142451, "grad_norm": 0.16467316448688507, "learning_rate": 3.8032756230130455e-05, "loss": 0.9001, "step": 6190 }, { "epoch": 7.051282051282051, "grad_norm": 0.23392154276371002, "learning_rate": 3.802878001133928e-05, "loss": 0.8469, "step": 6191 }, { "epoch": 7.0524216524216525, "grad_norm": 0.20221088826656342, "learning_rate": 3.802480334002273e-05, "loss": 0.7333, "step": 6192 }, { "epoch": 7.053561253561254, "grad_norm": 0.1554950475692749, "learning_rate": 3.802082621631896e-05, "loss": 0.8843, "step": 6193 }, { "epoch": 7.0547008547008545, "grad_norm": 0.16900935769081116, "learning_rate": 3.801684864036608e-05, "loss": 0.9239, "step": 6194 }, { "epoch": 7.055840455840456, "grad_norm": 0.1725151687860489, "learning_rate": 3.801287061230225e-05, "loss": 0.8127, "step": 6195 }, { "epoch": 7.056980056980057, "grad_norm": 0.19210198521614075, "learning_rate": 3.800889213226564e-05, "loss": 0.7209, "step": 6196 }, { "epoch": 7.058119658119658, "grad_norm": 0.1677943915128708, "learning_rate": 3.8004913200394424e-05, "loss": 0.8652, "step": 6197 }, { "epoch": 7.059259259259259, "grad_norm": 0.14735138416290283, "learning_rate": 3.800093381682682e-05, "loss": 1.0122, "step": 6198 }, { "epoch": 7.060398860398861, "grad_norm": 0.18274037539958954, "learning_rate": 3.799695398170102e-05, "loss": 0.8897, "step": 6199 }, { "epoch": 7.061538461538461, "grad_norm": 0.1837732344865799, "learning_rate": 3.799297369515528e-05, "loss": 0.8731, "step": 6200 }, { "epoch": 7.062678062678063, "grad_norm": 0.21213854849338531, "learning_rate": 3.798899295732782e-05, "loss": 0.6061, "step": 6201 }, { "epoch": 7.063817663817664, "grad_norm": 0.20559147000312805, "learning_rate": 3.798501176835693e-05, "loss": 0.7465, "step": 6202 }, { "epoch": 7.064957264957265, "grad_norm": 0.20526240766048431, "learning_rate": 3.798103012838086e-05, "loss": 0.7046, "step": 6203 }, { "epoch": 7.066096866096866, "grad_norm": 0.18055404722690582, "learning_rate": 3.7977048037537915e-05, "loss": 0.9445, "step": 6204 }, { "epoch": 7.067236467236468, "grad_norm": 0.19590269029140472, "learning_rate": 3.797306549596641e-05, "loss": 0.6855, "step": 6205 }, { "epoch": 7.068376068376068, "grad_norm": 0.18099546432495117, "learning_rate": 3.7969082503804666e-05, "loss": 0.833, "step": 6206 }, { "epoch": 7.06951566951567, "grad_norm": 0.17715154588222504, "learning_rate": 3.796509906119101e-05, "loss": 0.6798, "step": 6207 }, { "epoch": 7.070655270655271, "grad_norm": 0.1813538521528244, "learning_rate": 3.796111516826382e-05, "loss": 0.7816, "step": 6208 }, { "epoch": 7.0717948717948715, "grad_norm": 0.1964096575975418, "learning_rate": 3.795713082516145e-05, "loss": 0.7283, "step": 6209 }, { "epoch": 7.072934472934473, "grad_norm": 0.15161927044391632, "learning_rate": 3.7953146032022304e-05, "loss": 0.8643, "step": 6210 }, { "epoch": 7.074074074074074, "grad_norm": 0.17454886436462402, "learning_rate": 3.794916078898477e-05, "loss": 0.6843, "step": 6211 }, { "epoch": 7.075213675213675, "grad_norm": 0.19816745817661285, "learning_rate": 3.7945175096187275e-05, "loss": 0.7673, "step": 6212 }, { "epoch": 7.076353276353276, "grad_norm": 0.1860775649547577, "learning_rate": 3.794118895376824e-05, "loss": 0.6416, "step": 6213 }, { "epoch": 7.077492877492878, "grad_norm": 0.17332245409488678, "learning_rate": 3.793720236186613e-05, "loss": 0.7829, "step": 6214 }, { "epoch": 7.078632478632478, "grad_norm": 0.16401895880699158, "learning_rate": 3.793321532061941e-05, "loss": 0.7432, "step": 6215 }, { "epoch": 7.07977207977208, "grad_norm": 0.2050887644290924, "learning_rate": 3.7929227830166546e-05, "loss": 0.6448, "step": 6216 }, { "epoch": 7.080911680911681, "grad_norm": 0.1812848299741745, "learning_rate": 3.792523989064605e-05, "loss": 0.6495, "step": 6217 }, { "epoch": 7.082051282051282, "grad_norm": 0.17025980353355408, "learning_rate": 3.792125150219643e-05, "loss": 0.7915, "step": 6218 }, { "epoch": 7.083190883190883, "grad_norm": 0.19045335054397583, "learning_rate": 3.79172626649562e-05, "loss": 0.6843, "step": 6219 }, { "epoch": 7.084330484330485, "grad_norm": 0.21706286072731018, "learning_rate": 3.791327337906392e-05, "loss": 0.7377, "step": 6220 }, { "epoch": 7.085470085470085, "grad_norm": 0.206889346241951, "learning_rate": 3.7909283644658155e-05, "loss": 0.7257, "step": 6221 }, { "epoch": 7.086609686609687, "grad_norm": 0.15829861164093018, "learning_rate": 3.7905293461877456e-05, "loss": 0.7652, "step": 6222 }, { "epoch": 7.087749287749288, "grad_norm": 0.15279674530029297, "learning_rate": 3.790130283086043e-05, "loss": 0.9954, "step": 6223 }, { "epoch": 7.088888888888889, "grad_norm": 0.1945253312587738, "learning_rate": 3.789731175174568e-05, "loss": 0.7273, "step": 6224 }, { "epoch": 7.09002849002849, "grad_norm": 0.17092925310134888, "learning_rate": 3.7893320224671816e-05, "loss": 0.9352, "step": 6225 }, { "epoch": 7.091168091168091, "grad_norm": 0.17508654296398163, "learning_rate": 3.788932824977749e-05, "loss": 0.9059, "step": 6226 }, { "epoch": 7.092307692307692, "grad_norm": 0.17352032661437988, "learning_rate": 3.788533582720135e-05, "loss": 0.5925, "step": 6227 }, { "epoch": 7.093447293447293, "grad_norm": 0.16202287375926971, "learning_rate": 3.7881342957082064e-05, "loss": 0.8216, "step": 6228 }, { "epoch": 7.094586894586895, "grad_norm": 0.2091526836156845, "learning_rate": 3.787734963955831e-05, "loss": 0.7463, "step": 6229 }, { "epoch": 7.095726495726495, "grad_norm": 0.18009252846240997, "learning_rate": 3.787335587476879e-05, "loss": 0.8346, "step": 6230 }, { "epoch": 7.096866096866097, "grad_norm": 0.2047872245311737, "learning_rate": 3.786936166285221e-05, "loss": 0.6973, "step": 6231 }, { "epoch": 7.098005698005698, "grad_norm": 0.23231853544712067, "learning_rate": 3.7865367003947324e-05, "loss": 0.6047, "step": 6232 }, { "epoch": 7.099145299145299, "grad_norm": 0.19942378997802734, "learning_rate": 3.786137189819285e-05, "loss": 0.6073, "step": 6233 }, { "epoch": 7.1002849002849, "grad_norm": 0.18221347033977509, "learning_rate": 3.785737634572757e-05, "loss": 0.7227, "step": 6234 }, { "epoch": 7.101424501424502, "grad_norm": 0.15715356171131134, "learning_rate": 3.785338034669025e-05, "loss": 0.7622, "step": 6235 }, { "epoch": 7.102564102564102, "grad_norm": 0.16267679631710052, "learning_rate": 3.784938390121968e-05, "loss": 0.8677, "step": 6236 }, { "epoch": 7.103703703703704, "grad_norm": 0.193213552236557, "learning_rate": 3.784538700945467e-05, "loss": 0.7614, "step": 6237 }, { "epoch": 7.104843304843305, "grad_norm": 0.15954096615314484, "learning_rate": 3.784138967153405e-05, "loss": 0.8272, "step": 6238 }, { "epoch": 7.105982905982906, "grad_norm": 0.2050113081932068, "learning_rate": 3.783739188759665e-05, "loss": 0.7251, "step": 6239 }, { "epoch": 7.107122507122507, "grad_norm": 0.16363175213336945, "learning_rate": 3.783339365778132e-05, "loss": 0.914, "step": 6240 }, { "epoch": 7.1082621082621085, "grad_norm": 0.1950601041316986, "learning_rate": 3.782939498222695e-05, "loss": 0.7822, "step": 6241 }, { "epoch": 7.109401709401709, "grad_norm": 0.1943473368883133, "learning_rate": 3.78253958610724e-05, "loss": 0.7886, "step": 6242 }, { "epoch": 7.1105413105413104, "grad_norm": 0.15735872089862823, "learning_rate": 3.782139629445658e-05, "loss": 0.7819, "step": 6243 }, { "epoch": 7.111680911680912, "grad_norm": 0.18267220258712769, "learning_rate": 3.7817396282518424e-05, "loss": 0.7661, "step": 6244 }, { "epoch": 7.112820512820512, "grad_norm": 0.2050890028476715, "learning_rate": 3.781339582539684e-05, "loss": 0.6171, "step": 6245 }, { "epoch": 7.113960113960114, "grad_norm": 0.17580078542232513, "learning_rate": 3.780939492323077e-05, "loss": 1.0137, "step": 6246 }, { "epoch": 7.115099715099715, "grad_norm": 0.208582803606987, "learning_rate": 3.78053935761592e-05, "loss": 0.6083, "step": 6247 }, { "epoch": 7.116239316239316, "grad_norm": 0.16118314862251282, "learning_rate": 3.780139178432109e-05, "loss": 0.8084, "step": 6248 }, { "epoch": 7.117378917378917, "grad_norm": 0.16538970172405243, "learning_rate": 3.7797389547855443e-05, "loss": 0.8528, "step": 6249 }, { "epoch": 7.118518518518519, "grad_norm": 0.16062673926353455, "learning_rate": 3.779338686690126e-05, "loss": 0.7595, "step": 6250 }, { "epoch": 7.119658119658119, "grad_norm": 0.2156619429588318, "learning_rate": 3.778938374159758e-05, "loss": 0.6719, "step": 6251 }, { "epoch": 7.120797720797721, "grad_norm": 0.16036351025104523, "learning_rate": 3.7785380172083415e-05, "loss": 0.9017, "step": 6252 }, { "epoch": 7.121937321937322, "grad_norm": 0.20591062307357788, "learning_rate": 3.778137615849785e-05, "loss": 0.5711, "step": 6253 }, { "epoch": 7.123076923076923, "grad_norm": 0.19914428889751434, "learning_rate": 3.777737170097993e-05, "loss": 0.7247, "step": 6254 }, { "epoch": 7.124216524216524, "grad_norm": 0.19862796366214752, "learning_rate": 3.7773366799668776e-05, "loss": 0.8529, "step": 6255 }, { "epoch": 7.1253561253561255, "grad_norm": 0.1894269585609436, "learning_rate": 3.7769361454703445e-05, "loss": 0.8447, "step": 6256 }, { "epoch": 7.126495726495726, "grad_norm": 0.21392220258712769, "learning_rate": 3.7765355666223085e-05, "loss": 0.5833, "step": 6257 }, { "epoch": 7.1276353276353275, "grad_norm": 0.18810603022575378, "learning_rate": 3.7761349434366804e-05, "loss": 0.6764, "step": 6258 }, { "epoch": 7.128774928774929, "grad_norm": 0.22102965414524078, "learning_rate": 3.775734275927378e-05, "loss": 0.6572, "step": 6259 }, { "epoch": 7.12991452991453, "grad_norm": 0.18941909074783325, "learning_rate": 3.7753335641083146e-05, "loss": 0.662, "step": 6260 }, { "epoch": 7.131054131054131, "grad_norm": 0.2360292226076126, "learning_rate": 3.7749328079934104e-05, "loss": 0.4593, "step": 6261 }, { "epoch": 7.132193732193732, "grad_norm": 0.2077096402645111, "learning_rate": 3.774532007596583e-05, "loss": 0.6786, "step": 6262 }, { "epoch": 7.133333333333334, "grad_norm": 0.20154207944869995, "learning_rate": 3.774131162931754e-05, "loss": 0.767, "step": 6263 }, { "epoch": 7.134472934472934, "grad_norm": 0.20883165299892426, "learning_rate": 3.7737302740128466e-05, "loss": 0.7668, "step": 6264 }, { "epoch": 7.135612535612536, "grad_norm": 0.24615712463855743, "learning_rate": 3.7733293408537835e-05, "loss": 0.5943, "step": 6265 }, { "epoch": 7.136752136752137, "grad_norm": 0.22391575574874878, "learning_rate": 3.772928363468491e-05, "loss": 0.5849, "step": 6266 }, { "epoch": 7.137891737891738, "grad_norm": 0.19004112482070923, "learning_rate": 3.7725273418708954e-05, "loss": 0.7521, "step": 6267 }, { "epoch": 7.139031339031339, "grad_norm": 0.19732090830802917, "learning_rate": 3.772126276074926e-05, "loss": 0.7854, "step": 6268 }, { "epoch": 7.140170940170941, "grad_norm": 0.2178681492805481, "learning_rate": 3.771725166094512e-05, "loss": 0.5679, "step": 6269 }, { "epoch": 7.141310541310541, "grad_norm": 0.19348812103271484, "learning_rate": 3.771324011943587e-05, "loss": 0.6625, "step": 6270 }, { "epoch": 7.1424501424501425, "grad_norm": 0.20072202384471893, "learning_rate": 3.770922813636082e-05, "loss": 0.7176, "step": 6271 }, { "epoch": 7.143589743589744, "grad_norm": 0.17724600434303284, "learning_rate": 3.770521571185933e-05, "loss": 0.8514, "step": 6272 }, { "epoch": 7.1447293447293445, "grad_norm": 0.1965605765581131, "learning_rate": 3.770120284607076e-05, "loss": 0.772, "step": 6273 }, { "epoch": 7.145868945868946, "grad_norm": 0.19905537366867065, "learning_rate": 3.7697189539134484e-05, "loss": 0.6396, "step": 6274 }, { "epoch": 7.147008547008547, "grad_norm": 0.16375818848609924, "learning_rate": 3.769317579118989e-05, "loss": 0.7937, "step": 6275 }, { "epoch": 7.148148148148148, "grad_norm": 0.17767931520938873, "learning_rate": 3.768916160237641e-05, "loss": 0.7634, "step": 6276 }, { "epoch": 7.149287749287749, "grad_norm": 0.2176171839237213, "learning_rate": 3.768514697283345e-05, "loss": 0.6005, "step": 6277 }, { "epoch": 7.150427350427351, "grad_norm": 0.2047884464263916, "learning_rate": 3.768113190270044e-05, "loss": 0.6384, "step": 6278 }, { "epoch": 7.151566951566951, "grad_norm": 0.2172878235578537, "learning_rate": 3.767711639211685e-05, "loss": 0.6811, "step": 6279 }, { "epoch": 7.152706552706553, "grad_norm": 0.22276462614536285, "learning_rate": 3.767310044122215e-05, "loss": 0.7699, "step": 6280 }, { "epoch": 7.153846153846154, "grad_norm": 0.21287479996681213, "learning_rate": 3.766908405015582e-05, "loss": 0.557, "step": 6281 }, { "epoch": 7.154985754985755, "grad_norm": 0.16105368733406067, "learning_rate": 3.766506721905735e-05, "loss": 0.8144, "step": 6282 }, { "epoch": 7.156125356125356, "grad_norm": 0.15614740550518036, "learning_rate": 3.766104994806628e-05, "loss": 0.856, "step": 6283 }, { "epoch": 7.157264957264958, "grad_norm": 0.22421568632125854, "learning_rate": 3.765703223732212e-05, "loss": 0.7493, "step": 6284 }, { "epoch": 7.158404558404558, "grad_norm": 0.19010932743549347, "learning_rate": 3.765301408696443e-05, "loss": 0.7169, "step": 6285 }, { "epoch": 7.15954415954416, "grad_norm": 0.17612417042255402, "learning_rate": 3.7648995497132755e-05, "loss": 1.0025, "step": 6286 }, { "epoch": 7.160683760683761, "grad_norm": 0.2417812943458557, "learning_rate": 3.76449764679667e-05, "loss": 0.5729, "step": 6287 }, { "epoch": 7.1618233618233615, "grad_norm": 0.15472744405269623, "learning_rate": 3.7640956999605815e-05, "loss": 0.8218, "step": 6288 }, { "epoch": 7.162962962962963, "grad_norm": 0.1943567395210266, "learning_rate": 3.763693709218975e-05, "loss": 0.7593, "step": 6289 }, { "epoch": 7.164102564102564, "grad_norm": 0.2067941129207611, "learning_rate": 3.7632916745858095e-05, "loss": 0.5406, "step": 6290 }, { "epoch": 7.165242165242165, "grad_norm": 0.1726512312889099, "learning_rate": 3.76288959607505e-05, "loss": 0.7521, "step": 6291 }, { "epoch": 7.166381766381766, "grad_norm": 0.18153612315654755, "learning_rate": 3.7624874737006635e-05, "loss": 0.6399, "step": 6292 }, { "epoch": 7.167521367521368, "grad_norm": 0.1801934391260147, "learning_rate": 3.762085307476614e-05, "loss": 0.7035, "step": 6293 }, { "epoch": 7.168660968660968, "grad_norm": 0.2601912319660187, "learning_rate": 3.761683097416871e-05, "loss": 0.612, "step": 6294 }, { "epoch": 7.16980056980057, "grad_norm": 0.20805248618125916, "learning_rate": 3.761280843535404e-05, "loss": 0.7866, "step": 6295 }, { "epoch": 7.170940170940171, "grad_norm": 0.2429533451795578, "learning_rate": 3.760878545846186e-05, "loss": 0.5853, "step": 6296 }, { "epoch": 7.172079772079772, "grad_norm": 0.17130783200263977, "learning_rate": 3.760476204363187e-05, "loss": 0.8799, "step": 6297 }, { "epoch": 7.173219373219373, "grad_norm": 0.15848739445209503, "learning_rate": 3.760073819100384e-05, "loss": 0.9686, "step": 6298 }, { "epoch": 7.174358974358975, "grad_norm": 0.18064692616462708, "learning_rate": 3.7596713900717524e-05, "loss": 0.6252, "step": 6299 }, { "epoch": 7.175498575498575, "grad_norm": 0.17840689420700073, "learning_rate": 3.7592689172912696e-05, "loss": 0.8896, "step": 6300 }, { "epoch": 7.176638176638177, "grad_norm": 0.16646137833595276, "learning_rate": 3.758866400772913e-05, "loss": 0.9601, "step": 6301 }, { "epoch": 7.177777777777778, "grad_norm": 0.25890523195266724, "learning_rate": 3.758463840530665e-05, "loss": 0.5707, "step": 6302 }, { "epoch": 7.178917378917379, "grad_norm": 0.17094452679157257, "learning_rate": 3.758061236578507e-05, "loss": 0.7058, "step": 6303 }, { "epoch": 7.18005698005698, "grad_norm": 0.16663303971290588, "learning_rate": 3.757658588930423e-05, "loss": 0.8132, "step": 6304 }, { "epoch": 7.181196581196581, "grad_norm": 0.2043352574110031, "learning_rate": 3.757255897600397e-05, "loss": 0.7289, "step": 6305 }, { "epoch": 7.182336182336182, "grad_norm": 0.1831844449043274, "learning_rate": 3.756853162602417e-05, "loss": 0.7182, "step": 6306 }, { "epoch": 7.183475783475783, "grad_norm": 0.20970883965492249, "learning_rate": 3.756450383950469e-05, "loss": 0.6776, "step": 6307 }, { "epoch": 7.184615384615385, "grad_norm": 0.15798361599445343, "learning_rate": 3.7560475616585456e-05, "loss": 1.0042, "step": 6308 }, { "epoch": 7.185754985754985, "grad_norm": 0.19874326884746552, "learning_rate": 3.755644695740635e-05, "loss": 0.829, "step": 6309 }, { "epoch": 7.186894586894587, "grad_norm": 0.23814614117145538, "learning_rate": 3.755241786210731e-05, "loss": 0.5765, "step": 6310 }, { "epoch": 7.188034188034188, "grad_norm": 0.1938910186290741, "learning_rate": 3.7548388330828296e-05, "loss": 0.6619, "step": 6311 }, { "epoch": 7.189173789173789, "grad_norm": 0.151244655251503, "learning_rate": 3.754435836370923e-05, "loss": 0.8015, "step": 6312 }, { "epoch": 7.19031339031339, "grad_norm": 0.2609013020992279, "learning_rate": 3.7540327960890104e-05, "loss": 0.5556, "step": 6313 }, { "epoch": 7.191452991452992, "grad_norm": 0.19786864519119263, "learning_rate": 3.75362971225109e-05, "loss": 0.7076, "step": 6314 }, { "epoch": 7.192592592592592, "grad_norm": 0.20322565734386444, "learning_rate": 3.753226584871163e-05, "loss": 0.5222, "step": 6315 }, { "epoch": 7.193732193732194, "grad_norm": 0.16902421414852142, "learning_rate": 3.7528234139632294e-05, "loss": 0.8493, "step": 6316 }, { "epoch": 7.194871794871795, "grad_norm": 0.21259194612503052, "learning_rate": 3.7524201995412945e-05, "loss": 0.7068, "step": 6317 }, { "epoch": 7.196011396011396, "grad_norm": 0.22819651663303375, "learning_rate": 3.7520169416193605e-05, "loss": 0.6079, "step": 6318 }, { "epoch": 7.197150997150997, "grad_norm": 0.19091029465198517, "learning_rate": 3.751613640211435e-05, "loss": 1.0154, "step": 6319 }, { "epoch": 7.1982905982905985, "grad_norm": 0.18407659232616425, "learning_rate": 3.751210295331527e-05, "loss": 0.7085, "step": 6320 }, { "epoch": 7.199430199430199, "grad_norm": 0.24412783980369568, "learning_rate": 3.7508069069936444e-05, "loss": 0.4731, "step": 6321 }, { "epoch": 7.2005698005698004, "grad_norm": 0.16923773288726807, "learning_rate": 3.750403475211798e-05, "loss": 0.9468, "step": 6322 }, { "epoch": 7.201709401709402, "grad_norm": 0.23878052830696106, "learning_rate": 3.7500000000000003e-05, "loss": 0.7508, "step": 6323 }, { "epoch": 7.202849002849002, "grad_norm": 0.18814179301261902, "learning_rate": 3.749596481372265e-05, "loss": 0.6745, "step": 6324 }, { "epoch": 7.203988603988604, "grad_norm": 0.19777780771255493, "learning_rate": 3.749192919342607e-05, "loss": 0.8836, "step": 6325 }, { "epoch": 7.205128205128205, "grad_norm": 0.17554792761802673, "learning_rate": 3.7487893139250446e-05, "loss": 0.8289, "step": 6326 }, { "epoch": 7.206267806267807, "grad_norm": 0.19475427269935608, "learning_rate": 3.748385665133595e-05, "loss": 0.8231, "step": 6327 }, { "epoch": 7.207407407407407, "grad_norm": 0.19629493355751038, "learning_rate": 3.7479819729822776e-05, "loss": 0.7651, "step": 6328 }, { "epoch": 7.208547008547009, "grad_norm": 0.20756733417510986, "learning_rate": 3.747578237485114e-05, "loss": 0.6479, "step": 6329 }, { "epoch": 7.20968660968661, "grad_norm": 0.2086726725101471, "learning_rate": 3.7471744586561286e-05, "loss": 0.6164, "step": 6330 }, { "epoch": 7.210826210826211, "grad_norm": 0.17218539118766785, "learning_rate": 3.746770636509344e-05, "loss": 0.7861, "step": 6331 }, { "epoch": 7.211965811965812, "grad_norm": 0.20781554281711578, "learning_rate": 3.746366771058786e-05, "loss": 0.5785, "step": 6332 }, { "epoch": 7.2131054131054135, "grad_norm": 0.2016250044107437, "learning_rate": 3.7459628623184826e-05, "loss": 0.7309, "step": 6333 }, { "epoch": 7.214245014245014, "grad_norm": 0.18102596700191498, "learning_rate": 3.7455589103024635e-05, "loss": 0.8393, "step": 6334 }, { "epoch": 7.2153846153846155, "grad_norm": 0.19947540760040283, "learning_rate": 3.7451549150247564e-05, "loss": 0.591, "step": 6335 }, { "epoch": 7.216524216524217, "grad_norm": 0.1864817887544632, "learning_rate": 3.744750876499396e-05, "loss": 0.7529, "step": 6336 }, { "epoch": 7.2176638176638175, "grad_norm": 0.21457643806934357, "learning_rate": 3.7443467947404154e-05, "loss": 0.7302, "step": 6337 }, { "epoch": 7.218803418803419, "grad_norm": 0.1883746236562729, "learning_rate": 3.743942669761848e-05, "loss": 0.828, "step": 6338 }, { "epoch": 7.21994301994302, "grad_norm": 0.1932716816663742, "learning_rate": 3.7435385015777306e-05, "loss": 0.6754, "step": 6339 }, { "epoch": 7.221082621082621, "grad_norm": 0.15746288001537323, "learning_rate": 3.7431342902021015e-05, "loss": 1.0436, "step": 6340 }, { "epoch": 7.222222222222222, "grad_norm": 0.16571207344532013, "learning_rate": 3.742730035649e-05, "loss": 0.9344, "step": 6341 }, { "epoch": 7.223361823361824, "grad_norm": 0.21603375673294067, "learning_rate": 3.742325737932466e-05, "loss": 0.6403, "step": 6342 }, { "epoch": 7.224501424501424, "grad_norm": 0.1699799746274948, "learning_rate": 3.741921397066544e-05, "loss": 0.8142, "step": 6343 }, { "epoch": 7.225641025641026, "grad_norm": 0.19729934632778168, "learning_rate": 3.741517013065276e-05, "loss": 0.7122, "step": 6344 }, { "epoch": 7.226780626780627, "grad_norm": 0.22342008352279663, "learning_rate": 3.741112585942708e-05, "loss": 0.5988, "step": 6345 }, { "epoch": 7.227920227920228, "grad_norm": 0.21230460703372955, "learning_rate": 3.7407081157128865e-05, "loss": 0.6176, "step": 6346 }, { "epoch": 7.229059829059829, "grad_norm": 0.20271845161914825, "learning_rate": 3.7403036023898593e-05, "loss": 0.7986, "step": 6347 }, { "epoch": 7.230199430199431, "grad_norm": 0.16706155240535736, "learning_rate": 3.7398990459876784e-05, "loss": 0.7245, "step": 6348 }, { "epoch": 7.231339031339031, "grad_norm": 0.1747700572013855, "learning_rate": 3.739494446520394e-05, "loss": 0.9258, "step": 6349 }, { "epoch": 7.2324786324786325, "grad_norm": 0.22044582664966583, "learning_rate": 3.739089804002059e-05, "loss": 0.7384, "step": 6350 }, { "epoch": 7.233618233618234, "grad_norm": 0.20682981610298157, "learning_rate": 3.738685118446727e-05, "loss": 0.6292, "step": 6351 }, { "epoch": 7.2347578347578345, "grad_norm": 0.22375862300395966, "learning_rate": 3.738280389868455e-05, "loss": 0.6704, "step": 6352 }, { "epoch": 7.235897435897436, "grad_norm": 0.20786075294017792, "learning_rate": 3.737875618281299e-05, "loss": 0.787, "step": 6353 }, { "epoch": 7.237037037037037, "grad_norm": 0.19915176928043365, "learning_rate": 3.737470803699319e-05, "loss": 0.727, "step": 6354 }, { "epoch": 7.238176638176638, "grad_norm": 0.19247373938560486, "learning_rate": 3.737065946136575e-05, "loss": 0.7073, "step": 6355 }, { "epoch": 7.239316239316239, "grad_norm": 0.2080560028553009, "learning_rate": 3.736661045607129e-05, "loss": 0.6939, "step": 6356 }, { "epoch": 7.240455840455841, "grad_norm": 0.18483971059322357, "learning_rate": 3.736256102125043e-05, "loss": 0.731, "step": 6357 }, { "epoch": 7.241595441595441, "grad_norm": 0.1651158183813095, "learning_rate": 3.7358511157043834e-05, "loss": 0.7882, "step": 6358 }, { "epoch": 7.242735042735043, "grad_norm": 0.17980670928955078, "learning_rate": 3.7354460863592164e-05, "loss": 0.7534, "step": 6359 }, { "epoch": 7.243874643874644, "grad_norm": 0.18468749523162842, "learning_rate": 3.735041014103609e-05, "loss": 0.8794, "step": 6360 }, { "epoch": 7.245014245014245, "grad_norm": 0.17344863712787628, "learning_rate": 3.734635898951631e-05, "loss": 0.7689, "step": 6361 }, { "epoch": 7.246153846153846, "grad_norm": 0.20821402966976166, "learning_rate": 3.7342307409173525e-05, "loss": 0.6134, "step": 6362 }, { "epoch": 7.247293447293448, "grad_norm": 0.24080470204353333, "learning_rate": 3.733825540014846e-05, "loss": 0.4864, "step": 6363 }, { "epoch": 7.248433048433048, "grad_norm": 0.24453848600387573, "learning_rate": 3.7334202962581866e-05, "loss": 0.7976, "step": 6364 }, { "epoch": 7.24957264957265, "grad_norm": 0.1931018829345703, "learning_rate": 3.7330150096614476e-05, "loss": 0.6606, "step": 6365 }, { "epoch": 7.250712250712251, "grad_norm": 0.15646785497665405, "learning_rate": 3.7326096802387064e-05, "loss": 0.8076, "step": 6366 }, { "epoch": 7.2518518518518515, "grad_norm": 0.20232032239437103, "learning_rate": 3.732204308004042e-05, "loss": 0.7084, "step": 6367 }, { "epoch": 7.252991452991453, "grad_norm": 0.20485526323318481, "learning_rate": 3.731798892971533e-05, "loss": 0.7423, "step": 6368 }, { "epoch": 7.254131054131054, "grad_norm": 0.2187482714653015, "learning_rate": 3.731393435155261e-05, "loss": 0.7667, "step": 6369 }, { "epoch": 7.255270655270655, "grad_norm": 0.16875623166561127, "learning_rate": 3.730987934569308e-05, "loss": 0.7366, "step": 6370 }, { "epoch": 7.256410256410256, "grad_norm": 0.17663295567035675, "learning_rate": 3.7305823912277606e-05, "loss": 0.6995, "step": 6371 }, { "epoch": 7.257549857549858, "grad_norm": 0.2079482078552246, "learning_rate": 3.7301768051447014e-05, "loss": 0.7309, "step": 6372 }, { "epoch": 7.258689458689458, "grad_norm": 0.2204352468252182, "learning_rate": 3.7297711763342186e-05, "loss": 0.6636, "step": 6373 }, { "epoch": 7.25982905982906, "grad_norm": 0.19809284806251526, "learning_rate": 3.7293655048104006e-05, "loss": 0.6772, "step": 6374 }, { "epoch": 7.260968660968661, "grad_norm": 0.21955761313438416, "learning_rate": 3.7289597905873387e-05, "loss": 0.6774, "step": 6375 }, { "epoch": 7.262108262108262, "grad_norm": 0.2253624051809311, "learning_rate": 3.7285540336791233e-05, "loss": 0.628, "step": 6376 }, { "epoch": 7.263247863247863, "grad_norm": 0.19389750063419342, "learning_rate": 3.728148234099848e-05, "loss": 0.6518, "step": 6377 }, { "epoch": 7.264387464387465, "grad_norm": 0.19566278159618378, "learning_rate": 3.727742391863607e-05, "loss": 0.8202, "step": 6378 }, { "epoch": 7.265527065527065, "grad_norm": 0.17338863015174866, "learning_rate": 3.727336506984497e-05, "loss": 0.8103, "step": 6379 }, { "epoch": 7.266666666666667, "grad_norm": 0.1902279108762741, "learning_rate": 3.7269305794766135e-05, "loss": 0.6944, "step": 6380 }, { "epoch": 7.267806267806268, "grad_norm": 0.16620168089866638, "learning_rate": 3.726524609354058e-05, "loss": 0.9399, "step": 6381 }, { "epoch": 7.268945868945869, "grad_norm": 0.21054476499557495, "learning_rate": 3.7261185966309295e-05, "loss": 0.7023, "step": 6382 }, { "epoch": 7.27008547008547, "grad_norm": 0.22948648035526276, "learning_rate": 3.725712541321331e-05, "loss": 0.6445, "step": 6383 }, { "epoch": 7.2712250712250714, "grad_norm": 0.2025534063577652, "learning_rate": 3.7253064434393636e-05, "loss": 0.6077, "step": 6384 }, { "epoch": 7.272364672364672, "grad_norm": 0.21433599293231964, "learning_rate": 3.7249003029991346e-05, "loss": 0.7714, "step": 6385 }, { "epoch": 7.273504273504273, "grad_norm": 0.17267006635665894, "learning_rate": 3.72449412001475e-05, "loss": 0.9524, "step": 6386 }, { "epoch": 7.274643874643875, "grad_norm": 0.2087736427783966, "learning_rate": 3.7240878945003174e-05, "loss": 0.7637, "step": 6387 }, { "epoch": 7.275783475783475, "grad_norm": 0.2103361338376999, "learning_rate": 3.7236816264699456e-05, "loss": 0.609, "step": 6388 }, { "epoch": 7.276923076923077, "grad_norm": 0.20580655336380005, "learning_rate": 3.723275315937746e-05, "loss": 0.7603, "step": 6389 }, { "epoch": 7.278062678062678, "grad_norm": 0.19377201795578003, "learning_rate": 3.7228689629178304e-05, "loss": 0.9069, "step": 6390 }, { "epoch": 7.279202279202279, "grad_norm": 0.20012764632701874, "learning_rate": 3.722462567424312e-05, "loss": 0.7909, "step": 6391 }, { "epoch": 7.28034188034188, "grad_norm": 0.1709682047367096, "learning_rate": 3.722056129471309e-05, "loss": 0.8574, "step": 6392 }, { "epoch": 7.281481481481482, "grad_norm": 0.1687890887260437, "learning_rate": 3.721649649072934e-05, "loss": 0.7592, "step": 6393 }, { "epoch": 7.282621082621082, "grad_norm": 0.1789243072271347, "learning_rate": 3.721243126243308e-05, "loss": 0.916, "step": 6394 }, { "epoch": 7.283760683760684, "grad_norm": 0.2043289691209793, "learning_rate": 3.720836560996549e-05, "loss": 0.7166, "step": 6395 }, { "epoch": 7.284900284900285, "grad_norm": 0.19226306676864624, "learning_rate": 3.72042995334678e-05, "loss": 0.7854, "step": 6396 }, { "epoch": 7.286039886039886, "grad_norm": 0.1788424402475357, "learning_rate": 3.720023303308122e-05, "loss": 0.7621, "step": 6397 }, { "epoch": 7.287179487179487, "grad_norm": 0.19071969389915466, "learning_rate": 3.719616610894699e-05, "loss": 0.6767, "step": 6398 }, { "epoch": 7.2883190883190885, "grad_norm": 0.19216488301753998, "learning_rate": 3.7192098761206386e-05, "loss": 0.6823, "step": 6399 }, { "epoch": 7.289458689458689, "grad_norm": 0.1817394345998764, "learning_rate": 3.7188030990000656e-05, "loss": 0.6638, "step": 6400 }, { "epoch": 7.2905982905982905, "grad_norm": 0.20284122228622437, "learning_rate": 3.718396279547109e-05, "loss": 0.9087, "step": 6401 }, { "epoch": 7.291737891737892, "grad_norm": 0.24623697996139526, "learning_rate": 3.717989417775899e-05, "loss": 0.4837, "step": 6402 }, { "epoch": 7.292877492877492, "grad_norm": 0.19385245442390442, "learning_rate": 3.7175825137005674e-05, "loss": 0.7545, "step": 6403 }, { "epoch": 7.294017094017094, "grad_norm": 0.18305125832557678, "learning_rate": 3.7171755673352466e-05, "loss": 0.8107, "step": 6404 }, { "epoch": 7.295156695156695, "grad_norm": 0.1629340946674347, "learning_rate": 3.716768578694071e-05, "loss": 0.7998, "step": 6405 }, { "epoch": 7.296296296296296, "grad_norm": 0.19728673994541168, "learning_rate": 3.716361547791177e-05, "loss": 0.8426, "step": 6406 }, { "epoch": 7.297435897435897, "grad_norm": 0.21535879373550415, "learning_rate": 3.715954474640701e-05, "loss": 0.7026, "step": 6407 }, { "epoch": 7.298575498575499, "grad_norm": 0.24965982139110565, "learning_rate": 3.715547359256781e-05, "loss": 0.6492, "step": 6408 }, { "epoch": 7.2997150997151, "grad_norm": 0.17369042336940765, "learning_rate": 3.715140201653561e-05, "loss": 1.0365, "step": 6409 }, { "epoch": 7.300854700854701, "grad_norm": 0.23729373514652252, "learning_rate": 3.7147330018451784e-05, "loss": 0.567, "step": 6410 }, { "epoch": 7.301994301994302, "grad_norm": 0.20309162139892578, "learning_rate": 3.714325759845779e-05, "loss": 0.8097, "step": 6411 }, { "epoch": 7.3031339031339035, "grad_norm": 0.1969907432794571, "learning_rate": 3.7139184756695055e-05, "loss": 0.6219, "step": 6412 }, { "epoch": 7.304273504273504, "grad_norm": 0.21026773750782013, "learning_rate": 3.7135111493305056e-05, "loss": 0.5973, "step": 6413 }, { "epoch": 7.3054131054131055, "grad_norm": 0.17490412294864655, "learning_rate": 3.713103780842927e-05, "loss": 0.9277, "step": 6414 }, { "epoch": 7.306552706552707, "grad_norm": 0.1912621706724167, "learning_rate": 3.712696370220917e-05, "loss": 0.7399, "step": 6415 }, { "epoch": 7.3076923076923075, "grad_norm": 0.19970478117465973, "learning_rate": 3.712288917478627e-05, "loss": 0.8382, "step": 6416 }, { "epoch": 7.308831908831909, "grad_norm": 0.3479088544845581, "learning_rate": 3.7118814226302104e-05, "loss": 0.4499, "step": 6417 }, { "epoch": 7.30997150997151, "grad_norm": 0.16097833216190338, "learning_rate": 3.711473885689819e-05, "loss": 0.8032, "step": 6418 }, { "epoch": 7.311111111111111, "grad_norm": 0.19173267483711243, "learning_rate": 3.7110663066716065e-05, "loss": 0.6232, "step": 6419 }, { "epoch": 7.312250712250712, "grad_norm": 0.18430247902870178, "learning_rate": 3.7106586855897326e-05, "loss": 0.8214, "step": 6420 }, { "epoch": 7.313390313390314, "grad_norm": 0.15976233780384064, "learning_rate": 3.710251022458352e-05, "loss": 1.0789, "step": 6421 }, { "epoch": 7.314529914529914, "grad_norm": 0.18958532810211182, "learning_rate": 3.709843317291626e-05, "loss": 0.7529, "step": 6422 }, { "epoch": 7.315669515669516, "grad_norm": 0.15534301102161407, "learning_rate": 3.7094355701037134e-05, "loss": 0.8726, "step": 6423 }, { "epoch": 7.316809116809117, "grad_norm": 0.19631779193878174, "learning_rate": 3.709027780908778e-05, "loss": 0.6808, "step": 6424 }, { "epoch": 7.317948717948718, "grad_norm": 0.18666972219944, "learning_rate": 3.7086199497209825e-05, "loss": 0.7709, "step": 6425 }, { "epoch": 7.319088319088319, "grad_norm": 0.18058015406131744, "learning_rate": 3.708212076554494e-05, "loss": 0.7455, "step": 6426 }, { "epoch": 7.320227920227921, "grad_norm": 0.24110624194145203, "learning_rate": 3.707804161423476e-05, "loss": 0.611, "step": 6427 }, { "epoch": 7.321367521367521, "grad_norm": 0.20111285150051117, "learning_rate": 3.707396204342099e-05, "loss": 0.7382, "step": 6428 }, { "epoch": 7.3225071225071225, "grad_norm": 0.1679603010416031, "learning_rate": 3.706988205324531e-05, "loss": 0.722, "step": 6429 }, { "epoch": 7.323646723646724, "grad_norm": 0.2117585837841034, "learning_rate": 3.706580164384943e-05, "loss": 0.7155, "step": 6430 }, { "epoch": 7.3247863247863245, "grad_norm": 0.17562855780124664, "learning_rate": 3.706172081537509e-05, "loss": 0.7661, "step": 6431 }, { "epoch": 7.325925925925926, "grad_norm": 0.2086774706840515, "learning_rate": 3.7057639567964006e-05, "loss": 0.6875, "step": 6432 }, { "epoch": 7.327065527065527, "grad_norm": 0.21073679625988007, "learning_rate": 3.705355790175794e-05, "loss": 0.5822, "step": 6433 }, { "epoch": 7.328205128205128, "grad_norm": 0.20980389416217804, "learning_rate": 3.704947581689866e-05, "loss": 0.7232, "step": 6434 }, { "epoch": 7.329344729344729, "grad_norm": 0.2295154333114624, "learning_rate": 3.7045393313527955e-05, "loss": 0.6093, "step": 6435 }, { "epoch": 7.330484330484331, "grad_norm": 0.18639633059501648, "learning_rate": 3.704131039178761e-05, "loss": 0.6673, "step": 6436 }, { "epoch": 7.331623931623931, "grad_norm": 0.17542113363742828, "learning_rate": 3.703722705181945e-05, "loss": 0.8003, "step": 6437 }, { "epoch": 7.332763532763533, "grad_norm": 0.17164956033229828, "learning_rate": 3.7033143293765284e-05, "loss": 0.667, "step": 6438 }, { "epoch": 7.333903133903134, "grad_norm": 0.16480392217636108, "learning_rate": 3.7029059117766964e-05, "loss": 0.7339, "step": 6439 }, { "epoch": 7.335042735042735, "grad_norm": 0.22298744320869446, "learning_rate": 3.7024974523966335e-05, "loss": 0.7044, "step": 6440 }, { "epoch": 7.336182336182336, "grad_norm": 0.20570577681064606, "learning_rate": 3.7020889512505274e-05, "loss": 0.7658, "step": 6441 }, { "epoch": 7.337321937321938, "grad_norm": 0.1652759164571762, "learning_rate": 3.701680408352567e-05, "loss": 0.8559, "step": 6442 }, { "epoch": 7.338461538461538, "grad_norm": 0.17748406529426575, "learning_rate": 3.701271823716941e-05, "loss": 0.9425, "step": 6443 }, { "epoch": 7.33960113960114, "grad_norm": 0.23712830245494843, "learning_rate": 3.7008631973578404e-05, "loss": 0.6728, "step": 6444 }, { "epoch": 7.340740740740741, "grad_norm": 0.18479804694652557, "learning_rate": 3.700454529289459e-05, "loss": 0.9347, "step": 6445 }, { "epoch": 7.3418803418803416, "grad_norm": 0.23030495643615723, "learning_rate": 3.700045819525991e-05, "loss": 0.7154, "step": 6446 }, { "epoch": 7.343019943019943, "grad_norm": 0.18232811987400055, "learning_rate": 3.6996370680816306e-05, "loss": 0.6643, "step": 6447 }, { "epoch": 7.344159544159544, "grad_norm": 0.2164715677499771, "learning_rate": 3.699228274970577e-05, "loss": 0.5755, "step": 6448 }, { "epoch": 7.345299145299145, "grad_norm": 0.17794108390808105, "learning_rate": 3.6988194402070266e-05, "loss": 0.8348, "step": 6449 }, { "epoch": 7.346438746438746, "grad_norm": 0.21316228806972504, "learning_rate": 3.698410563805181e-05, "loss": 0.6755, "step": 6450 }, { "epoch": 7.347578347578348, "grad_norm": 0.21540795266628265, "learning_rate": 3.69800164577924e-05, "loss": 0.6251, "step": 6451 }, { "epoch": 7.348717948717948, "grad_norm": 0.24052973091602325, "learning_rate": 3.697592686143408e-05, "loss": 0.4998, "step": 6452 }, { "epoch": 7.34985754985755, "grad_norm": 0.17822284996509552, "learning_rate": 3.697183684911888e-05, "loss": 0.8513, "step": 6453 }, { "epoch": 7.350997150997151, "grad_norm": 0.26599130034446716, "learning_rate": 3.696774642098887e-05, "loss": 0.6429, "step": 6454 }, { "epoch": 7.352136752136752, "grad_norm": 0.15794286131858826, "learning_rate": 3.696365557718611e-05, "loss": 0.8733, "step": 6455 }, { "epoch": 7.353276353276353, "grad_norm": 0.17302434146404266, "learning_rate": 3.69595643178527e-05, "loss": 0.7269, "step": 6456 }, { "epoch": 7.354415954415955, "grad_norm": 0.1697869896888733, "learning_rate": 3.6955472643130716e-05, "loss": 0.705, "step": 6457 }, { "epoch": 7.355555555555555, "grad_norm": 0.18360832333564758, "learning_rate": 3.6951380553162305e-05, "loss": 0.8059, "step": 6458 }, { "epoch": 7.356695156695157, "grad_norm": 0.23724867403507233, "learning_rate": 3.6947288048089576e-05, "loss": 0.6416, "step": 6459 }, { "epoch": 7.357834757834758, "grad_norm": 0.2522468864917755, "learning_rate": 3.694319512805468e-05, "loss": 0.6654, "step": 6460 }, { "epoch": 7.358974358974359, "grad_norm": 0.17250117659568787, "learning_rate": 3.693910179319977e-05, "loss": 0.7498, "step": 6461 }, { "epoch": 7.36011396011396, "grad_norm": 0.22597560286521912, "learning_rate": 3.693500804366702e-05, "loss": 0.6864, "step": 6462 }, { "epoch": 7.3612535612535615, "grad_norm": 0.17453262209892273, "learning_rate": 3.6930913879598624e-05, "loss": 0.7904, "step": 6463 }, { "epoch": 7.362393162393162, "grad_norm": 0.1809937208890915, "learning_rate": 3.692681930113678e-05, "loss": 0.7803, "step": 6464 }, { "epoch": 7.363532763532763, "grad_norm": 0.17912113666534424, "learning_rate": 3.69227243084237e-05, "loss": 0.7296, "step": 6465 }, { "epoch": 7.364672364672365, "grad_norm": 0.2626039683818817, "learning_rate": 3.6918628901601614e-05, "loss": 0.4733, "step": 6466 }, { "epoch": 7.365811965811965, "grad_norm": 0.20799420773983002, "learning_rate": 3.691453308081278e-05, "loss": 0.8651, "step": 6467 }, { "epoch": 7.366951566951567, "grad_norm": 0.17873750627040863, "learning_rate": 3.691043684619943e-05, "loss": 0.8143, "step": 6468 }, { "epoch": 7.368091168091168, "grad_norm": 0.1866135448217392, "learning_rate": 3.690634019790387e-05, "loss": 0.7059, "step": 6469 }, { "epoch": 7.36923076923077, "grad_norm": 0.2021060287952423, "learning_rate": 3.690224313606837e-05, "loss": 0.8078, "step": 6470 }, { "epoch": 7.37037037037037, "grad_norm": 0.22178183495998383, "learning_rate": 3.689814566083523e-05, "loss": 0.6736, "step": 6471 }, { "epoch": 7.371509971509972, "grad_norm": 0.19032502174377441, "learning_rate": 3.689404777234677e-05, "loss": 0.5757, "step": 6472 }, { "epoch": 7.372649572649573, "grad_norm": 0.18673716485500336, "learning_rate": 3.6889949470745325e-05, "loss": 0.6637, "step": 6473 }, { "epoch": 7.373789173789174, "grad_norm": 0.19262084364891052, "learning_rate": 3.6885850756173236e-05, "loss": 0.873, "step": 6474 }, { "epoch": 7.374928774928775, "grad_norm": 0.18548183143138885, "learning_rate": 3.688175162877287e-05, "loss": 0.7487, "step": 6475 }, { "epoch": 7.3760683760683765, "grad_norm": 0.2082408368587494, "learning_rate": 3.687765208868659e-05, "loss": 0.6987, "step": 6476 }, { "epoch": 7.377207977207977, "grad_norm": 0.19280925393104553, "learning_rate": 3.687355213605679e-05, "loss": 0.633, "step": 6477 }, { "epoch": 7.3783475783475785, "grad_norm": 0.20687152445316315, "learning_rate": 3.686945177102587e-05, "loss": 0.5771, "step": 6478 }, { "epoch": 7.37948717948718, "grad_norm": 0.18748338520526886, "learning_rate": 3.6865350993736246e-05, "loss": 0.7257, "step": 6479 }, { "epoch": 7.3806267806267805, "grad_norm": 0.2174375206232071, "learning_rate": 3.6861249804330363e-05, "loss": 0.7366, "step": 6480 }, { "epoch": 7.381766381766382, "grad_norm": 0.250262051820755, "learning_rate": 3.685714820295064e-05, "loss": 0.5541, "step": 6481 }, { "epoch": 7.382905982905983, "grad_norm": 0.203110933303833, "learning_rate": 3.685304618973957e-05, "loss": 0.643, "step": 6482 }, { "epoch": 7.384045584045584, "grad_norm": 0.1578909307718277, "learning_rate": 3.68489437648396e-05, "loss": 0.9225, "step": 6483 }, { "epoch": 7.385185185185185, "grad_norm": 0.20081482827663422, "learning_rate": 3.684484092839323e-05, "loss": 0.7072, "step": 6484 }, { "epoch": 7.386324786324787, "grad_norm": 0.18147198855876923, "learning_rate": 3.684073768054296e-05, "loss": 0.8105, "step": 6485 }, { "epoch": 7.387464387464387, "grad_norm": 0.2797929346561432, "learning_rate": 3.683663402143131e-05, "loss": 0.546, "step": 6486 }, { "epoch": 7.388603988603989, "grad_norm": 0.21483547985553741, "learning_rate": 3.6832529951200804e-05, "loss": 0.7264, "step": 6487 }, { "epoch": 7.38974358974359, "grad_norm": 0.1595175713300705, "learning_rate": 3.6828425469994e-05, "loss": 0.8551, "step": 6488 }, { "epoch": 7.390883190883191, "grad_norm": 0.1573689579963684, "learning_rate": 3.682432057795345e-05, "loss": 0.8054, "step": 6489 }, { "epoch": 7.392022792022792, "grad_norm": 0.21687830984592438, "learning_rate": 3.682021527522173e-05, "loss": 0.6885, "step": 6490 }, { "epoch": 7.3931623931623935, "grad_norm": 0.22625142335891724, "learning_rate": 3.681610956194143e-05, "loss": 0.5647, "step": 6491 }, { "epoch": 7.394301994301994, "grad_norm": 0.2016458511352539, "learning_rate": 3.6812003438255146e-05, "loss": 0.82, "step": 6492 }, { "epoch": 7.3954415954415955, "grad_norm": 0.23761455714702606, "learning_rate": 3.680789690430549e-05, "loss": 0.6841, "step": 6493 }, { "epoch": 7.396581196581197, "grad_norm": 0.18108633160591125, "learning_rate": 3.6803789960235124e-05, "loss": 0.7343, "step": 6494 }, { "epoch": 7.3977207977207975, "grad_norm": 0.18732987344264984, "learning_rate": 3.679968260618666e-05, "loss": 0.8916, "step": 6495 }, { "epoch": 7.398860398860399, "grad_norm": 0.19718796014785767, "learning_rate": 3.679557484230277e-05, "loss": 0.8594, "step": 6496 }, { "epoch": 7.4, "grad_norm": 0.2020762860774994, "learning_rate": 3.6791466668726124e-05, "loss": 0.6715, "step": 6497 }, { "epoch": 7.401139601139601, "grad_norm": 0.20565477013587952, "learning_rate": 3.678735808559942e-05, "loss": 0.764, "step": 6498 }, { "epoch": 7.402279202279202, "grad_norm": 0.15999308228492737, "learning_rate": 3.678324909306536e-05, "loss": 0.8565, "step": 6499 }, { "epoch": 7.403418803418804, "grad_norm": 0.18991701304912567, "learning_rate": 3.6779139691266646e-05, "loss": 0.7792, "step": 6500 }, { "epoch": 7.404558404558404, "grad_norm": 0.18039284646511078, "learning_rate": 3.677502988034602e-05, "loss": 0.802, "step": 6501 }, { "epoch": 7.405698005698006, "grad_norm": 0.24642571806907654, "learning_rate": 3.6770919660446215e-05, "loss": 0.5139, "step": 6502 }, { "epoch": 7.406837606837607, "grad_norm": 0.19874225556850433, "learning_rate": 3.676680903171001e-05, "loss": 0.6623, "step": 6503 }, { "epoch": 7.407977207977208, "grad_norm": 0.16910535097122192, "learning_rate": 3.676269799428017e-05, "loss": 0.7572, "step": 6504 }, { "epoch": 7.409116809116809, "grad_norm": 0.17467468976974487, "learning_rate": 3.675858654829948e-05, "loss": 0.7377, "step": 6505 }, { "epoch": 7.410256410256411, "grad_norm": 0.1878518909215927, "learning_rate": 3.6754474693910747e-05, "loss": 0.7612, "step": 6506 }, { "epoch": 7.411396011396011, "grad_norm": 0.2117360681295395, "learning_rate": 3.675036243125677e-05, "loss": 0.5843, "step": 6507 }, { "epoch": 7.4125356125356126, "grad_norm": 0.22221580147743225, "learning_rate": 3.674624976048041e-05, "loss": 0.668, "step": 6508 }, { "epoch": 7.413675213675214, "grad_norm": 0.18445390462875366, "learning_rate": 3.674213668172448e-05, "loss": 0.73, "step": 6509 }, { "epoch": 7.4148148148148145, "grad_norm": 0.20327810943126678, "learning_rate": 3.673802319513186e-05, "loss": 0.719, "step": 6510 }, { "epoch": 7.415954415954416, "grad_norm": 0.196486234664917, "learning_rate": 3.673390930084541e-05, "loss": 0.6646, "step": 6511 }, { "epoch": 7.417094017094017, "grad_norm": 0.1451769769191742, "learning_rate": 3.672979499900802e-05, "loss": 0.9495, "step": 6512 }, { "epoch": 7.418233618233618, "grad_norm": 0.19748978316783905, "learning_rate": 3.672568028976259e-05, "loss": 0.7174, "step": 6513 }, { "epoch": 7.419373219373219, "grad_norm": 0.22927576303482056, "learning_rate": 3.6721565173252046e-05, "loss": 0.5826, "step": 6514 }, { "epoch": 7.420512820512821, "grad_norm": 0.19660289585590363, "learning_rate": 3.6717449649619304e-05, "loss": 0.6604, "step": 6515 }, { "epoch": 7.421652421652421, "grad_norm": 0.17920668423175812, "learning_rate": 3.671333371900732e-05, "loss": 0.9116, "step": 6516 }, { "epoch": 7.422792022792023, "grad_norm": 0.2083631008863449, "learning_rate": 3.670921738155903e-05, "loss": 0.7514, "step": 6517 }, { "epoch": 7.423931623931624, "grad_norm": 0.15553569793701172, "learning_rate": 3.6705100637417435e-05, "loss": 0.9736, "step": 6518 }, { "epoch": 7.425071225071225, "grad_norm": 0.16961699724197388, "learning_rate": 3.6700983486725494e-05, "loss": 0.6818, "step": 6519 }, { "epoch": 7.426210826210826, "grad_norm": 0.20760264992713928, "learning_rate": 3.6696865929626224e-05, "loss": 0.7052, "step": 6520 }, { "epoch": 7.427350427350428, "grad_norm": 0.23884300887584686, "learning_rate": 3.669274796626263e-05, "loss": 0.5973, "step": 6521 }, { "epoch": 7.428490028490028, "grad_norm": 0.24128636717796326, "learning_rate": 3.668862959677776e-05, "loss": 0.4992, "step": 6522 }, { "epoch": 7.42962962962963, "grad_norm": 0.18885107338428497, "learning_rate": 3.668451082131462e-05, "loss": 0.7256, "step": 6523 }, { "epoch": 7.430769230769231, "grad_norm": 0.23238390684127808, "learning_rate": 3.668039164001629e-05, "loss": 0.6884, "step": 6524 }, { "epoch": 7.431908831908832, "grad_norm": 0.24449187517166138, "learning_rate": 3.6676272053025845e-05, "loss": 0.5963, "step": 6525 }, { "epoch": 7.433048433048433, "grad_norm": 0.19439825415611267, "learning_rate": 3.667215206048636e-05, "loss": 0.8346, "step": 6526 }, { "epoch": 7.434188034188034, "grad_norm": 0.19193816184997559, "learning_rate": 3.666803166254094e-05, "loss": 0.8142, "step": 6527 }, { "epoch": 7.435327635327635, "grad_norm": 0.176726832985878, "learning_rate": 3.666391085933268e-05, "loss": 0.793, "step": 6528 }, { "epoch": 7.436467236467236, "grad_norm": 0.18357855081558228, "learning_rate": 3.665978965100472e-05, "loss": 0.8347, "step": 6529 }, { "epoch": 7.437606837606838, "grad_norm": 0.17473123967647552, "learning_rate": 3.665566803770021e-05, "loss": 0.8472, "step": 6530 }, { "epoch": 7.438746438746438, "grad_norm": 0.21925023198127747, "learning_rate": 3.665154601956229e-05, "loss": 0.5886, "step": 6531 }, { "epoch": 7.43988603988604, "grad_norm": 0.1874500811100006, "learning_rate": 3.664742359673414e-05, "loss": 0.8922, "step": 6532 }, { "epoch": 7.441025641025641, "grad_norm": 0.2333570122718811, "learning_rate": 3.664330076935894e-05, "loss": 0.6662, "step": 6533 }, { "epoch": 7.442165242165242, "grad_norm": 0.19756264984607697, "learning_rate": 3.663917753757988e-05, "loss": 0.6103, "step": 6534 }, { "epoch": 7.443304843304843, "grad_norm": 0.1659994125366211, "learning_rate": 3.663505390154018e-05, "loss": 0.6724, "step": 6535 }, { "epoch": 7.444444444444445, "grad_norm": 0.16979587078094482, "learning_rate": 3.6630929861383055e-05, "loss": 0.8911, "step": 6536 }, { "epoch": 7.445584045584045, "grad_norm": 0.1934581845998764, "learning_rate": 3.662680541725176e-05, "loss": 0.7523, "step": 6537 }, { "epoch": 7.446723646723647, "grad_norm": 0.18018688261508942, "learning_rate": 3.662268056928953e-05, "loss": 0.8586, "step": 6538 }, { "epoch": 7.447863247863248, "grad_norm": 0.19800017774105072, "learning_rate": 3.6618555317639645e-05, "loss": 0.7885, "step": 6539 }, { "epoch": 7.449002849002849, "grad_norm": 0.21024231612682343, "learning_rate": 3.6614429662445385e-05, "loss": 0.7404, "step": 6540 }, { "epoch": 7.45014245014245, "grad_norm": 0.1599353402853012, "learning_rate": 3.661030360385004e-05, "loss": 0.8066, "step": 6541 }, { "epoch": 7.4512820512820515, "grad_norm": 0.19375742971897125, "learning_rate": 3.660617714199692e-05, "loss": 0.6229, "step": 6542 }, { "epoch": 7.452421652421652, "grad_norm": 0.19025281071662903, "learning_rate": 3.660205027702935e-05, "loss": 0.8856, "step": 6543 }, { "epoch": 7.453561253561253, "grad_norm": 0.20846869051456451, "learning_rate": 3.6597923009090676e-05, "loss": 0.6475, "step": 6544 }, { "epoch": 7.454700854700855, "grad_norm": 0.23640169203281403, "learning_rate": 3.659379533832423e-05, "loss": 0.721, "step": 6545 }, { "epoch": 7.455840455840455, "grad_norm": 0.1847938895225525, "learning_rate": 3.6589667264873406e-05, "loss": 0.8238, "step": 6546 }, { "epoch": 7.456980056980057, "grad_norm": 0.17123650014400482, "learning_rate": 3.658553878888155e-05, "loss": 0.8123, "step": 6547 }, { "epoch": 7.458119658119658, "grad_norm": 0.19567157328128815, "learning_rate": 3.658140991049208e-05, "loss": 0.8075, "step": 6548 }, { "epoch": 7.459259259259259, "grad_norm": 0.19508473575115204, "learning_rate": 3.657728062984839e-05, "loss": 0.6899, "step": 6549 }, { "epoch": 7.46039886039886, "grad_norm": 0.21415705978870392, "learning_rate": 3.657315094709391e-05, "loss": 0.6703, "step": 6550 }, { "epoch": 7.461538461538462, "grad_norm": 0.2152075618505478, "learning_rate": 3.656902086237207e-05, "loss": 0.7301, "step": 6551 }, { "epoch": 7.462678062678062, "grad_norm": 0.15344300866127014, "learning_rate": 3.656489037582632e-05, "loss": 0.8336, "step": 6552 }, { "epoch": 7.463817663817664, "grad_norm": 0.19457511603832245, "learning_rate": 3.6560759487600125e-05, "loss": 0.7649, "step": 6553 }, { "epoch": 7.464957264957265, "grad_norm": 0.1852433830499649, "learning_rate": 3.6556628197836964e-05, "loss": 0.7166, "step": 6554 }, { "epoch": 7.466096866096866, "grad_norm": 0.1924922615289688, "learning_rate": 3.6552496506680325e-05, "loss": 0.7268, "step": 6555 }, { "epoch": 7.467236467236467, "grad_norm": 0.19060586392879486, "learning_rate": 3.65483644142737e-05, "loss": 0.8493, "step": 6556 }, { "epoch": 7.4683760683760685, "grad_norm": 0.2814032733440399, "learning_rate": 3.654423192076063e-05, "loss": 0.4382, "step": 6557 }, { "epoch": 7.46951566951567, "grad_norm": 0.18677476048469543, "learning_rate": 3.654009902628464e-05, "loss": 0.8313, "step": 6558 }, { "epoch": 7.4706552706552705, "grad_norm": 0.15410645306110382, "learning_rate": 3.653596573098927e-05, "loss": 0.7855, "step": 6559 }, { "epoch": 7.471794871794872, "grad_norm": 0.2007237821817398, "learning_rate": 3.65318320350181e-05, "loss": 0.8523, "step": 6560 }, { "epoch": 7.472934472934473, "grad_norm": 0.22001774609088898, "learning_rate": 3.6527697938514685e-05, "loss": 0.63, "step": 6561 }, { "epoch": 7.474074074074074, "grad_norm": 0.16158480942249298, "learning_rate": 3.65235634416226e-05, "loss": 0.9404, "step": 6562 }, { "epoch": 7.475213675213675, "grad_norm": 0.20947225391864777, "learning_rate": 3.651942854448549e-05, "loss": 0.6625, "step": 6563 }, { "epoch": 7.476353276353277, "grad_norm": 0.1522739678621292, "learning_rate": 3.651529324724693e-05, "loss": 0.8854, "step": 6564 }, { "epoch": 7.477492877492877, "grad_norm": 0.23686401546001434, "learning_rate": 3.6511157550050585e-05, "loss": 0.7503, "step": 6565 }, { "epoch": 7.478632478632479, "grad_norm": 0.20000828802585602, "learning_rate": 3.650702145304008e-05, "loss": 0.6863, "step": 6566 }, { "epoch": 7.47977207977208, "grad_norm": 0.17481471598148346, "learning_rate": 3.6502884956359065e-05, "loss": 0.8144, "step": 6567 }, { "epoch": 7.480911680911681, "grad_norm": 0.21649159491062164, "learning_rate": 3.649874806015123e-05, "loss": 0.7382, "step": 6568 }, { "epoch": 7.482051282051282, "grad_norm": 0.1939447522163391, "learning_rate": 3.649461076456025e-05, "loss": 0.7039, "step": 6569 }, { "epoch": 7.4831908831908835, "grad_norm": 0.20190556347370148, "learning_rate": 3.649047306972984e-05, "loss": 0.762, "step": 6570 }, { "epoch": 7.484330484330484, "grad_norm": 0.1808318793773651, "learning_rate": 3.648633497580368e-05, "loss": 0.8746, "step": 6571 }, { "epoch": 7.4854700854700855, "grad_norm": 0.19218935072422028, "learning_rate": 3.648219648292553e-05, "loss": 0.7296, "step": 6572 }, { "epoch": 7.486609686609687, "grad_norm": 0.14891155064105988, "learning_rate": 3.6478057591239115e-05, "loss": 0.8207, "step": 6573 }, { "epoch": 7.4877492877492875, "grad_norm": 0.18075217306613922, "learning_rate": 3.6473918300888206e-05, "loss": 0.7482, "step": 6574 }, { "epoch": 7.488888888888889, "grad_norm": 0.20154845714569092, "learning_rate": 3.6469778612016555e-05, "loss": 0.716, "step": 6575 }, { "epoch": 7.49002849002849, "grad_norm": 0.19883795082569122, "learning_rate": 3.6465638524767956e-05, "loss": 0.819, "step": 6576 }, { "epoch": 7.491168091168091, "grad_norm": 0.20036627352237701, "learning_rate": 3.646149803928619e-05, "loss": 0.7814, "step": 6577 }, { "epoch": 7.492307692307692, "grad_norm": 0.17774918675422668, "learning_rate": 3.645735715571508e-05, "loss": 0.6913, "step": 6578 }, { "epoch": 7.493447293447294, "grad_norm": 0.21232250332832336, "learning_rate": 3.6453215874198445e-05, "loss": 0.7196, "step": 6579 }, { "epoch": 7.494586894586894, "grad_norm": 0.21235182881355286, "learning_rate": 3.644907419488014e-05, "loss": 0.5707, "step": 6580 }, { "epoch": 7.495726495726496, "grad_norm": 0.17780590057373047, "learning_rate": 3.644493211790399e-05, "loss": 1.0197, "step": 6581 }, { "epoch": 7.496866096866097, "grad_norm": 0.19142213463783264, "learning_rate": 3.6440789643413886e-05, "loss": 0.8401, "step": 6582 }, { "epoch": 7.498005698005698, "grad_norm": 0.2070700228214264, "learning_rate": 3.643664677155369e-05, "loss": 0.5447, "step": 6583 }, { "epoch": 7.499145299145299, "grad_norm": 0.18078549206256866, "learning_rate": 3.643250350246729e-05, "loss": 0.8104, "step": 6584 }, { "epoch": 7.500284900284901, "grad_norm": 0.18190504610538483, "learning_rate": 3.642835983629862e-05, "loss": 0.8551, "step": 6585 }, { "epoch": 7.501424501424501, "grad_norm": 0.14183823764324188, "learning_rate": 3.642421577319157e-05, "loss": 0.9439, "step": 6586 }, { "epoch": 7.5025641025641026, "grad_norm": 0.14357706904411316, "learning_rate": 3.6420071313290105e-05, "loss": 0.7804, "step": 6587 }, { "epoch": 7.503703703703704, "grad_norm": 0.2097671926021576, "learning_rate": 3.6415926456738145e-05, "loss": 0.665, "step": 6588 }, { "epoch": 7.5048433048433045, "grad_norm": 0.22001692652702332, "learning_rate": 3.6411781203679674e-05, "loss": 0.6864, "step": 6589 }, { "epoch": 7.505982905982906, "grad_norm": 0.19312891364097595, "learning_rate": 3.6407635554258654e-05, "loss": 0.6758, "step": 6590 }, { "epoch": 7.507122507122507, "grad_norm": 0.17703326046466827, "learning_rate": 3.640348950861908e-05, "loss": 0.7875, "step": 6591 }, { "epoch": 7.508262108262108, "grad_norm": 0.2058870941400528, "learning_rate": 3.639934306690496e-05, "loss": 0.6695, "step": 6592 }, { "epoch": 7.509401709401709, "grad_norm": 0.21409137547016144, "learning_rate": 3.639519622926031e-05, "loss": 0.6427, "step": 6593 }, { "epoch": 7.510541310541311, "grad_norm": 0.2091960608959198, "learning_rate": 3.639104899582915e-05, "loss": 0.7639, "step": 6594 }, { "epoch": 7.511680911680911, "grad_norm": 0.18237628042697906, "learning_rate": 3.6386901366755534e-05, "loss": 0.6745, "step": 6595 }, { "epoch": 7.512820512820513, "grad_norm": 0.1747722327709198, "learning_rate": 3.6382753342183514e-05, "loss": 0.6726, "step": 6596 }, { "epoch": 7.513960113960114, "grad_norm": 0.18578509986400604, "learning_rate": 3.637860492225717e-05, "loss": 0.8558, "step": 6597 }, { "epoch": 7.515099715099715, "grad_norm": 0.18898378312587738, "learning_rate": 3.63744561071206e-05, "loss": 0.6919, "step": 6598 }, { "epoch": 7.516239316239316, "grad_norm": 0.1722445785999298, "learning_rate": 3.6370306896917885e-05, "loss": 0.8966, "step": 6599 }, { "epoch": 7.517378917378918, "grad_norm": 0.248495951294899, "learning_rate": 3.6366157291793126e-05, "loss": 0.5455, "step": 6600 }, { "epoch": 7.518518518518518, "grad_norm": 0.16866183280944824, "learning_rate": 3.636200729189048e-05, "loss": 0.8293, "step": 6601 }, { "epoch": 7.51965811965812, "grad_norm": 0.20386184751987457, "learning_rate": 3.635785689735408e-05, "loss": 0.7256, "step": 6602 }, { "epoch": 7.520797720797721, "grad_norm": 0.1914542019367218, "learning_rate": 3.635370610832807e-05, "loss": 0.7866, "step": 6603 }, { "epoch": 7.521937321937322, "grad_norm": 0.16765643656253815, "learning_rate": 3.634955492495663e-05, "loss": 0.8105, "step": 6604 }, { "epoch": 7.523076923076923, "grad_norm": 0.23983797430992126, "learning_rate": 3.634540334738392e-05, "loss": 0.6123, "step": 6605 }, { "epoch": 7.524216524216524, "grad_norm": 0.20749503374099731, "learning_rate": 3.634125137575416e-05, "loss": 0.7407, "step": 6606 }, { "epoch": 7.525356125356125, "grad_norm": 0.2098396122455597, "learning_rate": 3.633709901021155e-05, "loss": 0.819, "step": 6607 }, { "epoch": 7.526495726495726, "grad_norm": 0.22235573828220367, "learning_rate": 3.6332946250900325e-05, "loss": 0.7154, "step": 6608 }, { "epoch": 7.527635327635328, "grad_norm": 0.2189468890428543, "learning_rate": 3.63287930979647e-05, "loss": 0.7708, "step": 6609 }, { "epoch": 7.528774928774929, "grad_norm": 0.1647525429725647, "learning_rate": 3.632463955154895e-05, "loss": 0.8876, "step": 6610 }, { "epoch": 7.52991452991453, "grad_norm": 0.19296877086162567, "learning_rate": 3.6320485611797306e-05, "loss": 0.8159, "step": 6611 }, { "epoch": 7.531054131054131, "grad_norm": 0.15628954768180847, "learning_rate": 3.631633127885407e-05, "loss": 0.6174, "step": 6612 }, { "epoch": 7.532193732193733, "grad_norm": 0.16616255044937134, "learning_rate": 3.6312176552863536e-05, "loss": 0.9035, "step": 6613 }, { "epoch": 7.533333333333333, "grad_norm": 0.22767116129398346, "learning_rate": 3.6308021433969995e-05, "loss": 0.5334, "step": 6614 }, { "epoch": 7.534472934472935, "grad_norm": 0.1630139797925949, "learning_rate": 3.630386592231778e-05, "loss": 0.7488, "step": 6615 }, { "epoch": 7.535612535612536, "grad_norm": 0.20510128140449524, "learning_rate": 3.6299710018051215e-05, "loss": 0.7875, "step": 6616 }, { "epoch": 7.536752136752137, "grad_norm": 0.20808428525924683, "learning_rate": 3.629555372131464e-05, "loss": 0.7403, "step": 6617 }, { "epoch": 7.537891737891738, "grad_norm": 0.20908302068710327, "learning_rate": 3.629139703225242e-05, "loss": 0.5035, "step": 6618 }, { "epoch": 7.5390313390313395, "grad_norm": 0.1710822880268097, "learning_rate": 3.6287239951008946e-05, "loss": 0.6866, "step": 6619 }, { "epoch": 7.54017094017094, "grad_norm": 0.1620946079492569, "learning_rate": 3.628308247772857e-05, "loss": 0.8603, "step": 6620 }, { "epoch": 7.5413105413105415, "grad_norm": 0.21538139879703522, "learning_rate": 3.627892461255572e-05, "loss": 0.696, "step": 6621 }, { "epoch": 7.542450142450143, "grad_norm": 0.2605615258216858, "learning_rate": 3.62747663556348e-05, "loss": 0.5206, "step": 6622 }, { "epoch": 7.543589743589743, "grad_norm": 0.19554778933525085, "learning_rate": 3.627060770711024e-05, "loss": 0.5306, "step": 6623 }, { "epoch": 7.544729344729345, "grad_norm": 0.21877993643283844, "learning_rate": 3.626644866712648e-05, "loss": 0.5532, "step": 6624 }, { "epoch": 7.545868945868946, "grad_norm": 0.2135862112045288, "learning_rate": 3.626228923582797e-05, "loss": 0.7669, "step": 6625 }, { "epoch": 7.547008547008547, "grad_norm": 0.20073726773262024, "learning_rate": 3.6258129413359184e-05, "loss": 0.7541, "step": 6626 }, { "epoch": 7.548148148148148, "grad_norm": 0.19691218435764313, "learning_rate": 3.6253969199864604e-05, "loss": 0.8087, "step": 6627 }, { "epoch": 7.54928774928775, "grad_norm": 0.17147700488567352, "learning_rate": 3.624980859548873e-05, "loss": 0.7813, "step": 6628 }, { "epoch": 7.55042735042735, "grad_norm": 0.23159776628017426, "learning_rate": 3.624564760037606e-05, "loss": 0.7808, "step": 6629 }, { "epoch": 7.551566951566952, "grad_norm": 0.214361771941185, "learning_rate": 3.624148621467112e-05, "loss": 0.6672, "step": 6630 }, { "epoch": 7.552706552706553, "grad_norm": 0.21833765506744385, "learning_rate": 3.623732443851846e-05, "loss": 0.6643, "step": 6631 }, { "epoch": 7.553846153846154, "grad_norm": 0.18424160778522491, "learning_rate": 3.623316227206261e-05, "loss": 0.9242, "step": 6632 }, { "epoch": 7.554985754985755, "grad_norm": 0.17697963118553162, "learning_rate": 3.622899971544816e-05, "loss": 0.8505, "step": 6633 }, { "epoch": 7.5561253561253565, "grad_norm": 0.1748262494802475, "learning_rate": 3.622483676881965e-05, "loss": 0.7079, "step": 6634 }, { "epoch": 7.557264957264957, "grad_norm": 0.1862778216600418, "learning_rate": 3.62206734323217e-05, "loss": 0.732, "step": 6635 }, { "epoch": 7.5584045584045585, "grad_norm": 0.20817789435386658, "learning_rate": 3.621650970609891e-05, "loss": 0.8552, "step": 6636 }, { "epoch": 7.55954415954416, "grad_norm": 0.20246557891368866, "learning_rate": 3.621234559029588e-05, "loss": 0.7065, "step": 6637 }, { "epoch": 7.5606837606837605, "grad_norm": 0.20973993837833405, "learning_rate": 3.620818108505727e-05, "loss": 0.4741, "step": 6638 }, { "epoch": 7.561823361823362, "grad_norm": 0.20346422493457794, "learning_rate": 3.620401619052769e-05, "loss": 0.5761, "step": 6639 }, { "epoch": 7.562962962962963, "grad_norm": 0.20651406049728394, "learning_rate": 3.6199850906851826e-05, "loss": 0.7795, "step": 6640 }, { "epoch": 7.564102564102564, "grad_norm": 0.17555281519889832, "learning_rate": 3.6195685234174336e-05, "loss": 0.9339, "step": 6641 }, { "epoch": 7.565242165242165, "grad_norm": 0.17765425145626068, "learning_rate": 3.619151917263992e-05, "loss": 0.8671, "step": 6642 }, { "epoch": 7.566381766381767, "grad_norm": 0.19280676543712616, "learning_rate": 3.6187352722393256e-05, "loss": 0.7778, "step": 6643 }, { "epoch": 7.567521367521367, "grad_norm": 0.19583500921726227, "learning_rate": 3.6183185883579066e-05, "loss": 0.7342, "step": 6644 }, { "epoch": 7.568660968660969, "grad_norm": 0.17093996703624725, "learning_rate": 3.6179018656342076e-05, "loss": 0.867, "step": 6645 }, { "epoch": 7.56980056980057, "grad_norm": 0.17334173619747162, "learning_rate": 3.6174851040827026e-05, "loss": 0.7021, "step": 6646 }, { "epoch": 7.570940170940171, "grad_norm": 0.22137875854969025, "learning_rate": 3.6170683037178684e-05, "loss": 0.734, "step": 6647 }, { "epoch": 7.572079772079772, "grad_norm": 0.17077720165252686, "learning_rate": 3.6166514645541785e-05, "loss": 0.699, "step": 6648 }, { "epoch": 7.5732193732193736, "grad_norm": 0.1990506798028946, "learning_rate": 3.616234586606113e-05, "loss": 0.6806, "step": 6649 }, { "epoch": 7.574358974358974, "grad_norm": 0.22963400185108185, "learning_rate": 3.6158176698881494e-05, "loss": 0.7139, "step": 6650 }, { "epoch": 7.5754985754985755, "grad_norm": 0.2180236130952835, "learning_rate": 3.6154007144147714e-05, "loss": 0.7995, "step": 6651 }, { "epoch": 7.576638176638177, "grad_norm": 0.25464409589767456, "learning_rate": 3.614983720200458e-05, "loss": 0.5885, "step": 6652 }, { "epoch": 7.5777777777777775, "grad_norm": 0.15804032981395721, "learning_rate": 3.614566687259695e-05, "loss": 0.9211, "step": 6653 }, { "epoch": 7.578917378917379, "grad_norm": 0.20819330215454102, "learning_rate": 3.614149615606965e-05, "loss": 0.7807, "step": 6654 }, { "epoch": 7.58005698005698, "grad_norm": 0.20328183472156525, "learning_rate": 3.613732505256755e-05, "loss": 0.5991, "step": 6655 }, { "epoch": 7.581196581196581, "grad_norm": 0.20896422863006592, "learning_rate": 3.613315356223552e-05, "loss": 0.8131, "step": 6656 }, { "epoch": 7.582336182336182, "grad_norm": 0.22786849737167358, "learning_rate": 3.612898168521846e-05, "loss": 0.4963, "step": 6657 }, { "epoch": 7.583475783475784, "grad_norm": 0.24447943270206451, "learning_rate": 3.612480942166125e-05, "loss": 0.6984, "step": 6658 }, { "epoch": 7.584615384615384, "grad_norm": 0.2068115770816803, "learning_rate": 3.612063677170882e-05, "loss": 0.6086, "step": 6659 }, { "epoch": 7.585754985754986, "grad_norm": 0.17231090366840363, "learning_rate": 3.6116463735506095e-05, "loss": 0.7066, "step": 6660 }, { "epoch": 7.586894586894587, "grad_norm": 0.15871106088161469, "learning_rate": 3.611229031319801e-05, "loss": 0.9569, "step": 6661 }, { "epoch": 7.588034188034188, "grad_norm": 0.18566365540027618, "learning_rate": 3.610811650492953e-05, "loss": 0.8158, "step": 6662 }, { "epoch": 7.589173789173789, "grad_norm": 0.2549876570701599, "learning_rate": 3.6103942310845615e-05, "loss": 0.5759, "step": 6663 }, { "epoch": 7.590313390313391, "grad_norm": 0.20469443500041962, "learning_rate": 3.6099767731091246e-05, "loss": 0.7869, "step": 6664 }, { "epoch": 7.591452991452991, "grad_norm": 0.2003583014011383, "learning_rate": 3.609559276581142e-05, "loss": 0.7142, "step": 6665 }, { "epoch": 7.592592592592593, "grad_norm": 0.16280707716941833, "learning_rate": 3.609141741515114e-05, "loss": 0.9745, "step": 6666 }, { "epoch": 7.593732193732194, "grad_norm": 0.2296389490365982, "learning_rate": 3.608724167925543e-05, "loss": 0.8164, "step": 6667 }, { "epoch": 7.5948717948717945, "grad_norm": 0.17501582205295563, "learning_rate": 3.608306555826934e-05, "loss": 0.8154, "step": 6668 }, { "epoch": 7.596011396011396, "grad_norm": 0.22544457018375397, "learning_rate": 3.607888905233789e-05, "loss": 0.5752, "step": 6669 }, { "epoch": 7.597150997150997, "grad_norm": 0.19265951216220856, "learning_rate": 3.607471216160616e-05, "loss": 0.6945, "step": 6670 }, { "epoch": 7.598290598290598, "grad_norm": 0.21000435948371887, "learning_rate": 3.607053488621922e-05, "loss": 0.7426, "step": 6671 }, { "epoch": 7.599430199430199, "grad_norm": 0.18488295376300812, "learning_rate": 3.606635722632216e-05, "loss": 0.6778, "step": 6672 }, { "epoch": 7.600569800569801, "grad_norm": 0.21798954904079437, "learning_rate": 3.606217918206009e-05, "loss": 0.7453, "step": 6673 }, { "epoch": 7.601709401709401, "grad_norm": 0.18019331991672516, "learning_rate": 3.605800075357811e-05, "loss": 0.7475, "step": 6674 }, { "epoch": 7.602849002849003, "grad_norm": 0.1993224173784256, "learning_rate": 3.605382194102135e-05, "loss": 0.7692, "step": 6675 }, { "epoch": 7.603988603988604, "grad_norm": 0.20090512931346893, "learning_rate": 3.6049642744534964e-05, "loss": 0.6506, "step": 6676 }, { "epoch": 7.605128205128205, "grad_norm": 0.1689884513616562, "learning_rate": 3.604546316426409e-05, "loss": 0.904, "step": 6677 }, { "epoch": 7.606267806267806, "grad_norm": 0.21209757030010223, "learning_rate": 3.6041283200353904e-05, "loss": 0.6866, "step": 6678 }, { "epoch": 7.607407407407408, "grad_norm": 0.16836108267307281, "learning_rate": 3.60371028529496e-05, "loss": 0.7449, "step": 6679 }, { "epoch": 7.608547008547008, "grad_norm": 0.19372493028640747, "learning_rate": 3.603292212219635e-05, "loss": 0.855, "step": 6680 }, { "epoch": 7.60968660968661, "grad_norm": 0.2601233422756195, "learning_rate": 3.602874100823939e-05, "loss": 0.5822, "step": 6681 }, { "epoch": 7.610826210826211, "grad_norm": 0.1948636770248413, "learning_rate": 3.602455951122391e-05, "loss": 0.6676, "step": 6682 }, { "epoch": 7.611965811965812, "grad_norm": 0.38639694452285767, "learning_rate": 3.602037763129517e-05, "loss": 0.7921, "step": 6683 }, { "epoch": 7.613105413105413, "grad_norm": 0.22127041220664978, "learning_rate": 3.601619536859839e-05, "loss": 0.5744, "step": 6684 }, { "epoch": 7.614245014245014, "grad_norm": 0.20698286592960358, "learning_rate": 3.601201272327888e-05, "loss": 0.8063, "step": 6685 }, { "epoch": 7.615384615384615, "grad_norm": 0.2294767051935196, "learning_rate": 3.600782969548186e-05, "loss": 0.5447, "step": 6686 }, { "epoch": 7.616524216524216, "grad_norm": 0.22353273630142212, "learning_rate": 3.6003646285352655e-05, "loss": 0.3646, "step": 6687 }, { "epoch": 7.617663817663818, "grad_norm": 0.2009475976228714, "learning_rate": 3.599946249303655e-05, "loss": 0.7142, "step": 6688 }, { "epoch": 7.618803418803418, "grad_norm": 0.18972386419773102, "learning_rate": 3.5995278318678864e-05, "loss": 0.6538, "step": 6689 }, { "epoch": 7.61994301994302, "grad_norm": 0.1776159256696701, "learning_rate": 3.599109376242493e-05, "loss": 0.9373, "step": 6690 }, { "epoch": 7.621082621082621, "grad_norm": 0.17019517719745636, "learning_rate": 3.598690882442008e-05, "loss": 0.6456, "step": 6691 }, { "epoch": 7.622222222222222, "grad_norm": 0.20530672371387482, "learning_rate": 3.5982723504809675e-05, "loss": 0.705, "step": 6692 }, { "epoch": 7.623361823361823, "grad_norm": 0.2039433717727661, "learning_rate": 3.5978537803739076e-05, "loss": 0.8487, "step": 6693 }, { "epoch": 7.624501424501425, "grad_norm": 0.16625913977622986, "learning_rate": 3.597435172135367e-05, "loss": 0.8411, "step": 6694 }, { "epoch": 7.625641025641025, "grad_norm": 0.25948795676231384, "learning_rate": 3.597016525779885e-05, "loss": 0.4059, "step": 6695 }, { "epoch": 7.626780626780627, "grad_norm": 0.1494654268026352, "learning_rate": 3.596597841322002e-05, "loss": 0.8693, "step": 6696 }, { "epoch": 7.627920227920228, "grad_norm": 0.1634153574705124, "learning_rate": 3.596179118776261e-05, "loss": 0.9274, "step": 6697 }, { "epoch": 7.629059829059829, "grad_norm": 0.21546940505504608, "learning_rate": 3.595760358157204e-05, "loss": 0.634, "step": 6698 }, { "epoch": 7.63019943019943, "grad_norm": 0.2275114208459854, "learning_rate": 3.5953415594793774e-05, "loss": 0.6163, "step": 6699 }, { "epoch": 7.6313390313390315, "grad_norm": 0.18360714614391327, "learning_rate": 3.594922722757325e-05, "loss": 0.8058, "step": 6700 }, { "epoch": 7.632478632478632, "grad_norm": 0.1572057008743286, "learning_rate": 3.5945038480055956e-05, "loss": 0.8493, "step": 6701 }, { "epoch": 7.633618233618233, "grad_norm": 0.22970698773860931, "learning_rate": 3.594084935238738e-05, "loss": 0.5229, "step": 6702 }, { "epoch": 7.634757834757835, "grad_norm": 0.22282303869724274, "learning_rate": 3.593665984471302e-05, "loss": 0.6767, "step": 6703 }, { "epoch": 7.635897435897435, "grad_norm": 0.21896421909332275, "learning_rate": 3.5932469957178385e-05, "loss": 0.8706, "step": 6704 }, { "epoch": 7.637037037037037, "grad_norm": 0.21026067435741425, "learning_rate": 3.5928279689929e-05, "loss": 0.6977, "step": 6705 }, { "epoch": 7.638176638176638, "grad_norm": 0.24936474859714508, "learning_rate": 3.592408904311041e-05, "loss": 0.6082, "step": 6706 }, { "epoch": 7.639316239316239, "grad_norm": 0.22592933475971222, "learning_rate": 3.5919898016868176e-05, "loss": 0.6253, "step": 6707 }, { "epoch": 7.64045584045584, "grad_norm": 0.26729634404182434, "learning_rate": 3.591570661134784e-05, "loss": 0.6322, "step": 6708 }, { "epoch": 7.641595441595442, "grad_norm": 0.20441783964633942, "learning_rate": 3.5911514826695003e-05, "loss": 0.6183, "step": 6709 }, { "epoch": 7.642735042735043, "grad_norm": 0.17855162918567657, "learning_rate": 3.590732266305525e-05, "loss": 0.8315, "step": 6710 }, { "epoch": 7.643874643874644, "grad_norm": 0.22276297211647034, "learning_rate": 3.5903130120574185e-05, "loss": 0.7899, "step": 6711 }, { "epoch": 7.645014245014245, "grad_norm": 0.23653104901313782, "learning_rate": 3.589893719939743e-05, "loss": 0.5555, "step": 6712 }, { "epoch": 7.6461538461538465, "grad_norm": 0.1946984827518463, "learning_rate": 3.589474389967061e-05, "loss": 0.7975, "step": 6713 }, { "epoch": 7.647293447293447, "grad_norm": 0.23644982278347015, "learning_rate": 3.589055022153937e-05, "loss": 0.5539, "step": 6714 }, { "epoch": 7.6484330484330485, "grad_norm": 0.19905270636081696, "learning_rate": 3.5886356165149384e-05, "loss": 0.835, "step": 6715 }, { "epoch": 7.64957264957265, "grad_norm": 0.193971648812294, "learning_rate": 3.58821617306463e-05, "loss": 0.9625, "step": 6716 }, { "epoch": 7.6507122507122505, "grad_norm": 0.23056793212890625, "learning_rate": 3.587796691817581e-05, "loss": 0.6457, "step": 6717 }, { "epoch": 7.651851851851852, "grad_norm": 0.1596032828092575, "learning_rate": 3.587377172788362e-05, "loss": 0.8046, "step": 6718 }, { "epoch": 7.652991452991453, "grad_norm": 0.19464947283267975, "learning_rate": 3.586957615991544e-05, "loss": 0.7655, "step": 6719 }, { "epoch": 7.654131054131054, "grad_norm": 0.13628152012825012, "learning_rate": 3.5865380214416974e-05, "loss": 0.9186, "step": 6720 }, { "epoch": 7.655270655270655, "grad_norm": 0.17702051997184753, "learning_rate": 3.5861183891533986e-05, "loss": 0.8825, "step": 6721 }, { "epoch": 7.656410256410257, "grad_norm": 0.23871000111103058, "learning_rate": 3.5856987191412204e-05, "loss": 0.5539, "step": 6722 }, { "epoch": 7.657549857549857, "grad_norm": 0.20753921568393707, "learning_rate": 3.585279011419741e-05, "loss": 0.723, "step": 6723 }, { "epoch": 7.658689458689459, "grad_norm": 0.21139591932296753, "learning_rate": 3.5848592660035366e-05, "loss": 0.7481, "step": 6724 }, { "epoch": 7.65982905982906, "grad_norm": 0.1549995392560959, "learning_rate": 3.5844394829071867e-05, "loss": 0.7512, "step": 6725 }, { "epoch": 7.660968660968661, "grad_norm": 0.15958407521247864, "learning_rate": 3.584019662145272e-05, "loss": 0.8533, "step": 6726 }, { "epoch": 7.662108262108262, "grad_norm": 0.22194387018680573, "learning_rate": 3.5835998037323716e-05, "loss": 0.6888, "step": 6727 }, { "epoch": 7.663247863247864, "grad_norm": 0.20602399110794067, "learning_rate": 3.583179907683071e-05, "loss": 0.7573, "step": 6728 }, { "epoch": 7.664387464387464, "grad_norm": 0.1635296642780304, "learning_rate": 3.582759974011953e-05, "loss": 0.9509, "step": 6729 }, { "epoch": 7.6655270655270655, "grad_norm": 0.2121957242488861, "learning_rate": 3.5823400027336044e-05, "loss": 0.5779, "step": 6730 }, { "epoch": 7.666666666666667, "grad_norm": 0.22562338411808014, "learning_rate": 3.5819199938626104e-05, "loss": 0.7486, "step": 6731 }, { "epoch": 7.6678062678062675, "grad_norm": 0.1744430661201477, "learning_rate": 3.58149994741356e-05, "loss": 0.7707, "step": 6732 }, { "epoch": 7.668945868945869, "grad_norm": 0.22118063271045685, "learning_rate": 3.5810798634010416e-05, "loss": 0.6427, "step": 6733 }, { "epoch": 7.67008547008547, "grad_norm": 0.2152554988861084, "learning_rate": 3.580659741839647e-05, "loss": 0.5835, "step": 6734 }, { "epoch": 7.671225071225071, "grad_norm": 0.1917773187160492, "learning_rate": 3.580239582743968e-05, "loss": 0.8254, "step": 6735 }, { "epoch": 7.672364672364672, "grad_norm": 0.18175730109214783, "learning_rate": 3.579819386128598e-05, "loss": 0.7686, "step": 6736 }, { "epoch": 7.673504273504274, "grad_norm": 0.2351260781288147, "learning_rate": 3.5793991520081305e-05, "loss": 0.5168, "step": 6737 }, { "epoch": 7.674643874643874, "grad_norm": 0.18073241412639618, "learning_rate": 3.578978880397162e-05, "loss": 0.8385, "step": 6738 }, { "epoch": 7.675783475783476, "grad_norm": 0.181781604886055, "learning_rate": 3.578558571310291e-05, "loss": 0.7385, "step": 6739 }, { "epoch": 7.676923076923077, "grad_norm": 0.26611897349357605, "learning_rate": 3.578138224762113e-05, "loss": 0.4995, "step": 6740 }, { "epoch": 7.678062678062678, "grad_norm": 0.2237926423549652, "learning_rate": 3.577717840767231e-05, "loss": 0.5046, "step": 6741 }, { "epoch": 7.679202279202279, "grad_norm": 0.1818995326757431, "learning_rate": 3.5772974193402443e-05, "loss": 0.7741, "step": 6742 }, { "epoch": 7.680341880341881, "grad_norm": 0.20140786468982697, "learning_rate": 3.5768769604957555e-05, "loss": 0.686, "step": 6743 }, { "epoch": 7.681481481481481, "grad_norm": 0.2358056604862213, "learning_rate": 3.576456464248368e-05, "loss": 0.7, "step": 6744 }, { "epoch": 7.682621082621083, "grad_norm": 0.16571885347366333, "learning_rate": 3.576035930612688e-05, "loss": 0.7005, "step": 6745 }, { "epoch": 7.683760683760684, "grad_norm": 0.18278618156909943, "learning_rate": 3.57561535960332e-05, "loss": 0.6127, "step": 6746 }, { "epoch": 7.6849002849002845, "grad_norm": 0.18129862844944, "learning_rate": 3.5751947512348736e-05, "loss": 0.7352, "step": 6747 }, { "epoch": 7.686039886039886, "grad_norm": 0.21680477261543274, "learning_rate": 3.5747741055219564e-05, "loss": 0.6337, "step": 6748 }, { "epoch": 7.687179487179487, "grad_norm": 0.18856161832809448, "learning_rate": 3.5743534224791784e-05, "loss": 0.7527, "step": 6749 }, { "epoch": 7.688319088319088, "grad_norm": 0.2246232032775879, "learning_rate": 3.573932702121151e-05, "loss": 0.7238, "step": 6750 }, { "epoch": 7.689458689458689, "grad_norm": 0.24309638142585754, "learning_rate": 3.5735119444624895e-05, "loss": 0.6889, "step": 6751 }, { "epoch": 7.690598290598291, "grad_norm": 0.19355084002017975, "learning_rate": 3.5730911495178047e-05, "loss": 0.8058, "step": 6752 }, { "epoch": 7.691737891737891, "grad_norm": 0.19573912024497986, "learning_rate": 3.572670317301714e-05, "loss": 0.7122, "step": 6753 }, { "epoch": 7.692877492877493, "grad_norm": 0.19996854662895203, "learning_rate": 3.572249447828833e-05, "loss": 0.6179, "step": 6754 }, { "epoch": 7.694017094017094, "grad_norm": 0.17480404675006866, "learning_rate": 3.571828541113779e-05, "loss": 0.8216, "step": 6755 }, { "epoch": 7.695156695156696, "grad_norm": 0.24706783890724182, "learning_rate": 3.5714075971711734e-05, "loss": 0.546, "step": 6756 }, { "epoch": 7.696296296296296, "grad_norm": 0.19907280802726746, "learning_rate": 3.5709866160156355e-05, "loss": 0.7825, "step": 6757 }, { "epoch": 7.697435897435898, "grad_norm": 0.2604585587978363, "learning_rate": 3.570565597661787e-05, "loss": 0.4379, "step": 6758 }, { "epoch": 7.698575498575499, "grad_norm": 0.16438202559947968, "learning_rate": 3.5701445421242515e-05, "loss": 0.852, "step": 6759 }, { "epoch": 7.6997150997151, "grad_norm": 0.18762624263763428, "learning_rate": 3.569723449417653e-05, "loss": 0.8304, "step": 6760 }, { "epoch": 7.700854700854701, "grad_norm": 0.21662066876888275, "learning_rate": 3.569302319556616e-05, "loss": 0.775, "step": 6761 }, { "epoch": 7.7019943019943025, "grad_norm": 0.1819138079881668, "learning_rate": 3.5688811525557706e-05, "loss": 0.6319, "step": 6762 }, { "epoch": 7.703133903133903, "grad_norm": 0.20978693664073944, "learning_rate": 3.568459948429743e-05, "loss": 0.5899, "step": 6763 }, { "epoch": 7.704273504273504, "grad_norm": 0.1701655089855194, "learning_rate": 3.568038707193163e-05, "loss": 0.7628, "step": 6764 }, { "epoch": 7.705413105413106, "grad_norm": 0.28052636981010437, "learning_rate": 3.5676174288606613e-05, "loss": 0.3683, "step": 6765 }, { "epoch": 7.706552706552706, "grad_norm": 0.1874684989452362, "learning_rate": 3.56719611344687e-05, "loss": 0.8015, "step": 6766 }, { "epoch": 7.707692307692308, "grad_norm": 0.17393150925636292, "learning_rate": 3.5667747609664235e-05, "loss": 0.8852, "step": 6767 }, { "epoch": 7.708831908831909, "grad_norm": 0.19138458371162415, "learning_rate": 3.5663533714339556e-05, "loss": 0.6181, "step": 6768 }, { "epoch": 7.70997150997151, "grad_norm": 0.21075037121772766, "learning_rate": 3.565931944864103e-05, "loss": 0.7478, "step": 6769 }, { "epoch": 7.711111111111111, "grad_norm": 0.18601825833320618, "learning_rate": 3.565510481271502e-05, "loss": 0.5923, "step": 6770 }, { "epoch": 7.712250712250713, "grad_norm": 0.23541691899299622, "learning_rate": 3.565088980670793e-05, "loss": 0.8809, "step": 6771 }, { "epoch": 7.713390313390313, "grad_norm": 0.16794425249099731, "learning_rate": 3.5646674430766135e-05, "loss": 0.7799, "step": 6772 }, { "epoch": 7.714529914529915, "grad_norm": 0.28001055121421814, "learning_rate": 3.564245868503607e-05, "loss": 0.4284, "step": 6773 }, { "epoch": 7.715669515669516, "grad_norm": 0.1725984662771225, "learning_rate": 3.563824256966414e-05, "loss": 0.8952, "step": 6774 }, { "epoch": 7.716809116809117, "grad_norm": 0.1624051332473755, "learning_rate": 3.563402608479679e-05, "loss": 0.7974, "step": 6775 }, { "epoch": 7.717948717948718, "grad_norm": 0.19853821396827698, "learning_rate": 3.562980923058047e-05, "loss": 0.8336, "step": 6776 }, { "epoch": 7.7190883190883195, "grad_norm": 0.23455852270126343, "learning_rate": 3.562559200716165e-05, "loss": 0.5866, "step": 6777 }, { "epoch": 7.72022792022792, "grad_norm": 0.24911242723464966, "learning_rate": 3.5621374414686785e-05, "loss": 0.6417, "step": 6778 }, { "epoch": 7.7213675213675215, "grad_norm": 0.23466742038726807, "learning_rate": 3.561715645330239e-05, "loss": 0.6499, "step": 6779 }, { "epoch": 7.722507122507123, "grad_norm": 0.18805652856826782, "learning_rate": 3.561293812315495e-05, "loss": 0.8668, "step": 6780 }, { "epoch": 7.7236467236467234, "grad_norm": 0.2405278980731964, "learning_rate": 3.560871942439099e-05, "loss": 0.5377, "step": 6781 }, { "epoch": 7.724786324786325, "grad_norm": 0.1959921419620514, "learning_rate": 3.560450035715702e-05, "loss": 0.7965, "step": 6782 }, { "epoch": 7.725925925925926, "grad_norm": 0.1588277518749237, "learning_rate": 3.560028092159959e-05, "loss": 0.8128, "step": 6783 }, { "epoch": 7.727065527065527, "grad_norm": 0.20416897535324097, "learning_rate": 3.559606111786527e-05, "loss": 0.5679, "step": 6784 }, { "epoch": 7.728205128205128, "grad_norm": 0.1544797122478485, "learning_rate": 3.559184094610058e-05, "loss": 0.8786, "step": 6785 }, { "epoch": 7.72934472934473, "grad_norm": 0.22586064040660858, "learning_rate": 3.558762040645215e-05, "loss": 0.7454, "step": 6786 }, { "epoch": 7.73048433048433, "grad_norm": 0.16883732378482819, "learning_rate": 3.558339949906654e-05, "loss": 0.9011, "step": 6787 }, { "epoch": 7.731623931623932, "grad_norm": 0.17685134708881378, "learning_rate": 3.557917822409036e-05, "loss": 0.7891, "step": 6788 }, { "epoch": 7.732763532763533, "grad_norm": 0.2044299691915512, "learning_rate": 3.557495658167022e-05, "loss": 0.6773, "step": 6789 }, { "epoch": 7.733903133903134, "grad_norm": 0.2049073725938797, "learning_rate": 3.557073457195277e-05, "loss": 0.7226, "step": 6790 }, { "epoch": 7.735042735042735, "grad_norm": 0.20711560547351837, "learning_rate": 3.5566512195084636e-05, "loss": 0.8539, "step": 6791 }, { "epoch": 7.7361823361823365, "grad_norm": 0.1768498718738556, "learning_rate": 3.556228945121247e-05, "loss": 0.7954, "step": 6792 }, { "epoch": 7.737321937321937, "grad_norm": 0.1852618157863617, "learning_rate": 3.555806634048294e-05, "loss": 0.8245, "step": 6793 }, { "epoch": 7.7384615384615385, "grad_norm": 0.19067004323005676, "learning_rate": 3.5553842863042744e-05, "loss": 0.7274, "step": 6794 }, { "epoch": 7.73960113960114, "grad_norm": 0.24505653977394104, "learning_rate": 3.554961901903855e-05, "loss": 0.6032, "step": 6795 }, { "epoch": 7.7407407407407405, "grad_norm": 0.1807478666305542, "learning_rate": 3.5545394808617086e-05, "loss": 0.9577, "step": 6796 }, { "epoch": 7.741880341880342, "grad_norm": 0.2040424644947052, "learning_rate": 3.554117023192506e-05, "loss": 0.6683, "step": 6797 }, { "epoch": 7.743019943019943, "grad_norm": 0.1729096621274948, "learning_rate": 3.55369452891092e-05, "loss": 0.6779, "step": 6798 }, { "epoch": 7.744159544159544, "grad_norm": 0.19839952886104584, "learning_rate": 3.5532719980316256e-05, "loss": 0.7325, "step": 6799 }, { "epoch": 7.745299145299145, "grad_norm": 0.22343094646930695, "learning_rate": 3.552849430569297e-05, "loss": 0.4867, "step": 6800 }, { "epoch": 7.746438746438747, "grad_norm": 0.18718844652175903, "learning_rate": 3.552426826538614e-05, "loss": 0.676, "step": 6801 }, { "epoch": 7.747578347578347, "grad_norm": 0.2233506739139557, "learning_rate": 3.552004185954252e-05, "loss": 0.5602, "step": 6802 }, { "epoch": 7.748717948717949, "grad_norm": 0.16290506720542908, "learning_rate": 3.551581508830892e-05, "loss": 0.8802, "step": 6803 }, { "epoch": 7.74985754985755, "grad_norm": 0.17028826475143433, "learning_rate": 3.551158795183215e-05, "loss": 0.7727, "step": 6804 }, { "epoch": 7.750997150997151, "grad_norm": 0.23910178244113922, "learning_rate": 3.5507360450259014e-05, "loss": 0.6331, "step": 6805 }, { "epoch": 7.752136752136752, "grad_norm": 0.1908128261566162, "learning_rate": 3.550313258373636e-05, "loss": 0.7729, "step": 6806 }, { "epoch": 7.753276353276354, "grad_norm": 0.15797142684459686, "learning_rate": 3.549890435241102e-05, "loss": 0.8337, "step": 6807 }, { "epoch": 7.754415954415954, "grad_norm": 0.2094789445400238, "learning_rate": 3.549467575642986e-05, "loss": 0.608, "step": 6808 }, { "epoch": 7.7555555555555555, "grad_norm": 0.226154163479805, "learning_rate": 3.5490446795939765e-05, "loss": 0.5552, "step": 6809 }, { "epoch": 7.756695156695157, "grad_norm": 0.19495464861392975, "learning_rate": 3.5486217471087584e-05, "loss": 0.6993, "step": 6810 }, { "epoch": 7.7578347578347575, "grad_norm": 0.22222615778446198, "learning_rate": 3.548198778202025e-05, "loss": 0.8459, "step": 6811 }, { "epoch": 7.758974358974359, "grad_norm": 0.1648394614458084, "learning_rate": 3.547775772888464e-05, "loss": 0.8511, "step": 6812 }, { "epoch": 7.76011396011396, "grad_norm": 0.20041419565677643, "learning_rate": 3.5473527311827704e-05, "loss": 0.695, "step": 6813 }, { "epoch": 7.761253561253561, "grad_norm": 0.18546149134635925, "learning_rate": 3.5469296530996354e-05, "loss": 0.7463, "step": 6814 }, { "epoch": 7.762393162393162, "grad_norm": 0.19315482676029205, "learning_rate": 3.546506538653755e-05, "loss": 0.8367, "step": 6815 }, { "epoch": 7.763532763532764, "grad_norm": 0.21007438004016876, "learning_rate": 3.546083387859824e-05, "loss": 0.5947, "step": 6816 }, { "epoch": 7.764672364672364, "grad_norm": 0.15181663632392883, "learning_rate": 3.5456602007325406e-05, "loss": 0.8136, "step": 6817 }, { "epoch": 7.765811965811966, "grad_norm": 0.1625537872314453, "learning_rate": 3.5452369772866026e-05, "loss": 0.926, "step": 6818 }, { "epoch": 7.766951566951567, "grad_norm": 0.1997203528881073, "learning_rate": 3.5448137175367104e-05, "loss": 0.8008, "step": 6819 }, { "epoch": 7.768091168091168, "grad_norm": 0.4424345791339874, "learning_rate": 3.544390421497564e-05, "loss": 0.6434, "step": 6820 }, { "epoch": 7.769230769230769, "grad_norm": 0.17481213808059692, "learning_rate": 3.543967089183867e-05, "loss": 0.7499, "step": 6821 }, { "epoch": 7.770370370370371, "grad_norm": 0.19890199601650238, "learning_rate": 3.543543720610321e-05, "loss": 0.7431, "step": 6822 }, { "epoch": 7.771509971509971, "grad_norm": 0.1992911547422409, "learning_rate": 3.543120315791632e-05, "loss": 0.5817, "step": 6823 }, { "epoch": 7.772649572649573, "grad_norm": 0.1869911253452301, "learning_rate": 3.542696874742507e-05, "loss": 0.7625, "step": 6824 }, { "epoch": 7.773789173789174, "grad_norm": 0.1710990071296692, "learning_rate": 3.5422733974776506e-05, "loss": 0.9135, "step": 6825 }, { "epoch": 7.7749287749287745, "grad_norm": 0.21241231262683868, "learning_rate": 3.5418498840117744e-05, "loss": 0.8583, "step": 6826 }, { "epoch": 7.776068376068376, "grad_norm": 0.20025236904621124, "learning_rate": 3.541426334359584e-05, "loss": 0.7737, "step": 6827 }, { "epoch": 7.777207977207977, "grad_norm": 0.21202895045280457, "learning_rate": 3.541002748535796e-05, "loss": 0.6545, "step": 6828 }, { "epoch": 7.778347578347578, "grad_norm": 0.22664658725261688, "learning_rate": 3.540579126555118e-05, "loss": 0.6179, "step": 6829 }, { "epoch": 7.779487179487179, "grad_norm": 0.17499423027038574, "learning_rate": 3.5401554684322666e-05, "loss": 0.7105, "step": 6830 }, { "epoch": 7.780626780626781, "grad_norm": 0.18434879183769226, "learning_rate": 3.539731774181955e-05, "loss": 0.752, "step": 6831 }, { "epoch": 7.781766381766381, "grad_norm": 0.17901693284511566, "learning_rate": 3.539308043818899e-05, "loss": 0.7124, "step": 6832 }, { "epoch": 7.782905982905983, "grad_norm": 0.23148606717586517, "learning_rate": 3.5388842773578166e-05, "loss": 0.6997, "step": 6833 }, { "epoch": 7.784045584045584, "grad_norm": 0.19548821449279785, "learning_rate": 3.5384604748134266e-05, "loss": 0.6375, "step": 6834 }, { "epoch": 7.785185185185185, "grad_norm": 0.15549170970916748, "learning_rate": 3.5380366362004485e-05, "loss": 0.8166, "step": 6835 }, { "epoch": 7.786324786324786, "grad_norm": 0.2538563311100006, "learning_rate": 3.537612761533603e-05, "loss": 0.4698, "step": 6836 }, { "epoch": 7.787464387464388, "grad_norm": 0.2002454251050949, "learning_rate": 3.537188850827614e-05, "loss": 0.6412, "step": 6837 }, { "epoch": 7.788603988603988, "grad_norm": 0.178439661860466, "learning_rate": 3.5367649040972025e-05, "loss": 0.8598, "step": 6838 }, { "epoch": 7.78974358974359, "grad_norm": 0.15145058929920197, "learning_rate": 3.5363409213570954e-05, "loss": 0.9443, "step": 6839 }, { "epoch": 7.790883190883191, "grad_norm": 0.17305675148963928, "learning_rate": 3.5359169026220176e-05, "loss": 0.8753, "step": 6840 }, { "epoch": 7.792022792022792, "grad_norm": 0.1700587123632431, "learning_rate": 3.535492847906699e-05, "loss": 0.8682, "step": 6841 }, { "epoch": 7.793162393162393, "grad_norm": 0.1825442910194397, "learning_rate": 3.535068757225864e-05, "loss": 0.7587, "step": 6842 }, { "epoch": 7.794301994301994, "grad_norm": 0.1636766493320465, "learning_rate": 3.5346446305942456e-05, "loss": 0.8107, "step": 6843 }, { "epoch": 7.795441595441595, "grad_norm": 0.207448810338974, "learning_rate": 3.5342204680265725e-05, "loss": 0.7229, "step": 6844 }, { "epoch": 7.796581196581196, "grad_norm": 0.2476516216993332, "learning_rate": 3.533796269537581e-05, "loss": 0.5741, "step": 6845 }, { "epoch": 7.797720797720798, "grad_norm": 0.16115030646324158, "learning_rate": 3.5333720351419995e-05, "loss": 0.871, "step": 6846 }, { "epoch": 7.798860398860398, "grad_norm": 0.17233332991600037, "learning_rate": 3.5329477648545675e-05, "loss": 0.8569, "step": 6847 }, { "epoch": 7.8, "grad_norm": 0.19796954095363617, "learning_rate": 3.532523458690018e-05, "loss": 0.6368, "step": 6848 }, { "epoch": 7.801139601139601, "grad_norm": 0.22311918437480927, "learning_rate": 3.532099116663089e-05, "loss": 0.934, "step": 6849 }, { "epoch": 7.802279202279202, "grad_norm": 0.16385604441165924, "learning_rate": 3.53167473878852e-05, "loss": 0.8634, "step": 6850 }, { "epoch": 7.803418803418803, "grad_norm": 0.17855627834796906, "learning_rate": 3.53125032508105e-05, "loss": 0.7305, "step": 6851 }, { "epoch": 7.804558404558405, "grad_norm": 0.21436013281345367, "learning_rate": 3.530825875555421e-05, "loss": 0.7803, "step": 6852 }, { "epoch": 7.805698005698005, "grad_norm": 0.21189452707767487, "learning_rate": 3.530401390226373e-05, "loss": 0.9677, "step": 6853 }, { "epoch": 7.806837606837607, "grad_norm": 0.19591599702835083, "learning_rate": 3.529976869108653e-05, "loss": 0.7769, "step": 6854 }, { "epoch": 7.807977207977208, "grad_norm": 0.23710474371910095, "learning_rate": 3.529552312217001e-05, "loss": 0.6208, "step": 6855 }, { "epoch": 7.8091168091168095, "grad_norm": 0.15839490294456482, "learning_rate": 3.5291277195661686e-05, "loss": 0.8444, "step": 6856 }, { "epoch": 7.81025641025641, "grad_norm": 0.20124119520187378, "learning_rate": 3.528703091170899e-05, "loss": 0.712, "step": 6857 }, { "epoch": 7.8113960113960115, "grad_norm": 0.21655111014842987, "learning_rate": 3.528278427045943e-05, "loss": 0.5914, "step": 6858 }, { "epoch": 7.812535612535613, "grad_norm": 0.1968693733215332, "learning_rate": 3.52785372720605e-05, "loss": 0.8061, "step": 6859 }, { "epoch": 7.8136752136752134, "grad_norm": 0.20255804061889648, "learning_rate": 3.5274289916659684e-05, "loss": 0.6286, "step": 6860 }, { "epoch": 7.814814814814815, "grad_norm": 0.2363758683204651, "learning_rate": 3.527004220440455e-05, "loss": 0.6499, "step": 6861 }, { "epoch": 7.815954415954416, "grad_norm": 0.19995512068271637, "learning_rate": 3.526579413544259e-05, "loss": 0.7763, "step": 6862 }, { "epoch": 7.817094017094017, "grad_norm": 0.159713476896286, "learning_rate": 3.526154570992137e-05, "loss": 0.7089, "step": 6863 }, { "epoch": 7.818233618233618, "grad_norm": 0.16131985187530518, "learning_rate": 3.5257296927988454e-05, "loss": 0.8548, "step": 6864 }, { "epoch": 7.81937321937322, "grad_norm": 0.16453804075717926, "learning_rate": 3.525304778979141e-05, "loss": 0.8073, "step": 6865 }, { "epoch": 7.82051282051282, "grad_norm": 0.16888372600078583, "learning_rate": 3.52487982954778e-05, "loss": 0.8918, "step": 6866 }, { "epoch": 7.821652421652422, "grad_norm": 0.3027987480163574, "learning_rate": 3.524454844519526e-05, "loss": 0.7031, "step": 6867 }, { "epoch": 7.822792022792023, "grad_norm": 0.16794198751449585, "learning_rate": 3.524029823909138e-05, "loss": 0.7765, "step": 6868 }, { "epoch": 7.823931623931624, "grad_norm": 0.18534980714321136, "learning_rate": 3.523604767731378e-05, "loss": 0.7402, "step": 6869 }, { "epoch": 7.825071225071225, "grad_norm": 0.18222463130950928, "learning_rate": 3.523179676001009e-05, "loss": 0.6913, "step": 6870 }, { "epoch": 7.8262108262108265, "grad_norm": 0.20649908483028412, "learning_rate": 3.5227545487327956e-05, "loss": 0.7535, "step": 6871 }, { "epoch": 7.827350427350427, "grad_norm": 0.20010706782341003, "learning_rate": 3.5223293859415044e-05, "loss": 0.7566, "step": 6872 }, { "epoch": 7.8284900284900285, "grad_norm": 0.23376654088497162, "learning_rate": 3.5219041876419036e-05, "loss": 0.8216, "step": 6873 }, { "epoch": 7.82962962962963, "grad_norm": 0.20192305743694305, "learning_rate": 3.5214789538487586e-05, "loss": 0.7339, "step": 6874 }, { "epoch": 7.8307692307692305, "grad_norm": 0.19668714702129364, "learning_rate": 3.521053684576842e-05, "loss": 0.6646, "step": 6875 }, { "epoch": 7.831908831908832, "grad_norm": 0.2026573270559311, "learning_rate": 3.520628379840921e-05, "loss": 0.802, "step": 6876 }, { "epoch": 7.833048433048433, "grad_norm": 0.1732260137796402, "learning_rate": 3.520203039655771e-05, "loss": 0.839, "step": 6877 }, { "epoch": 7.834188034188034, "grad_norm": 0.19205796718597412, "learning_rate": 3.5197776640361636e-05, "loss": 0.7313, "step": 6878 }, { "epoch": 7.835327635327635, "grad_norm": 0.24481089413166046, "learning_rate": 3.519352252996873e-05, "loss": 0.5561, "step": 6879 }, { "epoch": 7.836467236467237, "grad_norm": 0.143026664853096, "learning_rate": 3.5189268065526765e-05, "loss": 0.8714, "step": 6880 }, { "epoch": 7.837606837606837, "grad_norm": 0.16897006332874298, "learning_rate": 3.518501324718349e-05, "loss": 0.8478, "step": 6881 }, { "epoch": 7.838746438746439, "grad_norm": 0.21014583110809326, "learning_rate": 3.51807580750867e-05, "loss": 0.6813, "step": 6882 }, { "epoch": 7.83988603988604, "grad_norm": 0.1924782395362854, "learning_rate": 3.5176502549384184e-05, "loss": 0.8273, "step": 6883 }, { "epoch": 7.841025641025641, "grad_norm": 0.17792312800884247, "learning_rate": 3.517224667022375e-05, "loss": 0.8583, "step": 6884 }, { "epoch": 7.842165242165242, "grad_norm": 0.2235907018184662, "learning_rate": 3.516799043775321e-05, "loss": 0.5991, "step": 6885 }, { "epoch": 7.843304843304844, "grad_norm": 0.19309446215629578, "learning_rate": 3.5163733852120404e-05, "loss": 0.6939, "step": 6886 }, { "epoch": 7.844444444444444, "grad_norm": 0.21489350497722626, "learning_rate": 3.515947691347318e-05, "loss": 0.7796, "step": 6887 }, { "epoch": 7.8455840455840455, "grad_norm": 0.19922323524951935, "learning_rate": 3.515521962195937e-05, "loss": 0.6368, "step": 6888 }, { "epoch": 7.846723646723647, "grad_norm": 0.23364771902561188, "learning_rate": 3.515096197772686e-05, "loss": 0.5944, "step": 6889 }, { "epoch": 7.8478632478632475, "grad_norm": 0.21053661406040192, "learning_rate": 3.514670398092353e-05, "loss": 0.681, "step": 6890 }, { "epoch": 7.849002849002849, "grad_norm": 0.1727704405784607, "learning_rate": 3.5142445631697264e-05, "loss": 0.748, "step": 6891 }, { "epoch": 7.85014245014245, "grad_norm": 0.16221095621585846, "learning_rate": 3.5138186930195974e-05, "loss": 0.8736, "step": 6892 }, { "epoch": 7.851282051282051, "grad_norm": 0.19573287665843964, "learning_rate": 3.5133927876567556e-05, "loss": 0.9099, "step": 6893 }, { "epoch": 7.852421652421652, "grad_norm": 0.19836091995239258, "learning_rate": 3.5129668470959964e-05, "loss": 0.5698, "step": 6894 }, { "epoch": 7.853561253561254, "grad_norm": 0.1833997517824173, "learning_rate": 3.512540871352114e-05, "loss": 0.8006, "step": 6895 }, { "epoch": 7.854700854700854, "grad_norm": 0.1837073266506195, "learning_rate": 3.5121148604399014e-05, "loss": 0.7439, "step": 6896 }, { "epoch": 7.855840455840456, "grad_norm": 0.21929405629634857, "learning_rate": 3.511688814374157e-05, "loss": 0.6605, "step": 6897 }, { "epoch": 7.856980056980057, "grad_norm": 0.2653679847717285, "learning_rate": 3.511262733169677e-05, "loss": 0.5247, "step": 6898 }, { "epoch": 7.858119658119658, "grad_norm": 0.14746218919754028, "learning_rate": 3.510836616841262e-05, "loss": 0.9149, "step": 6899 }, { "epoch": 7.859259259259259, "grad_norm": 0.23637035489082336, "learning_rate": 3.5104104654037106e-05, "loss": 0.6419, "step": 6900 }, { "epoch": 7.860398860398861, "grad_norm": 0.21475614607334137, "learning_rate": 3.509984278871826e-05, "loss": 0.7055, "step": 6901 }, { "epoch": 7.861538461538462, "grad_norm": 0.20276354253292084, "learning_rate": 3.50955805726041e-05, "loss": 0.7792, "step": 6902 }, { "epoch": 7.862678062678063, "grad_norm": 0.2001870721578598, "learning_rate": 3.509131800584267e-05, "loss": 0.7039, "step": 6903 }, { "epoch": 7.863817663817664, "grad_norm": 0.27086666226387024, "learning_rate": 3.5087055088581996e-05, "loss": 0.5863, "step": 6904 }, { "epoch": 7.864957264957265, "grad_norm": 0.18609215319156647, "learning_rate": 3.5082791820970175e-05, "loss": 0.766, "step": 6905 }, { "epoch": 7.866096866096866, "grad_norm": 0.20789191126823425, "learning_rate": 3.507852820315525e-05, "loss": 0.7687, "step": 6906 }, { "epoch": 7.867236467236467, "grad_norm": 0.20047178864479065, "learning_rate": 3.507426423528534e-05, "loss": 0.6216, "step": 6907 }, { "epoch": 7.868376068376069, "grad_norm": 0.16607266664505005, "learning_rate": 3.506999991750852e-05, "loss": 0.7783, "step": 6908 }, { "epoch": 7.869515669515669, "grad_norm": 0.17001698911190033, "learning_rate": 3.506573524997292e-05, "loss": 0.7621, "step": 6909 }, { "epoch": 7.870655270655271, "grad_norm": 0.23012277483940125, "learning_rate": 3.506147023282665e-05, "loss": 0.6566, "step": 6910 }, { "epoch": 7.871794871794872, "grad_norm": 0.20534779131412506, "learning_rate": 3.505720486621785e-05, "loss": 0.7052, "step": 6911 }, { "epoch": 7.872934472934473, "grad_norm": 0.15706349909305573, "learning_rate": 3.5052939150294664e-05, "loss": 0.9746, "step": 6912 }, { "epoch": 7.874074074074074, "grad_norm": 0.17365200817584991, "learning_rate": 3.5048673085205255e-05, "loss": 0.8463, "step": 6913 }, { "epoch": 7.875213675213676, "grad_norm": 0.18620894849300385, "learning_rate": 3.50444066710978e-05, "loss": 0.7948, "step": 6914 }, { "epoch": 7.876353276353276, "grad_norm": 0.1837530881166458, "learning_rate": 3.5040139908120466e-05, "loss": 0.6109, "step": 6915 }, { "epoch": 7.877492877492878, "grad_norm": 0.1721877157688141, "learning_rate": 3.503587279642148e-05, "loss": 0.9464, "step": 6916 }, { "epoch": 7.878632478632479, "grad_norm": 0.19873687624931335, "learning_rate": 3.503160533614903e-05, "loss": 0.7423, "step": 6917 }, { "epoch": 7.87977207977208, "grad_norm": 0.2068408578634262, "learning_rate": 3.502733752745133e-05, "loss": 0.6031, "step": 6918 }, { "epoch": 7.880911680911681, "grad_norm": 0.21217504143714905, "learning_rate": 3.502306937047663e-05, "loss": 0.7438, "step": 6919 }, { "epoch": 7.8820512820512825, "grad_norm": 0.17609016597270966, "learning_rate": 3.5018800865373166e-05, "loss": 0.7646, "step": 6920 }, { "epoch": 7.883190883190883, "grad_norm": 0.2181202918291092, "learning_rate": 3.50145320122892e-05, "loss": 0.81, "step": 6921 }, { "epoch": 7.8843304843304844, "grad_norm": 0.2040448784828186, "learning_rate": 3.5010262811372984e-05, "loss": 0.8014, "step": 6922 }, { "epoch": 7.885470085470086, "grad_norm": 0.21069930493831635, "learning_rate": 3.5005993262772826e-05, "loss": 0.7258, "step": 6923 }, { "epoch": 7.886609686609686, "grad_norm": 0.2277589589357376, "learning_rate": 3.5001723366637e-05, "loss": 0.5516, "step": 6924 }, { "epoch": 7.887749287749288, "grad_norm": 0.24380171298980713, "learning_rate": 3.4997453123113816e-05, "loss": 0.6724, "step": 6925 }, { "epoch": 7.888888888888889, "grad_norm": 0.19442526996135712, "learning_rate": 3.499318253235159e-05, "loss": 0.7346, "step": 6926 }, { "epoch": 7.89002849002849, "grad_norm": 0.16563531756401062, "learning_rate": 3.4988911594498656e-05, "loss": 0.874, "step": 6927 }, { "epoch": 7.891168091168091, "grad_norm": 0.16393303871154785, "learning_rate": 3.498464030970335e-05, "loss": 0.7987, "step": 6928 }, { "epoch": 7.892307692307693, "grad_norm": 0.19510045647621155, "learning_rate": 3.498036867811404e-05, "loss": 0.8063, "step": 6929 }, { "epoch": 7.893447293447293, "grad_norm": 0.16404707729816437, "learning_rate": 3.4976096699879066e-05, "loss": 0.7595, "step": 6930 }, { "epoch": 7.894586894586895, "grad_norm": 0.21610978245735168, "learning_rate": 3.497182437514683e-05, "loss": 0.7916, "step": 6931 }, { "epoch": 7.895726495726496, "grad_norm": 0.22689557075500488, "learning_rate": 3.49675517040657e-05, "loss": 0.6256, "step": 6932 }, { "epoch": 7.896866096866097, "grad_norm": 0.1903616040945053, "learning_rate": 3.4963278686784096e-05, "loss": 0.6609, "step": 6933 }, { "epoch": 7.898005698005698, "grad_norm": 0.1872972548007965, "learning_rate": 3.4959005323450416e-05, "loss": 0.8125, "step": 6934 }, { "epoch": 7.8991452991452995, "grad_norm": 0.20764897763729095, "learning_rate": 3.4954731614213105e-05, "loss": 0.7985, "step": 6935 }, { "epoch": 7.9002849002849, "grad_norm": 0.17833660542964935, "learning_rate": 3.495045755922058e-05, "loss": 0.78, "step": 6936 }, { "epoch": 7.9014245014245015, "grad_norm": 0.30074813961982727, "learning_rate": 3.4946183158621306e-05, "loss": 0.777, "step": 6937 }, { "epoch": 7.902564102564103, "grad_norm": 0.21130868792533875, "learning_rate": 3.4941908412563735e-05, "loss": 0.6284, "step": 6938 }, { "epoch": 7.9037037037037035, "grad_norm": 0.19078730046749115, "learning_rate": 3.493763332119634e-05, "loss": 0.7542, "step": 6939 }, { "epoch": 7.904843304843305, "grad_norm": 0.22114410996437073, "learning_rate": 3.4933357884667626e-05, "loss": 0.593, "step": 6940 }, { "epoch": 7.905982905982906, "grad_norm": 0.27360519766807556, "learning_rate": 3.4929082103126066e-05, "loss": 0.5625, "step": 6941 }, { "epoch": 7.907122507122507, "grad_norm": 0.21668018400669098, "learning_rate": 3.4924805976720185e-05, "loss": 0.6908, "step": 6942 }, { "epoch": 7.908262108262108, "grad_norm": 0.1980413794517517, "learning_rate": 3.4920529505598484e-05, "loss": 0.6576, "step": 6943 }, { "epoch": 7.90940170940171, "grad_norm": 0.17020384967327118, "learning_rate": 3.4916252689909527e-05, "loss": 0.886, "step": 6944 }, { "epoch": 7.91054131054131, "grad_norm": 0.22337353229522705, "learning_rate": 3.491197552980184e-05, "loss": 0.7651, "step": 6945 }, { "epoch": 7.911680911680912, "grad_norm": 0.24993619322776794, "learning_rate": 3.4907698025423984e-05, "loss": 0.5218, "step": 6946 }, { "epoch": 7.912820512820513, "grad_norm": 0.18221551179885864, "learning_rate": 3.4903420176924525e-05, "loss": 0.8951, "step": 6947 }, { "epoch": 7.913960113960114, "grad_norm": 0.24022223055362701, "learning_rate": 3.489914198445206e-05, "loss": 0.5555, "step": 6948 }, { "epoch": 7.915099715099715, "grad_norm": 0.21565523743629456, "learning_rate": 3.489486344815516e-05, "loss": 0.7623, "step": 6949 }, { "epoch": 7.9162393162393165, "grad_norm": 0.19092397391796112, "learning_rate": 3.489058456818244e-05, "loss": 0.7738, "step": 6950 }, { "epoch": 7.917378917378917, "grad_norm": 0.2190237194299698, "learning_rate": 3.4886305344682526e-05, "loss": 0.7363, "step": 6951 }, { "epoch": 7.9185185185185185, "grad_norm": 0.19466805458068848, "learning_rate": 3.488202577780405e-05, "loss": 0.7224, "step": 6952 }, { "epoch": 7.91965811965812, "grad_norm": 0.2089778035879135, "learning_rate": 3.487774586769562e-05, "loss": 0.6605, "step": 6953 }, { "epoch": 7.9207977207977205, "grad_norm": 0.20725427567958832, "learning_rate": 3.487346561450593e-05, "loss": 0.7964, "step": 6954 }, { "epoch": 7.921937321937322, "grad_norm": 0.16331282258033752, "learning_rate": 3.4869185018383624e-05, "loss": 0.9115, "step": 6955 }, { "epoch": 7.923076923076923, "grad_norm": 0.18405237793922424, "learning_rate": 3.4864904079477376e-05, "loss": 0.8216, "step": 6956 }, { "epoch": 7.924216524216524, "grad_norm": 0.17434020340442657, "learning_rate": 3.4860622797935885e-05, "loss": 0.7459, "step": 6957 }, { "epoch": 7.925356125356125, "grad_norm": 0.2535775899887085, "learning_rate": 3.4856341173907844e-05, "loss": 0.4788, "step": 6958 }, { "epoch": 7.926495726495727, "grad_norm": 0.2321566939353943, "learning_rate": 3.485205920754198e-05, "loss": 0.5672, "step": 6959 }, { "epoch": 7.927635327635327, "grad_norm": 0.21712049841880798, "learning_rate": 3.484777689898698e-05, "loss": 0.5446, "step": 6960 }, { "epoch": 7.928774928774929, "grad_norm": 0.210310697555542, "learning_rate": 3.484349424839163e-05, "loss": 0.7157, "step": 6961 }, { "epoch": 7.92991452991453, "grad_norm": 0.18413089215755463, "learning_rate": 3.4839211255904646e-05, "loss": 0.8803, "step": 6962 }, { "epoch": 7.931054131054131, "grad_norm": 0.23209485411643982, "learning_rate": 3.4834927921674807e-05, "loss": 0.5997, "step": 6963 }, { "epoch": 7.932193732193732, "grad_norm": 0.16725648939609528, "learning_rate": 3.4830644245850864e-05, "loss": 0.943, "step": 6964 }, { "epoch": 7.933333333333334, "grad_norm": 0.23912696540355682, "learning_rate": 3.482636022858162e-05, "loss": 0.602, "step": 6965 }, { "epoch": 7.934472934472934, "grad_norm": 0.1723431497812271, "learning_rate": 3.482207587001585e-05, "loss": 0.8787, "step": 6966 }, { "epoch": 7.9356125356125355, "grad_norm": 0.20418599247932434, "learning_rate": 3.48177911703024e-05, "loss": 0.6305, "step": 6967 }, { "epoch": 7.936752136752137, "grad_norm": 0.19222919642925262, "learning_rate": 3.4813506129590035e-05, "loss": 0.8004, "step": 6968 }, { "epoch": 7.9378917378917375, "grad_norm": 0.1631591022014618, "learning_rate": 3.480922074802763e-05, "loss": 0.8736, "step": 6969 }, { "epoch": 7.939031339031339, "grad_norm": 0.20801420509815216, "learning_rate": 3.480493502576401e-05, "loss": 0.7844, "step": 6970 }, { "epoch": 7.94017094017094, "grad_norm": 0.18752451241016388, "learning_rate": 3.480064896294803e-05, "loss": 0.8168, "step": 6971 }, { "epoch": 7.941310541310541, "grad_norm": 0.20247039198875427, "learning_rate": 3.479636255972857e-05, "loss": 0.7284, "step": 6972 }, { "epoch": 7.942450142450142, "grad_norm": 0.16914136707782745, "learning_rate": 3.479207581625448e-05, "loss": 0.9514, "step": 6973 }, { "epoch": 7.943589743589744, "grad_norm": 0.19120515882968903, "learning_rate": 3.478778873267468e-05, "loss": 0.7901, "step": 6974 }, { "epoch": 7.944729344729344, "grad_norm": 0.17753508687019348, "learning_rate": 3.478350130913807e-05, "loss": 0.9008, "step": 6975 }, { "epoch": 7.945868945868946, "grad_norm": 0.14952565729618073, "learning_rate": 3.4779213545793536e-05, "loss": 0.872, "step": 6976 }, { "epoch": 7.947008547008547, "grad_norm": 0.22527769207954407, "learning_rate": 3.477492544279003e-05, "loss": 0.5966, "step": 6977 }, { "epoch": 7.948148148148148, "grad_norm": 0.20993876457214355, "learning_rate": 3.477063700027648e-05, "loss": 0.6283, "step": 6978 }, { "epoch": 7.949287749287749, "grad_norm": 0.19296513497829437, "learning_rate": 3.4766348218401846e-05, "loss": 0.791, "step": 6979 }, { "epoch": 7.950427350427351, "grad_norm": 0.18066520988941193, "learning_rate": 3.476205909731508e-05, "loss": 0.7611, "step": 6980 }, { "epoch": 7.951566951566951, "grad_norm": 0.2059728056192398, "learning_rate": 3.475776963716515e-05, "loss": 0.7608, "step": 6981 }, { "epoch": 7.952706552706553, "grad_norm": 0.18269841372966766, "learning_rate": 3.475347983810105e-05, "loss": 0.7013, "step": 6982 }, { "epoch": 7.953846153846154, "grad_norm": 0.2518905997276306, "learning_rate": 3.474918970027177e-05, "loss": 1.0275, "step": 6983 }, { "epoch": 7.9549857549857546, "grad_norm": 0.2201099395751953, "learning_rate": 3.474489922382632e-05, "loss": 0.7612, "step": 6984 }, { "epoch": 7.956125356125356, "grad_norm": 0.21568866074085236, "learning_rate": 3.474060840891372e-05, "loss": 0.7361, "step": 6985 }, { "epoch": 7.957264957264957, "grad_norm": 0.22379179298877716, "learning_rate": 3.4736317255683015e-05, "loss": 0.656, "step": 6986 }, { "epoch": 7.958404558404558, "grad_norm": 0.1808573603630066, "learning_rate": 3.473202576428323e-05, "loss": 0.7515, "step": 6987 }, { "epoch": 7.959544159544159, "grad_norm": 0.18532869219779968, "learning_rate": 3.472773393486342e-05, "loss": 0.7191, "step": 6988 }, { "epoch": 7.960683760683761, "grad_norm": 0.18520545959472656, "learning_rate": 3.472344176757267e-05, "loss": 0.9918, "step": 6989 }, { "epoch": 7.961823361823361, "grad_norm": 0.19397109746932983, "learning_rate": 3.471914926256004e-05, "loss": 0.758, "step": 6990 }, { "epoch": 7.962962962962963, "grad_norm": 0.24348177015781403, "learning_rate": 3.471485641997463e-05, "loss": 0.6164, "step": 6991 }, { "epoch": 7.964102564102564, "grad_norm": 0.41862666606903076, "learning_rate": 3.471056323996554e-05, "loss": 0.8977, "step": 6992 }, { "epoch": 7.965242165242165, "grad_norm": 0.18165546655654907, "learning_rate": 3.470626972268189e-05, "loss": 0.7355, "step": 6993 }, { "epoch": 7.966381766381766, "grad_norm": 0.19281134009361267, "learning_rate": 3.47019758682728e-05, "loss": 0.7676, "step": 6994 }, { "epoch": 7.967521367521368, "grad_norm": 0.21593524515628815, "learning_rate": 3.46976816768874e-05, "loss": 0.762, "step": 6995 }, { "epoch": 7.968660968660968, "grad_norm": 0.230185404419899, "learning_rate": 3.469338714867485e-05, "loss": 0.6806, "step": 6996 }, { "epoch": 7.96980056980057, "grad_norm": 0.17793694138526917, "learning_rate": 3.468909228378431e-05, "loss": 0.9328, "step": 6997 }, { "epoch": 7.970940170940171, "grad_norm": 0.17860686779022217, "learning_rate": 3.468479708236495e-05, "loss": 0.6889, "step": 6998 }, { "epoch": 7.972079772079772, "grad_norm": 0.2190643548965454, "learning_rate": 3.468050154456595e-05, "loss": 0.6826, "step": 6999 }, { "epoch": 7.973219373219373, "grad_norm": 0.19970053434371948, "learning_rate": 3.467620567053651e-05, "loss": 0.6507, "step": 7000 }, { "epoch": 7.9743589743589745, "grad_norm": 0.2116745263338089, "learning_rate": 3.467190946042584e-05, "loss": 0.8031, "step": 7001 }, { "epoch": 7.975498575498576, "grad_norm": 0.18867363035678864, "learning_rate": 3.466761291438315e-05, "loss": 0.7361, "step": 7002 }, { "epoch": 7.976638176638176, "grad_norm": 0.19578492641448975, "learning_rate": 3.466331603255769e-05, "loss": 0.8706, "step": 7003 }, { "epoch": 7.977777777777778, "grad_norm": 0.23488479852676392, "learning_rate": 3.4659018815098684e-05, "loss": 0.5953, "step": 7004 }, { "epoch": 7.978917378917379, "grad_norm": 0.20616556704044342, "learning_rate": 3.4654721262155385e-05, "loss": 0.6429, "step": 7005 }, { "epoch": 7.98005698005698, "grad_norm": 0.1781628131866455, "learning_rate": 3.4650423373877076e-05, "loss": 0.8953, "step": 7006 }, { "epoch": 7.981196581196581, "grad_norm": 0.17765744030475616, "learning_rate": 3.464612515041302e-05, "loss": 0.7049, "step": 7007 }, { "epoch": 7.982336182336183, "grad_norm": 0.2767314016819, "learning_rate": 3.464182659191252e-05, "loss": 0.4096, "step": 7008 }, { "epoch": 7.983475783475783, "grad_norm": 0.18152180314064026, "learning_rate": 3.463752769852485e-05, "loss": 0.8218, "step": 7009 }, { "epoch": 7.984615384615385, "grad_norm": 0.2066626101732254, "learning_rate": 3.463322847039935e-05, "loss": 0.7268, "step": 7010 }, { "epoch": 7.985754985754986, "grad_norm": 0.2450648546218872, "learning_rate": 3.462892890768533e-05, "loss": 0.5793, "step": 7011 }, { "epoch": 7.986894586894587, "grad_norm": 0.2684401571750641, "learning_rate": 3.4624629010532136e-05, "loss": 0.7406, "step": 7012 }, { "epoch": 7.988034188034188, "grad_norm": 0.25210240483283997, "learning_rate": 3.46203287790891e-05, "loss": 0.6941, "step": 7013 }, { "epoch": 7.9891737891737895, "grad_norm": 0.17891636490821838, "learning_rate": 3.46160282135056e-05, "loss": 0.8529, "step": 7014 }, { "epoch": 7.99031339031339, "grad_norm": 0.16356289386749268, "learning_rate": 3.461172731393098e-05, "loss": 0.9962, "step": 7015 }, { "epoch": 7.9914529914529915, "grad_norm": 0.23235754668712616, "learning_rate": 3.460742608051466e-05, "loss": 0.6886, "step": 7016 }, { "epoch": 7.992592592592593, "grad_norm": 0.15596100687980652, "learning_rate": 3.4603124513405995e-05, "loss": 1.047, "step": 7017 }, { "epoch": 7.9937321937321935, "grad_norm": 0.23271232843399048, "learning_rate": 3.4598822612754413e-05, "loss": 0.8109, "step": 7018 }, { "epoch": 7.994871794871795, "grad_norm": 0.15891754627227783, "learning_rate": 3.459452037870933e-05, "loss": 0.8591, "step": 7019 }, { "epoch": 7.996011396011396, "grad_norm": 0.23841074109077454, "learning_rate": 3.459021781142015e-05, "loss": 0.6763, "step": 7020 }, { "epoch": 7.997150997150997, "grad_norm": 0.25842007994651794, "learning_rate": 3.458591491103635e-05, "loss": 0.7704, "step": 7021 }, { "epoch": 7.998290598290598, "grad_norm": 0.16829439997673035, "learning_rate": 3.458161167770735e-05, "loss": 0.8967, "step": 7022 }, { "epoch": 7.9994301994302, "grad_norm": 0.19539670646190643, "learning_rate": 3.457730811158264e-05, "loss": 0.7824, "step": 7023 }, { "epoch": 8.0, "grad_norm": 0.36949995160102844, "learning_rate": 3.4573004212811675e-05, "loss": 0.6208, "step": 7024 }, { "epoch": 8.0011396011396, "grad_norm": 0.16737952828407288, "learning_rate": 3.456869998154396e-05, "loss": 0.789, "step": 7025 }, { "epoch": 8.002279202279203, "grad_norm": 0.1565866768360138, "learning_rate": 3.456439541792896e-05, "loss": 0.8853, "step": 7026 }, { "epoch": 8.003418803418803, "grad_norm": 0.17611169815063477, "learning_rate": 3.456009052211622e-05, "loss": 0.8464, "step": 7027 }, { "epoch": 8.004558404558404, "grad_norm": 0.18413347005844116, "learning_rate": 3.455578529425524e-05, "loss": 0.8049, "step": 7028 }, { "epoch": 8.005698005698006, "grad_norm": 0.1993255913257599, "learning_rate": 3.455147973449556e-05, "loss": 0.6136, "step": 7029 }, { "epoch": 8.006837606837607, "grad_norm": 0.18642333149909973, "learning_rate": 3.454717384298672e-05, "loss": 0.81, "step": 7030 }, { "epoch": 8.007977207977207, "grad_norm": 0.21787570416927338, "learning_rate": 3.4542867619878275e-05, "loss": 0.7353, "step": 7031 }, { "epoch": 8.00911680911681, "grad_norm": 0.1952671855688095, "learning_rate": 3.45385610653198e-05, "loss": 0.807, "step": 7032 }, { "epoch": 8.01025641025641, "grad_norm": 0.2039884477853775, "learning_rate": 3.453425417946087e-05, "loss": 0.7051, "step": 7033 }, { "epoch": 8.01139601139601, "grad_norm": 0.1795361191034317, "learning_rate": 3.452994696245107e-05, "loss": 0.7775, "step": 7034 }, { "epoch": 8.012535612535613, "grad_norm": 0.1974959373474121, "learning_rate": 3.452563941444e-05, "loss": 0.6406, "step": 7035 }, { "epoch": 8.013675213675214, "grad_norm": 0.21458816528320312, "learning_rate": 3.452133153557729e-05, "loss": 0.6946, "step": 7036 }, { "epoch": 8.014814814814814, "grad_norm": 0.23039047420024872, "learning_rate": 3.451702332601254e-05, "loss": 0.4753, "step": 7037 }, { "epoch": 8.015954415954416, "grad_norm": 0.17048563063144684, "learning_rate": 3.45127147858954e-05, "loss": 0.6948, "step": 7038 }, { "epoch": 8.017094017094017, "grad_norm": 0.2207256555557251, "learning_rate": 3.450840591537552e-05, "loss": 0.5992, "step": 7039 }, { "epoch": 8.018233618233618, "grad_norm": 0.22948038578033447, "learning_rate": 3.4504096714602554e-05, "loss": 0.5309, "step": 7040 }, { "epoch": 8.01937321937322, "grad_norm": 0.17594662308692932, "learning_rate": 3.4499787183726165e-05, "loss": 0.6524, "step": 7041 }, { "epoch": 8.02051282051282, "grad_norm": 0.15962903201580048, "learning_rate": 3.4495477322896047e-05, "loss": 0.8802, "step": 7042 }, { "epoch": 8.021652421652421, "grad_norm": 0.17640933394432068, "learning_rate": 3.4491167132261886e-05, "loss": 0.7508, "step": 7043 }, { "epoch": 8.022792022792023, "grad_norm": 0.18207409977912903, "learning_rate": 3.44868566119734e-05, "loss": 0.7672, "step": 7044 }, { "epoch": 8.023931623931624, "grad_norm": 0.20971670746803284, "learning_rate": 3.448254576218029e-05, "loss": 0.6199, "step": 7045 }, { "epoch": 8.025071225071224, "grad_norm": 0.20471307635307312, "learning_rate": 3.4478234583032284e-05, "loss": 0.6351, "step": 7046 }, { "epoch": 8.026210826210827, "grad_norm": 0.18834306299686432, "learning_rate": 3.4473923074679126e-05, "loss": 0.6741, "step": 7047 }, { "epoch": 8.027350427350427, "grad_norm": 0.1725883185863495, "learning_rate": 3.4469611237270574e-05, "loss": 0.8075, "step": 7048 }, { "epoch": 8.028490028490028, "grad_norm": 0.16943272948265076, "learning_rate": 3.446529907095638e-05, "loss": 0.667, "step": 7049 }, { "epoch": 8.02962962962963, "grad_norm": 0.1960059255361557, "learning_rate": 3.446098657588631e-05, "loss": 0.6117, "step": 7050 }, { "epoch": 8.03076923076923, "grad_norm": 0.20933693647384644, "learning_rate": 3.445667375221017e-05, "loss": 0.6904, "step": 7051 }, { "epoch": 8.031908831908831, "grad_norm": 0.1454627364873886, "learning_rate": 3.445236060007774e-05, "loss": 0.9105, "step": 7052 }, { "epoch": 8.033048433048434, "grad_norm": 0.20399250090122223, "learning_rate": 3.4448047119638824e-05, "loss": 0.8021, "step": 7053 }, { "epoch": 8.034188034188034, "grad_norm": 0.19561836123466492, "learning_rate": 3.444373331104326e-05, "loss": 0.8751, "step": 7054 }, { "epoch": 8.035327635327635, "grad_norm": 0.17703506350517273, "learning_rate": 3.4439419174440865e-05, "loss": 0.7238, "step": 7055 }, { "epoch": 8.036467236467237, "grad_norm": 0.1614612489938736, "learning_rate": 3.443510470998149e-05, "loss": 0.8676, "step": 7056 }, { "epoch": 8.037606837606837, "grad_norm": 0.20640437304973602, "learning_rate": 3.443078991781498e-05, "loss": 0.4616, "step": 7057 }, { "epoch": 8.038746438746438, "grad_norm": 0.21130231022834778, "learning_rate": 3.442647479809119e-05, "loss": 0.7574, "step": 7058 }, { "epoch": 8.03988603988604, "grad_norm": 0.18179568648338318, "learning_rate": 3.442215935096001e-05, "loss": 0.7957, "step": 7059 }, { "epoch": 8.04102564102564, "grad_norm": 0.21558696031570435, "learning_rate": 3.441784357657133e-05, "loss": 0.6683, "step": 7060 }, { "epoch": 8.042165242165241, "grad_norm": 0.18749386072158813, "learning_rate": 3.441352747507504e-05, "loss": 0.7306, "step": 7061 }, { "epoch": 8.043304843304844, "grad_norm": 0.19328436255455017, "learning_rate": 3.440921104662106e-05, "loss": 0.6622, "step": 7062 }, { "epoch": 8.044444444444444, "grad_norm": 0.14162495732307434, "learning_rate": 3.44048942913593e-05, "loss": 0.9996, "step": 7063 }, { "epoch": 8.045584045584045, "grad_norm": 0.19856049120426178, "learning_rate": 3.44005772094397e-05, "loss": 0.678, "step": 7064 }, { "epoch": 8.046723646723647, "grad_norm": 0.21557380259037018, "learning_rate": 3.43962598010122e-05, "loss": 0.8354, "step": 7065 }, { "epoch": 8.047863247863248, "grad_norm": 0.21577595174312592, "learning_rate": 3.4391942066226755e-05, "loss": 0.8101, "step": 7066 }, { "epoch": 8.049002849002848, "grad_norm": 0.18127447366714478, "learning_rate": 3.438762400523334e-05, "loss": 0.6539, "step": 7067 }, { "epoch": 8.05014245014245, "grad_norm": 0.17225967347621918, "learning_rate": 3.4383305618181925e-05, "loss": 0.7079, "step": 7068 }, { "epoch": 8.051282051282051, "grad_norm": 0.2191513627767563, "learning_rate": 3.43789869052225e-05, "loss": 0.7383, "step": 7069 }, { "epoch": 8.052421652421652, "grad_norm": 0.19804395735263824, "learning_rate": 3.437466786650506e-05, "loss": 0.8199, "step": 7070 }, { "epoch": 8.053561253561254, "grad_norm": 0.17152638733386993, "learning_rate": 3.4370348502179634e-05, "loss": 0.9667, "step": 7071 }, { "epoch": 8.054700854700855, "grad_norm": 0.21188904345035553, "learning_rate": 3.436602881239624e-05, "loss": 0.7232, "step": 7072 }, { "epoch": 8.055840455840455, "grad_norm": 0.5748328566551208, "learning_rate": 3.43617087973049e-05, "loss": 0.5792, "step": 7073 }, { "epoch": 8.056980056980057, "grad_norm": 0.13886001706123352, "learning_rate": 3.435738845705567e-05, "loss": 0.9684, "step": 7074 }, { "epoch": 8.058119658119658, "grad_norm": 0.36263221502304077, "learning_rate": 3.435306779179861e-05, "loss": 0.7062, "step": 7075 }, { "epoch": 8.059259259259258, "grad_norm": 0.17102155089378357, "learning_rate": 3.4348746801683784e-05, "loss": 0.7513, "step": 7076 }, { "epoch": 8.06039886039886, "grad_norm": 0.2088032364845276, "learning_rate": 3.434442548686127e-05, "loss": 0.6356, "step": 7077 }, { "epoch": 8.061538461538461, "grad_norm": 0.26760900020599365, "learning_rate": 3.4340103847481165e-05, "loss": 0.661, "step": 7078 }, { "epoch": 8.062678062678062, "grad_norm": 0.2030276656150818, "learning_rate": 3.433578188369356e-05, "loss": 0.6599, "step": 7079 }, { "epoch": 8.063817663817664, "grad_norm": 0.21676896512508392, "learning_rate": 3.433145959564859e-05, "loss": 0.7473, "step": 7080 }, { "epoch": 8.064957264957265, "grad_norm": 0.17342697083950043, "learning_rate": 3.4327136983496356e-05, "loss": 0.9241, "step": 7081 }, { "epoch": 8.066096866096865, "grad_norm": 0.22797280550003052, "learning_rate": 3.4322814047387e-05, "loss": 0.5714, "step": 7082 }, { "epoch": 8.067236467236468, "grad_norm": 0.24386514723300934, "learning_rate": 3.431849078747069e-05, "loss": 0.6692, "step": 7083 }, { "epoch": 8.068376068376068, "grad_norm": 0.24640591442584991, "learning_rate": 3.4314167203897555e-05, "loss": 0.6181, "step": 7084 }, { "epoch": 8.069515669515669, "grad_norm": 0.2500792443752289, "learning_rate": 3.430984329681779e-05, "loss": 0.653, "step": 7085 }, { "epoch": 8.070655270655271, "grad_norm": 0.2008410394191742, "learning_rate": 3.4305519066381556e-05, "loss": 0.6139, "step": 7086 }, { "epoch": 8.071794871794872, "grad_norm": 0.2510213255882263, "learning_rate": 3.4301194512739055e-05, "loss": 0.4839, "step": 7087 }, { "epoch": 8.072934472934472, "grad_norm": 0.2027340531349182, "learning_rate": 3.42968696360405e-05, "loss": 0.6899, "step": 7088 }, { "epoch": 8.074074074074074, "grad_norm": 0.1756913959980011, "learning_rate": 3.429254443643609e-05, "loss": 0.8085, "step": 7089 }, { "epoch": 8.075213675213675, "grad_norm": 0.20395086705684662, "learning_rate": 3.4288218914076056e-05, "loss": 0.7696, "step": 7090 }, { "epoch": 8.076353276353275, "grad_norm": 0.1912129819393158, "learning_rate": 3.428389306911064e-05, "loss": 0.844, "step": 7091 }, { "epoch": 8.077492877492878, "grad_norm": 0.2174018770456314, "learning_rate": 3.427956690169008e-05, "loss": 0.6293, "step": 7092 }, { "epoch": 8.078632478632478, "grad_norm": 0.18037767708301544, "learning_rate": 3.427524041196464e-05, "loss": 0.9507, "step": 7093 }, { "epoch": 8.079772079772079, "grad_norm": 0.1751590073108673, "learning_rate": 3.427091360008461e-05, "loss": 0.7677, "step": 7094 }, { "epoch": 8.080911680911681, "grad_norm": 0.19041262567043304, "learning_rate": 3.426658646620024e-05, "loss": 0.7292, "step": 7095 }, { "epoch": 8.082051282051282, "grad_norm": 0.20226050913333893, "learning_rate": 3.426225901046185e-05, "loss": 0.7366, "step": 7096 }, { "epoch": 8.083190883190884, "grad_norm": 0.18322516977787018, "learning_rate": 3.425793123301973e-05, "loss": 0.6936, "step": 7097 }, { "epoch": 8.084330484330485, "grad_norm": 0.2583000659942627, "learning_rate": 3.4253603134024195e-05, "loss": 0.5171, "step": 7098 }, { "epoch": 8.085470085470085, "grad_norm": 0.19763217866420746, "learning_rate": 3.424927471362557e-05, "loss": 0.697, "step": 7099 }, { "epoch": 8.086609686609687, "grad_norm": 0.16299405694007874, "learning_rate": 3.424494597197421e-05, "loss": 0.8714, "step": 7100 }, { "epoch": 8.087749287749288, "grad_norm": 0.17452651262283325, "learning_rate": 3.424061690922045e-05, "loss": 0.8544, "step": 7101 }, { "epoch": 8.088888888888889, "grad_norm": 0.2546103596687317, "learning_rate": 3.423628752551466e-05, "loss": 0.5386, "step": 7102 }, { "epoch": 8.090028490028491, "grad_norm": 0.16574569046497345, "learning_rate": 3.4231957821007185e-05, "loss": 0.8548, "step": 7103 }, { "epoch": 8.091168091168091, "grad_norm": 0.15770331025123596, "learning_rate": 3.422762779584844e-05, "loss": 0.9192, "step": 7104 }, { "epoch": 8.092307692307692, "grad_norm": 0.1630685031414032, "learning_rate": 3.4223297450188796e-05, "loss": 0.772, "step": 7105 }, { "epoch": 8.093447293447294, "grad_norm": 0.20278406143188477, "learning_rate": 3.421896678417868e-05, "loss": 0.6406, "step": 7106 }, { "epoch": 8.094586894586895, "grad_norm": 0.2626460790634155, "learning_rate": 3.421463579796849e-05, "loss": 0.56, "step": 7107 }, { "epoch": 8.095726495726495, "grad_norm": 0.24302344024181366, "learning_rate": 3.421030449170866e-05, "loss": 0.5718, "step": 7108 }, { "epoch": 8.096866096866098, "grad_norm": 0.20670919120311737, "learning_rate": 3.4205972865549624e-05, "loss": 0.6991, "step": 7109 }, { "epoch": 8.098005698005698, "grad_norm": 0.2587011754512787, "learning_rate": 3.420164091964183e-05, "loss": 0.6819, "step": 7110 }, { "epoch": 8.099145299145299, "grad_norm": 0.20296776294708252, "learning_rate": 3.419730865413575e-05, "loss": 0.7154, "step": 7111 }, { "epoch": 8.100284900284901, "grad_norm": 0.2368420660495758, "learning_rate": 3.419297606918185e-05, "loss": 0.7073, "step": 7112 }, { "epoch": 8.101424501424502, "grad_norm": 0.1704382449388504, "learning_rate": 3.418864316493061e-05, "loss": 0.6907, "step": 7113 }, { "epoch": 8.102564102564102, "grad_norm": 0.2472260594367981, "learning_rate": 3.418430994153251e-05, "loss": 0.5698, "step": 7114 }, { "epoch": 8.103703703703705, "grad_norm": 0.19436900317668915, "learning_rate": 3.417997639913808e-05, "loss": 0.8179, "step": 7115 }, { "epoch": 8.104843304843305, "grad_norm": 0.20320023596286774, "learning_rate": 3.4175642537897824e-05, "loss": 0.7338, "step": 7116 }, { "epoch": 8.105982905982906, "grad_norm": 0.19544652104377747, "learning_rate": 3.417130835796227e-05, "loss": 0.6937, "step": 7117 }, { "epoch": 8.107122507122508, "grad_norm": 0.22964812815189362, "learning_rate": 3.4166973859481946e-05, "loss": 0.6568, "step": 7118 }, { "epoch": 8.108262108262108, "grad_norm": 0.19748233258724213, "learning_rate": 3.416263904260743e-05, "loss": 0.6063, "step": 7119 }, { "epoch": 8.109401709401709, "grad_norm": 0.26273640990257263, "learning_rate": 3.415830390748924e-05, "loss": 0.5052, "step": 7120 }, { "epoch": 8.110541310541311, "grad_norm": 0.1507258266210556, "learning_rate": 3.4153968454277984e-05, "loss": 0.7803, "step": 7121 }, { "epoch": 8.111680911680912, "grad_norm": 0.18942983448505402, "learning_rate": 3.414963268312422e-05, "loss": 0.7394, "step": 7122 }, { "epoch": 8.112820512820512, "grad_norm": 0.23636314272880554, "learning_rate": 3.4145296594178565e-05, "loss": 0.5983, "step": 7123 }, { "epoch": 8.113960113960115, "grad_norm": 0.20422495901584625, "learning_rate": 3.41409601875916e-05, "loss": 0.7172, "step": 7124 }, { "epoch": 8.115099715099715, "grad_norm": 0.14250652492046356, "learning_rate": 3.413662346351395e-05, "loss": 0.8622, "step": 7125 }, { "epoch": 8.116239316239316, "grad_norm": 0.2178293615579605, "learning_rate": 3.413228642209625e-05, "loss": 0.6593, "step": 7126 }, { "epoch": 8.117378917378918, "grad_norm": 0.2105027437210083, "learning_rate": 3.412794906348912e-05, "loss": 0.7712, "step": 7127 }, { "epoch": 8.118518518518519, "grad_norm": 0.1878097504377365, "learning_rate": 3.4123611387843223e-05, "loss": 0.7691, "step": 7128 }, { "epoch": 8.11965811965812, "grad_norm": 0.22846786677837372, "learning_rate": 3.411927339530921e-05, "loss": 0.6266, "step": 7129 }, { "epoch": 8.120797720797722, "grad_norm": 0.1520662158727646, "learning_rate": 3.411493508603776e-05, "loss": 0.8268, "step": 7130 }, { "epoch": 8.121937321937322, "grad_norm": 0.28886398673057556, "learning_rate": 3.4110596460179537e-05, "loss": 0.5266, "step": 7131 }, { "epoch": 8.123076923076923, "grad_norm": 0.25251293182373047, "learning_rate": 3.4106257517885246e-05, "loss": 0.435, "step": 7132 }, { "epoch": 8.124216524216525, "grad_norm": 0.18864810466766357, "learning_rate": 3.4101918259305586e-05, "loss": 0.6886, "step": 7133 }, { "epoch": 8.125356125356126, "grad_norm": 0.2172015905380249, "learning_rate": 3.409757868459129e-05, "loss": 0.5421, "step": 7134 }, { "epoch": 8.126495726495726, "grad_norm": 0.1778659224510193, "learning_rate": 3.409323879389306e-05, "loss": 0.6555, "step": 7135 }, { "epoch": 8.127635327635328, "grad_norm": 0.2387818694114685, "learning_rate": 3.4088898587361634e-05, "loss": 0.8124, "step": 7136 }, { "epoch": 8.128774928774929, "grad_norm": 0.21289221942424774, "learning_rate": 3.408455806514777e-05, "loss": 0.6962, "step": 7137 }, { "epoch": 8.12991452991453, "grad_norm": 0.20071299374103546, "learning_rate": 3.408021722740222e-05, "loss": 0.6872, "step": 7138 }, { "epoch": 8.131054131054132, "grad_norm": 0.20584726333618164, "learning_rate": 3.407587607427576e-05, "loss": 0.6627, "step": 7139 }, { "epoch": 8.132193732193732, "grad_norm": 0.14742788672447205, "learning_rate": 3.4071534605919164e-05, "loss": 0.9505, "step": 7140 }, { "epoch": 8.133333333333333, "grad_norm": 0.2064577341079712, "learning_rate": 3.406719282248322e-05, "loss": 0.7713, "step": 7141 }, { "epoch": 8.134472934472935, "grad_norm": 0.22255748510360718, "learning_rate": 3.4062850724118735e-05, "loss": 0.7487, "step": 7142 }, { "epoch": 8.135612535612536, "grad_norm": 0.20696209371089935, "learning_rate": 3.4058508310976526e-05, "loss": 0.7442, "step": 7143 }, { "epoch": 8.136752136752136, "grad_norm": 0.22416678071022034, "learning_rate": 3.4054165583207406e-05, "loss": 0.6652, "step": 7144 }, { "epoch": 8.137891737891739, "grad_norm": 0.20389822125434875, "learning_rate": 3.404982254096223e-05, "loss": 0.9133, "step": 7145 }, { "epoch": 8.13903133903134, "grad_norm": 0.15410453081130981, "learning_rate": 3.4045479184391816e-05, "loss": 0.8412, "step": 7146 }, { "epoch": 8.14017094017094, "grad_norm": 0.1810719519853592, "learning_rate": 3.404113551364704e-05, "loss": 0.7553, "step": 7147 }, { "epoch": 8.141310541310542, "grad_norm": 0.15530860424041748, "learning_rate": 3.403679152887876e-05, "loss": 0.8006, "step": 7148 }, { "epoch": 8.142450142450143, "grad_norm": 0.19188271462917328, "learning_rate": 3.4032447230237865e-05, "loss": 0.7097, "step": 7149 }, { "epoch": 8.143589743589743, "grad_norm": 0.19926056265830994, "learning_rate": 3.4028102617875237e-05, "loss": 0.7841, "step": 7150 }, { "epoch": 8.144729344729345, "grad_norm": 0.17562030255794525, "learning_rate": 3.402375769194177e-05, "loss": 0.7486, "step": 7151 }, { "epoch": 8.145868945868946, "grad_norm": 0.20137180387973785, "learning_rate": 3.401941245258839e-05, "loss": 0.6086, "step": 7152 }, { "epoch": 8.147008547008546, "grad_norm": 0.20159313082695007, "learning_rate": 3.401506689996601e-05, "loss": 0.6215, "step": 7153 }, { "epoch": 8.148148148148149, "grad_norm": 0.21637333929538727, "learning_rate": 3.4010721034225566e-05, "loss": 0.723, "step": 7154 }, { "epoch": 8.14928774928775, "grad_norm": 0.174230694770813, "learning_rate": 3.4006374855518004e-05, "loss": 0.7396, "step": 7155 }, { "epoch": 8.15042735042735, "grad_norm": 0.17579050362110138, "learning_rate": 3.4002028363994276e-05, "loss": 0.8874, "step": 7156 }, { "epoch": 8.151566951566952, "grad_norm": 0.18470925092697144, "learning_rate": 3.399768155980534e-05, "loss": 0.6751, "step": 7157 }, { "epoch": 8.152706552706553, "grad_norm": 0.1878298968076706, "learning_rate": 3.3993334443102184e-05, "loss": 0.7811, "step": 7158 }, { "epoch": 8.153846153846153, "grad_norm": 0.16194337606430054, "learning_rate": 3.398898701403579e-05, "loss": 0.9992, "step": 7159 }, { "epoch": 8.154985754985756, "grad_norm": 0.25477665662765503, "learning_rate": 3.398463927275716e-05, "loss": 0.5095, "step": 7160 }, { "epoch": 8.156125356125356, "grad_norm": 0.21325922012329102, "learning_rate": 3.39802912194173e-05, "loss": 0.6449, "step": 7161 }, { "epoch": 8.157264957264957, "grad_norm": 0.20713818073272705, "learning_rate": 3.397594285416723e-05, "loss": 0.7, "step": 7162 }, { "epoch": 8.158404558404559, "grad_norm": 0.18533176183700562, "learning_rate": 3.397159417715797e-05, "loss": 0.7912, "step": 7163 }, { "epoch": 8.15954415954416, "grad_norm": 0.18421204388141632, "learning_rate": 3.396724518854059e-05, "loss": 0.8206, "step": 7164 }, { "epoch": 8.16068376068376, "grad_norm": 0.19567228853702545, "learning_rate": 3.3962895888466105e-05, "loss": 0.6666, "step": 7165 }, { "epoch": 8.161823361823362, "grad_norm": 0.18482913076877594, "learning_rate": 3.3958546277085615e-05, "loss": 0.7562, "step": 7166 }, { "epoch": 8.162962962962963, "grad_norm": 0.24270056188106537, "learning_rate": 3.3954196354550174e-05, "loss": 0.4988, "step": 7167 }, { "epoch": 8.164102564102564, "grad_norm": 0.23909823596477509, "learning_rate": 3.3949846121010865e-05, "loss": 0.6893, "step": 7168 }, { "epoch": 8.165242165242166, "grad_norm": 0.16294808685779572, "learning_rate": 3.3945495576618785e-05, "loss": 0.8472, "step": 7169 }, { "epoch": 8.166381766381766, "grad_norm": 0.19857490062713623, "learning_rate": 3.3941144721525046e-05, "loss": 0.8641, "step": 7170 }, { "epoch": 8.167521367521367, "grad_norm": 0.19892112910747528, "learning_rate": 3.393679355588077e-05, "loss": 0.7258, "step": 7171 }, { "epoch": 8.16866096866097, "grad_norm": 0.23370476067066193, "learning_rate": 3.3932442079837066e-05, "loss": 0.6543, "step": 7172 }, { "epoch": 8.16980056980057, "grad_norm": 0.23885028064250946, "learning_rate": 3.392809029354509e-05, "loss": 0.7123, "step": 7173 }, { "epoch": 8.17094017094017, "grad_norm": 0.2139512449502945, "learning_rate": 3.3923738197155984e-05, "loss": 0.7092, "step": 7174 }, { "epoch": 8.172079772079773, "grad_norm": 0.2665398120880127, "learning_rate": 3.391938579082091e-05, "loss": 0.6516, "step": 7175 }, { "epoch": 8.173219373219373, "grad_norm": 0.1842581331729889, "learning_rate": 3.391503307469104e-05, "loss": 0.8499, "step": 7176 }, { "epoch": 8.174358974358974, "grad_norm": 0.21950329840183258, "learning_rate": 3.391068004891756e-05, "loss": 0.6333, "step": 7177 }, { "epoch": 8.175498575498576, "grad_norm": 0.19671662151813507, "learning_rate": 3.390632671365165e-05, "loss": 0.5673, "step": 7178 }, { "epoch": 8.176638176638177, "grad_norm": 0.16526201367378235, "learning_rate": 3.390197306904453e-05, "loss": 0.8273, "step": 7179 }, { "epoch": 8.177777777777777, "grad_norm": 0.19709545373916626, "learning_rate": 3.3897619115247395e-05, "loss": 0.6213, "step": 7180 }, { "epoch": 8.17891737891738, "grad_norm": 0.20775169134140015, "learning_rate": 3.3893264852411475e-05, "loss": 0.6329, "step": 7181 }, { "epoch": 8.18005698005698, "grad_norm": 0.24375586211681366, "learning_rate": 3.3888910280688024e-05, "loss": 0.494, "step": 7182 }, { "epoch": 8.18119658119658, "grad_norm": 0.22048993408679962, "learning_rate": 3.3884555400228266e-05, "loss": 0.7548, "step": 7183 }, { "epoch": 8.182336182336183, "grad_norm": 0.20597326755523682, "learning_rate": 3.388020021118347e-05, "loss": 0.7958, "step": 7184 }, { "epoch": 8.183475783475783, "grad_norm": 0.2033444046974182, "learning_rate": 3.3875844713704905e-05, "loss": 0.7194, "step": 7185 }, { "epoch": 8.184615384615384, "grad_norm": 0.23415708541870117, "learning_rate": 3.3871488907943835e-05, "loss": 0.709, "step": 7186 }, { "epoch": 8.185754985754986, "grad_norm": 0.1721237301826477, "learning_rate": 3.386713279405156e-05, "loss": 0.8547, "step": 7187 }, { "epoch": 8.186894586894587, "grad_norm": 0.19792774319648743, "learning_rate": 3.3862776372179384e-05, "loss": 0.7481, "step": 7188 }, { "epoch": 8.188034188034187, "grad_norm": 0.19219958782196045, "learning_rate": 3.38584196424786e-05, "loss": 0.6374, "step": 7189 }, { "epoch": 8.18917378917379, "grad_norm": 0.19490176439285278, "learning_rate": 3.385406260510056e-05, "loss": 0.7085, "step": 7190 }, { "epoch": 8.19031339031339, "grad_norm": 0.22433261573314667, "learning_rate": 3.384970526019656e-05, "loss": 0.64, "step": 7191 }, { "epoch": 8.19145299145299, "grad_norm": 0.2058522254228592, "learning_rate": 3.384534760791797e-05, "loss": 0.7725, "step": 7192 }, { "epoch": 8.192592592592593, "grad_norm": 0.18015679717063904, "learning_rate": 3.384098964841612e-05, "loss": 0.6959, "step": 7193 }, { "epoch": 8.193732193732194, "grad_norm": 0.19807380437850952, "learning_rate": 3.38366313818424e-05, "loss": 0.7339, "step": 7194 }, { "epoch": 8.194871794871794, "grad_norm": 0.2094758152961731, "learning_rate": 3.383227280834816e-05, "loss": 0.7301, "step": 7195 }, { "epoch": 8.196011396011396, "grad_norm": 0.26335787773132324, "learning_rate": 3.382791392808481e-05, "loss": 0.7256, "step": 7196 }, { "epoch": 8.197150997150997, "grad_norm": 0.2601282000541687, "learning_rate": 3.3823554741203716e-05, "loss": 0.4856, "step": 7197 }, { "epoch": 8.198290598290598, "grad_norm": 0.21603776514530182, "learning_rate": 3.381919524785631e-05, "loss": 0.7389, "step": 7198 }, { "epoch": 8.1994301994302, "grad_norm": 0.24947494268417358, "learning_rate": 3.3814835448194e-05, "loss": 0.4968, "step": 7199 }, { "epoch": 8.2005698005698, "grad_norm": 0.23176923394203186, "learning_rate": 3.381047534236822e-05, "loss": 0.6624, "step": 7200 }, { "epoch": 8.201709401709401, "grad_norm": 0.20691458880901337, "learning_rate": 3.380611493053039e-05, "loss": 0.8128, "step": 7201 }, { "epoch": 8.202849002849003, "grad_norm": 0.17805428802967072, "learning_rate": 3.380175421283198e-05, "loss": 0.8402, "step": 7202 }, { "epoch": 8.203988603988604, "grad_norm": 0.16130395233631134, "learning_rate": 3.379739318942444e-05, "loss": 1.0689, "step": 7203 }, { "epoch": 8.205128205128204, "grad_norm": 0.20997899770736694, "learning_rate": 3.379303186045924e-05, "loss": 0.6264, "step": 7204 }, { "epoch": 8.206267806267807, "grad_norm": 0.18589375913143158, "learning_rate": 3.378867022608787e-05, "loss": 0.813, "step": 7205 }, { "epoch": 8.207407407407407, "grad_norm": 0.1978245973587036, "learning_rate": 3.378430828646181e-05, "loss": 0.7567, "step": 7206 }, { "epoch": 8.208547008547008, "grad_norm": 0.19161473214626312, "learning_rate": 3.3779946041732565e-05, "loss": 0.712, "step": 7207 }, { "epoch": 8.20968660968661, "grad_norm": 0.20370537042617798, "learning_rate": 3.377558349205164e-05, "loss": 0.6954, "step": 7208 }, { "epoch": 8.21082621082621, "grad_norm": 0.23119372129440308, "learning_rate": 3.377122063757058e-05, "loss": 0.4729, "step": 7209 }, { "epoch": 8.211965811965811, "grad_norm": 0.19260242581367493, "learning_rate": 3.3766857478440895e-05, "loss": 0.757, "step": 7210 }, { "epoch": 8.213105413105414, "grad_norm": 0.19637152552604675, "learning_rate": 3.376249401481415e-05, "loss": 0.6828, "step": 7211 }, { "epoch": 8.214245014245014, "grad_norm": 0.17000946402549744, "learning_rate": 3.375813024684189e-05, "loss": 0.8371, "step": 7212 }, { "epoch": 8.215384615384615, "grad_norm": 0.1960098147392273, "learning_rate": 3.3753766174675686e-05, "loss": 0.7028, "step": 7213 }, { "epoch": 8.216524216524217, "grad_norm": 0.21466651558876038, "learning_rate": 3.37494017984671e-05, "loss": 0.7695, "step": 7214 }, { "epoch": 8.217663817663817, "grad_norm": 0.21413464844226837, "learning_rate": 3.374503711836773e-05, "loss": 0.589, "step": 7215 }, { "epoch": 8.218803418803418, "grad_norm": 0.20334623754024506, "learning_rate": 3.374067213452918e-05, "loss": 0.8125, "step": 7216 }, { "epoch": 8.21994301994302, "grad_norm": 0.20716072618961334, "learning_rate": 3.373630684710304e-05, "loss": 0.777, "step": 7217 }, { "epoch": 8.221082621082621, "grad_norm": 0.20598304271697998, "learning_rate": 3.373194125624094e-05, "loss": 0.8426, "step": 7218 }, { "epoch": 8.222222222222221, "grad_norm": 0.20386014878749847, "learning_rate": 3.3727575362094513e-05, "loss": 0.5724, "step": 7219 }, { "epoch": 8.223361823361824, "grad_norm": 0.25801876187324524, "learning_rate": 3.372320916481539e-05, "loss": 0.6839, "step": 7220 }, { "epoch": 8.224501424501424, "grad_norm": 0.22828209400177002, "learning_rate": 3.371884266455522e-05, "loss": 0.601, "step": 7221 }, { "epoch": 8.225641025641025, "grad_norm": 0.25332388281822205, "learning_rate": 3.371447586146567e-05, "loss": 0.7478, "step": 7222 }, { "epoch": 8.226780626780627, "grad_norm": 0.16740012168884277, "learning_rate": 3.371010875569841e-05, "loss": 0.776, "step": 7223 }, { "epoch": 8.227920227920228, "grad_norm": 0.2657046914100647, "learning_rate": 3.3705741347405126e-05, "loss": 0.4394, "step": 7224 }, { "epoch": 8.229059829059828, "grad_norm": 0.22548681497573853, "learning_rate": 3.370137363673749e-05, "loss": 0.711, "step": 7225 }, { "epoch": 8.23019943019943, "grad_norm": 0.20890066027641296, "learning_rate": 3.3697005623847226e-05, "loss": 0.7601, "step": 7226 }, { "epoch": 8.231339031339031, "grad_norm": 0.21798698604106903, "learning_rate": 3.369263730888604e-05, "loss": 0.5204, "step": 7227 }, { "epoch": 8.232478632478632, "grad_norm": 0.22593146562576294, "learning_rate": 3.368826869200566e-05, "loss": 0.7164, "step": 7228 }, { "epoch": 8.233618233618234, "grad_norm": 0.1523062139749527, "learning_rate": 3.3683899773357806e-05, "loss": 0.7875, "step": 7229 }, { "epoch": 8.234757834757835, "grad_norm": 0.18687289953231812, "learning_rate": 3.3679530553094236e-05, "loss": 0.7488, "step": 7230 }, { "epoch": 8.235897435897435, "grad_norm": 0.20674464106559753, "learning_rate": 3.3675161031366706e-05, "loss": 0.6053, "step": 7231 }, { "epoch": 8.237037037037037, "grad_norm": 0.22084195911884308, "learning_rate": 3.3670791208326966e-05, "loss": 0.5489, "step": 7232 }, { "epoch": 8.238176638176638, "grad_norm": 0.21917662024497986, "learning_rate": 3.3666421084126815e-05, "loss": 0.6746, "step": 7233 }, { "epoch": 8.239316239316238, "grad_norm": 0.17784634232521057, "learning_rate": 3.366205065891802e-05, "loss": 0.8069, "step": 7234 }, { "epoch": 8.24045584045584, "grad_norm": 0.19141167402267456, "learning_rate": 3.3657679932852385e-05, "loss": 0.7902, "step": 7235 }, { "epoch": 8.241595441595441, "grad_norm": 0.17095151543617249, "learning_rate": 3.3653308906081715e-05, "loss": 0.8679, "step": 7236 }, { "epoch": 8.242735042735042, "grad_norm": 0.22071368992328644, "learning_rate": 3.364893757875783e-05, "loss": 0.7874, "step": 7237 }, { "epoch": 8.243874643874644, "grad_norm": 0.1756248027086258, "learning_rate": 3.3644565951032565e-05, "loss": 0.7982, "step": 7238 }, { "epoch": 8.245014245014245, "grad_norm": 0.18324190378189087, "learning_rate": 3.364019402305775e-05, "loss": 0.8962, "step": 7239 }, { "epoch": 8.246153846153845, "grad_norm": 0.1766497790813446, "learning_rate": 3.363582179498523e-05, "loss": 0.711, "step": 7240 }, { "epoch": 8.247293447293448, "grad_norm": 0.14802196621894836, "learning_rate": 3.3631449266966874e-05, "loss": 0.9253, "step": 7241 }, { "epoch": 8.248433048433048, "grad_norm": 0.20839270949363708, "learning_rate": 3.362707643915455e-05, "loss": 0.6343, "step": 7242 }, { "epoch": 8.249572649572649, "grad_norm": 0.2486768364906311, "learning_rate": 3.3622703311700136e-05, "loss": 0.5471, "step": 7243 }, { "epoch": 8.250712250712251, "grad_norm": 0.1962577998638153, "learning_rate": 3.361832988475552e-05, "loss": 0.7123, "step": 7244 }, { "epoch": 8.251851851851852, "grad_norm": 0.2022298127412796, "learning_rate": 3.3613956158472614e-05, "loss": 0.7409, "step": 7245 }, { "epoch": 8.252991452991452, "grad_norm": 0.23249484598636627, "learning_rate": 3.360958213300332e-05, "loss": 0.6927, "step": 7246 }, { "epoch": 8.254131054131054, "grad_norm": 0.167454332113266, "learning_rate": 3.3605207808499556e-05, "loss": 0.8609, "step": 7247 }, { "epoch": 8.255270655270655, "grad_norm": 0.20528429746627808, "learning_rate": 3.3600833185113266e-05, "loss": 0.7459, "step": 7248 }, { "epoch": 8.256410256410255, "grad_norm": 0.20554734766483307, "learning_rate": 3.3596458262996386e-05, "loss": 0.6844, "step": 7249 }, { "epoch": 8.257549857549858, "grad_norm": 0.21883270144462585, "learning_rate": 3.359208304230087e-05, "loss": 0.6415, "step": 7250 }, { "epoch": 8.258689458689458, "grad_norm": 0.1936005800962448, "learning_rate": 3.358770752317868e-05, "loss": 0.7177, "step": 7251 }, { "epoch": 8.25982905982906, "grad_norm": 0.243613600730896, "learning_rate": 3.358333170578179e-05, "loss": 0.571, "step": 7252 }, { "epoch": 8.260968660968661, "grad_norm": 0.16592811048030853, "learning_rate": 3.357895559026219e-05, "loss": 0.853, "step": 7253 }, { "epoch": 8.262108262108262, "grad_norm": 0.18197090923786163, "learning_rate": 3.357457917677187e-05, "loss": 0.8275, "step": 7254 }, { "epoch": 8.263247863247864, "grad_norm": 0.16797739267349243, "learning_rate": 3.357020246546283e-05, "loss": 0.7768, "step": 7255 }, { "epoch": 8.264387464387465, "grad_norm": 0.23813243210315704, "learning_rate": 3.35658254564871e-05, "loss": 0.5604, "step": 7256 }, { "epoch": 8.265527065527065, "grad_norm": 0.20278388261795044, "learning_rate": 3.3561448149996685e-05, "loss": 0.7999, "step": 7257 }, { "epoch": 8.266666666666667, "grad_norm": 0.2195715755224228, "learning_rate": 3.355707054614364e-05, "loss": 0.5943, "step": 7258 }, { "epoch": 8.267806267806268, "grad_norm": 0.19100749492645264, "learning_rate": 3.355269264507999e-05, "loss": 0.837, "step": 7259 }, { "epoch": 8.268945868945869, "grad_norm": 0.1974109411239624, "learning_rate": 3.3548314446957816e-05, "loss": 0.6738, "step": 7260 }, { "epoch": 8.270085470085471, "grad_norm": 0.20671726763248444, "learning_rate": 3.3543935951929174e-05, "loss": 0.834, "step": 7261 }, { "epoch": 8.271225071225071, "grad_norm": 0.20759086310863495, "learning_rate": 3.353955716014614e-05, "loss": 0.675, "step": 7262 }, { "epoch": 8.272364672364672, "grad_norm": 0.20911797881126404, "learning_rate": 3.35351780717608e-05, "loss": 0.7591, "step": 7263 }, { "epoch": 8.273504273504274, "grad_norm": 0.21806155145168304, "learning_rate": 3.3530798686925243e-05, "loss": 0.6563, "step": 7264 }, { "epoch": 8.274643874643875, "grad_norm": 0.17803005874156952, "learning_rate": 3.35264190057916e-05, "loss": 0.8413, "step": 7265 }, { "epoch": 8.275783475783475, "grad_norm": 0.19721558690071106, "learning_rate": 3.352203902851198e-05, "loss": 0.8028, "step": 7266 }, { "epoch": 8.276923076923078, "grad_norm": 0.2705168128013611, "learning_rate": 3.351765875523851e-05, "loss": 0.7717, "step": 7267 }, { "epoch": 8.278062678062678, "grad_norm": 0.20172752439975739, "learning_rate": 3.3513278186123316e-05, "loss": 0.9156, "step": 7268 }, { "epoch": 8.279202279202279, "grad_norm": 0.1877267211675644, "learning_rate": 3.350889732131856e-05, "loss": 0.7332, "step": 7269 }, { "epoch": 8.280341880341881, "grad_norm": 0.17579896748065948, "learning_rate": 3.3504516160976404e-05, "loss": 0.7554, "step": 7270 }, { "epoch": 8.281481481481482, "grad_norm": 0.176714226603508, "learning_rate": 3.350013470524902e-05, "loss": 0.7036, "step": 7271 }, { "epoch": 8.282621082621082, "grad_norm": 0.2160552591085434, "learning_rate": 3.349575295428857e-05, "loss": 0.7961, "step": 7272 }, { "epoch": 8.283760683760685, "grad_norm": 0.24364876747131348, "learning_rate": 3.3491370908247273e-05, "loss": 0.5536, "step": 7273 }, { "epoch": 8.284900284900285, "grad_norm": 0.21509499847888947, "learning_rate": 3.34869885672773e-05, "loss": 0.8383, "step": 7274 }, { "epoch": 8.286039886039886, "grad_norm": 0.17860279977321625, "learning_rate": 3.348260593153088e-05, "loss": 0.8919, "step": 7275 }, { "epoch": 8.287179487179488, "grad_norm": 0.20313961803913116, "learning_rate": 3.347822300116022e-05, "loss": 0.6938, "step": 7276 }, { "epoch": 8.288319088319088, "grad_norm": 0.21612028777599335, "learning_rate": 3.347383977631757e-05, "loss": 0.7654, "step": 7277 }, { "epoch": 8.289458689458689, "grad_norm": 0.22881169617176056, "learning_rate": 3.3469456257155166e-05, "loss": 0.6896, "step": 7278 }, { "epoch": 8.290598290598291, "grad_norm": 0.1889042854309082, "learning_rate": 3.346507244382524e-05, "loss": 0.7589, "step": 7279 }, { "epoch": 8.291737891737892, "grad_norm": 0.2020287662744522, "learning_rate": 3.346068833648008e-05, "loss": 0.5483, "step": 7280 }, { "epoch": 8.292877492877492, "grad_norm": 0.1734069287776947, "learning_rate": 3.345630393527194e-05, "loss": 0.6652, "step": 7281 }, { "epoch": 8.294017094017095, "grad_norm": 0.1785465031862259, "learning_rate": 3.345191924035312e-05, "loss": 0.928, "step": 7282 }, { "epoch": 8.295156695156695, "grad_norm": 0.17669369280338287, "learning_rate": 3.344753425187589e-05, "loss": 0.6255, "step": 7283 }, { "epoch": 8.296296296296296, "grad_norm": 0.23857755959033966, "learning_rate": 3.3443148969992576e-05, "loss": 0.7075, "step": 7284 }, { "epoch": 8.297435897435898, "grad_norm": 0.18792074918746948, "learning_rate": 3.343876339485547e-05, "loss": 0.6417, "step": 7285 }, { "epoch": 8.298575498575499, "grad_norm": 0.20096182823181152, "learning_rate": 3.3434377526616914e-05, "loss": 0.7125, "step": 7286 }, { "epoch": 8.2997150997151, "grad_norm": 0.2250138372182846, "learning_rate": 3.342999136542922e-05, "loss": 0.7328, "step": 7287 }, { "epoch": 8.300854700854702, "grad_norm": 0.21230454742908478, "learning_rate": 3.342560491144475e-05, "loss": 0.7452, "step": 7288 }, { "epoch": 8.301994301994302, "grad_norm": 0.20976322889328003, "learning_rate": 3.342121816481585e-05, "loss": 0.7153, "step": 7289 }, { "epoch": 8.303133903133903, "grad_norm": 0.17850695550441742, "learning_rate": 3.341683112569489e-05, "loss": 0.7857, "step": 7290 }, { "epoch": 8.304273504273505, "grad_norm": 0.19860774278640747, "learning_rate": 3.341244379423423e-05, "loss": 0.9077, "step": 7291 }, { "epoch": 8.305413105413106, "grad_norm": 0.20130538940429688, "learning_rate": 3.3408056170586265e-05, "loss": 0.862, "step": 7292 }, { "epoch": 8.306552706552706, "grad_norm": 0.21216948330402374, "learning_rate": 3.340366825490339e-05, "loss": 0.8028, "step": 7293 }, { "epoch": 8.307692307692308, "grad_norm": 0.2430482804775238, "learning_rate": 3.3399280047338004e-05, "loss": 0.5628, "step": 7294 }, { "epoch": 8.308831908831909, "grad_norm": 0.22877097129821777, "learning_rate": 3.339489154804253e-05, "loss": 0.8129, "step": 7295 }, { "epoch": 8.30997150997151, "grad_norm": 0.21312668919563293, "learning_rate": 3.3390502757169376e-05, "loss": 0.6534, "step": 7296 }, { "epoch": 8.311111111111112, "grad_norm": 0.18086498975753784, "learning_rate": 3.338611367487099e-05, "loss": 0.7918, "step": 7297 }, { "epoch": 8.312250712250712, "grad_norm": 0.23419511318206787, "learning_rate": 3.3381724301299816e-05, "loss": 0.6764, "step": 7298 }, { "epoch": 8.313390313390313, "grad_norm": 0.20369236171245575, "learning_rate": 3.337733463660831e-05, "loss": 0.5918, "step": 7299 }, { "epoch": 8.314529914529915, "grad_norm": 0.23764194548130035, "learning_rate": 3.337294468094893e-05, "loss": 0.7771, "step": 7300 }, { "epoch": 8.315669515669516, "grad_norm": 0.16571687161922455, "learning_rate": 3.336855443447416e-05, "loss": 0.8492, "step": 7301 }, { "epoch": 8.316809116809116, "grad_norm": 0.19056515395641327, "learning_rate": 3.336416389733647e-05, "loss": 0.6287, "step": 7302 }, { "epoch": 8.317948717948719, "grad_norm": 0.24364341795444489, "learning_rate": 3.335977306968838e-05, "loss": 0.541, "step": 7303 }, { "epoch": 8.31908831908832, "grad_norm": 0.2063123881816864, "learning_rate": 3.3355381951682374e-05, "loss": 0.6076, "step": 7304 }, { "epoch": 8.32022792022792, "grad_norm": 0.23121146857738495, "learning_rate": 3.335099054347098e-05, "loss": 0.4948, "step": 7305 }, { "epoch": 8.321367521367522, "grad_norm": 0.23306216299533844, "learning_rate": 3.3346598845206715e-05, "loss": 0.6466, "step": 7306 }, { "epoch": 8.322507122507123, "grad_norm": 0.19110971689224243, "learning_rate": 3.334220685704212e-05, "loss": 0.6224, "step": 7307 }, { "epoch": 8.323646723646723, "grad_norm": 0.19357438385486603, "learning_rate": 3.3337814579129744e-05, "loss": 0.8785, "step": 7308 }, { "epoch": 8.324786324786325, "grad_norm": 0.22389012575149536, "learning_rate": 3.3333422011622134e-05, "loss": 0.7222, "step": 7309 }, { "epoch": 8.325925925925926, "grad_norm": 0.2533615827560425, "learning_rate": 3.332902915467187e-05, "loss": 0.6152, "step": 7310 }, { "epoch": 8.327065527065526, "grad_norm": 0.2572626769542694, "learning_rate": 3.33246360084315e-05, "loss": 0.5672, "step": 7311 }, { "epoch": 8.328205128205129, "grad_norm": 0.19206684827804565, "learning_rate": 3.3320242573053644e-05, "loss": 0.7731, "step": 7312 }, { "epoch": 8.32934472934473, "grad_norm": 0.14389660954475403, "learning_rate": 3.331584884869087e-05, "loss": 0.8121, "step": 7313 }, { "epoch": 8.33048433048433, "grad_norm": 0.306020051240921, "learning_rate": 3.331145483549581e-05, "loss": 0.6047, "step": 7314 }, { "epoch": 8.331623931623932, "grad_norm": 0.23067918419837952, "learning_rate": 3.330706053362106e-05, "loss": 0.6743, "step": 7315 }, { "epoch": 8.332763532763533, "grad_norm": 0.18673858046531677, "learning_rate": 3.330266594321926e-05, "loss": 0.7463, "step": 7316 }, { "epoch": 8.333903133903133, "grad_norm": 0.23222613334655762, "learning_rate": 3.3298271064443026e-05, "loss": 0.6596, "step": 7317 }, { "epoch": 8.335042735042736, "grad_norm": 0.2181825190782547, "learning_rate": 3.329387589744503e-05, "loss": 0.6956, "step": 7318 }, { "epoch": 8.336182336182336, "grad_norm": 0.2011530101299286, "learning_rate": 3.3289480442377904e-05, "loss": 0.6112, "step": 7319 }, { "epoch": 8.337321937321937, "grad_norm": 0.2408788502216339, "learning_rate": 3.328508469939434e-05, "loss": 0.5947, "step": 7320 }, { "epoch": 8.338461538461539, "grad_norm": 0.22859489917755127, "learning_rate": 3.3280688668646984e-05, "loss": 0.6697, "step": 7321 }, { "epoch": 8.33960113960114, "grad_norm": 0.23684445023536682, "learning_rate": 3.3276292350288554e-05, "loss": 0.6499, "step": 7322 }, { "epoch": 8.34074074074074, "grad_norm": 0.260758638381958, "learning_rate": 3.327189574447173e-05, "loss": 0.6182, "step": 7323 }, { "epoch": 8.341880341880342, "grad_norm": 0.22596196830272675, "learning_rate": 3.3267498851349204e-05, "loss": 0.6605, "step": 7324 }, { "epoch": 8.343019943019943, "grad_norm": 0.14734920859336853, "learning_rate": 3.3263101671073724e-05, "loss": 0.865, "step": 7325 }, { "epoch": 8.344159544159544, "grad_norm": 0.21761855483055115, "learning_rate": 3.325870420379799e-05, "loss": 0.484, "step": 7326 }, { "epoch": 8.345299145299146, "grad_norm": 0.47291189432144165, "learning_rate": 3.3254306449674756e-05, "loss": 0.814, "step": 7327 }, { "epoch": 8.346438746438746, "grad_norm": 0.2643078565597534, "learning_rate": 3.324990840885674e-05, "loss": 0.4888, "step": 7328 }, { "epoch": 8.347578347578347, "grad_norm": 0.23896785080432892, "learning_rate": 3.3245510081496746e-05, "loss": 0.626, "step": 7329 }, { "epoch": 8.34871794871795, "grad_norm": 0.1692821979522705, "learning_rate": 3.324111146774749e-05, "loss": 0.8557, "step": 7330 }, { "epoch": 8.34985754985755, "grad_norm": 0.18174251914024353, "learning_rate": 3.323671256776177e-05, "loss": 0.4771, "step": 7331 }, { "epoch": 8.35099715099715, "grad_norm": 0.25295665860176086, "learning_rate": 3.3232313381692385e-05, "loss": 0.7679, "step": 7332 }, { "epoch": 8.352136752136753, "grad_norm": 0.18513503670692444, "learning_rate": 3.322791390969211e-05, "loss": 0.871, "step": 7333 }, { "epoch": 8.353276353276353, "grad_norm": 0.17371828854084015, "learning_rate": 3.3223514151913756e-05, "loss": 0.8518, "step": 7334 }, { "epoch": 8.354415954415954, "grad_norm": 0.23687179386615753, "learning_rate": 3.321911410851014e-05, "loss": 0.6116, "step": 7335 }, { "epoch": 8.355555555555556, "grad_norm": 0.1863275021314621, "learning_rate": 3.321471377963409e-05, "loss": 0.6996, "step": 7336 }, { "epoch": 8.356695156695157, "grad_norm": 0.1586081087589264, "learning_rate": 3.321031316543844e-05, "loss": 0.8639, "step": 7337 }, { "epoch": 8.357834757834757, "grad_norm": 0.24024544656276703, "learning_rate": 3.320591226607604e-05, "loss": 0.6041, "step": 7338 }, { "epoch": 8.35897435897436, "grad_norm": 0.2112145870923996, "learning_rate": 3.320151108169974e-05, "loss": 0.7068, "step": 7339 }, { "epoch": 8.36011396011396, "grad_norm": 0.18478870391845703, "learning_rate": 3.3197109612462405e-05, "loss": 0.7975, "step": 7340 }, { "epoch": 8.36125356125356, "grad_norm": 0.20976874232292175, "learning_rate": 3.31927078585169e-05, "loss": 0.6678, "step": 7341 }, { "epoch": 8.362393162393163, "grad_norm": 0.2051706314086914, "learning_rate": 3.3188305820016136e-05, "loss": 0.8632, "step": 7342 }, { "epoch": 8.363532763532763, "grad_norm": 0.19248652458190918, "learning_rate": 3.3183903497112985e-05, "loss": 0.9468, "step": 7343 }, { "epoch": 8.364672364672364, "grad_norm": 0.23227335512638092, "learning_rate": 3.317950088996036e-05, "loss": 0.5203, "step": 7344 }, { "epoch": 8.365811965811966, "grad_norm": 0.24318364262580872, "learning_rate": 3.317509799871118e-05, "loss": 0.6219, "step": 7345 }, { "epoch": 8.366951566951567, "grad_norm": 0.1984577178955078, "learning_rate": 3.317069482351836e-05, "loss": 0.6433, "step": 7346 }, { "epoch": 8.368091168091167, "grad_norm": 0.20170587301254272, "learning_rate": 3.316629136453484e-05, "loss": 0.7974, "step": 7347 }, { "epoch": 8.36923076923077, "grad_norm": 0.2340664565563202, "learning_rate": 3.316188762191357e-05, "loss": 0.5978, "step": 7348 }, { "epoch": 8.37037037037037, "grad_norm": 0.25876733660697937, "learning_rate": 3.3157483595807484e-05, "loss": 0.5988, "step": 7349 }, { "epoch": 8.37150997150997, "grad_norm": 0.17292191088199615, "learning_rate": 3.3153079286369576e-05, "loss": 0.6582, "step": 7350 }, { "epoch": 8.372649572649573, "grad_norm": 0.19964800775051117, "learning_rate": 3.314867469375279e-05, "loss": 0.6532, "step": 7351 }, { "epoch": 8.373789173789174, "grad_norm": 0.21056808531284332, "learning_rate": 3.314426981811013e-05, "loss": 0.6819, "step": 7352 }, { "epoch": 8.374928774928774, "grad_norm": 0.1966729611158371, "learning_rate": 3.313986465959458e-05, "loss": 0.8039, "step": 7353 }, { "epoch": 8.376068376068377, "grad_norm": 0.19657807052135468, "learning_rate": 3.313545921835915e-05, "loss": 0.7674, "step": 7354 }, { "epoch": 8.377207977207977, "grad_norm": 0.21132008731365204, "learning_rate": 3.313105349455684e-05, "loss": 0.7759, "step": 7355 }, { "epoch": 8.378347578347578, "grad_norm": 0.1679302603006363, "learning_rate": 3.312664748834069e-05, "loss": 0.6613, "step": 7356 }, { "epoch": 8.37948717948718, "grad_norm": 0.2551022469997406, "learning_rate": 3.312224119986372e-05, "loss": 0.5962, "step": 7357 }, { "epoch": 8.38062678062678, "grad_norm": 0.2117428183555603, "learning_rate": 3.3117834629278974e-05, "loss": 0.8595, "step": 7358 }, { "epoch": 8.381766381766381, "grad_norm": 0.20957203209400177, "learning_rate": 3.3113427776739514e-05, "loss": 0.6074, "step": 7359 }, { "epoch": 8.382905982905983, "grad_norm": 0.1663791537284851, "learning_rate": 3.310902064239839e-05, "loss": 0.8607, "step": 7360 }, { "epoch": 8.384045584045584, "grad_norm": 0.2344769686460495, "learning_rate": 3.3104613226408685e-05, "loss": 0.6274, "step": 7361 }, { "epoch": 8.385185185185184, "grad_norm": 0.21692374348640442, "learning_rate": 3.3100205528923475e-05, "loss": 0.6482, "step": 7362 }, { "epoch": 8.386324786324787, "grad_norm": 0.19572094082832336, "learning_rate": 3.3095797550095845e-05, "loss": 0.6797, "step": 7363 }, { "epoch": 8.387464387464387, "grad_norm": 0.2012271285057068, "learning_rate": 3.30913892900789e-05, "loss": 0.6168, "step": 7364 }, { "epoch": 8.388603988603988, "grad_norm": 0.19752828776836395, "learning_rate": 3.308698074902576e-05, "loss": 0.7932, "step": 7365 }, { "epoch": 8.38974358974359, "grad_norm": 0.20255888998508453, "learning_rate": 3.308257192708954e-05, "loss": 0.6429, "step": 7366 }, { "epoch": 8.39088319088319, "grad_norm": 0.21768487989902496, "learning_rate": 3.307816282442337e-05, "loss": 0.7006, "step": 7367 }, { "epoch": 8.392022792022791, "grad_norm": 0.1909329742193222, "learning_rate": 3.307375344118039e-05, "loss": 0.7744, "step": 7368 }, { "epoch": 8.393162393162394, "grad_norm": 0.23957112431526184, "learning_rate": 3.3069343777513744e-05, "loss": 0.5901, "step": 7369 }, { "epoch": 8.394301994301994, "grad_norm": 0.2202073335647583, "learning_rate": 3.306493383357661e-05, "loss": 0.7047, "step": 7370 }, { "epoch": 8.395441595441595, "grad_norm": 0.20845544338226318, "learning_rate": 3.306052360952213e-05, "loss": 0.6855, "step": 7371 }, { "epoch": 8.396581196581197, "grad_norm": 0.24946178495883942, "learning_rate": 3.305611310550351e-05, "loss": 0.595, "step": 7372 }, { "epoch": 8.397720797720797, "grad_norm": 0.1769694685935974, "learning_rate": 3.305170232167392e-05, "loss": 0.9465, "step": 7373 }, { "epoch": 8.398860398860398, "grad_norm": 0.20594260096549988, "learning_rate": 3.304729125818657e-05, "loss": 0.7854, "step": 7374 }, { "epoch": 8.4, "grad_norm": 0.362347275018692, "learning_rate": 3.3042879915194655e-05, "loss": 0.7602, "step": 7375 }, { "epoch": 8.401139601139601, "grad_norm": 0.23845791816711426, "learning_rate": 3.303846829285142e-05, "loss": 0.6781, "step": 7376 }, { "epoch": 8.402279202279201, "grad_norm": 0.3973340690135956, "learning_rate": 3.303405639131007e-05, "loss": 0.6324, "step": 7377 }, { "epoch": 8.403418803418804, "grad_norm": 0.2270396649837494, "learning_rate": 3.302964421072384e-05, "loss": 0.6361, "step": 7378 }, { "epoch": 8.404558404558404, "grad_norm": 0.20940916240215302, "learning_rate": 3.302523175124599e-05, "loss": 0.7596, "step": 7379 }, { "epoch": 8.405698005698005, "grad_norm": 0.1925322413444519, "learning_rate": 3.302081901302977e-05, "loss": 0.7875, "step": 7380 }, { "epoch": 8.406837606837607, "grad_norm": 0.19672545790672302, "learning_rate": 3.301640599622845e-05, "loss": 0.7438, "step": 7381 }, { "epoch": 8.407977207977208, "grad_norm": 0.23943732678890228, "learning_rate": 3.3011992700995305e-05, "loss": 0.59, "step": 7382 }, { "epoch": 8.40911680911681, "grad_norm": 0.20913273096084595, "learning_rate": 3.300757912748362e-05, "loss": 0.7543, "step": 7383 }, { "epoch": 8.41025641025641, "grad_norm": 0.20598195493221283, "learning_rate": 3.3003165275846695e-05, "loss": 0.6334, "step": 7384 }, { "epoch": 8.411396011396011, "grad_norm": 0.19529956579208374, "learning_rate": 3.2998751146237825e-05, "loss": 0.7716, "step": 7385 }, { "epoch": 8.412535612535613, "grad_norm": 0.25812625885009766, "learning_rate": 3.299433673881033e-05, "loss": 0.5798, "step": 7386 }, { "epoch": 8.413675213675214, "grad_norm": 0.2058606594800949, "learning_rate": 3.2989922053717536e-05, "loss": 0.6541, "step": 7387 }, { "epoch": 8.414814814814815, "grad_norm": 0.1793244630098343, "learning_rate": 3.2985507091112776e-05, "loss": 0.8379, "step": 7388 }, { "epoch": 8.415954415954417, "grad_norm": 0.20750270783901215, "learning_rate": 3.29810918511494e-05, "loss": 0.7989, "step": 7389 }, { "epoch": 8.417094017094017, "grad_norm": 0.22443367540836334, "learning_rate": 3.297667633398075e-05, "loss": 0.6884, "step": 7390 }, { "epoch": 8.418233618233618, "grad_norm": 0.22191601991653442, "learning_rate": 3.2972260539760206e-05, "loss": 0.6384, "step": 7391 }, { "epoch": 8.41937321937322, "grad_norm": 0.1811549812555313, "learning_rate": 3.296784446864112e-05, "loss": 0.769, "step": 7392 }, { "epoch": 8.42051282051282, "grad_norm": 0.2108343541622162, "learning_rate": 3.296342812077688e-05, "loss": 0.6417, "step": 7393 }, { "epoch": 8.421652421652421, "grad_norm": 0.1670847237110138, "learning_rate": 3.295901149632089e-05, "loss": 0.747, "step": 7394 }, { "epoch": 8.422792022792024, "grad_norm": 0.21345795691013336, "learning_rate": 3.295459459542655e-05, "loss": 0.7172, "step": 7395 }, { "epoch": 8.423931623931624, "grad_norm": 0.2507505416870117, "learning_rate": 3.295017741824725e-05, "loss": 0.5596, "step": 7396 }, { "epoch": 8.425071225071225, "grad_norm": 0.2168746143579483, "learning_rate": 3.294575996493643e-05, "loss": 0.5596, "step": 7397 }, { "epoch": 8.426210826210827, "grad_norm": 0.21462614834308624, "learning_rate": 3.294134223564752e-05, "loss": 0.6587, "step": 7398 }, { "epoch": 8.427350427350428, "grad_norm": 0.20166388154029846, "learning_rate": 3.293692423053395e-05, "loss": 0.7495, "step": 7399 }, { "epoch": 8.428490028490028, "grad_norm": 0.17037194967269897, "learning_rate": 3.2932505949749174e-05, "loss": 0.8331, "step": 7400 }, { "epoch": 8.42962962962963, "grad_norm": 0.1669473648071289, "learning_rate": 3.292808739344665e-05, "loss": 0.7935, "step": 7401 }, { "epoch": 8.430769230769231, "grad_norm": 0.18680305778980255, "learning_rate": 3.292366856177986e-05, "loss": 0.8855, "step": 7402 }, { "epoch": 8.431908831908832, "grad_norm": 0.20334556698799133, "learning_rate": 3.2919249454902265e-05, "loss": 0.7106, "step": 7403 }, { "epoch": 8.433048433048434, "grad_norm": 0.18944790959358215, "learning_rate": 3.2914830072967356e-05, "loss": 0.7304, "step": 7404 }, { "epoch": 8.434188034188034, "grad_norm": 0.2841229736804962, "learning_rate": 3.2910410416128635e-05, "loss": 0.8535, "step": 7405 }, { "epoch": 8.435327635327635, "grad_norm": 0.21714118123054504, "learning_rate": 3.2905990484539606e-05, "loss": 0.7142, "step": 7406 }, { "epoch": 8.436467236467237, "grad_norm": 0.22880962491035461, "learning_rate": 3.290157027835378e-05, "loss": 0.5942, "step": 7407 }, { "epoch": 8.437606837606838, "grad_norm": 0.18921370804309845, "learning_rate": 3.2897149797724694e-05, "loss": 0.8467, "step": 7408 }, { "epoch": 8.438746438746438, "grad_norm": 0.1684616357088089, "learning_rate": 3.289272904280588e-05, "loss": 0.8504, "step": 7409 }, { "epoch": 8.43988603988604, "grad_norm": 0.19537535309791565, "learning_rate": 3.288830801375088e-05, "loss": 0.7657, "step": 7410 }, { "epoch": 8.441025641025641, "grad_norm": 0.20097994804382324, "learning_rate": 3.2883886710713254e-05, "loss": 0.584, "step": 7411 }, { "epoch": 8.442165242165242, "grad_norm": 0.6534616947174072, "learning_rate": 3.2879465133846554e-05, "loss": 0.6286, "step": 7412 }, { "epoch": 8.443304843304844, "grad_norm": 0.15233008563518524, "learning_rate": 3.2875043283304356e-05, "loss": 0.9783, "step": 7413 }, { "epoch": 8.444444444444445, "grad_norm": 0.23752959072589874, "learning_rate": 3.2870621159240264e-05, "loss": 0.5869, "step": 7414 }, { "epoch": 8.445584045584045, "grad_norm": 0.27003517746925354, "learning_rate": 3.286619876180784e-05, "loss": 0.6678, "step": 7415 }, { "epoch": 8.446723646723648, "grad_norm": 0.22215615212917328, "learning_rate": 3.286177609116072e-05, "loss": 0.5475, "step": 7416 }, { "epoch": 8.447863247863248, "grad_norm": 0.17038565874099731, "learning_rate": 3.285735314745248e-05, "loss": 0.7558, "step": 7417 }, { "epoch": 8.449002849002849, "grad_norm": 0.18483445048332214, "learning_rate": 3.2852929930836765e-05, "loss": 0.8252, "step": 7418 }, { "epoch": 8.450142450142451, "grad_norm": 0.1767086684703827, "learning_rate": 3.2848506441467194e-05, "loss": 0.7139, "step": 7419 }, { "epoch": 8.451282051282051, "grad_norm": 0.16779440641403198, "learning_rate": 3.284408267949741e-05, "loss": 0.9694, "step": 7420 }, { "epoch": 8.452421652421652, "grad_norm": 0.18901821970939636, "learning_rate": 3.283965864508107e-05, "loss": 0.6774, "step": 7421 }, { "epoch": 8.453561253561254, "grad_norm": 0.18769466876983643, "learning_rate": 3.283523433837182e-05, "loss": 0.7856, "step": 7422 }, { "epoch": 8.454700854700855, "grad_norm": 0.19077813625335693, "learning_rate": 3.283080975952333e-05, "loss": 0.6924, "step": 7423 }, { "epoch": 8.455840455840455, "grad_norm": 0.18337106704711914, "learning_rate": 3.2826384908689286e-05, "loss": 0.8401, "step": 7424 }, { "epoch": 8.456980056980058, "grad_norm": 0.24050772190093994, "learning_rate": 3.282195978602338e-05, "loss": 0.667, "step": 7425 }, { "epoch": 8.458119658119658, "grad_norm": 0.21716605126857758, "learning_rate": 3.281753439167929e-05, "loss": 0.6343, "step": 7426 }, { "epoch": 8.459259259259259, "grad_norm": 0.2014734148979187, "learning_rate": 3.2813108725810736e-05, "loss": 0.7172, "step": 7427 }, { "epoch": 8.460398860398861, "grad_norm": 0.19228309392929077, "learning_rate": 3.280868278857143e-05, "loss": 0.823, "step": 7428 }, { "epoch": 8.461538461538462, "grad_norm": 0.17936232686042786, "learning_rate": 3.280425658011509e-05, "loss": 0.7971, "step": 7429 }, { "epoch": 8.462678062678062, "grad_norm": 0.20183010399341583, "learning_rate": 3.2799830100595475e-05, "loss": 0.7879, "step": 7430 }, { "epoch": 8.463817663817665, "grad_norm": 0.19867466390132904, "learning_rate": 3.279540335016629e-05, "loss": 0.7338, "step": 7431 }, { "epoch": 8.464957264957265, "grad_norm": 0.1901450902223587, "learning_rate": 3.279097632898132e-05, "loss": 0.8318, "step": 7432 }, { "epoch": 8.466096866096866, "grad_norm": 0.19821587204933167, "learning_rate": 3.278654903719432e-05, "loss": 0.8632, "step": 7433 }, { "epoch": 8.467236467236468, "grad_norm": 0.1778682917356491, "learning_rate": 3.278212147495905e-05, "loss": 0.8468, "step": 7434 }, { "epoch": 8.468376068376068, "grad_norm": 0.2030886709690094, "learning_rate": 3.277769364242931e-05, "loss": 0.8225, "step": 7435 }, { "epoch": 8.469515669515669, "grad_norm": 0.2139289379119873, "learning_rate": 3.2773265539758875e-05, "loss": 0.7398, "step": 7436 }, { "epoch": 8.470655270655271, "grad_norm": 0.1902332752943039, "learning_rate": 3.2768837167101554e-05, "loss": 0.8473, "step": 7437 }, { "epoch": 8.471794871794872, "grad_norm": 0.2748855650424957, "learning_rate": 3.276440852461115e-05, "loss": 0.5862, "step": 7438 }, { "epoch": 8.472934472934472, "grad_norm": 0.2746782898902893, "learning_rate": 3.275997961244149e-05, "loss": 0.5615, "step": 7439 }, { "epoch": 8.474074074074075, "grad_norm": 0.2350367307662964, "learning_rate": 3.275555043074639e-05, "loss": 0.7274, "step": 7440 }, { "epoch": 8.475213675213675, "grad_norm": 0.230951726436615, "learning_rate": 3.27511209796797e-05, "loss": 0.5736, "step": 7441 }, { "epoch": 8.476353276353276, "grad_norm": 0.26116323471069336, "learning_rate": 3.2746691259395266e-05, "loss": 0.4428, "step": 7442 }, { "epoch": 8.477492877492878, "grad_norm": 0.1632407307624817, "learning_rate": 3.274226127004694e-05, "loss": 0.8882, "step": 7443 }, { "epoch": 8.478632478632479, "grad_norm": 0.16980884969234467, "learning_rate": 3.2737831011788586e-05, "loss": 0.6736, "step": 7444 }, { "epoch": 8.47977207977208, "grad_norm": 0.18194320797920227, "learning_rate": 3.273340048477409e-05, "loss": 0.8031, "step": 7445 }, { "epoch": 8.480911680911682, "grad_norm": 0.2755073606967926, "learning_rate": 3.2728969689157315e-05, "loss": 0.6776, "step": 7446 }, { "epoch": 8.482051282051282, "grad_norm": 0.2624169588088989, "learning_rate": 3.272453862509218e-05, "loss": 0.5836, "step": 7447 }, { "epoch": 8.483190883190883, "grad_norm": 0.16591161489486694, "learning_rate": 3.272010729273257e-05, "loss": 0.9367, "step": 7448 }, { "epoch": 8.484330484330485, "grad_norm": 0.18204365670681, "learning_rate": 3.2715675692232405e-05, "loss": 0.6631, "step": 7449 }, { "epoch": 8.485470085470086, "grad_norm": 0.185991570353508, "learning_rate": 3.27112438237456e-05, "loss": 0.7504, "step": 7450 }, { "epoch": 8.486609686609686, "grad_norm": 0.22235769033432007, "learning_rate": 3.2706811687426095e-05, "loss": 0.6969, "step": 7451 }, { "epoch": 8.487749287749288, "grad_norm": 0.22267098724842072, "learning_rate": 3.2702379283427825e-05, "loss": 0.746, "step": 7452 }, { "epoch": 8.488888888888889, "grad_norm": 0.2239670753479004, "learning_rate": 3.269794661190475e-05, "loss": 0.5705, "step": 7453 }, { "epoch": 8.49002849002849, "grad_norm": 0.17975512146949768, "learning_rate": 3.2693513673010804e-05, "loss": 0.7621, "step": 7454 }, { "epoch": 8.491168091168092, "grad_norm": 0.21485914289951324, "learning_rate": 3.268908046689999e-05, "loss": 0.7921, "step": 7455 }, { "epoch": 8.492307692307692, "grad_norm": 0.19152890145778656, "learning_rate": 3.268464699372625e-05, "loss": 0.8554, "step": 7456 }, { "epoch": 8.493447293447293, "grad_norm": 0.1772022396326065, "learning_rate": 3.268021325364359e-05, "loss": 0.886, "step": 7457 }, { "epoch": 8.494586894586895, "grad_norm": 0.17583614587783813, "learning_rate": 3.2675779246806006e-05, "loss": 0.8151, "step": 7458 }, { "epoch": 8.495726495726496, "grad_norm": 0.1722690612077713, "learning_rate": 3.26713449733675e-05, "loss": 0.8749, "step": 7459 }, { "epoch": 8.496866096866096, "grad_norm": 0.25349435210227966, "learning_rate": 3.2666910433482085e-05, "loss": 0.5816, "step": 7460 }, { "epoch": 8.498005698005699, "grad_norm": 0.18909041583538055, "learning_rate": 3.266247562730379e-05, "loss": 0.7358, "step": 7461 }, { "epoch": 8.4991452991453, "grad_norm": 0.19632674753665924, "learning_rate": 3.265804055498664e-05, "loss": 0.6451, "step": 7462 }, { "epoch": 8.5002849002849, "grad_norm": 0.20935028791427612, "learning_rate": 3.2653605216684684e-05, "loss": 0.9886, "step": 7463 }, { "epoch": 8.501424501424502, "grad_norm": 0.2283162921667099, "learning_rate": 3.2649169612551975e-05, "loss": 0.5153, "step": 7464 }, { "epoch": 8.502564102564103, "grad_norm": 0.20793019235134125, "learning_rate": 3.2644733742742564e-05, "loss": 0.7073, "step": 7465 }, { "epoch": 8.503703703703703, "grad_norm": 0.18664056062698364, "learning_rate": 3.264029760741054e-05, "loss": 0.6648, "step": 7466 }, { "epoch": 8.504843304843305, "grad_norm": 0.17454460263252258, "learning_rate": 3.263586120670995e-05, "loss": 0.8426, "step": 7467 }, { "epoch": 8.505982905982906, "grad_norm": 0.1813623309135437, "learning_rate": 3.2631424540794914e-05, "loss": 0.7656, "step": 7468 }, { "epoch": 8.507122507122507, "grad_norm": 0.2675894498825073, "learning_rate": 3.262698760981951e-05, "loss": 0.5332, "step": 7469 }, { "epoch": 8.508262108262109, "grad_norm": 0.2021193504333496, "learning_rate": 3.262255041393786e-05, "loss": 0.7238, "step": 7470 }, { "epoch": 8.50940170940171, "grad_norm": 0.2289065718650818, "learning_rate": 3.2618112953304064e-05, "loss": 0.7733, "step": 7471 }, { "epoch": 8.51054131054131, "grad_norm": 0.2523181438446045, "learning_rate": 3.261367522807227e-05, "loss": 0.7155, "step": 7472 }, { "epoch": 8.511680911680912, "grad_norm": 0.16519233584403992, "learning_rate": 3.260923723839657e-05, "loss": 0.8448, "step": 7473 }, { "epoch": 8.512820512820513, "grad_norm": 0.21784242987632751, "learning_rate": 3.260479898443116e-05, "loss": 0.8034, "step": 7474 }, { "epoch": 8.513960113960113, "grad_norm": 0.20125596225261688, "learning_rate": 3.260036046633016e-05, "loss": 0.8475, "step": 7475 }, { "epoch": 8.515099715099716, "grad_norm": 0.2057802826166153, "learning_rate": 3.2595921684247745e-05, "loss": 0.7236, "step": 7476 }, { "epoch": 8.516239316239316, "grad_norm": 0.18940041959285736, "learning_rate": 3.259148263833807e-05, "loss": 0.7352, "step": 7477 }, { "epoch": 8.517378917378917, "grad_norm": 0.18929541110992432, "learning_rate": 3.2587043328755336e-05, "loss": 0.7664, "step": 7478 }, { "epoch": 8.518518518518519, "grad_norm": 0.19956086575984955, "learning_rate": 3.2582603755653715e-05, "loss": 0.7915, "step": 7479 }, { "epoch": 8.51965811965812, "grad_norm": 0.2583984136581421, "learning_rate": 3.257816391918741e-05, "loss": 0.5429, "step": 7480 }, { "epoch": 8.52079772079772, "grad_norm": 0.2227642834186554, "learning_rate": 3.257372381951065e-05, "loss": 0.7769, "step": 7481 }, { "epoch": 8.521937321937322, "grad_norm": 0.1856517642736435, "learning_rate": 3.256928345677762e-05, "loss": 0.9346, "step": 7482 }, { "epoch": 8.523076923076923, "grad_norm": 0.22680893540382385, "learning_rate": 3.256484283114257e-05, "loss": 0.8209, "step": 7483 }, { "epoch": 8.524216524216524, "grad_norm": 0.16011473536491394, "learning_rate": 3.256040194275971e-05, "loss": 0.934, "step": 7484 }, { "epoch": 8.525356125356126, "grad_norm": 0.19772249460220337, "learning_rate": 3.2555960791783314e-05, "loss": 0.7956, "step": 7485 }, { "epoch": 8.526495726495726, "grad_norm": 0.2261231690645218, "learning_rate": 3.255151937836761e-05, "loss": 0.619, "step": 7486 }, { "epoch": 8.527635327635327, "grad_norm": 0.1840931624174118, "learning_rate": 3.254707770266688e-05, "loss": 0.8156, "step": 7487 }, { "epoch": 8.52877492877493, "grad_norm": 0.1985095739364624, "learning_rate": 3.254263576483538e-05, "loss": 0.8716, "step": 7488 }, { "epoch": 8.52991452991453, "grad_norm": 0.2081703543663025, "learning_rate": 3.25381935650274e-05, "loss": 0.7213, "step": 7489 }, { "epoch": 8.53105413105413, "grad_norm": 0.23321858048439026, "learning_rate": 3.2533751103397227e-05, "loss": 0.5409, "step": 7490 }, { "epoch": 8.532193732193733, "grad_norm": 0.21747148036956787, "learning_rate": 3.252930838009916e-05, "loss": 0.6494, "step": 7491 }, { "epoch": 8.533333333333333, "grad_norm": 0.19605937600135803, "learning_rate": 3.252486539528751e-05, "loss": 0.701, "step": 7492 }, { "epoch": 8.534472934472934, "grad_norm": 0.2508218288421631, "learning_rate": 3.2520422149116594e-05, "loss": 0.5526, "step": 7493 }, { "epoch": 8.535612535612536, "grad_norm": 0.20444120466709137, "learning_rate": 3.251597864174072e-05, "loss": 0.6714, "step": 7494 }, { "epoch": 8.536752136752137, "grad_norm": 0.2642122805118561, "learning_rate": 3.2511534873314254e-05, "loss": 0.4758, "step": 7495 }, { "epoch": 8.537891737891737, "grad_norm": 0.15745115280151367, "learning_rate": 3.250709084399152e-05, "loss": 0.8293, "step": 7496 }, { "epoch": 8.53903133903134, "grad_norm": 0.17655222117900848, "learning_rate": 3.2502646553926874e-05, "loss": 0.8498, "step": 7497 }, { "epoch": 8.54017094017094, "grad_norm": 0.16540151834487915, "learning_rate": 3.249820200327469e-05, "loss": 0.8846, "step": 7498 }, { "epoch": 8.54131054131054, "grad_norm": 0.22341862320899963, "learning_rate": 3.249375719218932e-05, "loss": 0.634, "step": 7499 }, { "epoch": 8.542450142450143, "grad_norm": 0.20056979358196259, "learning_rate": 3.2489312120825164e-05, "loss": 0.7172, "step": 7500 }, { "epoch": 8.543589743589743, "grad_norm": 0.21974578499794006, "learning_rate": 3.2484866789336585e-05, "loss": 0.751, "step": 7501 }, { "epoch": 8.544729344729344, "grad_norm": 0.1837639957666397, "learning_rate": 3.248042119787802e-05, "loss": 0.7221, "step": 7502 }, { "epoch": 8.545868945868946, "grad_norm": 0.19959641993045807, "learning_rate": 3.2475975346603846e-05, "loss": 0.8158, "step": 7503 }, { "epoch": 8.547008547008547, "grad_norm": 0.2758103311061859, "learning_rate": 3.2471529235668495e-05, "loss": 0.486, "step": 7504 }, { "epoch": 8.548148148148147, "grad_norm": 0.19508454203605652, "learning_rate": 3.246708286522638e-05, "loss": 0.7394, "step": 7505 }, { "epoch": 8.54928774928775, "grad_norm": 0.1672787368297577, "learning_rate": 3.246263623543194e-05, "loss": 0.8951, "step": 7506 }, { "epoch": 8.55042735042735, "grad_norm": 0.2056223452091217, "learning_rate": 3.245818934643963e-05, "loss": 0.6276, "step": 7507 }, { "epoch": 8.55156695156695, "grad_norm": 0.1978432983160019, "learning_rate": 3.245374219840389e-05, "loss": 0.6644, "step": 7508 }, { "epoch": 8.552706552706553, "grad_norm": 0.1940750628709793, "learning_rate": 3.2449294791479185e-05, "loss": 0.6731, "step": 7509 }, { "epoch": 8.553846153846154, "grad_norm": 0.19845548272132874, "learning_rate": 3.244484712581999e-05, "loss": 0.659, "step": 7510 }, { "epoch": 8.554985754985754, "grad_norm": 0.18966366350650787, "learning_rate": 3.2440399201580776e-05, "loss": 0.6791, "step": 7511 }, { "epoch": 8.556125356125357, "grad_norm": 0.19679704308509827, "learning_rate": 3.243595101891603e-05, "loss": 0.7417, "step": 7512 }, { "epoch": 8.557264957264957, "grad_norm": 0.21071788668632507, "learning_rate": 3.2431502577980265e-05, "loss": 0.7277, "step": 7513 }, { "epoch": 8.558404558404558, "grad_norm": 0.19645434617996216, "learning_rate": 3.2427053878927984e-05, "loss": 0.8119, "step": 7514 }, { "epoch": 8.55954415954416, "grad_norm": 0.2379760444164276, "learning_rate": 3.242260492191369e-05, "loss": 0.6499, "step": 7515 }, { "epoch": 8.56068376068376, "grad_norm": 0.18639816343784332, "learning_rate": 3.2418155707091914e-05, "loss": 0.9022, "step": 7516 }, { "epoch": 8.561823361823361, "grad_norm": 0.17497022449970245, "learning_rate": 3.241370623461719e-05, "loss": 0.8861, "step": 7517 }, { "epoch": 8.562962962962963, "grad_norm": 0.19553431868553162, "learning_rate": 3.240925650464406e-05, "loss": 0.8825, "step": 7518 }, { "epoch": 8.564102564102564, "grad_norm": 0.21569235622882843, "learning_rate": 3.240480651732709e-05, "loss": 0.6814, "step": 7519 }, { "epoch": 8.565242165242164, "grad_norm": 0.18715718388557434, "learning_rate": 3.240035627282081e-05, "loss": 0.7876, "step": 7520 }, { "epoch": 8.566381766381767, "grad_norm": 0.2536460757255554, "learning_rate": 3.239590577127981e-05, "loss": 0.5207, "step": 7521 }, { "epoch": 8.567521367521367, "grad_norm": 0.22352391481399536, "learning_rate": 3.2391455012858665e-05, "loss": 0.6525, "step": 7522 }, { "epoch": 8.568660968660968, "grad_norm": 0.21493937075138092, "learning_rate": 3.238700399771196e-05, "loss": 0.6143, "step": 7523 }, { "epoch": 8.56980056980057, "grad_norm": 0.2501967251300812, "learning_rate": 3.238255272599429e-05, "loss": 0.4957, "step": 7524 }, { "epoch": 8.57094017094017, "grad_norm": 0.1957802176475525, "learning_rate": 3.2378101197860266e-05, "loss": 0.9877, "step": 7525 }, { "epoch": 8.572079772079771, "grad_norm": 0.21185512840747833, "learning_rate": 3.23736494134645e-05, "loss": 0.8848, "step": 7526 }, { "epoch": 8.573219373219374, "grad_norm": 0.2178097814321518, "learning_rate": 3.2369197372961604e-05, "loss": 0.7869, "step": 7527 }, { "epoch": 8.574358974358974, "grad_norm": 0.1684758961200714, "learning_rate": 3.236474507650622e-05, "loss": 0.8135, "step": 7528 }, { "epoch": 8.575498575498575, "grad_norm": 0.22879929840564728, "learning_rate": 3.236029252425299e-05, "loss": 0.694, "step": 7529 }, { "epoch": 8.576638176638177, "grad_norm": 0.21084371209144592, "learning_rate": 3.235583971635656e-05, "loss": 0.5505, "step": 7530 }, { "epoch": 8.577777777777778, "grad_norm": 0.2515728175640106, "learning_rate": 3.2351386652971585e-05, "loss": 0.7559, "step": 7531 }, { "epoch": 8.578917378917378, "grad_norm": 0.1792079657316208, "learning_rate": 3.234693333425274e-05, "loss": 0.8642, "step": 7532 }, { "epoch": 8.58005698005698, "grad_norm": 0.20028193295001984, "learning_rate": 3.234247976035469e-05, "loss": 0.6371, "step": 7533 }, { "epoch": 8.581196581196581, "grad_norm": 0.20154264569282532, "learning_rate": 3.2338025931432126e-05, "loss": 0.6983, "step": 7534 }, { "epoch": 8.582336182336181, "grad_norm": 0.14242465794086456, "learning_rate": 3.233357184763974e-05, "loss": 0.8349, "step": 7535 }, { "epoch": 8.583475783475784, "grad_norm": 0.20334072411060333, "learning_rate": 3.232911750913225e-05, "loss": 0.7664, "step": 7536 }, { "epoch": 8.584615384615384, "grad_norm": 0.15908706188201904, "learning_rate": 3.232466291606434e-05, "loss": 0.7391, "step": 7537 }, { "epoch": 8.585754985754985, "grad_norm": 0.21249696612358093, "learning_rate": 3.232020806859075e-05, "loss": 0.8167, "step": 7538 }, { "epoch": 8.586894586894587, "grad_norm": 0.19840359687805176, "learning_rate": 3.23157529668662e-05, "loss": 0.7975, "step": 7539 }, { "epoch": 8.588034188034188, "grad_norm": 0.1614355593919754, "learning_rate": 3.231129761104543e-05, "loss": 0.8875, "step": 7540 }, { "epoch": 8.589173789173788, "grad_norm": 0.30287957191467285, "learning_rate": 3.230684200128319e-05, "loss": 0.3547, "step": 7541 }, { "epoch": 8.59031339031339, "grad_norm": 0.1973702758550644, "learning_rate": 3.230238613773423e-05, "loss": 0.582, "step": 7542 }, { "epoch": 8.591452991452991, "grad_norm": 0.16713880002498627, "learning_rate": 3.2297930020553326e-05, "loss": 0.8109, "step": 7543 }, { "epoch": 8.592592592592592, "grad_norm": 0.19080480933189392, "learning_rate": 3.2293473649895236e-05, "loss": 0.6886, "step": 7544 }, { "epoch": 8.593732193732194, "grad_norm": 0.21673078835010529, "learning_rate": 3.2289017025914745e-05, "loss": 0.5763, "step": 7545 }, { "epoch": 8.594871794871795, "grad_norm": 0.20066715776920319, "learning_rate": 3.2284560148766656e-05, "loss": 0.7703, "step": 7546 }, { "epoch": 8.596011396011395, "grad_norm": 0.21993280947208405, "learning_rate": 3.228010301860576e-05, "loss": 0.7767, "step": 7547 }, { "epoch": 8.597150997150997, "grad_norm": 0.18429525196552277, "learning_rate": 3.227564563558686e-05, "loss": 0.6522, "step": 7548 }, { "epoch": 8.598290598290598, "grad_norm": 0.2343692183494568, "learning_rate": 3.227118799986479e-05, "loss": 0.8281, "step": 7549 }, { "epoch": 8.5994301994302, "grad_norm": 0.20530085265636444, "learning_rate": 3.226673011159436e-05, "loss": 0.714, "step": 7550 }, { "epoch": 8.6005698005698, "grad_norm": 0.21249805390834808, "learning_rate": 3.22622719709304e-05, "loss": 0.6511, "step": 7551 }, { "epoch": 8.601709401709401, "grad_norm": 0.23579922318458557, "learning_rate": 3.2257813578027776e-05, "loss": 0.6051, "step": 7552 }, { "epoch": 8.602849002849004, "grad_norm": 0.18509957194328308, "learning_rate": 3.225335493304132e-05, "loss": 0.9183, "step": 7553 }, { "epoch": 8.603988603988604, "grad_norm": 0.23925815522670746, "learning_rate": 3.2248896036125906e-05, "loss": 0.7304, "step": 7554 }, { "epoch": 8.605128205128205, "grad_norm": 0.20431527495384216, "learning_rate": 3.2244436887436396e-05, "loss": 0.6057, "step": 7555 }, { "epoch": 8.606267806267807, "grad_norm": 0.19780471920967102, "learning_rate": 3.223997748712767e-05, "loss": 0.8452, "step": 7556 }, { "epoch": 8.607407407407408, "grad_norm": 0.23321953415870667, "learning_rate": 3.223551783535461e-05, "loss": 0.6708, "step": 7557 }, { "epoch": 8.608547008547008, "grad_norm": 0.19315187633037567, "learning_rate": 3.223105793227212e-05, "loss": 0.8238, "step": 7558 }, { "epoch": 8.60968660968661, "grad_norm": 0.16965371370315552, "learning_rate": 3.222659777803511e-05, "loss": 0.8877, "step": 7559 }, { "epoch": 8.610826210826211, "grad_norm": 0.20588497817516327, "learning_rate": 3.222213737279849e-05, "loss": 0.6075, "step": 7560 }, { "epoch": 8.611965811965812, "grad_norm": 0.18448422849178314, "learning_rate": 3.221767671671717e-05, "loss": 0.7569, "step": 7561 }, { "epoch": 8.613105413105414, "grad_norm": 0.1948234587907791, "learning_rate": 3.221321580994608e-05, "loss": 0.81, "step": 7562 }, { "epoch": 8.614245014245014, "grad_norm": 0.2320779412984848, "learning_rate": 3.2208754652640177e-05, "loss": 0.8917, "step": 7563 }, { "epoch": 8.615384615384615, "grad_norm": 0.23803263902664185, "learning_rate": 3.22042932449544e-05, "loss": 0.5832, "step": 7564 }, { "epoch": 8.616524216524217, "grad_norm": 0.20834219455718994, "learning_rate": 3.2199831587043714e-05, "loss": 0.754, "step": 7565 }, { "epoch": 8.617663817663818, "grad_norm": 0.2905752658843994, "learning_rate": 3.219536967906307e-05, "loss": 0.4483, "step": 7566 }, { "epoch": 8.618803418803418, "grad_norm": 0.21069851517677307, "learning_rate": 3.219090752116745e-05, "loss": 0.6427, "step": 7567 }, { "epoch": 8.61994301994302, "grad_norm": 0.21779058873653412, "learning_rate": 3.218644511351183e-05, "loss": 0.7916, "step": 7568 }, { "epoch": 8.621082621082621, "grad_norm": 0.2853034734725952, "learning_rate": 3.2181982456251216e-05, "loss": 0.6385, "step": 7569 }, { "epoch": 8.622222222222222, "grad_norm": 0.1741340309381485, "learning_rate": 3.2177519549540604e-05, "loss": 0.8664, "step": 7570 }, { "epoch": 8.623361823361824, "grad_norm": 0.2566923201084137, "learning_rate": 3.2173056393534995e-05, "loss": 0.5023, "step": 7571 }, { "epoch": 8.624501424501425, "grad_norm": 0.2065931111574173, "learning_rate": 3.216859298838942e-05, "loss": 0.6617, "step": 7572 }, { "epoch": 8.625641025641025, "grad_norm": 0.1904585063457489, "learning_rate": 3.2164129334258885e-05, "loss": 0.8297, "step": 7573 }, { "epoch": 8.626780626780628, "grad_norm": 0.17974013090133667, "learning_rate": 3.2159665431298444e-05, "loss": 0.5732, "step": 7574 }, { "epoch": 8.627920227920228, "grad_norm": 0.2057327926158905, "learning_rate": 3.215520127966313e-05, "loss": 0.7775, "step": 7575 }, { "epoch": 8.629059829059829, "grad_norm": 0.23213617503643036, "learning_rate": 3.2150736879507996e-05, "loss": 0.6528, "step": 7576 }, { "epoch": 8.630199430199431, "grad_norm": 0.20942586660385132, "learning_rate": 3.214627223098812e-05, "loss": 0.7394, "step": 7577 }, { "epoch": 8.631339031339031, "grad_norm": 0.2412552684545517, "learning_rate": 3.214180733425853e-05, "loss": 0.7084, "step": 7578 }, { "epoch": 8.632478632478632, "grad_norm": 0.20640644431114197, "learning_rate": 3.213734218947435e-05, "loss": 0.793, "step": 7579 }, { "epoch": 8.633618233618234, "grad_norm": 0.2071792483329773, "learning_rate": 3.2132876796790646e-05, "loss": 0.7439, "step": 7580 }, { "epoch": 8.634757834757835, "grad_norm": 0.21695175766944885, "learning_rate": 3.212841115636252e-05, "loss": 0.6131, "step": 7581 }, { "epoch": 8.635897435897435, "grad_norm": 0.20960822701454163, "learning_rate": 3.212394526834506e-05, "loss": 0.7419, "step": 7582 }, { "epoch": 8.637037037037038, "grad_norm": 0.21692641079425812, "learning_rate": 3.2119479132893404e-05, "loss": 0.7871, "step": 7583 }, { "epoch": 8.638176638176638, "grad_norm": 0.1722017377614975, "learning_rate": 3.211501275016265e-05, "loss": 0.7754, "step": 7584 }, { "epoch": 8.639316239316239, "grad_norm": 0.18513859808444977, "learning_rate": 3.2110546120307934e-05, "loss": 0.7636, "step": 7585 }, { "epoch": 8.640455840455841, "grad_norm": 0.18878234922885895, "learning_rate": 3.210607924348441e-05, "loss": 0.7652, "step": 7586 }, { "epoch": 8.641595441595442, "grad_norm": 0.17475619912147522, "learning_rate": 3.21016121198472e-05, "loss": 0.7942, "step": 7587 }, { "epoch": 8.642735042735042, "grad_norm": 0.24528071284294128, "learning_rate": 3.209714474955147e-05, "loss": 0.7041, "step": 7588 }, { "epoch": 8.643874643874645, "grad_norm": 0.18839310109615326, "learning_rate": 3.2092677132752386e-05, "loss": 0.6241, "step": 7589 }, { "epoch": 8.645014245014245, "grad_norm": 0.2187696397304535, "learning_rate": 3.208820926960513e-05, "loss": 0.5604, "step": 7590 }, { "epoch": 8.646153846153846, "grad_norm": 0.17568111419677734, "learning_rate": 3.2083741160264866e-05, "loss": 0.7785, "step": 7591 }, { "epoch": 8.647293447293448, "grad_norm": 0.22295598685741425, "learning_rate": 3.2079272804886797e-05, "loss": 0.6559, "step": 7592 }, { "epoch": 8.648433048433048, "grad_norm": 0.20748206973075867, "learning_rate": 3.207480420362611e-05, "loss": 0.6732, "step": 7593 }, { "epoch": 8.649572649572649, "grad_norm": 0.20264337956905365, "learning_rate": 3.207033535663802e-05, "loss": 0.8176, "step": 7594 }, { "epoch": 8.650712250712251, "grad_norm": 0.17401979863643646, "learning_rate": 3.2065866264077735e-05, "loss": 0.9071, "step": 7595 }, { "epoch": 8.651851851851852, "grad_norm": 0.241608664393425, "learning_rate": 3.2061396926100485e-05, "loss": 0.775, "step": 7596 }, { "epoch": 8.652991452991452, "grad_norm": 0.17840832471847534, "learning_rate": 3.2056927342861504e-05, "loss": 0.8594, "step": 7597 }, { "epoch": 8.654131054131055, "grad_norm": 0.20939922332763672, "learning_rate": 3.205245751451603e-05, "loss": 0.8088, "step": 7598 }, { "epoch": 8.655270655270655, "grad_norm": 0.21536515653133392, "learning_rate": 3.2047987441219306e-05, "loss": 0.6038, "step": 7599 }, { "epoch": 8.656410256410256, "grad_norm": 0.2172224521636963, "learning_rate": 3.2043517123126605e-05, "loss": 0.6961, "step": 7600 }, { "epoch": 8.657549857549858, "grad_norm": 0.21221227943897247, "learning_rate": 3.2039046560393185e-05, "loss": 0.6486, "step": 7601 }, { "epoch": 8.658689458689459, "grad_norm": 0.2361478954553604, "learning_rate": 3.203457575317431e-05, "loss": 0.5372, "step": 7602 }, { "epoch": 8.65982905982906, "grad_norm": 0.17305105924606323, "learning_rate": 3.203010470162528e-05, "loss": 0.899, "step": 7603 }, { "epoch": 8.660968660968662, "grad_norm": 0.15480591356754303, "learning_rate": 3.202563340590138e-05, "loss": 0.8264, "step": 7604 }, { "epoch": 8.662108262108262, "grad_norm": 0.18095020949840546, "learning_rate": 3.202116186615792e-05, "loss": 0.8745, "step": 7605 }, { "epoch": 8.663247863247863, "grad_norm": 0.15004213154315948, "learning_rate": 3.2016690082550185e-05, "loss": 0.8771, "step": 7606 }, { "epoch": 8.664387464387465, "grad_norm": 0.21371202170848846, "learning_rate": 3.201221805523351e-05, "loss": 0.8396, "step": 7607 }, { "epoch": 8.665527065527066, "grad_norm": 0.1737605780363083, "learning_rate": 3.200774578436323e-05, "loss": 0.8326, "step": 7608 }, { "epoch": 8.666666666666666, "grad_norm": 0.22488968074321747, "learning_rate": 3.2003273270094655e-05, "loss": 0.5997, "step": 7609 }, { "epoch": 8.667806267806268, "grad_norm": 0.21885648369789124, "learning_rate": 3.199880051258315e-05, "loss": 0.6382, "step": 7610 }, { "epoch": 8.668945868945869, "grad_norm": 0.18303780257701874, "learning_rate": 3.1994327511984044e-05, "loss": 0.7313, "step": 7611 }, { "epoch": 8.67008547008547, "grad_norm": 0.18519094586372375, "learning_rate": 3.198985426845272e-05, "loss": 0.6757, "step": 7612 }, { "epoch": 8.671225071225072, "grad_norm": 0.23257476091384888, "learning_rate": 3.198538078214454e-05, "loss": 0.7306, "step": 7613 }, { "epoch": 8.672364672364672, "grad_norm": 0.22287617623806, "learning_rate": 3.198090705321486e-05, "loss": 0.8281, "step": 7614 }, { "epoch": 8.673504273504273, "grad_norm": 0.21014879643917084, "learning_rate": 3.197643308181909e-05, "loss": 0.8487, "step": 7615 }, { "epoch": 8.674643874643875, "grad_norm": 0.21236072480678558, "learning_rate": 3.197195886811261e-05, "loss": 0.6114, "step": 7616 }, { "epoch": 8.675783475783476, "grad_norm": 0.192336767911911, "learning_rate": 3.1967484412250825e-05, "loss": 0.6567, "step": 7617 }, { "epoch": 8.676923076923076, "grad_norm": 0.18832515180110931, "learning_rate": 3.196300971438915e-05, "loss": 0.7274, "step": 7618 }, { "epoch": 8.678062678062679, "grad_norm": 0.20374321937561035, "learning_rate": 3.1958534774683e-05, "loss": 0.6818, "step": 7619 }, { "epoch": 8.67920227920228, "grad_norm": 0.3274502754211426, "learning_rate": 3.1954059593287796e-05, "loss": 0.7898, "step": 7620 }, { "epoch": 8.68034188034188, "grad_norm": 0.2472456693649292, "learning_rate": 3.194958417035898e-05, "loss": 0.7302, "step": 7621 }, { "epoch": 8.681481481481482, "grad_norm": 0.17255574464797974, "learning_rate": 3.1945108506051996e-05, "loss": 0.7454, "step": 7622 }, { "epoch": 8.682621082621083, "grad_norm": 0.2142171859741211, "learning_rate": 3.194063260052229e-05, "loss": 0.6772, "step": 7623 }, { "epoch": 8.683760683760683, "grad_norm": 0.27398771047592163, "learning_rate": 3.1936156453925336e-05, "loss": 0.521, "step": 7624 }, { "epoch": 8.684900284900285, "grad_norm": 0.21053948998451233, "learning_rate": 3.193168006641659e-05, "loss": 0.7986, "step": 7625 }, { "epoch": 8.686039886039886, "grad_norm": 0.1836550235748291, "learning_rate": 3.192720343815153e-05, "loss": 0.7332, "step": 7626 }, { "epoch": 8.687179487179487, "grad_norm": 0.2423364818096161, "learning_rate": 3.192272656928565e-05, "loss": 0.5273, "step": 7627 }, { "epoch": 8.688319088319089, "grad_norm": 0.18115922808647156, "learning_rate": 3.191824945997443e-05, "loss": 0.7619, "step": 7628 }, { "epoch": 8.68945868945869, "grad_norm": 0.20010307431221008, "learning_rate": 3.191377211037338e-05, "loss": 0.7154, "step": 7629 }, { "epoch": 8.69059829059829, "grad_norm": 0.21479089558124542, "learning_rate": 3.190929452063803e-05, "loss": 0.6882, "step": 7630 }, { "epoch": 8.691737891737892, "grad_norm": 0.18326161801815033, "learning_rate": 3.1904816690923864e-05, "loss": 0.7618, "step": 7631 }, { "epoch": 8.692877492877493, "grad_norm": 0.1974235326051712, "learning_rate": 3.1900338621386434e-05, "loss": 0.8034, "step": 7632 }, { "epoch": 8.694017094017093, "grad_norm": 0.23021548986434937, "learning_rate": 3.189586031218126e-05, "loss": 0.5779, "step": 7633 }, { "epoch": 8.695156695156696, "grad_norm": 0.21015411615371704, "learning_rate": 3.18913817634639e-05, "loss": 0.7211, "step": 7634 }, { "epoch": 8.696296296296296, "grad_norm": 0.20431192219257355, "learning_rate": 3.18869029753899e-05, "loss": 0.7245, "step": 7635 }, { "epoch": 8.697435897435897, "grad_norm": 0.20619742572307587, "learning_rate": 3.188242394811482e-05, "loss": 0.767, "step": 7636 }, { "epoch": 8.698575498575499, "grad_norm": 0.23843063414096832, "learning_rate": 3.187794468179422e-05, "loss": 0.5424, "step": 7637 }, { "epoch": 8.6997150997151, "grad_norm": 0.17725051939487457, "learning_rate": 3.18734651765837e-05, "loss": 0.7511, "step": 7638 }, { "epoch": 8.7008547008547, "grad_norm": 0.23317287862300873, "learning_rate": 3.1868985432638824e-05, "loss": 0.6057, "step": 7639 }, { "epoch": 8.701994301994302, "grad_norm": 0.2045641988515854, "learning_rate": 3.18645054501152e-05, "loss": 0.8055, "step": 7640 }, { "epoch": 8.703133903133903, "grad_norm": 0.19148777425289154, "learning_rate": 3.186002522916842e-05, "loss": 0.791, "step": 7641 }, { "epoch": 8.704273504273504, "grad_norm": 0.17628014087677002, "learning_rate": 3.18555447699541e-05, "loss": 0.8129, "step": 7642 }, { "epoch": 8.705413105413106, "grad_norm": 0.21798065304756165, "learning_rate": 3.185106407262786e-05, "loss": 0.7275, "step": 7643 }, { "epoch": 8.706552706552706, "grad_norm": 0.18521204590797424, "learning_rate": 3.184658313734532e-05, "loss": 0.9083, "step": 7644 }, { "epoch": 8.707692307692307, "grad_norm": 0.16972415149211884, "learning_rate": 3.1842101964262125e-05, "loss": 0.7585, "step": 7645 }, { "epoch": 8.70883190883191, "grad_norm": 0.22746361792087555, "learning_rate": 3.1837620553533906e-05, "loss": 0.6105, "step": 7646 }, { "epoch": 8.70997150997151, "grad_norm": 0.22049470245838165, "learning_rate": 3.183313890531633e-05, "loss": 0.7619, "step": 7647 }, { "epoch": 8.71111111111111, "grad_norm": 0.20580513775348663, "learning_rate": 3.182865701976504e-05, "loss": 0.5789, "step": 7648 }, { "epoch": 8.712250712250713, "grad_norm": 0.20522210001945496, "learning_rate": 3.1824174897035716e-05, "loss": 0.8254, "step": 7649 }, { "epoch": 8.713390313390313, "grad_norm": 0.16379280388355255, "learning_rate": 3.181969253728403e-05, "loss": 0.8075, "step": 7650 }, { "epoch": 8.714529914529914, "grad_norm": 0.22116778790950775, "learning_rate": 3.181520994066567e-05, "loss": 0.6458, "step": 7651 }, { "epoch": 8.715669515669516, "grad_norm": 0.19953124225139618, "learning_rate": 3.1810727107336324e-05, "loss": 0.6728, "step": 7652 }, { "epoch": 8.716809116809117, "grad_norm": 0.3285248279571533, "learning_rate": 3.18062440374517e-05, "loss": 0.8631, "step": 7653 }, { "epoch": 8.717948717948717, "grad_norm": 0.17777201533317566, "learning_rate": 3.180176073116751e-05, "loss": 0.7471, "step": 7654 }, { "epoch": 8.71908831908832, "grad_norm": 0.16862934827804565, "learning_rate": 3.1797277188639453e-05, "loss": 0.8936, "step": 7655 }, { "epoch": 8.72022792022792, "grad_norm": 0.1947425752878189, "learning_rate": 3.1792793410023265e-05, "loss": 0.7341, "step": 7656 }, { "epoch": 8.72136752136752, "grad_norm": 0.24071785807609558, "learning_rate": 3.178830939547469e-05, "loss": 0.5337, "step": 7657 }, { "epoch": 8.722507122507123, "grad_norm": 0.23296836018562317, "learning_rate": 3.178382514514946e-05, "loss": 0.7678, "step": 7658 }, { "epoch": 8.723646723646723, "grad_norm": 0.2113717645406723, "learning_rate": 3.177934065920332e-05, "loss": 0.6235, "step": 7659 }, { "epoch": 8.724786324786324, "grad_norm": 0.1431703269481659, "learning_rate": 3.177485593779205e-05, "loss": 1.0219, "step": 7660 }, { "epoch": 8.725925925925926, "grad_norm": 0.2029425948858261, "learning_rate": 3.1770370981071386e-05, "loss": 0.7498, "step": 7661 }, { "epoch": 8.727065527065527, "grad_norm": 0.18088236451148987, "learning_rate": 3.1765885789197123e-05, "loss": 0.6215, "step": 7662 }, { "epoch": 8.728205128205127, "grad_norm": 0.1828458309173584, "learning_rate": 3.176140036232505e-05, "loss": 0.6518, "step": 7663 }, { "epoch": 8.72934472934473, "grad_norm": 0.24527403712272644, "learning_rate": 3.175691470061094e-05, "loss": 0.7904, "step": 7664 }, { "epoch": 8.73048433048433, "grad_norm": 0.2324470430612564, "learning_rate": 3.175242880421061e-05, "loss": 0.5974, "step": 7665 }, { "epoch": 8.73162393162393, "grad_norm": 0.19904866814613342, "learning_rate": 3.174794267327985e-05, "loss": 0.8869, "step": 7666 }, { "epoch": 8.732763532763533, "grad_norm": 0.17857752740383148, "learning_rate": 3.174345630797448e-05, "loss": 0.899, "step": 7667 }, { "epoch": 8.733903133903134, "grad_norm": 0.1803186535835266, "learning_rate": 3.173896970845034e-05, "loss": 0.9189, "step": 7668 }, { "epoch": 8.735042735042736, "grad_norm": 0.18548636138439178, "learning_rate": 3.173448287486324e-05, "loss": 0.7402, "step": 7669 }, { "epoch": 8.736182336182337, "grad_norm": 0.18063399195671082, "learning_rate": 3.172999580736903e-05, "loss": 0.7928, "step": 7670 }, { "epoch": 8.737321937321937, "grad_norm": 0.20401537418365479, "learning_rate": 3.172550850612357e-05, "loss": 0.8598, "step": 7671 }, { "epoch": 8.73846153846154, "grad_norm": 0.23399047553539276, "learning_rate": 3.172102097128269e-05, "loss": 0.6711, "step": 7672 }, { "epoch": 8.73960113960114, "grad_norm": 0.18393062055110931, "learning_rate": 3.171653320300227e-05, "loss": 0.6996, "step": 7673 }, { "epoch": 8.74074074074074, "grad_norm": 0.20093819499015808, "learning_rate": 3.171204520143819e-05, "loss": 0.8209, "step": 7674 }, { "epoch": 8.741880341880343, "grad_norm": 0.21068376302719116, "learning_rate": 3.1707556966746324e-05, "loss": 0.8712, "step": 7675 }, { "epoch": 8.743019943019943, "grad_norm": 0.24260488152503967, "learning_rate": 3.170306849908255e-05, "loss": 0.5486, "step": 7676 }, { "epoch": 8.744159544159544, "grad_norm": 0.22668501734733582, "learning_rate": 3.169857979860278e-05, "loss": 0.7032, "step": 7677 }, { "epoch": 8.745299145299146, "grad_norm": 0.1814589947462082, "learning_rate": 3.1694090865462905e-05, "loss": 0.7434, "step": 7678 }, { "epoch": 8.746438746438747, "grad_norm": 0.2279900759458542, "learning_rate": 3.168960169981885e-05, "loss": 0.751, "step": 7679 }, { "epoch": 8.747578347578347, "grad_norm": 0.20198272168636322, "learning_rate": 3.168511230182653e-05, "loss": 0.6161, "step": 7680 }, { "epoch": 8.74871794871795, "grad_norm": 0.17433913052082062, "learning_rate": 3.168062267164188e-05, "loss": 0.834, "step": 7681 }, { "epoch": 8.74985754985755, "grad_norm": 0.20520195364952087, "learning_rate": 3.167613280942083e-05, "loss": 0.6117, "step": 7682 }, { "epoch": 8.75099715099715, "grad_norm": 0.2644828259944916, "learning_rate": 3.1671642715319316e-05, "loss": 0.6147, "step": 7683 }, { "epoch": 8.752136752136753, "grad_norm": 0.18779976665973663, "learning_rate": 3.166715238949332e-05, "loss": 0.7695, "step": 7684 }, { "epoch": 8.753276353276354, "grad_norm": 0.292783260345459, "learning_rate": 3.166266183209878e-05, "loss": 0.7338, "step": 7685 }, { "epoch": 8.754415954415954, "grad_norm": 0.1825760006904602, "learning_rate": 3.165817104329167e-05, "loss": 0.9071, "step": 7686 }, { "epoch": 8.755555555555556, "grad_norm": 0.16865390539169312, "learning_rate": 3.165368002322798e-05, "loss": 0.7664, "step": 7687 }, { "epoch": 8.756695156695157, "grad_norm": 0.17671336233615875, "learning_rate": 3.1649188772063676e-05, "loss": 0.8101, "step": 7688 }, { "epoch": 8.757834757834758, "grad_norm": 0.20125947892665863, "learning_rate": 3.1644697289954764e-05, "loss": 0.7404, "step": 7689 }, { "epoch": 8.75897435897436, "grad_norm": 0.24956966936588287, "learning_rate": 3.164020557705724e-05, "loss": 0.8366, "step": 7690 }, { "epoch": 8.76011396011396, "grad_norm": 0.2270399034023285, "learning_rate": 3.1635713633527114e-05, "loss": 0.5605, "step": 7691 }, { "epoch": 8.761253561253561, "grad_norm": 0.19510602951049805, "learning_rate": 3.163122145952041e-05, "loss": 0.8299, "step": 7692 }, { "epoch": 8.762393162393163, "grad_norm": 0.21669277548789978, "learning_rate": 3.162672905519315e-05, "loss": 0.7112, "step": 7693 }, { "epoch": 8.763532763532764, "grad_norm": 0.15557679533958435, "learning_rate": 3.162223642070136e-05, "loss": 0.7787, "step": 7694 }, { "epoch": 8.764672364672364, "grad_norm": 0.22524762153625488, "learning_rate": 3.161774355620109e-05, "loss": 0.6749, "step": 7695 }, { "epoch": 8.765811965811967, "grad_norm": 0.18174321949481964, "learning_rate": 3.161325046184839e-05, "loss": 0.6749, "step": 7696 }, { "epoch": 8.766951566951567, "grad_norm": 0.21741217374801636, "learning_rate": 3.1608757137799315e-05, "loss": 0.6055, "step": 7697 }, { "epoch": 8.768091168091168, "grad_norm": 0.20289987325668335, "learning_rate": 3.1604263584209924e-05, "loss": 0.8313, "step": 7698 }, { "epoch": 8.76923076923077, "grad_norm": 0.1633116751909256, "learning_rate": 3.1599769801236306e-05, "loss": 0.9437, "step": 7699 }, { "epoch": 8.77037037037037, "grad_norm": 0.20629127323627472, "learning_rate": 3.1595275789034525e-05, "loss": 0.7266, "step": 7700 }, { "epoch": 8.771509971509971, "grad_norm": 0.4131268858909607, "learning_rate": 3.1590781547760686e-05, "loss": 0.7076, "step": 7701 }, { "epoch": 8.772649572649573, "grad_norm": 0.2436034232378006, "learning_rate": 3.158628707757087e-05, "loss": 0.6973, "step": 7702 }, { "epoch": 8.773789173789174, "grad_norm": 0.18831466138362885, "learning_rate": 3.15817923786212e-05, "loss": 0.7632, "step": 7703 }, { "epoch": 8.774928774928775, "grad_norm": 0.19230034947395325, "learning_rate": 3.157729745106778e-05, "loss": 0.6297, "step": 7704 }, { "epoch": 8.776068376068377, "grad_norm": 0.17475441098213196, "learning_rate": 3.157280229506673e-05, "loss": 0.9326, "step": 7705 }, { "epoch": 8.777207977207977, "grad_norm": 0.20874318480491638, "learning_rate": 3.156830691077417e-05, "loss": 0.7311, "step": 7706 }, { "epoch": 8.778347578347578, "grad_norm": 0.21333029866218567, "learning_rate": 3.156381129834627e-05, "loss": 0.5903, "step": 7707 }, { "epoch": 8.77948717948718, "grad_norm": 0.24406632781028748, "learning_rate": 3.1559315457939136e-05, "loss": 0.4627, "step": 7708 }, { "epoch": 8.78062678062678, "grad_norm": 0.1977035254240036, "learning_rate": 3.155481938970895e-05, "loss": 0.9271, "step": 7709 }, { "epoch": 8.781766381766381, "grad_norm": 0.23286591470241547, "learning_rate": 3.155032309381185e-05, "loss": 0.6175, "step": 7710 }, { "epoch": 8.782905982905984, "grad_norm": 0.17138858139514923, "learning_rate": 3.154582657040402e-05, "loss": 0.8525, "step": 7711 }, { "epoch": 8.784045584045584, "grad_norm": 0.2692497968673706, "learning_rate": 3.154132981964164e-05, "loss": 0.4682, "step": 7712 }, { "epoch": 8.785185185185185, "grad_norm": 0.1906704306602478, "learning_rate": 3.153683284168087e-05, "loss": 0.7989, "step": 7713 }, { "epoch": 8.786324786324787, "grad_norm": 0.2293115258216858, "learning_rate": 3.153233563667793e-05, "loss": 0.7226, "step": 7714 }, { "epoch": 8.787464387464388, "grad_norm": 0.19892051815986633, "learning_rate": 3.152783820478901e-05, "loss": 1.0022, "step": 7715 }, { "epoch": 8.788603988603988, "grad_norm": 0.2179240733385086, "learning_rate": 3.1523340546170313e-05, "loss": 0.4818, "step": 7716 }, { "epoch": 8.78974358974359, "grad_norm": 0.20819567143917084, "learning_rate": 3.151884266097806e-05, "loss": 0.5795, "step": 7717 }, { "epoch": 8.790883190883191, "grad_norm": 0.2115866243839264, "learning_rate": 3.151434454936848e-05, "loss": 0.8428, "step": 7718 }, { "epoch": 8.792022792022792, "grad_norm": 0.21958622336387634, "learning_rate": 3.15098462114978e-05, "loss": 0.7726, "step": 7719 }, { "epoch": 8.793162393162394, "grad_norm": 0.22615550458431244, "learning_rate": 3.150534764752225e-05, "loss": 0.4887, "step": 7720 }, { "epoch": 8.794301994301994, "grad_norm": 0.21324467658996582, "learning_rate": 3.1500848857598084e-05, "loss": 0.6675, "step": 7721 }, { "epoch": 8.795441595441595, "grad_norm": 0.23308824002742767, "learning_rate": 3.149634984188157e-05, "loss": 0.7363, "step": 7722 }, { "epoch": 8.796581196581197, "grad_norm": 0.2446025162935257, "learning_rate": 3.149185060052896e-05, "loss": 0.6422, "step": 7723 }, { "epoch": 8.797720797720798, "grad_norm": 0.2245902121067047, "learning_rate": 3.148735113369652e-05, "loss": 0.7391, "step": 7724 }, { "epoch": 8.798860398860398, "grad_norm": 0.22236482799053192, "learning_rate": 3.148285144154054e-05, "loss": 0.8429, "step": 7725 }, { "epoch": 8.8, "grad_norm": 0.21395176649093628, "learning_rate": 3.14783515242173e-05, "loss": 0.6981, "step": 7726 }, { "epoch": 8.801139601139601, "grad_norm": 0.1498282104730606, "learning_rate": 3.147385138188309e-05, "loss": 0.8744, "step": 7727 }, { "epoch": 8.802279202279202, "grad_norm": 0.21710364520549774, "learning_rate": 3.1469351014694226e-05, "loss": 0.6589, "step": 7728 }, { "epoch": 8.803418803418804, "grad_norm": 0.23486445844173431, "learning_rate": 3.146485042280701e-05, "loss": 0.6693, "step": 7729 }, { "epoch": 8.804558404558405, "grad_norm": 0.23334114253520966, "learning_rate": 3.1460349606377756e-05, "loss": 0.4895, "step": 7730 }, { "epoch": 8.805698005698005, "grad_norm": 0.20501215755939484, "learning_rate": 3.1455848565562804e-05, "loss": 0.7128, "step": 7731 }, { "epoch": 8.806837606837608, "grad_norm": 0.20447050034999847, "learning_rate": 3.145134730051847e-05, "loss": 0.7555, "step": 7732 }, { "epoch": 8.807977207977208, "grad_norm": 0.2115122377872467, "learning_rate": 3.14468458114011e-05, "loss": 0.6423, "step": 7733 }, { "epoch": 8.809116809116809, "grad_norm": 0.18811063468456268, "learning_rate": 3.144234409836705e-05, "loss": 0.7499, "step": 7734 }, { "epoch": 8.810256410256411, "grad_norm": 0.1993071287870407, "learning_rate": 3.1437842161572674e-05, "loss": 0.6218, "step": 7735 }, { "epoch": 8.811396011396011, "grad_norm": 0.21417857706546783, "learning_rate": 3.143334000117433e-05, "loss": 0.764, "step": 7736 }, { "epoch": 8.812535612535612, "grad_norm": 0.21320945024490356, "learning_rate": 3.142883761732841e-05, "loss": 0.6304, "step": 7737 }, { "epoch": 8.813675213675214, "grad_norm": 0.21412084996700287, "learning_rate": 3.142433501019126e-05, "loss": 0.7014, "step": 7738 }, { "epoch": 8.814814814814815, "grad_norm": 0.23887000977993011, "learning_rate": 3.14198321799193e-05, "loss": 0.6606, "step": 7739 }, { "epoch": 8.815954415954415, "grad_norm": 0.22859087586402893, "learning_rate": 3.141532912666892e-05, "loss": 0.6729, "step": 7740 }, { "epoch": 8.817094017094018, "grad_norm": 0.20311769843101501, "learning_rate": 3.1410825850596506e-05, "loss": 0.7882, "step": 7741 }, { "epoch": 8.818233618233618, "grad_norm": 0.22371307015419006, "learning_rate": 3.140632235185849e-05, "loss": 0.608, "step": 7742 }, { "epoch": 8.819373219373219, "grad_norm": 0.21212604641914368, "learning_rate": 3.140181863061127e-05, "loss": 0.6504, "step": 7743 }, { "epoch": 8.820512820512821, "grad_norm": 0.1994614452123642, "learning_rate": 3.139731468701129e-05, "loss": 0.65, "step": 7744 }, { "epoch": 8.821652421652422, "grad_norm": 0.18662983179092407, "learning_rate": 3.139281052121498e-05, "loss": 0.746, "step": 7745 }, { "epoch": 8.822792022792022, "grad_norm": 0.2502181828022003, "learning_rate": 3.138830613337878e-05, "loss": 0.4778, "step": 7746 }, { "epoch": 8.823931623931625, "grad_norm": 0.22858481109142303, "learning_rate": 3.138380152365913e-05, "loss": 0.6457, "step": 7747 }, { "epoch": 8.825071225071225, "grad_norm": 0.18484479188919067, "learning_rate": 3.137929669221251e-05, "loss": 0.6432, "step": 7748 }, { "epoch": 8.826210826210826, "grad_norm": 0.27624914050102234, "learning_rate": 3.137479163919536e-05, "loss": 0.5642, "step": 7749 }, { "epoch": 8.827350427350428, "grad_norm": 0.22071190178394318, "learning_rate": 3.1370286364764176e-05, "loss": 0.729, "step": 7750 }, { "epoch": 8.828490028490029, "grad_norm": 0.1727389544248581, "learning_rate": 3.1365780869075424e-05, "loss": 0.7548, "step": 7751 }, { "epoch": 8.829629629629629, "grad_norm": 0.19292603433132172, "learning_rate": 3.13612751522856e-05, "loss": 0.855, "step": 7752 }, { "epoch": 8.830769230769231, "grad_norm": 0.2081027776002884, "learning_rate": 3.1356769214551195e-05, "loss": 0.806, "step": 7753 }, { "epoch": 8.831908831908832, "grad_norm": 0.22061720490455627, "learning_rate": 3.1352263056028716e-05, "loss": 0.6008, "step": 7754 }, { "epoch": 8.833048433048432, "grad_norm": 0.19643181562423706, "learning_rate": 3.134775667687467e-05, "loss": 0.873, "step": 7755 }, { "epoch": 8.834188034188035, "grad_norm": 0.18146218359470367, "learning_rate": 3.134325007724558e-05, "loss": 0.772, "step": 7756 }, { "epoch": 8.835327635327635, "grad_norm": 0.20587703585624695, "learning_rate": 3.133874325729797e-05, "loss": 0.7325, "step": 7757 }, { "epoch": 8.836467236467236, "grad_norm": 0.19565923511981964, "learning_rate": 3.1334236217188376e-05, "loss": 0.8558, "step": 7758 }, { "epoch": 8.837606837606838, "grad_norm": 0.2094258964061737, "learning_rate": 3.132972895707333e-05, "loss": 0.6833, "step": 7759 }, { "epoch": 8.838746438746439, "grad_norm": 0.21240152418613434, "learning_rate": 3.1325221477109394e-05, "loss": 0.645, "step": 7760 }, { "epoch": 8.83988603988604, "grad_norm": 0.2169884890317917, "learning_rate": 3.132071377745313e-05, "loss": 0.7009, "step": 7761 }, { "epoch": 8.841025641025642, "grad_norm": 0.2079538106918335, "learning_rate": 3.131620585826109e-05, "loss": 0.7524, "step": 7762 }, { "epoch": 8.842165242165242, "grad_norm": 0.1845642775297165, "learning_rate": 3.131169771968985e-05, "loss": 0.808, "step": 7763 }, { "epoch": 8.843304843304843, "grad_norm": 0.19623424112796783, "learning_rate": 3.130718936189599e-05, "loss": 0.76, "step": 7764 }, { "epoch": 8.844444444444445, "grad_norm": 0.20893262326717377, "learning_rate": 3.130268078503611e-05, "loss": 0.6806, "step": 7765 }, { "epoch": 8.845584045584046, "grad_norm": 0.23447632789611816, "learning_rate": 3.129817198926677e-05, "loss": 0.7176, "step": 7766 }, { "epoch": 8.846723646723646, "grad_norm": 0.21263551712036133, "learning_rate": 3.129366297474462e-05, "loss": 0.6646, "step": 7767 }, { "epoch": 8.847863247863248, "grad_norm": 0.16097824275493622, "learning_rate": 3.128915374162623e-05, "loss": 0.8573, "step": 7768 }, { "epoch": 8.849002849002849, "grad_norm": 0.20370391011238098, "learning_rate": 3.128464429006826e-05, "loss": 0.6426, "step": 7769 }, { "epoch": 8.85014245014245, "grad_norm": 0.21010948717594147, "learning_rate": 3.1280134620227284e-05, "loss": 0.6076, "step": 7770 }, { "epoch": 8.851282051282052, "grad_norm": 0.14581793546676636, "learning_rate": 3.127562473225998e-05, "loss": 0.8324, "step": 7771 }, { "epoch": 8.852421652421652, "grad_norm": 0.1763293743133545, "learning_rate": 3.127111462632296e-05, "loss": 0.7556, "step": 7772 }, { "epoch": 8.853561253561253, "grad_norm": 0.18776406347751617, "learning_rate": 3.126660430257288e-05, "loss": 0.7891, "step": 7773 }, { "epoch": 8.854700854700855, "grad_norm": 0.19078278541564941, "learning_rate": 3.126209376116641e-05, "loss": 0.6665, "step": 7774 }, { "epoch": 8.855840455840456, "grad_norm": 0.3110523819923401, "learning_rate": 3.12575830022602e-05, "loss": 0.3793, "step": 7775 }, { "epoch": 8.856980056980056, "grad_norm": 0.16890090703964233, "learning_rate": 3.125307202601093e-05, "loss": 0.672, "step": 7776 }, { "epoch": 8.858119658119659, "grad_norm": 0.20060500502586365, "learning_rate": 3.1248560832575255e-05, "loss": 0.7782, "step": 7777 }, { "epoch": 8.85925925925926, "grad_norm": 0.21224698424339294, "learning_rate": 3.124404942210989e-05, "loss": 0.6011, "step": 7778 }, { "epoch": 8.86039886039886, "grad_norm": 0.18043029308319092, "learning_rate": 3.1239537794771515e-05, "loss": 0.7435, "step": 7779 }, { "epoch": 8.861538461538462, "grad_norm": 0.18207389116287231, "learning_rate": 3.123502595071684e-05, "loss": 0.6631, "step": 7780 }, { "epoch": 8.862678062678063, "grad_norm": 0.24329136312007904, "learning_rate": 3.123051389010256e-05, "loss": 0.569, "step": 7781 }, { "epoch": 8.863817663817663, "grad_norm": 0.18057847023010254, "learning_rate": 3.122600161308541e-05, "loss": 0.8271, "step": 7782 }, { "epoch": 8.864957264957265, "grad_norm": 0.22279685735702515, "learning_rate": 3.122148911982208e-05, "loss": 0.5229, "step": 7783 }, { "epoch": 8.866096866096866, "grad_norm": 0.1838337630033493, "learning_rate": 3.1216976410469344e-05, "loss": 0.8541, "step": 7784 }, { "epoch": 8.867236467236467, "grad_norm": 0.16838793456554413, "learning_rate": 3.121246348518391e-05, "loss": 1.0952, "step": 7785 }, { "epoch": 8.868376068376069, "grad_norm": 0.22770701348781586, "learning_rate": 3.1207950344122544e-05, "loss": 0.5433, "step": 7786 }, { "epoch": 8.86951566951567, "grad_norm": 0.22541071474552155, "learning_rate": 3.120343698744198e-05, "loss": 0.6469, "step": 7787 }, { "epoch": 8.87065527065527, "grad_norm": 0.1801062375307083, "learning_rate": 3.1198923415299e-05, "loss": 0.7294, "step": 7788 }, { "epoch": 8.871794871794872, "grad_norm": 0.23137366771697998, "learning_rate": 3.119440962785036e-05, "loss": 0.7491, "step": 7789 }, { "epoch": 8.872934472934473, "grad_norm": 0.23917138576507568, "learning_rate": 3.118989562525284e-05, "loss": 0.5461, "step": 7790 }, { "epoch": 8.874074074074073, "grad_norm": 0.25182202458381653, "learning_rate": 3.118538140766322e-05, "loss": 0.6141, "step": 7791 }, { "epoch": 8.875213675213676, "grad_norm": 0.21472328901290894, "learning_rate": 3.118086697523829e-05, "loss": 0.7132, "step": 7792 }, { "epoch": 8.876353276353276, "grad_norm": 0.16811223328113556, "learning_rate": 3.1176352328134855e-05, "loss": 0.9114, "step": 7793 }, { "epoch": 8.877492877492877, "grad_norm": 0.22903700172901154, "learning_rate": 3.117183746650971e-05, "loss": 0.568, "step": 7794 }, { "epoch": 8.878632478632479, "grad_norm": 0.21215792000293732, "learning_rate": 3.116732239051969e-05, "loss": 0.8292, "step": 7795 }, { "epoch": 8.87977207977208, "grad_norm": 0.20104119181632996, "learning_rate": 3.11628071003216e-05, "loss": 0.8077, "step": 7796 }, { "epoch": 8.88091168091168, "grad_norm": 0.2236744612455368, "learning_rate": 3.1158291596072276e-05, "loss": 0.8089, "step": 7797 }, { "epoch": 8.882051282051282, "grad_norm": 0.201921284198761, "learning_rate": 3.115377587792854e-05, "loss": 0.7208, "step": 7798 }, { "epoch": 8.883190883190883, "grad_norm": 0.1841856837272644, "learning_rate": 3.114925994604725e-05, "loss": 0.8963, "step": 7799 }, { "epoch": 8.884330484330484, "grad_norm": 0.20346450805664062, "learning_rate": 3.1144743800585256e-05, "loss": 0.7813, "step": 7800 }, { "epoch": 8.885470085470086, "grad_norm": 0.2184840738773346, "learning_rate": 3.11402274416994e-05, "loss": 0.6826, "step": 7801 }, { "epoch": 8.886609686609686, "grad_norm": 0.22706322371959686, "learning_rate": 3.1135710869546576e-05, "loss": 0.7256, "step": 7802 }, { "epoch": 8.887749287749287, "grad_norm": 0.20829591155052185, "learning_rate": 3.113119408428363e-05, "loss": 0.7682, "step": 7803 }, { "epoch": 8.88888888888889, "grad_norm": 0.19905990362167358, "learning_rate": 3.1126677086067457e-05, "loss": 0.8342, "step": 7804 }, { "epoch": 8.89002849002849, "grad_norm": 0.19021393358707428, "learning_rate": 3.1122159875054924e-05, "loss": 0.7214, "step": 7805 }, { "epoch": 8.89116809116809, "grad_norm": 0.2697696089744568, "learning_rate": 3.111764245140296e-05, "loss": 0.7523, "step": 7806 }, { "epoch": 8.892307692307693, "grad_norm": 0.21230560541152954, "learning_rate": 3.1113124815268444e-05, "loss": 0.6203, "step": 7807 }, { "epoch": 8.893447293447293, "grad_norm": 0.21342356503009796, "learning_rate": 3.11086069668083e-05, "loss": 0.7195, "step": 7808 }, { "epoch": 8.894586894586894, "grad_norm": 0.18811871111392975, "learning_rate": 3.110408890617942e-05, "loss": 0.826, "step": 7809 }, { "epoch": 8.895726495726496, "grad_norm": 0.19298504292964935, "learning_rate": 3.109957063353876e-05, "loss": 0.7805, "step": 7810 }, { "epoch": 8.896866096866097, "grad_norm": 0.1965564638376236, "learning_rate": 3.109505214904323e-05, "loss": 0.8131, "step": 7811 }, { "epoch": 8.898005698005697, "grad_norm": 0.24918051064014435, "learning_rate": 3.109053345284979e-05, "loss": 0.6382, "step": 7812 }, { "epoch": 8.8991452991453, "grad_norm": 0.20430241525173187, "learning_rate": 3.108601454511536e-05, "loss": 0.6763, "step": 7813 }, { "epoch": 8.9002849002849, "grad_norm": 0.1908400058746338, "learning_rate": 3.108149542599692e-05, "loss": 0.6695, "step": 7814 }, { "epoch": 8.9014245014245, "grad_norm": 0.20428918302059174, "learning_rate": 3.1076976095651414e-05, "loss": 0.852, "step": 7815 }, { "epoch": 8.902564102564103, "grad_norm": 0.17677143216133118, "learning_rate": 3.107245655423581e-05, "loss": 0.671, "step": 7816 }, { "epoch": 8.903703703703703, "grad_norm": 0.2036582976579666, "learning_rate": 3.10679368019071e-05, "loss": 0.595, "step": 7817 }, { "epoch": 8.904843304843304, "grad_norm": 0.1899828016757965, "learning_rate": 3.106341683882226e-05, "loss": 0.8034, "step": 7818 }, { "epoch": 8.905982905982906, "grad_norm": 0.17370927333831787, "learning_rate": 3.105889666513827e-05, "loss": 0.8806, "step": 7819 }, { "epoch": 8.907122507122507, "grad_norm": 0.22656746208667755, "learning_rate": 3.105437628101214e-05, "loss": 0.6355, "step": 7820 }, { "epoch": 8.908262108262107, "grad_norm": 0.2530188262462616, "learning_rate": 3.104985568660087e-05, "loss": 0.5964, "step": 7821 }, { "epoch": 8.90940170940171, "grad_norm": 0.25722649693489075, "learning_rate": 3.104533488206148e-05, "loss": 0.5751, "step": 7822 }, { "epoch": 8.91054131054131, "grad_norm": 0.27463385462760925, "learning_rate": 3.1040813867550984e-05, "loss": 0.5416, "step": 7823 }, { "epoch": 8.91168091168091, "grad_norm": 0.18324056267738342, "learning_rate": 3.10362926432264e-05, "loss": 0.8647, "step": 7824 }, { "epoch": 8.912820512820513, "grad_norm": 0.24443140625953674, "learning_rate": 3.103177120924479e-05, "loss": 0.7451, "step": 7825 }, { "epoch": 8.913960113960114, "grad_norm": 0.1662614941596985, "learning_rate": 3.102724956576317e-05, "loss": 0.8351, "step": 7826 }, { "epoch": 8.915099715099714, "grad_norm": 0.18487805128097534, "learning_rate": 3.102272771293859e-05, "loss": 0.8716, "step": 7827 }, { "epoch": 8.916239316239317, "grad_norm": 0.163230761885643, "learning_rate": 3.101820565092812e-05, "loss": 0.8471, "step": 7828 }, { "epoch": 8.917378917378917, "grad_norm": 0.3384037911891937, "learning_rate": 3.101368337988882e-05, "loss": 0.3097, "step": 7829 }, { "epoch": 8.918518518518518, "grad_norm": 0.1944686472415924, "learning_rate": 3.1009160899977756e-05, "loss": 0.6428, "step": 7830 }, { "epoch": 8.91965811965812, "grad_norm": 0.1764814555644989, "learning_rate": 3.100463821135201e-05, "loss": 0.705, "step": 7831 }, { "epoch": 8.92079772079772, "grad_norm": 0.17810149490833282, "learning_rate": 3.100011531416866e-05, "loss": 0.8209, "step": 7832 }, { "epoch": 8.921937321937321, "grad_norm": 0.2048386037349701, "learning_rate": 3.0995592208584814e-05, "loss": 0.6529, "step": 7833 }, { "epoch": 8.923076923076923, "grad_norm": 0.16116826236248016, "learning_rate": 3.099106889475756e-05, "loss": 0.8393, "step": 7834 }, { "epoch": 8.924216524216524, "grad_norm": 0.2247179001569748, "learning_rate": 3.098654537284401e-05, "loss": 0.705, "step": 7835 }, { "epoch": 8.925356125356124, "grad_norm": 0.24455715715885162, "learning_rate": 3.098202164300128e-05, "loss": 0.7098, "step": 7836 }, { "epoch": 8.926495726495727, "grad_norm": 0.2455403357744217, "learning_rate": 3.097749770538648e-05, "loss": 0.5523, "step": 7837 }, { "epoch": 8.927635327635327, "grad_norm": 0.24926690757274628, "learning_rate": 3.097297356015674e-05, "loss": 0.583, "step": 7838 }, { "epoch": 8.928774928774928, "grad_norm": 0.19409289956092834, "learning_rate": 3.096844920746921e-05, "loss": 0.7751, "step": 7839 }, { "epoch": 8.92991452991453, "grad_norm": 0.24124380946159363, "learning_rate": 3.096392464748103e-05, "loss": 0.7231, "step": 7840 }, { "epoch": 8.93105413105413, "grad_norm": 0.2634790241718292, "learning_rate": 3.095939988034934e-05, "loss": 0.6397, "step": 7841 }, { "epoch": 8.932193732193731, "grad_norm": 0.16715000569820404, "learning_rate": 3.095487490623131e-05, "loss": 0.8968, "step": 7842 }, { "epoch": 8.933333333333334, "grad_norm": 0.17600218951702118, "learning_rate": 3.0950349725284085e-05, "loss": 0.7262, "step": 7843 }, { "epoch": 8.934472934472934, "grad_norm": 0.19259227812290192, "learning_rate": 3.094582433766486e-05, "loss": 0.9158, "step": 7844 }, { "epoch": 8.935612535612536, "grad_norm": 0.21922144293785095, "learning_rate": 3.09412987435308e-05, "loss": 0.5877, "step": 7845 }, { "epoch": 8.936752136752137, "grad_norm": 0.24164317548274994, "learning_rate": 3.0936772943039106e-05, "loss": 0.5463, "step": 7846 }, { "epoch": 8.937891737891738, "grad_norm": 0.2136332392692566, "learning_rate": 3.0932246936346955e-05, "loss": 0.7558, "step": 7847 }, { "epoch": 8.93903133903134, "grad_norm": 0.23429881036281586, "learning_rate": 3.092772072361156e-05, "loss": 0.7598, "step": 7848 }, { "epoch": 8.94017094017094, "grad_norm": 0.19063034653663635, "learning_rate": 3.0923194304990116e-05, "loss": 0.896, "step": 7849 }, { "epoch": 8.941310541310541, "grad_norm": 0.22400988638401031, "learning_rate": 3.0918667680639846e-05, "loss": 0.6574, "step": 7850 }, { "epoch": 8.942450142450143, "grad_norm": 0.23572759330272675, "learning_rate": 3.091414085071797e-05, "loss": 0.6288, "step": 7851 }, { "epoch": 8.943589743589744, "grad_norm": 0.25042036175727844, "learning_rate": 3.090961381538172e-05, "loss": 0.6406, "step": 7852 }, { "epoch": 8.944729344729344, "grad_norm": 0.18161028623580933, "learning_rate": 3.090508657478834e-05, "loss": 0.8103, "step": 7853 }, { "epoch": 8.945868945868947, "grad_norm": 0.19624114036560059, "learning_rate": 3.090055912909504e-05, "loss": 0.6675, "step": 7854 }, { "epoch": 8.947008547008547, "grad_norm": 0.15669001638889313, "learning_rate": 3.089603147845911e-05, "loss": 0.7988, "step": 7855 }, { "epoch": 8.948148148148148, "grad_norm": 0.17270700633525848, "learning_rate": 3.08915036230378e-05, "loss": 0.9235, "step": 7856 }, { "epoch": 8.94928774928775, "grad_norm": 0.18196718394756317, "learning_rate": 3.0886975562988366e-05, "loss": 0.8509, "step": 7857 }, { "epoch": 8.95042735042735, "grad_norm": 0.28779137134552, "learning_rate": 3.088244729846807e-05, "loss": 0.704, "step": 7858 }, { "epoch": 8.951566951566951, "grad_norm": 0.22087204456329346, "learning_rate": 3.0877918829634213e-05, "loss": 0.7136, "step": 7859 }, { "epoch": 8.952706552706553, "grad_norm": 0.19543227553367615, "learning_rate": 3.087339015664406e-05, "loss": 0.7373, "step": 7860 }, { "epoch": 8.953846153846154, "grad_norm": 0.1887291520833969, "learning_rate": 3.086886127965492e-05, "loss": 0.7649, "step": 7861 }, { "epoch": 8.954985754985755, "grad_norm": 0.18013887107372284, "learning_rate": 3.086433219882409e-05, "loss": 0.8074, "step": 7862 }, { "epoch": 8.956125356125357, "grad_norm": 0.2100178450345993, "learning_rate": 3.085980291430887e-05, "loss": 0.6478, "step": 7863 }, { "epoch": 8.957264957264957, "grad_norm": 0.18159078061580658, "learning_rate": 3.085527342626658e-05, "loss": 0.8595, "step": 7864 }, { "epoch": 8.958404558404558, "grad_norm": 0.18629483878612518, "learning_rate": 3.085074373485456e-05, "loss": 0.6951, "step": 7865 }, { "epoch": 8.95954415954416, "grad_norm": 0.22148196399211884, "learning_rate": 3.0846213840230106e-05, "loss": 0.7306, "step": 7866 }, { "epoch": 8.96068376068376, "grad_norm": 0.2141670435667038, "learning_rate": 3.0841683742550566e-05, "loss": 0.642, "step": 7867 }, { "epoch": 8.961823361823361, "grad_norm": 0.22931569814682007, "learning_rate": 3.083715344197329e-05, "loss": 0.5878, "step": 7868 }, { "epoch": 8.962962962962964, "grad_norm": 0.2777910530567169, "learning_rate": 3.083262293865562e-05, "loss": 0.4543, "step": 7869 }, { "epoch": 8.964102564102564, "grad_norm": 0.2475350946187973, "learning_rate": 3.082809223275492e-05, "loss": 0.6118, "step": 7870 }, { "epoch": 8.965242165242165, "grad_norm": 0.2269582450389862, "learning_rate": 3.082356132442854e-05, "loss": 0.7678, "step": 7871 }, { "epoch": 8.966381766381767, "grad_norm": 0.18349537253379822, "learning_rate": 3.081903021383387e-05, "loss": 0.8643, "step": 7872 }, { "epoch": 8.967521367521368, "grad_norm": 0.17430894076824188, "learning_rate": 3.081449890112827e-05, "loss": 0.7734, "step": 7873 }, { "epoch": 8.968660968660968, "grad_norm": 0.25528132915496826, "learning_rate": 3.080996738646914e-05, "loss": 0.5903, "step": 7874 }, { "epoch": 8.96980056980057, "grad_norm": 0.21637718379497528, "learning_rate": 3.080543567001387e-05, "loss": 0.6596, "step": 7875 }, { "epoch": 8.970940170940171, "grad_norm": 0.20959778130054474, "learning_rate": 3.080090375191984e-05, "loss": 0.6371, "step": 7876 }, { "epoch": 8.972079772079772, "grad_norm": 0.19999878108501434, "learning_rate": 3.079637163234448e-05, "loss": 0.6877, "step": 7877 }, { "epoch": 8.973219373219374, "grad_norm": 0.18591931462287903, "learning_rate": 3.0791839311445184e-05, "loss": 0.8589, "step": 7878 }, { "epoch": 8.974358974358974, "grad_norm": 0.1909388303756714, "learning_rate": 3.078730678937939e-05, "loss": 0.8331, "step": 7879 }, { "epoch": 8.975498575498575, "grad_norm": 0.20299936830997467, "learning_rate": 3.078277406630452e-05, "loss": 0.7731, "step": 7880 }, { "epoch": 8.976638176638177, "grad_norm": 0.19652584195137024, "learning_rate": 3.0778241142378e-05, "loss": 0.8612, "step": 7881 }, { "epoch": 8.977777777777778, "grad_norm": 0.1882491111755371, "learning_rate": 3.077370801775726e-05, "loss": 0.7031, "step": 7882 }, { "epoch": 8.978917378917378, "grad_norm": 0.16153548657894135, "learning_rate": 3.0769174692599785e-05, "loss": 0.8683, "step": 7883 }, { "epoch": 8.98005698005698, "grad_norm": 0.23726435005664825, "learning_rate": 3.0764641167063e-05, "loss": 0.6444, "step": 7884 }, { "epoch": 8.981196581196581, "grad_norm": 0.21085144579410553, "learning_rate": 3.076010744130437e-05, "loss": 0.8917, "step": 7885 }, { "epoch": 8.982336182336182, "grad_norm": 0.18745844066143036, "learning_rate": 3.075557351548137e-05, "loss": 0.7833, "step": 7886 }, { "epoch": 8.983475783475784, "grad_norm": 0.19731897115707397, "learning_rate": 3.075103938975148e-05, "loss": 0.8099, "step": 7887 }, { "epoch": 8.984615384615385, "grad_norm": 0.1938193440437317, "learning_rate": 3.0746505064272164e-05, "loss": 0.7111, "step": 7888 }, { "epoch": 8.985754985754985, "grad_norm": 0.21260614693164825, "learning_rate": 3.074197053920093e-05, "loss": 0.7241, "step": 7889 }, { "epoch": 8.986894586894588, "grad_norm": 0.1491367369890213, "learning_rate": 3.073743581469527e-05, "loss": 0.8752, "step": 7890 }, { "epoch": 8.988034188034188, "grad_norm": 0.30841800570487976, "learning_rate": 3.0732900890912695e-05, "loss": 0.4057, "step": 7891 }, { "epoch": 8.989173789173789, "grad_norm": 0.2244536429643631, "learning_rate": 3.072836576801069e-05, "loss": 0.6508, "step": 7892 }, { "epoch": 8.990313390313391, "grad_norm": 0.22859877347946167, "learning_rate": 3.0723830446146796e-05, "loss": 0.6319, "step": 7893 }, { "epoch": 8.991452991452991, "grad_norm": 0.20965613424777985, "learning_rate": 3.071929492547853e-05, "loss": 0.542, "step": 7894 }, { "epoch": 8.992592592592592, "grad_norm": 0.22765378654003143, "learning_rate": 3.071475920616342e-05, "loss": 0.7092, "step": 7895 }, { "epoch": 8.993732193732194, "grad_norm": 0.1661691665649414, "learning_rate": 3.071022328835902e-05, "loss": 0.898, "step": 7896 }, { "epoch": 8.994871794871795, "grad_norm": 0.22471977770328522, "learning_rate": 3.070568717222285e-05, "loss": 0.6615, "step": 7897 }, { "epoch": 8.996011396011395, "grad_norm": 0.2047819197177887, "learning_rate": 3.070115085791248e-05, "loss": 0.8607, "step": 7898 }, { "epoch": 8.997150997150998, "grad_norm": 0.15379248559474945, "learning_rate": 3.069661434558545e-05, "loss": 0.8015, "step": 7899 }, { "epoch": 8.998290598290598, "grad_norm": 0.21577894687652588, "learning_rate": 3.0692077635399354e-05, "loss": 0.7617, "step": 7900 }, { "epoch": 8.999430199430199, "grad_norm": 0.21575061976909637, "learning_rate": 3.0687540727511736e-05, "loss": 0.7665, "step": 7901 }, { "epoch": 9.0, "grad_norm": 0.3739865720272064, "learning_rate": 3.06830036220802e-05, "loss": 0.4867, "step": 7902 }, { "epoch": 9.0011396011396, "grad_norm": 0.169849693775177, "learning_rate": 3.067846631926231e-05, "loss": 0.87, "step": 7903 }, { "epoch": 9.002279202279203, "grad_norm": 0.14331378042697906, "learning_rate": 3.067392881921567e-05, "loss": 0.8204, "step": 7904 }, { "epoch": 9.003418803418803, "grad_norm": 0.20782536268234253, "learning_rate": 3.066939112209788e-05, "loss": 0.6792, "step": 7905 }, { "epoch": 9.004558404558404, "grad_norm": 0.23367100954055786, "learning_rate": 3.0664853228066545e-05, "loss": 0.6591, "step": 7906 }, { "epoch": 9.005698005698006, "grad_norm": 0.20821213722229004, "learning_rate": 3.066031513727928e-05, "loss": 0.6785, "step": 7907 }, { "epoch": 9.006837606837607, "grad_norm": 0.19033651053905487, "learning_rate": 3.0655776849893716e-05, "loss": 0.7277, "step": 7908 }, { "epoch": 9.007977207977207, "grad_norm": 0.18766376376152039, "learning_rate": 3.065123836606745e-05, "loss": 0.7571, "step": 7909 }, { "epoch": 9.00911680911681, "grad_norm": 0.18898771703243256, "learning_rate": 3.064669968595814e-05, "loss": 0.9254, "step": 7910 }, { "epoch": 9.01025641025641, "grad_norm": 0.20341551303863525, "learning_rate": 3.064216080972342e-05, "loss": 0.7038, "step": 7911 }, { "epoch": 9.01139601139601, "grad_norm": 0.1814105361700058, "learning_rate": 3.063762173752095e-05, "loss": 0.6781, "step": 7912 }, { "epoch": 9.012535612535613, "grad_norm": 0.23274095356464386, "learning_rate": 3.0633082469508364e-05, "loss": 0.4745, "step": 7913 }, { "epoch": 9.013675213675214, "grad_norm": 0.20255059003829956, "learning_rate": 3.062854300584333e-05, "loss": 0.7065, "step": 7914 }, { "epoch": 9.014814814814814, "grad_norm": 0.22046412527561188, "learning_rate": 3.062400334668353e-05, "loss": 0.6523, "step": 7915 }, { "epoch": 9.015954415954416, "grad_norm": 0.17578087747097015, "learning_rate": 3.061946349218662e-05, "loss": 0.9795, "step": 7916 }, { "epoch": 9.017094017094017, "grad_norm": 0.1903984397649765, "learning_rate": 3.061492344251029e-05, "loss": 0.8292, "step": 7917 }, { "epoch": 9.018233618233618, "grad_norm": 0.20894210040569305, "learning_rate": 3.061038319781223e-05, "loss": 0.7828, "step": 7918 }, { "epoch": 9.01937321937322, "grad_norm": 0.1967022567987442, "learning_rate": 3.060584275825013e-05, "loss": 0.6703, "step": 7919 }, { "epoch": 9.02051282051282, "grad_norm": 0.20595534145832062, "learning_rate": 3.0601302123981697e-05, "loss": 0.7336, "step": 7920 }, { "epoch": 9.021652421652421, "grad_norm": 0.22291889786720276, "learning_rate": 3.0596761295164634e-05, "loss": 0.4897, "step": 7921 }, { "epoch": 9.022792022792023, "grad_norm": 0.20241078734397888, "learning_rate": 3.059222027195666e-05, "loss": 0.7701, "step": 7922 }, { "epoch": 9.023931623931624, "grad_norm": 0.19974841177463531, "learning_rate": 3.05876790545155e-05, "loss": 0.6909, "step": 7923 }, { "epoch": 9.025071225071224, "grad_norm": 0.22207312285900116, "learning_rate": 3.058313764299888e-05, "loss": 0.6557, "step": 7924 }, { "epoch": 9.026210826210827, "grad_norm": 0.2338474541902542, "learning_rate": 3.0578596037564544e-05, "loss": 0.5804, "step": 7925 }, { "epoch": 9.027350427350427, "grad_norm": 0.19024735689163208, "learning_rate": 3.057405423837021e-05, "loss": 0.7939, "step": 7926 }, { "epoch": 9.028490028490028, "grad_norm": 0.18150648474693298, "learning_rate": 3.0569512245573654e-05, "loss": 0.7721, "step": 7927 }, { "epoch": 9.02962962962963, "grad_norm": 0.19173087179660797, "learning_rate": 3.056497005933262e-05, "loss": 0.7397, "step": 7928 }, { "epoch": 9.03076923076923, "grad_norm": 0.205705463886261, "learning_rate": 3.056042767980487e-05, "loss": 0.7156, "step": 7929 }, { "epoch": 9.031908831908831, "grad_norm": 0.21228349208831787, "learning_rate": 3.0555885107148186e-05, "loss": 0.6467, "step": 7930 }, { "epoch": 9.033048433048434, "grad_norm": 0.1990744024515152, "learning_rate": 3.0551342341520325e-05, "loss": 0.8364, "step": 7931 }, { "epoch": 9.034188034188034, "grad_norm": 0.18487299978733063, "learning_rate": 3.0546799383079074e-05, "loss": 0.7673, "step": 7932 }, { "epoch": 9.035327635327635, "grad_norm": 0.17298202216625214, "learning_rate": 3.0542256231982226e-05, "loss": 0.8777, "step": 7933 }, { "epoch": 9.036467236467237, "grad_norm": 0.17005349695682526, "learning_rate": 3.0537712888387586e-05, "loss": 0.7778, "step": 7934 }, { "epoch": 9.037606837606837, "grad_norm": 0.2257419377565384, "learning_rate": 3.0533169352452947e-05, "loss": 0.6396, "step": 7935 }, { "epoch": 9.038746438746438, "grad_norm": 0.24718356132507324, "learning_rate": 3.052862562433611e-05, "loss": 0.6649, "step": 7936 }, { "epoch": 9.03988603988604, "grad_norm": 0.15201455354690552, "learning_rate": 3.052408170419492e-05, "loss": 0.844, "step": 7937 }, { "epoch": 9.04102564102564, "grad_norm": 0.17157624661922455, "learning_rate": 3.051953759218717e-05, "loss": 0.9226, "step": 7938 }, { "epoch": 9.042165242165241, "grad_norm": 0.23245535790920258, "learning_rate": 3.05149932884707e-05, "loss": 0.4737, "step": 7939 }, { "epoch": 9.043304843304844, "grad_norm": 0.2305932641029358, "learning_rate": 3.0510448793203346e-05, "loss": 0.7596, "step": 7940 }, { "epoch": 9.044444444444444, "grad_norm": 0.20094603300094604, "learning_rate": 3.0505904106542948e-05, "loss": 0.791, "step": 7941 }, { "epoch": 9.045584045584045, "grad_norm": 0.21628820896148682, "learning_rate": 3.050135922864737e-05, "loss": 0.7386, "step": 7942 }, { "epoch": 9.046723646723647, "grad_norm": 0.23067307472229004, "learning_rate": 3.0496814159674453e-05, "loss": 0.6767, "step": 7943 }, { "epoch": 9.047863247863248, "grad_norm": 0.208968386054039, "learning_rate": 3.049226889978206e-05, "loss": 0.723, "step": 7944 }, { "epoch": 9.049002849002848, "grad_norm": 0.28499385714530945, "learning_rate": 3.048772344912807e-05, "loss": 0.3641, "step": 7945 }, { "epoch": 9.05014245014245, "grad_norm": 0.1914040595293045, "learning_rate": 3.048317780787035e-05, "loss": 0.7566, "step": 7946 }, { "epoch": 9.051282051282051, "grad_norm": 0.20092247426509857, "learning_rate": 3.0478631976166787e-05, "loss": 0.7957, "step": 7947 }, { "epoch": 9.052421652421652, "grad_norm": 0.21474263072013855, "learning_rate": 3.0474085954175262e-05, "loss": 0.5355, "step": 7948 }, { "epoch": 9.053561253561254, "grad_norm": 0.1841273009777069, "learning_rate": 3.0469539742053686e-05, "loss": 0.6731, "step": 7949 }, { "epoch": 9.054700854700855, "grad_norm": 0.18076074123382568, "learning_rate": 3.046499333995995e-05, "loss": 0.7469, "step": 7950 }, { "epoch": 9.055840455840455, "grad_norm": 0.22733916342258453, "learning_rate": 3.0460446748051973e-05, "loss": 0.6836, "step": 7951 }, { "epoch": 9.056980056980057, "grad_norm": 0.19410863518714905, "learning_rate": 3.045589996648766e-05, "loss": 0.7948, "step": 7952 }, { "epoch": 9.058119658119658, "grad_norm": 0.22652725875377655, "learning_rate": 3.045135299542494e-05, "loss": 0.625, "step": 7953 }, { "epoch": 9.059259259259258, "grad_norm": 0.22382645308971405, "learning_rate": 3.0446805835021737e-05, "loss": 0.6675, "step": 7954 }, { "epoch": 9.06039886039886, "grad_norm": 0.18913283944129944, "learning_rate": 3.044225848543598e-05, "loss": 0.7916, "step": 7955 }, { "epoch": 9.061538461538461, "grad_norm": 0.20150530338287354, "learning_rate": 3.0437710946825638e-05, "loss": 0.6819, "step": 7956 }, { "epoch": 9.062678062678062, "grad_norm": 0.1940554976463318, "learning_rate": 3.0433163219348625e-05, "loss": 0.7243, "step": 7957 }, { "epoch": 9.063817663817664, "grad_norm": 0.24352307617664337, "learning_rate": 3.042861530316292e-05, "loss": 0.6377, "step": 7958 }, { "epoch": 9.064957264957265, "grad_norm": 0.1836363524198532, "learning_rate": 3.042406719842647e-05, "loss": 0.8672, "step": 7959 }, { "epoch": 9.066096866096865, "grad_norm": 0.24205131828784943, "learning_rate": 3.0419518905297255e-05, "loss": 0.6717, "step": 7960 }, { "epoch": 9.067236467236468, "grad_norm": 0.22637714445590973, "learning_rate": 3.041497042393324e-05, "loss": 0.5596, "step": 7961 }, { "epoch": 9.068376068376068, "grad_norm": 0.1908290535211563, "learning_rate": 3.0410421754492423e-05, "loss": 0.7181, "step": 7962 }, { "epoch": 9.069515669515669, "grad_norm": 0.1839943528175354, "learning_rate": 3.040587289713277e-05, "loss": 0.8654, "step": 7963 }, { "epoch": 9.070655270655271, "grad_norm": 0.20175249874591827, "learning_rate": 3.0401323852012287e-05, "loss": 0.7141, "step": 7964 }, { "epoch": 9.071794871794872, "grad_norm": 0.3334799110889435, "learning_rate": 3.039677461928897e-05, "loss": 0.5159, "step": 7965 }, { "epoch": 9.072934472934472, "grad_norm": 0.18899498879909515, "learning_rate": 3.0392225199120832e-05, "loss": 0.8762, "step": 7966 }, { "epoch": 9.074074074074074, "grad_norm": 0.19922029972076416, "learning_rate": 3.0387675591665882e-05, "loss": 0.6303, "step": 7967 }, { "epoch": 9.075213675213675, "grad_norm": 0.2401798963546753, "learning_rate": 3.0383125797082153e-05, "loss": 0.5326, "step": 7968 }, { "epoch": 9.076353276353275, "grad_norm": 0.1890830397605896, "learning_rate": 3.0378575815527647e-05, "loss": 0.8489, "step": 7969 }, { "epoch": 9.077492877492878, "grad_norm": 0.2172982543706894, "learning_rate": 3.0374025647160422e-05, "loss": 0.5955, "step": 7970 }, { "epoch": 9.078632478632478, "grad_norm": 0.22764314711093903, "learning_rate": 3.0369475292138505e-05, "loss": 0.7766, "step": 7971 }, { "epoch": 9.079772079772079, "grad_norm": 0.2294934093952179, "learning_rate": 3.0364924750619937e-05, "loss": 0.5676, "step": 7972 }, { "epoch": 9.080911680911681, "grad_norm": 0.21727004647254944, "learning_rate": 3.0360374022762788e-05, "loss": 0.5687, "step": 7973 }, { "epoch": 9.082051282051282, "grad_norm": 0.1839292049407959, "learning_rate": 3.0355823108725106e-05, "loss": 0.8086, "step": 7974 }, { "epoch": 9.083190883190884, "grad_norm": 0.22737336158752441, "learning_rate": 3.035127200866496e-05, "loss": 0.6477, "step": 7975 }, { "epoch": 9.084330484330485, "grad_norm": 0.2086174488067627, "learning_rate": 3.0346720722740413e-05, "loss": 0.4085, "step": 7976 }, { "epoch": 9.085470085470085, "grad_norm": 0.1949806660413742, "learning_rate": 3.0342169251109566e-05, "loss": 0.8199, "step": 7977 }, { "epoch": 9.086609686609687, "grad_norm": 0.1926809549331665, "learning_rate": 3.033761759393048e-05, "loss": 0.8182, "step": 7978 }, { "epoch": 9.087749287749288, "grad_norm": 0.22032250463962555, "learning_rate": 3.033306575136126e-05, "loss": 0.5923, "step": 7979 }, { "epoch": 9.088888888888889, "grad_norm": 0.18312066793441772, "learning_rate": 3.0328513723559997e-05, "loss": 0.7698, "step": 7980 }, { "epoch": 9.090028490028491, "grad_norm": 0.16672851145267487, "learning_rate": 3.0323961510684797e-05, "loss": 0.7758, "step": 7981 }, { "epoch": 9.091168091168091, "grad_norm": 0.19653446972370148, "learning_rate": 3.0319409112893775e-05, "loss": 0.7214, "step": 7982 }, { "epoch": 9.092307692307692, "grad_norm": 0.21093419194221497, "learning_rate": 3.0314856530345055e-05, "loss": 0.642, "step": 7983 }, { "epoch": 9.093447293447294, "grad_norm": 0.16019900143146515, "learning_rate": 3.031030376319674e-05, "loss": 0.9468, "step": 7984 }, { "epoch": 9.094586894586895, "grad_norm": 0.20375417172908783, "learning_rate": 3.030575081160698e-05, "loss": 0.5477, "step": 7985 }, { "epoch": 9.095726495726495, "grad_norm": 0.45640164613723755, "learning_rate": 3.03011976757339e-05, "loss": 0.6721, "step": 7986 }, { "epoch": 9.096866096866098, "grad_norm": 0.19301548600196838, "learning_rate": 3.029664435573564e-05, "loss": 0.9278, "step": 7987 }, { "epoch": 9.098005698005698, "grad_norm": 0.23720908164978027, "learning_rate": 3.0292090851770365e-05, "loss": 0.6369, "step": 7988 }, { "epoch": 9.099145299145299, "grad_norm": 0.2241889387369156, "learning_rate": 3.0287537163996223e-05, "loss": 0.691, "step": 7989 }, { "epoch": 9.100284900284901, "grad_norm": 0.19245818257331848, "learning_rate": 3.028298329257137e-05, "loss": 0.7764, "step": 7990 }, { "epoch": 9.101424501424502, "grad_norm": 0.238912895321846, "learning_rate": 3.0278429237653976e-05, "loss": 0.6545, "step": 7991 }, { "epoch": 9.102564102564102, "grad_norm": 0.2048003375530243, "learning_rate": 3.0273874999402218e-05, "loss": 0.7816, "step": 7992 }, { "epoch": 9.103703703703705, "grad_norm": 0.19136501848697662, "learning_rate": 3.026932057797428e-05, "loss": 0.8156, "step": 7993 }, { "epoch": 9.104843304843305, "grad_norm": 0.19424991309642792, "learning_rate": 3.0264765973528353e-05, "loss": 0.8568, "step": 7994 }, { "epoch": 9.105982905982906, "grad_norm": 0.23728442192077637, "learning_rate": 3.026021118622262e-05, "loss": 0.5536, "step": 7995 }, { "epoch": 9.107122507122508, "grad_norm": 0.2579558491706848, "learning_rate": 3.0255656216215296e-05, "loss": 0.613, "step": 7996 }, { "epoch": 9.108262108262108, "grad_norm": 0.19430309534072876, "learning_rate": 3.0251101063664567e-05, "loss": 0.8128, "step": 7997 }, { "epoch": 9.109401709401709, "grad_norm": 0.17508958280086517, "learning_rate": 3.0246545728728657e-05, "loss": 0.9131, "step": 7998 }, { "epoch": 9.110541310541311, "grad_norm": 0.21832704544067383, "learning_rate": 3.024199021156579e-05, "loss": 0.7154, "step": 7999 }, { "epoch": 9.111680911680912, "grad_norm": 0.19524739682674408, "learning_rate": 3.0237434512334196e-05, "loss": 0.7135, "step": 8000 }, { "epoch": 9.112820512820512, "grad_norm": 0.19931471347808838, "learning_rate": 3.0232878631192086e-05, "loss": 0.6977, "step": 8001 }, { "epoch": 9.113960113960115, "grad_norm": 0.21324680745601654, "learning_rate": 3.0228322568297718e-05, "loss": 0.5053, "step": 8002 }, { "epoch": 9.115099715099715, "grad_norm": 0.16571569442749023, "learning_rate": 3.0223766323809326e-05, "loss": 0.8375, "step": 8003 }, { "epoch": 9.116239316239316, "grad_norm": 0.18001367151737213, "learning_rate": 3.0219209897885165e-05, "loss": 0.7627, "step": 8004 }, { "epoch": 9.117378917378918, "grad_norm": 0.20142637193202972, "learning_rate": 3.021465329068349e-05, "loss": 0.5493, "step": 8005 }, { "epoch": 9.118518518518519, "grad_norm": 0.19719557464122772, "learning_rate": 3.0210096502362566e-05, "loss": 0.7908, "step": 8006 }, { "epoch": 9.11965811965812, "grad_norm": 0.24484696984291077, "learning_rate": 3.0205539533080667e-05, "loss": 0.5803, "step": 8007 }, { "epoch": 9.120797720797722, "grad_norm": 0.25731462240219116, "learning_rate": 3.0200982382996056e-05, "loss": 0.5803, "step": 8008 }, { "epoch": 9.121937321937322, "grad_norm": 0.2253497987985611, "learning_rate": 3.019642505226703e-05, "loss": 0.5927, "step": 8009 }, { "epoch": 9.123076923076923, "grad_norm": 0.2693677842617035, "learning_rate": 3.019186754105187e-05, "loss": 0.58, "step": 8010 }, { "epoch": 9.124216524216525, "grad_norm": 0.21957387030124664, "learning_rate": 3.0187309849508878e-05, "loss": 0.8018, "step": 8011 }, { "epoch": 9.125356125356126, "grad_norm": 0.2059958577156067, "learning_rate": 3.0182751977796343e-05, "loss": 0.6538, "step": 8012 }, { "epoch": 9.126495726495726, "grad_norm": 0.21474669873714447, "learning_rate": 3.0178193926072586e-05, "loss": 0.5963, "step": 8013 }, { "epoch": 9.127635327635328, "grad_norm": 0.24345733225345612, "learning_rate": 3.0173635694495904e-05, "loss": 0.6136, "step": 8014 }, { "epoch": 9.128774928774929, "grad_norm": 0.24151192605495453, "learning_rate": 3.0169077283224635e-05, "loss": 0.5686, "step": 8015 }, { "epoch": 9.12991452991453, "grad_norm": 0.1804502010345459, "learning_rate": 3.0164518692417093e-05, "loss": 0.8, "step": 8016 }, { "epoch": 9.131054131054132, "grad_norm": 0.19830574095249176, "learning_rate": 3.0159959922231618e-05, "loss": 0.7746, "step": 8017 }, { "epoch": 9.132193732193732, "grad_norm": 0.22743438184261322, "learning_rate": 3.015540097282654e-05, "loss": 0.5336, "step": 8018 }, { "epoch": 9.133333333333333, "grad_norm": 0.17095860838890076, "learning_rate": 3.015084184436021e-05, "loss": 0.7969, "step": 8019 }, { "epoch": 9.134472934472935, "grad_norm": 0.1981702744960785, "learning_rate": 3.0146282536990978e-05, "loss": 0.7442, "step": 8020 }, { "epoch": 9.135612535612536, "grad_norm": 0.22443892061710358, "learning_rate": 3.0141723050877203e-05, "loss": 0.6122, "step": 8021 }, { "epoch": 9.136752136752136, "grad_norm": 0.26646047830581665, "learning_rate": 3.0137163386177248e-05, "loss": 0.404, "step": 8022 }, { "epoch": 9.137891737891739, "grad_norm": 0.20456752181053162, "learning_rate": 3.0132603543049477e-05, "loss": 0.6927, "step": 8023 }, { "epoch": 9.13903133903134, "grad_norm": 0.17897120118141174, "learning_rate": 3.0128043521652276e-05, "loss": 0.8187, "step": 8024 }, { "epoch": 9.14017094017094, "grad_norm": 0.2257222980260849, "learning_rate": 3.0123483322144015e-05, "loss": 0.5345, "step": 8025 }, { "epoch": 9.141310541310542, "grad_norm": 0.23555949330329895, "learning_rate": 3.0118922944683094e-05, "loss": 0.5457, "step": 8026 }, { "epoch": 9.142450142450143, "grad_norm": 0.19824659824371338, "learning_rate": 3.011436238942789e-05, "loss": 0.7135, "step": 8027 }, { "epoch": 9.143589743589743, "grad_norm": 0.1848185658454895, "learning_rate": 3.0109801656536834e-05, "loss": 0.7755, "step": 8028 }, { "epoch": 9.144729344729345, "grad_norm": 0.1790562868118286, "learning_rate": 3.01052407461683e-05, "loss": 0.6638, "step": 8029 }, { "epoch": 9.145868945868946, "grad_norm": 0.22054655849933624, "learning_rate": 3.0100679658480724e-05, "loss": 0.66, "step": 8030 }, { "epoch": 9.147008547008546, "grad_norm": 0.2575461268424988, "learning_rate": 3.009611839363251e-05, "loss": 0.6992, "step": 8031 }, { "epoch": 9.148148148148149, "grad_norm": 0.2213752269744873, "learning_rate": 3.0091556951782095e-05, "loss": 0.6647, "step": 8032 }, { "epoch": 9.14928774928775, "grad_norm": 0.20892935991287231, "learning_rate": 3.008699533308791e-05, "loss": 0.8171, "step": 8033 }, { "epoch": 9.15042735042735, "grad_norm": 0.19577014446258545, "learning_rate": 3.0082433537708376e-05, "loss": 0.7467, "step": 8034 }, { "epoch": 9.151566951566952, "grad_norm": 0.1811130791902542, "learning_rate": 3.007787156580196e-05, "loss": 0.9044, "step": 8035 }, { "epoch": 9.152706552706553, "grad_norm": 0.21809349954128265, "learning_rate": 3.007330941752709e-05, "loss": 0.6055, "step": 8036 }, { "epoch": 9.153846153846153, "grad_norm": 0.2638418972492218, "learning_rate": 3.0068747093042233e-05, "loss": 0.5942, "step": 8037 }, { "epoch": 9.154985754985756, "grad_norm": 0.20913495123386383, "learning_rate": 3.006418459250585e-05, "loss": 0.7034, "step": 8038 }, { "epoch": 9.156125356125356, "grad_norm": 0.2108583003282547, "learning_rate": 3.005962191607642e-05, "loss": 0.7311, "step": 8039 }, { "epoch": 9.157264957264957, "grad_norm": 0.17478828132152557, "learning_rate": 3.00550590639124e-05, "loss": 0.7496, "step": 8040 }, { "epoch": 9.158404558404559, "grad_norm": 0.26220399141311646, "learning_rate": 3.0050496036172283e-05, "loss": 0.6376, "step": 8041 }, { "epoch": 9.15954415954416, "grad_norm": 0.21376913785934448, "learning_rate": 3.0045932833014536e-05, "loss": 0.5264, "step": 8042 }, { "epoch": 9.16068376068376, "grad_norm": 0.18874740600585938, "learning_rate": 3.0041369454597672e-05, "loss": 0.7482, "step": 8043 }, { "epoch": 9.161823361823362, "grad_norm": 0.1886608600616455, "learning_rate": 3.0036805901080185e-05, "loss": 0.7498, "step": 8044 }, { "epoch": 9.162962962962963, "grad_norm": 0.427082359790802, "learning_rate": 3.003224217262058e-05, "loss": 0.8577, "step": 8045 }, { "epoch": 9.164102564102564, "grad_norm": 0.2096005082130432, "learning_rate": 3.0027678269377362e-05, "loss": 0.7669, "step": 8046 }, { "epoch": 9.165242165242166, "grad_norm": 0.20323561131954193, "learning_rate": 3.0023114191509056e-05, "loss": 0.5966, "step": 8047 }, { "epoch": 9.166381766381766, "grad_norm": 0.21189725399017334, "learning_rate": 3.0018549939174172e-05, "loss": 0.7716, "step": 8048 }, { "epoch": 9.167521367521367, "grad_norm": 0.18440569937229156, "learning_rate": 3.001398551253125e-05, "loss": 0.7185, "step": 8049 }, { "epoch": 9.16866096866097, "grad_norm": 0.20738254487514496, "learning_rate": 3.0009420911738827e-05, "loss": 0.6946, "step": 8050 }, { "epoch": 9.16980056980057, "grad_norm": 0.21101903915405273, "learning_rate": 3.000485613695544e-05, "loss": 0.7349, "step": 8051 }, { "epoch": 9.17094017094017, "grad_norm": 0.2050398886203766, "learning_rate": 3.0000291188339636e-05, "loss": 0.747, "step": 8052 }, { "epoch": 9.172079772079773, "grad_norm": 0.2424107789993286, "learning_rate": 2.999572606604996e-05, "loss": 0.5924, "step": 8053 }, { "epoch": 9.173219373219373, "grad_norm": 0.2001570165157318, "learning_rate": 2.9991160770244985e-05, "loss": 0.7611, "step": 8054 }, { "epoch": 9.174358974358974, "grad_norm": 0.17317825555801392, "learning_rate": 2.9986595301083277e-05, "loss": 0.9028, "step": 8055 }, { "epoch": 9.175498575498576, "grad_norm": 0.18667954206466675, "learning_rate": 2.9982029658723392e-05, "loss": 0.6992, "step": 8056 }, { "epoch": 9.176638176638177, "grad_norm": 0.2017718255519867, "learning_rate": 2.9977463843323923e-05, "loss": 0.6735, "step": 8057 }, { "epoch": 9.177777777777777, "grad_norm": 0.2579292356967926, "learning_rate": 2.997289785504344e-05, "loss": 0.6174, "step": 8058 }, { "epoch": 9.17891737891738, "grad_norm": 0.20380988717079163, "learning_rate": 2.9968331694040542e-05, "loss": 0.8659, "step": 8059 }, { "epoch": 9.18005698005698, "grad_norm": 0.206913560628891, "learning_rate": 2.996376536047382e-05, "loss": 0.7524, "step": 8060 }, { "epoch": 9.18119658119658, "grad_norm": 0.21928489208221436, "learning_rate": 2.995919885450188e-05, "loss": 0.6095, "step": 8061 }, { "epoch": 9.182336182336183, "grad_norm": 0.18263733386993408, "learning_rate": 2.9954632176283325e-05, "loss": 0.833, "step": 8062 }, { "epoch": 9.183475783475783, "grad_norm": 0.18622763454914093, "learning_rate": 2.9950065325976768e-05, "loss": 0.8737, "step": 8063 }, { "epoch": 9.184615384615384, "grad_norm": 0.2090657502412796, "learning_rate": 2.9945498303740825e-05, "loss": 0.5765, "step": 8064 }, { "epoch": 9.185754985754986, "grad_norm": 0.19382807612419128, "learning_rate": 2.9940931109734133e-05, "loss": 0.7357, "step": 8065 }, { "epoch": 9.186894586894587, "grad_norm": 0.21184006333351135, "learning_rate": 2.9936363744115316e-05, "loss": 0.6146, "step": 8066 }, { "epoch": 9.188034188034187, "grad_norm": 0.18455208837985992, "learning_rate": 2.9931796207043007e-05, "loss": 0.7379, "step": 8067 }, { "epoch": 9.18917378917379, "grad_norm": 0.1959788054227829, "learning_rate": 2.9927228498675852e-05, "loss": 0.8937, "step": 8068 }, { "epoch": 9.19031339031339, "grad_norm": 0.26601675152778625, "learning_rate": 2.992266061917251e-05, "loss": 0.6287, "step": 8069 }, { "epoch": 9.19145299145299, "grad_norm": 0.2321777194738388, "learning_rate": 2.9918092568691614e-05, "loss": 0.5539, "step": 8070 }, { "epoch": 9.192592592592593, "grad_norm": 0.1900903284549713, "learning_rate": 2.9913524347391847e-05, "loss": 0.7664, "step": 8071 }, { "epoch": 9.193732193732194, "grad_norm": 0.17832405865192413, "learning_rate": 2.9908955955431868e-05, "loss": 0.7927, "step": 8072 }, { "epoch": 9.194871794871794, "grad_norm": 0.2156316190958023, "learning_rate": 2.9904387392970347e-05, "loss": 0.5693, "step": 8073 }, { "epoch": 9.196011396011396, "grad_norm": 0.21304167807102203, "learning_rate": 2.9899818660165963e-05, "loss": 0.6855, "step": 8074 }, { "epoch": 9.197150997150997, "grad_norm": 0.1971638798713684, "learning_rate": 2.98952497571774e-05, "loss": 0.9238, "step": 8075 }, { "epoch": 9.198290598290598, "grad_norm": 0.227126345038414, "learning_rate": 2.9890680684163352e-05, "loss": 0.8005, "step": 8076 }, { "epoch": 9.1994301994302, "grad_norm": 0.14012542366981506, "learning_rate": 2.9886111441282527e-05, "loss": 0.8824, "step": 8077 }, { "epoch": 9.2005698005698, "grad_norm": 0.20587700605392456, "learning_rate": 2.98815420286936e-05, "loss": 0.6945, "step": 8078 }, { "epoch": 9.201709401709401, "grad_norm": 0.23867155611515045, "learning_rate": 2.98769724465553e-05, "loss": 0.6181, "step": 8079 }, { "epoch": 9.202849002849003, "grad_norm": 0.17290520668029785, "learning_rate": 2.9872402695026337e-05, "loss": 0.7346, "step": 8080 }, { "epoch": 9.203988603988604, "grad_norm": 0.24878010153770447, "learning_rate": 2.9867832774265424e-05, "loss": 0.5777, "step": 8081 }, { "epoch": 9.205128205128204, "grad_norm": 0.2160666584968567, "learning_rate": 2.9863262684431297e-05, "loss": 0.6324, "step": 8082 }, { "epoch": 9.206267806267807, "grad_norm": 0.19905027747154236, "learning_rate": 2.9858692425682676e-05, "loss": 0.8479, "step": 8083 }, { "epoch": 9.207407407407407, "grad_norm": 0.2422788441181183, "learning_rate": 2.985412199817832e-05, "loss": 0.7472, "step": 8084 }, { "epoch": 9.208547008547008, "grad_norm": 0.21977217495441437, "learning_rate": 2.984955140207694e-05, "loss": 0.7277, "step": 8085 }, { "epoch": 9.20968660968661, "grad_norm": 0.23536036908626556, "learning_rate": 2.9844980637537313e-05, "loss": 0.5627, "step": 8086 }, { "epoch": 9.21082621082621, "grad_norm": 0.26169732213020325, "learning_rate": 2.984040970471818e-05, "loss": 0.6232, "step": 8087 }, { "epoch": 9.211965811965811, "grad_norm": 0.21738310158252716, "learning_rate": 2.9835838603778316e-05, "loss": 0.6676, "step": 8088 }, { "epoch": 9.213105413105414, "grad_norm": 0.2095075398683548, "learning_rate": 2.9831267334876468e-05, "loss": 0.6409, "step": 8089 }, { "epoch": 9.214245014245014, "grad_norm": 0.22634997963905334, "learning_rate": 2.982669589817143e-05, "loss": 0.5905, "step": 8090 }, { "epoch": 9.215384615384615, "grad_norm": 0.21932442486286163, "learning_rate": 2.9822124293821963e-05, "loss": 0.8036, "step": 8091 }, { "epoch": 9.216524216524217, "grad_norm": 0.20687827467918396, "learning_rate": 2.981755252198686e-05, "loss": 0.7543, "step": 8092 }, { "epoch": 9.217663817663817, "grad_norm": 0.23628811538219452, "learning_rate": 2.9812980582824912e-05, "loss": 0.6928, "step": 8093 }, { "epoch": 9.218803418803418, "grad_norm": 0.21168608963489532, "learning_rate": 2.9808408476494908e-05, "loss": 0.5425, "step": 8094 }, { "epoch": 9.21994301994302, "grad_norm": 0.23283641040325165, "learning_rate": 2.980383620315566e-05, "loss": 0.7824, "step": 8095 }, { "epoch": 9.221082621082621, "grad_norm": 0.2229052186012268, "learning_rate": 2.979926376296596e-05, "loss": 0.5829, "step": 8096 }, { "epoch": 9.222222222222221, "grad_norm": 0.16488371789455414, "learning_rate": 2.9794691156084638e-05, "loss": 0.9006, "step": 8097 }, { "epoch": 9.223361823361824, "grad_norm": 0.18390604853630066, "learning_rate": 2.9790118382670506e-05, "loss": 0.7466, "step": 8098 }, { "epoch": 9.224501424501424, "grad_norm": 0.20317108929157257, "learning_rate": 2.9785545442882396e-05, "loss": 0.9282, "step": 8099 }, { "epoch": 9.225641025641025, "grad_norm": 0.18961268663406372, "learning_rate": 2.978097233687912e-05, "loss": 0.8216, "step": 8100 }, { "epoch": 9.226780626780627, "grad_norm": 0.19334463775157928, "learning_rate": 2.9776399064819537e-05, "loss": 0.7577, "step": 8101 }, { "epoch": 9.227920227920228, "grad_norm": 0.20428909361362457, "learning_rate": 2.9771825626862477e-05, "loss": 0.7447, "step": 8102 }, { "epoch": 9.229059829059828, "grad_norm": 0.18184207379817963, "learning_rate": 2.9767252023166786e-05, "loss": 0.949, "step": 8103 }, { "epoch": 9.23019943019943, "grad_norm": 0.22060103714466095, "learning_rate": 2.9762678253891325e-05, "loss": 0.5539, "step": 8104 }, { "epoch": 9.231339031339031, "grad_norm": 0.35077163577079773, "learning_rate": 2.9758104319194957e-05, "loss": 0.6337, "step": 8105 }, { "epoch": 9.232478632478632, "grad_norm": 0.22378169000148773, "learning_rate": 2.975353021923653e-05, "loss": 0.5805, "step": 8106 }, { "epoch": 9.233618233618234, "grad_norm": 0.1952681690454483, "learning_rate": 2.974895595417494e-05, "loss": 0.7213, "step": 8107 }, { "epoch": 9.234757834757835, "grad_norm": 0.2203952968120575, "learning_rate": 2.974438152416904e-05, "loss": 0.4797, "step": 8108 }, { "epoch": 9.235897435897435, "grad_norm": 0.17881755530834198, "learning_rate": 2.9739806929377722e-05, "loss": 0.7338, "step": 8109 }, { "epoch": 9.237037037037037, "grad_norm": 0.2541481852531433, "learning_rate": 2.9735232169959882e-05, "loss": 0.5651, "step": 8110 }, { "epoch": 9.238176638176638, "grad_norm": 0.21063436567783356, "learning_rate": 2.97306572460744e-05, "loss": 0.708, "step": 8111 }, { "epoch": 9.239316239316238, "grad_norm": 0.23637300729751587, "learning_rate": 2.9726082157880186e-05, "loss": 0.7502, "step": 8112 }, { "epoch": 9.24045584045584, "grad_norm": 0.20930254459381104, "learning_rate": 2.972150690553614e-05, "loss": 0.6742, "step": 8113 }, { "epoch": 9.241595441595441, "grad_norm": 0.25343096256256104, "learning_rate": 2.9716931489201173e-05, "loss": 0.5638, "step": 8114 }, { "epoch": 9.242735042735042, "grad_norm": 0.22264398634433746, "learning_rate": 2.9712355909034208e-05, "loss": 0.6037, "step": 8115 }, { "epoch": 9.243874643874644, "grad_norm": 0.23029720783233643, "learning_rate": 2.970778016519416e-05, "loss": 0.6288, "step": 8116 }, { "epoch": 9.245014245014245, "grad_norm": 0.1818740963935852, "learning_rate": 2.970320425783996e-05, "loss": 0.7519, "step": 8117 }, { "epoch": 9.246153846153845, "grad_norm": 0.22784234583377838, "learning_rate": 2.969862818713055e-05, "loss": 0.6703, "step": 8118 }, { "epoch": 9.247293447293448, "grad_norm": 0.23091241717338562, "learning_rate": 2.9694051953224848e-05, "loss": 0.7056, "step": 8119 }, { "epoch": 9.248433048433048, "grad_norm": 0.20170490443706512, "learning_rate": 2.9689475556281826e-05, "loss": 0.796, "step": 8120 }, { "epoch": 9.249572649572649, "grad_norm": 0.18195651471614838, "learning_rate": 2.9684898996460414e-05, "loss": 0.6815, "step": 8121 }, { "epoch": 9.250712250712251, "grad_norm": 0.19702638685703278, "learning_rate": 2.968032227391958e-05, "loss": 0.7324, "step": 8122 }, { "epoch": 9.251851851851852, "grad_norm": 0.22882287204265594, "learning_rate": 2.967574538881828e-05, "loss": 0.724, "step": 8123 }, { "epoch": 9.252991452991452, "grad_norm": 0.20196014642715454, "learning_rate": 2.967116834131549e-05, "loss": 0.6742, "step": 8124 }, { "epoch": 9.254131054131054, "grad_norm": 0.19326460361480713, "learning_rate": 2.9666591131570177e-05, "loss": 0.705, "step": 8125 }, { "epoch": 9.255270655270655, "grad_norm": 0.21384099125862122, "learning_rate": 2.9662013759741313e-05, "loss": 0.7464, "step": 8126 }, { "epoch": 9.256410256410255, "grad_norm": 0.21308358013629913, "learning_rate": 2.96574362259879e-05, "loss": 0.7085, "step": 8127 }, { "epoch": 9.257549857549858, "grad_norm": 0.14960002899169922, "learning_rate": 2.965285853046891e-05, "loss": 0.8266, "step": 8128 }, { "epoch": 9.258689458689458, "grad_norm": 0.20382560789585114, "learning_rate": 2.9648280673343353e-05, "loss": 0.6948, "step": 8129 }, { "epoch": 9.25982905982906, "grad_norm": 0.18864072859287262, "learning_rate": 2.964370265477022e-05, "loss": 0.6555, "step": 8130 }, { "epoch": 9.260968660968661, "grad_norm": 0.21966667473316193, "learning_rate": 2.9639124474908532e-05, "loss": 0.6743, "step": 8131 }, { "epoch": 9.262108262108262, "grad_norm": 0.1928093135356903, "learning_rate": 2.9634546133917286e-05, "loss": 0.7254, "step": 8132 }, { "epoch": 9.263247863247864, "grad_norm": 0.20280228555202484, "learning_rate": 2.9629967631955513e-05, "loss": 0.7429, "step": 8133 }, { "epoch": 9.264387464387465, "grad_norm": 0.20422202348709106, "learning_rate": 2.9625388969182227e-05, "loss": 0.7347, "step": 8134 }, { "epoch": 9.265527065527065, "grad_norm": 0.22982145845890045, "learning_rate": 2.9620810145756467e-05, "loss": 0.5859, "step": 8135 }, { "epoch": 9.266666666666667, "grad_norm": 0.16095775365829468, "learning_rate": 2.9616231161837253e-05, "loss": 0.8101, "step": 8136 }, { "epoch": 9.267806267806268, "grad_norm": 0.19835558533668518, "learning_rate": 2.9611652017583654e-05, "loss": 0.6331, "step": 8137 }, { "epoch": 9.268945868945869, "grad_norm": 0.2580896019935608, "learning_rate": 2.9607072713154684e-05, "loss": 0.2775, "step": 8138 }, { "epoch": 9.270085470085471, "grad_norm": 0.2118746042251587, "learning_rate": 2.9602493248709416e-05, "loss": 0.6997, "step": 8139 }, { "epoch": 9.271225071225071, "grad_norm": 0.22363902628421783, "learning_rate": 2.9597913624406897e-05, "loss": 0.7387, "step": 8140 }, { "epoch": 9.272364672364672, "grad_norm": 0.24798230826854706, "learning_rate": 2.9593333840406202e-05, "loss": 0.6034, "step": 8141 }, { "epoch": 9.273504273504274, "grad_norm": 0.2064630389213562, "learning_rate": 2.9588753896866377e-05, "loss": 0.5787, "step": 8142 }, { "epoch": 9.274643874643875, "grad_norm": 0.16789507865905762, "learning_rate": 2.958417379394652e-05, "loss": 0.8098, "step": 8143 }, { "epoch": 9.275783475783475, "grad_norm": 0.21151581406593323, "learning_rate": 2.9579593531805694e-05, "loss": 0.6493, "step": 8144 }, { "epoch": 9.276923076923078, "grad_norm": 0.1902713030576706, "learning_rate": 2.9575013110602994e-05, "loss": 0.7452, "step": 8145 }, { "epoch": 9.278062678062678, "grad_norm": 0.22250163555145264, "learning_rate": 2.957043253049751e-05, "loss": 0.4699, "step": 8146 }, { "epoch": 9.279202279202279, "grad_norm": 0.17782625555992126, "learning_rate": 2.9565851791648326e-05, "loss": 0.8351, "step": 8147 }, { "epoch": 9.280341880341881, "grad_norm": 0.23341424763202667, "learning_rate": 2.9561270894214565e-05, "loss": 0.7839, "step": 8148 }, { "epoch": 9.281481481481482, "grad_norm": 0.22422893345355988, "learning_rate": 2.9556689838355316e-05, "loss": 0.7115, "step": 8149 }, { "epoch": 9.282621082621082, "grad_norm": 0.20913754403591156, "learning_rate": 2.9552108624229702e-05, "loss": 0.7875, "step": 8150 }, { "epoch": 9.283760683760685, "grad_norm": 0.1924496591091156, "learning_rate": 2.954752725199683e-05, "loss": 0.8041, "step": 8151 }, { "epoch": 9.284900284900285, "grad_norm": 0.20742133259773254, "learning_rate": 2.954294572181584e-05, "loss": 0.6125, "step": 8152 }, { "epoch": 9.286039886039886, "grad_norm": 0.1897021383047104, "learning_rate": 2.9538364033845844e-05, "loss": 0.82, "step": 8153 }, { "epoch": 9.287179487179488, "grad_norm": 0.21899229288101196, "learning_rate": 2.953378218824599e-05, "loss": 0.6727, "step": 8154 }, { "epoch": 9.288319088319088, "grad_norm": 0.19185768067836761, "learning_rate": 2.952920018517541e-05, "loss": 0.6638, "step": 8155 }, { "epoch": 9.289458689458689, "grad_norm": 0.19596540927886963, "learning_rate": 2.9524618024793255e-05, "loss": 0.8122, "step": 8156 }, { "epoch": 9.290598290598291, "grad_norm": 0.22717249393463135, "learning_rate": 2.9520035707258665e-05, "loss": 0.5919, "step": 8157 }, { "epoch": 9.291737891737892, "grad_norm": 0.2406366914510727, "learning_rate": 2.951545323273081e-05, "loss": 0.7926, "step": 8158 }, { "epoch": 9.292877492877492, "grad_norm": 0.19621329009532928, "learning_rate": 2.951087060136885e-05, "loss": 0.6764, "step": 8159 }, { "epoch": 9.294017094017095, "grad_norm": 0.1909162700176239, "learning_rate": 2.9506287813331946e-05, "loss": 0.6891, "step": 8160 }, { "epoch": 9.295156695156695, "grad_norm": 0.19592273235321045, "learning_rate": 2.9501704868779278e-05, "loss": 0.7787, "step": 8161 }, { "epoch": 9.296296296296296, "grad_norm": 0.2075003981590271, "learning_rate": 2.9497121767870017e-05, "loss": 0.8056, "step": 8162 }, { "epoch": 9.297435897435898, "grad_norm": 0.2118813842535019, "learning_rate": 2.949253851076335e-05, "loss": 0.7734, "step": 8163 }, { "epoch": 9.298575498575499, "grad_norm": 0.20379863679409027, "learning_rate": 2.948795509761846e-05, "loss": 0.7292, "step": 8164 }, { "epoch": 9.2997150997151, "grad_norm": 0.20795179903507233, "learning_rate": 2.9483371528594557e-05, "loss": 0.8436, "step": 8165 }, { "epoch": 9.300854700854702, "grad_norm": 0.21498194336891174, "learning_rate": 2.9478787803850822e-05, "loss": 0.6823, "step": 8166 }, { "epoch": 9.301994301994302, "grad_norm": 0.20631302893161774, "learning_rate": 2.9474203923546478e-05, "loss": 0.6561, "step": 8167 }, { "epoch": 9.303133903133903, "grad_norm": 0.19053110480308533, "learning_rate": 2.9469619887840726e-05, "loss": 0.7629, "step": 8168 }, { "epoch": 9.304273504273505, "grad_norm": 0.21060165762901306, "learning_rate": 2.946503569689278e-05, "loss": 0.7497, "step": 8169 }, { "epoch": 9.305413105413106, "grad_norm": 0.21850259602069855, "learning_rate": 2.9460451350861873e-05, "loss": 0.8449, "step": 8170 }, { "epoch": 9.306552706552706, "grad_norm": 0.1815422624349594, "learning_rate": 2.9455866849907215e-05, "loss": 0.6673, "step": 8171 }, { "epoch": 9.307692307692308, "grad_norm": 0.16804252564907074, "learning_rate": 2.9451282194188057e-05, "loss": 0.9475, "step": 8172 }, { "epoch": 9.308831908831909, "grad_norm": 0.20566946268081665, "learning_rate": 2.9446697383863618e-05, "loss": 0.6295, "step": 8173 }, { "epoch": 9.30997150997151, "grad_norm": 0.18234986066818237, "learning_rate": 2.944211241909316e-05, "loss": 0.6918, "step": 8174 }, { "epoch": 9.311111111111112, "grad_norm": 0.2161446362733841, "learning_rate": 2.9437527300035907e-05, "loss": 0.5254, "step": 8175 }, { "epoch": 9.312250712250712, "grad_norm": 0.2099989354610443, "learning_rate": 2.943294202685114e-05, "loss": 0.9123, "step": 8176 }, { "epoch": 9.313390313390313, "grad_norm": 0.189099982380867, "learning_rate": 2.9428356599698104e-05, "loss": 0.8486, "step": 8177 }, { "epoch": 9.314529914529915, "grad_norm": 0.19961534440517426, "learning_rate": 2.942377101873607e-05, "loss": 0.7307, "step": 8178 }, { "epoch": 9.315669515669516, "grad_norm": 0.2685472071170807, "learning_rate": 2.941918528412429e-05, "loss": 0.5682, "step": 8179 }, { "epoch": 9.316809116809116, "grad_norm": 0.22447064518928528, "learning_rate": 2.9414599396022057e-05, "loss": 0.5909, "step": 8180 }, { "epoch": 9.317948717948719, "grad_norm": 0.19430287182331085, "learning_rate": 2.941001335458865e-05, "loss": 0.6627, "step": 8181 }, { "epoch": 9.31908831908832, "grad_norm": 0.19336576759815216, "learning_rate": 2.940542715998335e-05, "loss": 0.6767, "step": 8182 }, { "epoch": 9.32022792022792, "grad_norm": 0.25945714116096497, "learning_rate": 2.9400840812365448e-05, "loss": 0.5833, "step": 8183 }, { "epoch": 9.321367521367522, "grad_norm": 0.1922517716884613, "learning_rate": 2.939625431189425e-05, "loss": 0.8939, "step": 8184 }, { "epoch": 9.322507122507123, "grad_norm": 0.2269548624753952, "learning_rate": 2.9391667658729044e-05, "loss": 0.6816, "step": 8185 }, { "epoch": 9.323646723646723, "grad_norm": 0.24113623797893524, "learning_rate": 2.938708085302914e-05, "loss": 0.8185, "step": 8186 }, { "epoch": 9.324786324786325, "grad_norm": 0.18623025715351105, "learning_rate": 2.9382493894953866e-05, "loss": 0.7378, "step": 8187 }, { "epoch": 9.325925925925926, "grad_norm": 0.208524689078331, "learning_rate": 2.937790678466251e-05, "loss": 0.8193, "step": 8188 }, { "epoch": 9.327065527065526, "grad_norm": 0.238856241106987, "learning_rate": 2.937331952231443e-05, "loss": 0.6099, "step": 8189 }, { "epoch": 9.328205128205129, "grad_norm": 0.24416497349739075, "learning_rate": 2.9368732108068925e-05, "loss": 0.5675, "step": 8190 }, { "epoch": 9.32934472934473, "grad_norm": 0.20798704028129578, "learning_rate": 2.9364144542085338e-05, "loss": 0.7097, "step": 8191 }, { "epoch": 9.33048433048433, "grad_norm": 0.2220153510570526, "learning_rate": 2.935955682452301e-05, "loss": 0.6809, "step": 8192 }, { "epoch": 9.331623931623932, "grad_norm": 0.20715565979480743, "learning_rate": 2.9354968955541295e-05, "loss": 0.7386, "step": 8193 }, { "epoch": 9.332763532763533, "grad_norm": 0.1788390874862671, "learning_rate": 2.935038093529952e-05, "loss": 0.7685, "step": 8194 }, { "epoch": 9.333903133903133, "grad_norm": 0.22558629512786865, "learning_rate": 2.9345792763957058e-05, "loss": 0.5917, "step": 8195 }, { "epoch": 9.335042735042736, "grad_norm": 0.19123651087284088, "learning_rate": 2.9341204441673266e-05, "loss": 0.8431, "step": 8196 }, { "epoch": 9.336182336182336, "grad_norm": 0.23311489820480347, "learning_rate": 2.9336615968607495e-05, "loss": 0.7442, "step": 8197 }, { "epoch": 9.337321937321937, "grad_norm": 0.2288980334997177, "learning_rate": 2.9332027344919128e-05, "loss": 0.7242, "step": 8198 }, { "epoch": 9.338461538461539, "grad_norm": 0.16458222270011902, "learning_rate": 2.9327438570767545e-05, "loss": 0.9016, "step": 8199 }, { "epoch": 9.33960113960114, "grad_norm": 0.2311260849237442, "learning_rate": 2.9322849646312113e-05, "loss": 0.6548, "step": 8200 }, { "epoch": 9.34074074074074, "grad_norm": 0.1785007268190384, "learning_rate": 2.931826057171223e-05, "loss": 0.866, "step": 8201 }, { "epoch": 9.341880341880342, "grad_norm": 0.20539353787899017, "learning_rate": 2.931367134712728e-05, "loss": 0.7227, "step": 8202 }, { "epoch": 9.343019943019943, "grad_norm": 0.1755536049604416, "learning_rate": 2.9309081972716663e-05, "loss": 0.8441, "step": 8203 }, { "epoch": 9.344159544159544, "grad_norm": 0.23982207477092743, "learning_rate": 2.9304492448639774e-05, "loss": 0.5358, "step": 8204 }, { "epoch": 9.345299145299146, "grad_norm": 0.13797718286514282, "learning_rate": 2.929990277505603e-05, "loss": 1.1089, "step": 8205 }, { "epoch": 9.346438746438746, "grad_norm": 0.22900795936584473, "learning_rate": 2.9295312952124843e-05, "loss": 0.6001, "step": 8206 }, { "epoch": 9.347578347578347, "grad_norm": 0.23939545452594757, "learning_rate": 2.9290722980005615e-05, "loss": 0.5684, "step": 8207 }, { "epoch": 9.34871794871795, "grad_norm": 0.1710413694381714, "learning_rate": 2.928613285885778e-05, "loss": 0.7677, "step": 8208 }, { "epoch": 9.34985754985755, "grad_norm": 0.22461503744125366, "learning_rate": 2.928154258884076e-05, "loss": 0.6548, "step": 8209 }, { "epoch": 9.35099715099715, "grad_norm": 0.1757086217403412, "learning_rate": 2.9276952170114004e-05, "loss": 0.7924, "step": 8210 }, { "epoch": 9.352136752136753, "grad_norm": 0.19776201248168945, "learning_rate": 2.9272361602836928e-05, "loss": 0.7908, "step": 8211 }, { "epoch": 9.353276353276353, "grad_norm": 0.1843082308769226, "learning_rate": 2.926777088716899e-05, "loss": 0.6816, "step": 8212 }, { "epoch": 9.354415954415954, "grad_norm": 0.2525804042816162, "learning_rate": 2.926318002326962e-05, "loss": 0.5495, "step": 8213 }, { "epoch": 9.355555555555556, "grad_norm": 0.18370480835437775, "learning_rate": 2.92585890112983e-05, "loss": 0.8228, "step": 8214 }, { "epoch": 9.356695156695157, "grad_norm": 0.2274402230978012, "learning_rate": 2.9253997851414462e-05, "loss": 0.6225, "step": 8215 }, { "epoch": 9.357834757834757, "grad_norm": 0.18862919509410858, "learning_rate": 2.9249406543777593e-05, "loss": 0.6113, "step": 8216 }, { "epoch": 9.35897435897436, "grad_norm": 0.18409699201583862, "learning_rate": 2.9244815088547133e-05, "loss": 0.7041, "step": 8217 }, { "epoch": 9.36011396011396, "grad_norm": 0.22614800930023193, "learning_rate": 2.9240223485882582e-05, "loss": 0.6301, "step": 8218 }, { "epoch": 9.36125356125356, "grad_norm": 0.22871989011764526, "learning_rate": 2.9235631735943404e-05, "loss": 0.6535, "step": 8219 }, { "epoch": 9.362393162393163, "grad_norm": 0.2747134566307068, "learning_rate": 2.923103983888909e-05, "loss": 0.501, "step": 8220 }, { "epoch": 9.363532763532763, "grad_norm": 0.20141135156154633, "learning_rate": 2.922644779487913e-05, "loss": 0.6875, "step": 8221 }, { "epoch": 9.364672364672364, "grad_norm": 0.17061856389045715, "learning_rate": 2.9221855604073007e-05, "loss": 0.9525, "step": 8222 }, { "epoch": 9.365811965811966, "grad_norm": 0.22977714240550995, "learning_rate": 2.9217263266630235e-05, "loss": 0.7529, "step": 8223 }, { "epoch": 9.366951566951567, "grad_norm": 0.21643103659152985, "learning_rate": 2.9212670782710306e-05, "loss": 0.7464, "step": 8224 }, { "epoch": 9.368091168091167, "grad_norm": 0.22548314929008484, "learning_rate": 2.9208078152472746e-05, "loss": 0.7379, "step": 8225 }, { "epoch": 9.36923076923077, "grad_norm": 0.2928323447704315, "learning_rate": 2.920348537607705e-05, "loss": 0.3066, "step": 8226 }, { "epoch": 9.37037037037037, "grad_norm": 0.2212189882993698, "learning_rate": 2.9198892453682753e-05, "loss": 0.6476, "step": 8227 }, { "epoch": 9.37150997150997, "grad_norm": 0.19711345434188843, "learning_rate": 2.919429938544937e-05, "loss": 0.8307, "step": 8228 }, { "epoch": 9.372649572649573, "grad_norm": 0.25519493222236633, "learning_rate": 2.918970617153644e-05, "loss": 0.7157, "step": 8229 }, { "epoch": 9.373789173789174, "grad_norm": 0.22985145449638367, "learning_rate": 2.918511281210349e-05, "loss": 0.6439, "step": 8230 }, { "epoch": 9.374928774928774, "grad_norm": 0.1725936084985733, "learning_rate": 2.918051930731006e-05, "loss": 0.778, "step": 8231 }, { "epoch": 9.376068376068377, "grad_norm": 0.19835489988327026, "learning_rate": 2.9175925657315705e-05, "loss": 0.6756, "step": 8232 }, { "epoch": 9.377207977207977, "grad_norm": 0.2511741816997528, "learning_rate": 2.917133186227996e-05, "loss": 0.6039, "step": 8233 }, { "epoch": 9.378347578347578, "grad_norm": 0.2059909850358963, "learning_rate": 2.916673792236239e-05, "loss": 0.7232, "step": 8234 }, { "epoch": 9.37948717948718, "grad_norm": 0.17528991401195526, "learning_rate": 2.9162143837722555e-05, "loss": 0.6653, "step": 8235 }, { "epoch": 9.38062678062678, "grad_norm": 0.19559913873672485, "learning_rate": 2.9157549608520023e-05, "loss": 0.7836, "step": 8236 }, { "epoch": 9.381766381766381, "grad_norm": 0.20877596735954285, "learning_rate": 2.915295523491436e-05, "loss": 0.6439, "step": 8237 }, { "epoch": 9.382905982905983, "grad_norm": 0.2692725658416748, "learning_rate": 2.9148360717065138e-05, "loss": 0.5489, "step": 8238 }, { "epoch": 9.384045584045584, "grad_norm": 0.22037579119205475, "learning_rate": 2.914376605513194e-05, "loss": 0.701, "step": 8239 }, { "epoch": 9.385185185185184, "grad_norm": 0.17327138781547546, "learning_rate": 2.913917124927435e-05, "loss": 0.8007, "step": 8240 }, { "epoch": 9.386324786324787, "grad_norm": 0.18858470022678375, "learning_rate": 2.9134576299651957e-05, "loss": 0.7466, "step": 8241 }, { "epoch": 9.387464387464387, "grad_norm": 0.22589455544948578, "learning_rate": 2.912998120642436e-05, "loss": 0.8381, "step": 8242 }, { "epoch": 9.388603988603988, "grad_norm": 0.2291344255208969, "learning_rate": 2.912538596975116e-05, "loss": 0.5449, "step": 8243 }, { "epoch": 9.38974358974359, "grad_norm": 0.18037211894989014, "learning_rate": 2.912079058979197e-05, "loss": 0.8264, "step": 8244 }, { "epoch": 9.39088319088319, "grad_norm": 0.22732049226760864, "learning_rate": 2.9116195066706376e-05, "loss": 0.8301, "step": 8245 }, { "epoch": 9.392022792022791, "grad_norm": 0.2421773374080658, "learning_rate": 2.9111599400654016e-05, "loss": 0.5436, "step": 8246 }, { "epoch": 9.393162393162394, "grad_norm": 0.17228582501411438, "learning_rate": 2.9107003591794496e-05, "loss": 0.8842, "step": 8247 }, { "epoch": 9.394301994301994, "grad_norm": 0.18033257126808167, "learning_rate": 2.9102407640287448e-05, "loss": 0.927, "step": 8248 }, { "epoch": 9.395441595441595, "grad_norm": 0.17915350198745728, "learning_rate": 2.9097811546292508e-05, "loss": 0.7632, "step": 8249 }, { "epoch": 9.396581196581197, "grad_norm": 0.20919619500637054, "learning_rate": 2.90932153099693e-05, "loss": 0.6352, "step": 8250 }, { "epoch": 9.397720797720797, "grad_norm": 0.21215081214904785, "learning_rate": 2.9088618931477467e-05, "loss": 0.6774, "step": 8251 }, { "epoch": 9.398860398860398, "grad_norm": 0.17635183036327362, "learning_rate": 2.9084022410976654e-05, "loss": 0.6956, "step": 8252 }, { "epoch": 9.4, "grad_norm": 0.18748271465301514, "learning_rate": 2.907942574862651e-05, "loss": 0.7098, "step": 8253 }, { "epoch": 9.401139601139601, "grad_norm": 0.18658225238323212, "learning_rate": 2.9074828944586695e-05, "loss": 0.8669, "step": 8254 }, { "epoch": 9.402279202279201, "grad_norm": 0.21046657860279083, "learning_rate": 2.907023199901687e-05, "loss": 0.7399, "step": 8255 }, { "epoch": 9.403418803418804, "grad_norm": 0.22088541090488434, "learning_rate": 2.9065634912076683e-05, "loss": 0.7906, "step": 8256 }, { "epoch": 9.404558404558404, "grad_norm": 0.20422707498073578, "learning_rate": 2.9061037683925823e-05, "loss": 0.603, "step": 8257 }, { "epoch": 9.405698005698005, "grad_norm": 0.1929522007703781, "learning_rate": 2.905644031472395e-05, "loss": 0.6718, "step": 8258 }, { "epoch": 9.406837606837607, "grad_norm": 0.23850117623806, "learning_rate": 2.9051842804630752e-05, "loss": 0.6068, "step": 8259 }, { "epoch": 9.407977207977208, "grad_norm": 0.2040931135416031, "learning_rate": 2.9047245153805915e-05, "loss": 0.7519, "step": 8260 }, { "epoch": 9.40911680911681, "grad_norm": 0.18663831055164337, "learning_rate": 2.904264736240912e-05, "loss": 0.7596, "step": 8261 }, { "epoch": 9.41025641025641, "grad_norm": 0.18372474610805511, "learning_rate": 2.9038049430600067e-05, "loss": 0.7603, "step": 8262 }, { "epoch": 9.411396011396011, "grad_norm": 0.2466694414615631, "learning_rate": 2.9033451358538443e-05, "loss": 0.4859, "step": 8263 }, { "epoch": 9.412535612535613, "grad_norm": 0.2079930305480957, "learning_rate": 2.9028853146383977e-05, "loss": 0.6024, "step": 8264 }, { "epoch": 9.413675213675214, "grad_norm": 0.19135509431362152, "learning_rate": 2.902425479429635e-05, "loss": 0.8327, "step": 8265 }, { "epoch": 9.414814814814815, "grad_norm": 0.17083074152469635, "learning_rate": 2.9019656302435295e-05, "loss": 0.7324, "step": 8266 }, { "epoch": 9.415954415954417, "grad_norm": 0.1864568144083023, "learning_rate": 2.9015057670960522e-05, "loss": 0.8068, "step": 8267 }, { "epoch": 9.417094017094017, "grad_norm": 0.1972772479057312, "learning_rate": 2.901045890003175e-05, "loss": 0.8363, "step": 8268 }, { "epoch": 9.418233618233618, "grad_norm": 0.21849708259105682, "learning_rate": 2.900585998980871e-05, "loss": 0.6289, "step": 8269 }, { "epoch": 9.41937321937322, "grad_norm": 0.1950043886899948, "learning_rate": 2.900126094045114e-05, "loss": 0.5283, "step": 8270 }, { "epoch": 9.42051282051282, "grad_norm": 0.19965900480747223, "learning_rate": 2.8996661752118775e-05, "loss": 0.8386, "step": 8271 }, { "epoch": 9.421652421652421, "grad_norm": 0.23779959976673126, "learning_rate": 2.8992062424971357e-05, "loss": 0.7332, "step": 8272 }, { "epoch": 9.422792022792024, "grad_norm": 0.21002760529518127, "learning_rate": 2.8987462959168633e-05, "loss": 0.6746, "step": 8273 }, { "epoch": 9.423931623931624, "grad_norm": 0.19183014333248138, "learning_rate": 2.8982863354870347e-05, "loss": 0.6645, "step": 8274 }, { "epoch": 9.425071225071225, "grad_norm": 0.23862184584140778, "learning_rate": 2.8978263612236267e-05, "loss": 0.65, "step": 8275 }, { "epoch": 9.426210826210827, "grad_norm": 0.23275874555110931, "learning_rate": 2.897366373142616e-05, "loss": 0.6638, "step": 8276 }, { "epoch": 9.427350427350428, "grad_norm": 0.26541343331336975, "learning_rate": 2.8969063712599775e-05, "loss": 0.547, "step": 8277 }, { "epoch": 9.428490028490028, "grad_norm": 0.23670506477355957, "learning_rate": 2.8964463555916897e-05, "loss": 0.7398, "step": 8278 }, { "epoch": 9.42962962962963, "grad_norm": 0.17798356711864471, "learning_rate": 2.8959863261537295e-05, "loss": 0.8515, "step": 8279 }, { "epoch": 9.430769230769231, "grad_norm": 0.1822018176317215, "learning_rate": 2.8955262829620748e-05, "loss": 0.7291, "step": 8280 }, { "epoch": 9.431908831908832, "grad_norm": 0.28091150522232056, "learning_rate": 2.8950662260327055e-05, "loss": 0.5391, "step": 8281 }, { "epoch": 9.433048433048434, "grad_norm": 0.2726202607154846, "learning_rate": 2.8946061553815996e-05, "loss": 0.5993, "step": 8282 }, { "epoch": 9.434188034188034, "grad_norm": 0.17134778201580048, "learning_rate": 2.8941460710247363e-05, "loss": 0.9159, "step": 8283 }, { "epoch": 9.435327635327635, "grad_norm": 0.1888948678970337, "learning_rate": 2.8936859729780963e-05, "loss": 0.6763, "step": 8284 }, { "epoch": 9.436467236467237, "grad_norm": 0.237699955701828, "learning_rate": 2.8932258612576595e-05, "loss": 0.6358, "step": 8285 }, { "epoch": 9.437606837606838, "grad_norm": 0.20391274988651276, "learning_rate": 2.8927657358794076e-05, "loss": 0.6017, "step": 8286 }, { "epoch": 9.438746438746438, "grad_norm": 0.16478058695793152, "learning_rate": 2.8923055968593215e-05, "loss": 0.7718, "step": 8287 }, { "epoch": 9.43988603988604, "grad_norm": 0.17847026884555817, "learning_rate": 2.891845444213383e-05, "loss": 0.7971, "step": 8288 }, { "epoch": 9.441025641025641, "grad_norm": 0.26238593459129333, "learning_rate": 2.891385277957575e-05, "loss": 0.6168, "step": 8289 }, { "epoch": 9.442165242165242, "grad_norm": 0.1678786724805832, "learning_rate": 2.8909250981078796e-05, "loss": 0.8156, "step": 8290 }, { "epoch": 9.443304843304844, "grad_norm": 0.15799471735954285, "learning_rate": 2.89046490468028e-05, "loss": 0.8172, "step": 8291 }, { "epoch": 9.444444444444445, "grad_norm": 0.2266349196434021, "learning_rate": 2.8900046976907615e-05, "loss": 0.5889, "step": 8292 }, { "epoch": 9.445584045584045, "grad_norm": 0.2325068861246109, "learning_rate": 2.889544477155307e-05, "loss": 0.6788, "step": 8293 }, { "epoch": 9.446723646723648, "grad_norm": 0.24711363017559052, "learning_rate": 2.8890842430899012e-05, "loss": 0.5984, "step": 8294 }, { "epoch": 9.447863247863248, "grad_norm": 0.2359197437763214, "learning_rate": 2.88862399551053e-05, "loss": 0.6938, "step": 8295 }, { "epoch": 9.449002849002849, "grad_norm": 0.26285019516944885, "learning_rate": 2.8881637344331784e-05, "loss": 0.5129, "step": 8296 }, { "epoch": 9.450142450142451, "grad_norm": 0.26490017771720886, "learning_rate": 2.8877034598738328e-05, "loss": 0.5057, "step": 8297 }, { "epoch": 9.451282051282051, "grad_norm": 0.29601070284843445, "learning_rate": 2.8872431718484804e-05, "loss": 0.6329, "step": 8298 }, { "epoch": 9.452421652421652, "grad_norm": 0.2394779473543167, "learning_rate": 2.8867828703731075e-05, "loss": 0.776, "step": 8299 }, { "epoch": 9.453561253561254, "grad_norm": 0.17764927446842194, "learning_rate": 2.8863225554637025e-05, "loss": 0.7667, "step": 8300 }, { "epoch": 9.454700854700855, "grad_norm": 0.18035757541656494, "learning_rate": 2.8858622271362518e-05, "loss": 0.8518, "step": 8301 }, { "epoch": 9.455840455840455, "grad_norm": 0.19396564364433289, "learning_rate": 2.8854018854067445e-05, "loss": 0.6938, "step": 8302 }, { "epoch": 9.456980056980058, "grad_norm": 0.24865515530109406, "learning_rate": 2.8849415302911704e-05, "loss": 0.4668, "step": 8303 }, { "epoch": 9.458119658119658, "grad_norm": 0.21866804361343384, "learning_rate": 2.884481161805519e-05, "loss": 0.549, "step": 8304 }, { "epoch": 9.459259259259259, "grad_norm": 0.2153816670179367, "learning_rate": 2.8840207799657786e-05, "loss": 0.7232, "step": 8305 }, { "epoch": 9.460398860398861, "grad_norm": 0.2515931725502014, "learning_rate": 2.883560384787941e-05, "loss": 0.5268, "step": 8306 }, { "epoch": 9.461538461538462, "grad_norm": 0.23770812153816223, "learning_rate": 2.8830999762879963e-05, "loss": 0.4959, "step": 8307 }, { "epoch": 9.462678062678062, "grad_norm": 0.2572888135910034, "learning_rate": 2.8826395544819353e-05, "loss": 0.5319, "step": 8308 }, { "epoch": 9.463817663817665, "grad_norm": 0.2278318703174591, "learning_rate": 2.882179119385751e-05, "loss": 0.6656, "step": 8309 }, { "epoch": 9.464957264957265, "grad_norm": 0.18753720819950104, "learning_rate": 2.881718671015435e-05, "loss": 0.6344, "step": 8310 }, { "epoch": 9.466096866096866, "grad_norm": 0.14883768558502197, "learning_rate": 2.8812582093869794e-05, "loss": 0.9088, "step": 8311 }, { "epoch": 9.467236467236468, "grad_norm": 0.21777556836605072, "learning_rate": 2.8807977345163778e-05, "loss": 0.6704, "step": 8312 }, { "epoch": 9.468376068376068, "grad_norm": 0.16861961781978607, "learning_rate": 2.8803372464196237e-05, "loss": 0.7989, "step": 8313 }, { "epoch": 9.469515669515669, "grad_norm": 0.18020348250865936, "learning_rate": 2.8798767451127108e-05, "loss": 0.7701, "step": 8314 }, { "epoch": 9.470655270655271, "grad_norm": 0.26641204953193665, "learning_rate": 2.8794162306116344e-05, "loss": 0.5527, "step": 8315 }, { "epoch": 9.471794871794872, "grad_norm": 0.21417199075222015, "learning_rate": 2.878955702932389e-05, "loss": 0.7334, "step": 8316 }, { "epoch": 9.472934472934472, "grad_norm": 0.18426980078220367, "learning_rate": 2.8784951620909695e-05, "loss": 0.7961, "step": 8317 }, { "epoch": 9.474074074074075, "grad_norm": 0.19051989912986755, "learning_rate": 2.8780346081033716e-05, "loss": 0.5143, "step": 8318 }, { "epoch": 9.475213675213675, "grad_norm": 0.19523242115974426, "learning_rate": 2.8775740409855932e-05, "loss": 0.634, "step": 8319 }, { "epoch": 9.476353276353276, "grad_norm": 0.19099469482898712, "learning_rate": 2.87711346075363e-05, "loss": 0.9806, "step": 8320 }, { "epoch": 9.477492877492878, "grad_norm": 0.16670578718185425, "learning_rate": 2.8766528674234787e-05, "loss": 0.7367, "step": 8321 }, { "epoch": 9.478632478632479, "grad_norm": 0.24034541845321655, "learning_rate": 2.876192261011138e-05, "loss": 0.711, "step": 8322 }, { "epoch": 9.47977207977208, "grad_norm": 0.2211134135723114, "learning_rate": 2.875731641532605e-05, "loss": 0.7313, "step": 8323 }, { "epoch": 9.480911680911682, "grad_norm": 0.20278768241405487, "learning_rate": 2.8752710090038793e-05, "loss": 0.7777, "step": 8324 }, { "epoch": 9.482051282051282, "grad_norm": 0.2389332503080368, "learning_rate": 2.874810363440959e-05, "loss": 0.616, "step": 8325 }, { "epoch": 9.483190883190883, "grad_norm": 0.1825195848941803, "learning_rate": 2.8743497048598446e-05, "loss": 0.917, "step": 8326 }, { "epoch": 9.484330484330485, "grad_norm": 0.21460694074630737, "learning_rate": 2.873889033276535e-05, "loss": 0.7109, "step": 8327 }, { "epoch": 9.485470085470086, "grad_norm": 0.21505093574523926, "learning_rate": 2.873428348707032e-05, "loss": 0.7015, "step": 8328 }, { "epoch": 9.486609686609686, "grad_norm": 0.20415301620960236, "learning_rate": 2.8729676511673337e-05, "loss": 0.7509, "step": 8329 }, { "epoch": 9.487749287749288, "grad_norm": 0.23831972479820251, "learning_rate": 2.8725069406734446e-05, "loss": 0.7278, "step": 8330 }, { "epoch": 9.488888888888889, "grad_norm": 0.18390317261219025, "learning_rate": 2.8720462172413648e-05, "loss": 0.7367, "step": 8331 }, { "epoch": 9.49002849002849, "grad_norm": 0.2513265013694763, "learning_rate": 2.8715854808870967e-05, "loss": 0.5149, "step": 8332 }, { "epoch": 9.491168091168092, "grad_norm": 0.20089967548847198, "learning_rate": 2.871124731626643e-05, "loss": 0.8384, "step": 8333 }, { "epoch": 9.492307692307692, "grad_norm": 0.20433096587657928, "learning_rate": 2.8706639694760073e-05, "loss": 0.6738, "step": 8334 }, { "epoch": 9.493447293447293, "grad_norm": 0.1998908817768097, "learning_rate": 2.870203194451191e-05, "loss": 0.7671, "step": 8335 }, { "epoch": 9.494586894586895, "grad_norm": 0.20310935378074646, "learning_rate": 2.869742406568201e-05, "loss": 0.8301, "step": 8336 }, { "epoch": 9.495726495726496, "grad_norm": 0.25311413407325745, "learning_rate": 2.86928160584304e-05, "loss": 0.5871, "step": 8337 }, { "epoch": 9.496866096866096, "grad_norm": 0.2146860510110855, "learning_rate": 2.868820792291714e-05, "loss": 0.7608, "step": 8338 }, { "epoch": 9.498005698005699, "grad_norm": 0.24213021993637085, "learning_rate": 2.868359965930227e-05, "loss": 0.7398, "step": 8339 }, { "epoch": 9.4991452991453, "grad_norm": 0.1569492220878601, "learning_rate": 2.8678991267745854e-05, "loss": 0.9358, "step": 8340 }, { "epoch": 9.5002849002849, "grad_norm": 0.2284863442182541, "learning_rate": 2.8674382748407956e-05, "loss": 0.6213, "step": 8341 }, { "epoch": 9.501424501424502, "grad_norm": 0.20555830001831055, "learning_rate": 2.8669774101448638e-05, "loss": 0.5989, "step": 8342 }, { "epoch": 9.502564102564103, "grad_norm": 0.19570647180080414, "learning_rate": 2.8665165327027975e-05, "loss": 0.6522, "step": 8343 }, { "epoch": 9.503703703703703, "grad_norm": 0.1841321587562561, "learning_rate": 2.866055642530604e-05, "loss": 0.9094, "step": 8344 }, { "epoch": 9.504843304843305, "grad_norm": 0.2816087603569031, "learning_rate": 2.865594739644292e-05, "loss": 0.4545, "step": 8345 }, { "epoch": 9.505982905982906, "grad_norm": 0.1943245381116867, "learning_rate": 2.865133824059868e-05, "loss": 0.7018, "step": 8346 }, { "epoch": 9.507122507122507, "grad_norm": 0.18258622288703918, "learning_rate": 2.8646728957933434e-05, "loss": 0.8033, "step": 8347 }, { "epoch": 9.508262108262109, "grad_norm": 0.2008790820837021, "learning_rate": 2.8642119548607255e-05, "loss": 0.6307, "step": 8348 }, { "epoch": 9.50940170940171, "grad_norm": 0.21512305736541748, "learning_rate": 2.8637510012780256e-05, "loss": 0.6303, "step": 8349 }, { "epoch": 9.51054131054131, "grad_norm": 0.1645938903093338, "learning_rate": 2.8632900350612522e-05, "loss": 0.8429, "step": 8350 }, { "epoch": 9.511680911680912, "grad_norm": 0.20535434782505035, "learning_rate": 2.862829056226417e-05, "loss": 0.8113, "step": 8351 }, { "epoch": 9.512820512820513, "grad_norm": 0.21696172654628754, "learning_rate": 2.8623680647895312e-05, "loss": 0.7139, "step": 8352 }, { "epoch": 9.513960113960113, "grad_norm": 0.2463272511959076, "learning_rate": 2.8619070607666064e-05, "loss": 0.5647, "step": 8353 }, { "epoch": 9.515099715099716, "grad_norm": 0.21977262198925018, "learning_rate": 2.8614460441736534e-05, "loss": 0.5622, "step": 8354 }, { "epoch": 9.516239316239316, "grad_norm": 0.2143358290195465, "learning_rate": 2.860985015026686e-05, "loss": 0.8698, "step": 8355 }, { "epoch": 9.517378917378917, "grad_norm": 0.26626792550086975, "learning_rate": 2.8605239733417154e-05, "loss": 0.5889, "step": 8356 }, { "epoch": 9.518518518518519, "grad_norm": 0.2296808958053589, "learning_rate": 2.860062919134756e-05, "loss": 0.6174, "step": 8357 }, { "epoch": 9.51965811965812, "grad_norm": 0.2062365859746933, "learning_rate": 2.859601852421822e-05, "loss": 0.8162, "step": 8358 }, { "epoch": 9.52079772079772, "grad_norm": 0.24304889142513275, "learning_rate": 2.8591407732189264e-05, "loss": 0.5502, "step": 8359 }, { "epoch": 9.521937321937322, "grad_norm": 0.23465819656848907, "learning_rate": 2.8586796815420842e-05, "loss": 0.6699, "step": 8360 }, { "epoch": 9.523076923076923, "grad_norm": 0.20695257186889648, "learning_rate": 2.85821857740731e-05, "loss": 0.702, "step": 8361 }, { "epoch": 9.524216524216524, "grad_norm": 0.3112834095954895, "learning_rate": 2.8577574608306202e-05, "loss": 0.4252, "step": 8362 }, { "epoch": 9.525356125356126, "grad_norm": 0.19720891118049622, "learning_rate": 2.857296331828028e-05, "loss": 0.8422, "step": 8363 }, { "epoch": 9.526495726495726, "grad_norm": 0.17155838012695312, "learning_rate": 2.8568351904155533e-05, "loss": 0.5871, "step": 8364 }, { "epoch": 9.527635327635327, "grad_norm": 0.2342628687620163, "learning_rate": 2.8563740366092107e-05, "loss": 0.6936, "step": 8365 }, { "epoch": 9.52877492877493, "grad_norm": 0.21113745868206024, "learning_rate": 2.855912870425018e-05, "loss": 0.7266, "step": 8366 }, { "epoch": 9.52991452991453, "grad_norm": 0.18610437214374542, "learning_rate": 2.8554516918789926e-05, "loss": 0.7853, "step": 8367 }, { "epoch": 9.53105413105413, "grad_norm": 0.2376990020275116, "learning_rate": 2.8549905009871518e-05, "loss": 0.5954, "step": 8368 }, { "epoch": 9.532193732193733, "grad_norm": 0.25791552662849426, "learning_rate": 2.854529297765516e-05, "loss": 0.5568, "step": 8369 }, { "epoch": 9.533333333333333, "grad_norm": 0.20374339818954468, "learning_rate": 2.8540680822301013e-05, "loss": 0.6964, "step": 8370 }, { "epoch": 9.534472934472934, "grad_norm": 0.17722158133983612, "learning_rate": 2.8536068543969287e-05, "loss": 0.8453, "step": 8371 }, { "epoch": 9.535612535612536, "grad_norm": 0.2036590576171875, "learning_rate": 2.8531456142820183e-05, "loss": 0.5294, "step": 8372 }, { "epoch": 9.536752136752137, "grad_norm": 0.17068059742450714, "learning_rate": 2.8526843619013887e-05, "loss": 0.9828, "step": 8373 }, { "epoch": 9.537891737891737, "grad_norm": 0.2139616459608078, "learning_rate": 2.852223097271062e-05, "loss": 0.7065, "step": 8374 }, { "epoch": 9.53903133903134, "grad_norm": 0.20804154872894287, "learning_rate": 2.8517618204070585e-05, "loss": 0.6997, "step": 8375 }, { "epoch": 9.54017094017094, "grad_norm": 0.24357639253139496, "learning_rate": 2.8513005313253994e-05, "loss": 0.7099, "step": 8376 }, { "epoch": 9.54131054131054, "grad_norm": 0.22251440584659576, "learning_rate": 2.8508392300421066e-05, "loss": 0.632, "step": 8377 }, { "epoch": 9.542450142450143, "grad_norm": 0.2210913449525833, "learning_rate": 2.8503779165732024e-05, "loss": 0.7186, "step": 8378 }, { "epoch": 9.543589743589743, "grad_norm": 0.1985541135072708, "learning_rate": 2.8499165909347102e-05, "loss": 0.6945, "step": 8379 }, { "epoch": 9.544729344729344, "grad_norm": 0.19815881550312042, "learning_rate": 2.849455253142652e-05, "loss": 0.7998, "step": 8380 }, { "epoch": 9.545868945868946, "grad_norm": 0.2804073989391327, "learning_rate": 2.8489939032130524e-05, "loss": 0.5707, "step": 8381 }, { "epoch": 9.547008547008547, "grad_norm": 0.19798095524311066, "learning_rate": 2.8485325411619345e-05, "loss": 0.6787, "step": 8382 }, { "epoch": 9.548148148148147, "grad_norm": 0.23597125709056854, "learning_rate": 2.848071167005323e-05, "loss": 0.6493, "step": 8383 }, { "epoch": 9.54928774928775, "grad_norm": 0.16221386194229126, "learning_rate": 2.847609780759243e-05, "loss": 0.8093, "step": 8384 }, { "epoch": 9.55042735042735, "grad_norm": 0.19412098824977875, "learning_rate": 2.847148382439718e-05, "loss": 0.6367, "step": 8385 }, { "epoch": 9.55156695156695, "grad_norm": 0.18541525304317474, "learning_rate": 2.846686972062777e-05, "loss": 0.8264, "step": 8386 }, { "epoch": 9.552706552706553, "grad_norm": 0.15969184041023254, "learning_rate": 2.846225549644443e-05, "loss": 0.7718, "step": 8387 }, { "epoch": 9.553846153846154, "grad_norm": 0.29093435406684875, "learning_rate": 2.8457641152007437e-05, "loss": 0.5925, "step": 8388 }, { "epoch": 9.554985754985754, "grad_norm": 0.22861137986183167, "learning_rate": 2.8453026687477058e-05, "loss": 0.6109, "step": 8389 }, { "epoch": 9.556125356125357, "grad_norm": 0.18513795733451843, "learning_rate": 2.844841210301356e-05, "loss": 0.8982, "step": 8390 }, { "epoch": 9.557264957264957, "grad_norm": 0.20136123895645142, "learning_rate": 2.8443797398777232e-05, "loss": 0.7893, "step": 8391 }, { "epoch": 9.558404558404558, "grad_norm": 0.19829393923282623, "learning_rate": 2.8439182574928357e-05, "loss": 0.6484, "step": 8392 }, { "epoch": 9.55954415954416, "grad_norm": 0.16065245866775513, "learning_rate": 2.84345676316272e-05, "loss": 0.8099, "step": 8393 }, { "epoch": 9.56068376068376, "grad_norm": 0.2197786271572113, "learning_rate": 2.842995256903408e-05, "loss": 0.7164, "step": 8394 }, { "epoch": 9.561823361823361, "grad_norm": 0.2042093724012375, "learning_rate": 2.842533738730926e-05, "loss": 0.8174, "step": 8395 }, { "epoch": 9.562962962962963, "grad_norm": 0.22282445430755615, "learning_rate": 2.8420722086613057e-05, "loss": 0.8054, "step": 8396 }, { "epoch": 9.564102564102564, "grad_norm": 0.2050190269947052, "learning_rate": 2.841610666710577e-05, "loss": 0.8097, "step": 8397 }, { "epoch": 9.565242165242164, "grad_norm": 0.2720555067062378, "learning_rate": 2.8411491128947708e-05, "loss": 0.5605, "step": 8398 }, { "epoch": 9.566381766381767, "grad_norm": 0.21021820604801178, "learning_rate": 2.840687547229917e-05, "loss": 0.6591, "step": 8399 }, { "epoch": 9.567521367521367, "grad_norm": 0.20061002671718597, "learning_rate": 2.840225969732049e-05, "loss": 0.7079, "step": 8400 }, { "epoch": 9.568660968660968, "grad_norm": 0.1844741702079773, "learning_rate": 2.8397643804171957e-05, "loss": 0.8422, "step": 8401 }, { "epoch": 9.56980056980057, "grad_norm": 0.24361716210842133, "learning_rate": 2.839302779301392e-05, "loss": 0.5547, "step": 8402 }, { "epoch": 9.57094017094017, "grad_norm": 0.18238916993141174, "learning_rate": 2.8388411664006702e-05, "loss": 0.842, "step": 8403 }, { "epoch": 9.572079772079771, "grad_norm": 0.2600004971027374, "learning_rate": 2.8383795417310623e-05, "loss": 0.5572, "step": 8404 }, { "epoch": 9.573219373219374, "grad_norm": 0.2804044485092163, "learning_rate": 2.8379179053086032e-05, "loss": 0.4059, "step": 8405 }, { "epoch": 9.574358974358974, "grad_norm": 0.1579924374818802, "learning_rate": 2.8374562571493245e-05, "loss": 0.7915, "step": 8406 }, { "epoch": 9.575498575498575, "grad_norm": 0.22872616350650787, "learning_rate": 2.836994597269263e-05, "loss": 0.729, "step": 8407 }, { "epoch": 9.576638176638177, "grad_norm": 0.2375708818435669, "learning_rate": 2.836532925684452e-05, "loss": 0.7186, "step": 8408 }, { "epoch": 9.577777777777778, "grad_norm": 0.23086920380592346, "learning_rate": 2.8360712424109272e-05, "loss": 0.6302, "step": 8409 }, { "epoch": 9.578917378917378, "grad_norm": 0.17449885606765747, "learning_rate": 2.835609547464724e-05, "loss": 0.7291, "step": 8410 }, { "epoch": 9.58005698005698, "grad_norm": 0.2576386034488678, "learning_rate": 2.835147840861878e-05, "loss": 0.5856, "step": 8411 }, { "epoch": 9.581196581196581, "grad_norm": 0.1862577348947525, "learning_rate": 2.8346861226184256e-05, "loss": 0.7679, "step": 8412 }, { "epoch": 9.582336182336181, "grad_norm": 0.2355736792087555, "learning_rate": 2.8342243927504047e-05, "loss": 0.6464, "step": 8413 }, { "epoch": 9.583475783475784, "grad_norm": 0.1800006777048111, "learning_rate": 2.833762651273851e-05, "loss": 0.6567, "step": 8414 }, { "epoch": 9.584615384615384, "grad_norm": 0.18470345437526703, "learning_rate": 2.8333008982048027e-05, "loss": 0.8081, "step": 8415 }, { "epoch": 9.585754985754985, "grad_norm": 0.20250608026981354, "learning_rate": 2.8328391335592973e-05, "loss": 0.7836, "step": 8416 }, { "epoch": 9.586894586894587, "grad_norm": 0.23904845118522644, "learning_rate": 2.832377357353374e-05, "loss": 0.6268, "step": 8417 }, { "epoch": 9.588034188034188, "grad_norm": 0.20193591713905334, "learning_rate": 2.831915569603071e-05, "loss": 0.7641, "step": 8418 }, { "epoch": 9.589173789173788, "grad_norm": 0.18129391968250275, "learning_rate": 2.8314537703244265e-05, "loss": 0.7474, "step": 8419 }, { "epoch": 9.59031339031339, "grad_norm": 0.2015620619058609, "learning_rate": 2.830991959533482e-05, "loss": 0.6556, "step": 8420 }, { "epoch": 9.591452991452991, "grad_norm": 0.19163182377815247, "learning_rate": 2.830530137246276e-05, "loss": 0.8212, "step": 8421 }, { "epoch": 9.592592592592592, "grad_norm": 0.2748200297355652, "learning_rate": 2.83006830347885e-05, "loss": 0.5025, "step": 8422 }, { "epoch": 9.593732193732194, "grad_norm": 0.2064012885093689, "learning_rate": 2.829606458247243e-05, "loss": 0.8288, "step": 8423 }, { "epoch": 9.594871794871795, "grad_norm": 0.18211974203586578, "learning_rate": 2.829144601567499e-05, "loss": 0.8086, "step": 8424 }, { "epoch": 9.596011396011395, "grad_norm": 0.20421072840690613, "learning_rate": 2.8286827334556566e-05, "loss": 0.7935, "step": 8425 }, { "epoch": 9.597150997150997, "grad_norm": 0.15879186987876892, "learning_rate": 2.8282208539277594e-05, "loss": 0.8622, "step": 8426 }, { "epoch": 9.598290598290598, "grad_norm": 0.23644597828388214, "learning_rate": 2.827758962999849e-05, "loss": 0.6415, "step": 8427 }, { "epoch": 9.5994301994302, "grad_norm": 0.19257822632789612, "learning_rate": 2.827297060687969e-05, "loss": 0.7557, "step": 8428 }, { "epoch": 9.6005698005698, "grad_norm": 0.24540145695209503, "learning_rate": 2.826835147008161e-05, "loss": 0.6768, "step": 8429 }, { "epoch": 9.601709401709401, "grad_norm": 0.21854540705680847, "learning_rate": 2.8263732219764703e-05, "loss": 0.8531, "step": 8430 }, { "epoch": 9.602849002849004, "grad_norm": 0.196981742978096, "learning_rate": 2.8259112856089405e-05, "loss": 0.6341, "step": 8431 }, { "epoch": 9.603988603988604, "grad_norm": 0.21874146163463593, "learning_rate": 2.8254493379216152e-05, "loss": 0.7642, "step": 8432 }, { "epoch": 9.605128205128205, "grad_norm": 0.15934787690639496, "learning_rate": 2.8249873789305394e-05, "loss": 0.6682, "step": 8433 }, { "epoch": 9.606267806267807, "grad_norm": 0.2906241714954376, "learning_rate": 2.8245254086517575e-05, "loss": 0.5481, "step": 8434 }, { "epoch": 9.607407407407408, "grad_norm": 0.21550823748111725, "learning_rate": 2.8240634271013167e-05, "loss": 0.6166, "step": 8435 }, { "epoch": 9.608547008547008, "grad_norm": 0.21351531147956848, "learning_rate": 2.823601434295261e-05, "loss": 0.6378, "step": 8436 }, { "epoch": 9.60968660968661, "grad_norm": 0.20039942860603333, "learning_rate": 2.8231394302496383e-05, "loss": 0.8, "step": 8437 }, { "epoch": 9.610826210826211, "grad_norm": 0.17776717245578766, "learning_rate": 2.822677414980494e-05, "loss": 0.933, "step": 8438 }, { "epoch": 9.611965811965812, "grad_norm": 0.25148046016693115, "learning_rate": 2.8222153885038767e-05, "loss": 0.4991, "step": 8439 }, { "epoch": 9.613105413105414, "grad_norm": 0.20714183151721954, "learning_rate": 2.821753350835831e-05, "loss": 0.7522, "step": 8440 }, { "epoch": 9.614245014245014, "grad_norm": 0.18635442852973938, "learning_rate": 2.8212913019924086e-05, "loss": 0.7363, "step": 8441 }, { "epoch": 9.615384615384615, "grad_norm": 0.2570042610168457, "learning_rate": 2.8208292419896543e-05, "loss": 0.4784, "step": 8442 }, { "epoch": 9.616524216524217, "grad_norm": 0.2306656539440155, "learning_rate": 2.820367170843619e-05, "loss": 0.6339, "step": 8443 }, { "epoch": 9.617663817663818, "grad_norm": 0.22380559146404266, "learning_rate": 2.8199050885703504e-05, "loss": 0.5676, "step": 8444 }, { "epoch": 9.618803418803418, "grad_norm": 0.2236308753490448, "learning_rate": 2.8194429951858985e-05, "loss": 0.7608, "step": 8445 }, { "epoch": 9.61994301994302, "grad_norm": 0.21724647283554077, "learning_rate": 2.8189808907063132e-05, "loss": 0.6835, "step": 8446 }, { "epoch": 9.621082621082621, "grad_norm": 0.21728353202342987, "learning_rate": 2.818518775147644e-05, "loss": 0.7744, "step": 8447 }, { "epoch": 9.622222222222222, "grad_norm": 0.2925417125225067, "learning_rate": 2.8180566485259425e-05, "loss": 0.6382, "step": 8448 }, { "epoch": 9.623361823361824, "grad_norm": 0.157082661986351, "learning_rate": 2.8175945108572577e-05, "loss": 0.8076, "step": 8449 }, { "epoch": 9.624501424501425, "grad_norm": 0.2292805165052414, "learning_rate": 2.8171323621576428e-05, "loss": 0.6967, "step": 8450 }, { "epoch": 9.625641025641025, "grad_norm": 0.2331184446811676, "learning_rate": 2.8166702024431484e-05, "loss": 0.6488, "step": 8451 }, { "epoch": 9.626780626780628, "grad_norm": 0.20063607394695282, "learning_rate": 2.816208031729828e-05, "loss": 0.5961, "step": 8452 }, { "epoch": 9.627920227920228, "grad_norm": 0.2293260097503662, "learning_rate": 2.8157458500337318e-05, "loss": 0.6886, "step": 8453 }, { "epoch": 9.629059829059829, "grad_norm": 0.22276504337787628, "learning_rate": 2.8152836573709152e-05, "loss": 0.6444, "step": 8454 }, { "epoch": 9.630199430199431, "grad_norm": 0.25844740867614746, "learning_rate": 2.8148214537574296e-05, "loss": 0.6206, "step": 8455 }, { "epoch": 9.631339031339031, "grad_norm": 0.2520429790019989, "learning_rate": 2.8143592392093287e-05, "loss": 0.4121, "step": 8456 }, { "epoch": 9.632478632478632, "grad_norm": 0.2038751244544983, "learning_rate": 2.8138970137426675e-05, "loss": 0.7226, "step": 8457 }, { "epoch": 9.633618233618234, "grad_norm": 0.2728098928928375, "learning_rate": 2.8134347773734996e-05, "loss": 0.5881, "step": 8458 }, { "epoch": 9.634757834757835, "grad_norm": 0.1946948915719986, "learning_rate": 2.81297253011788e-05, "loss": 0.9104, "step": 8459 }, { "epoch": 9.635897435897435, "grad_norm": 0.18152308464050293, "learning_rate": 2.812510271991864e-05, "loss": 0.655, "step": 8460 }, { "epoch": 9.637037037037038, "grad_norm": 0.17640575766563416, "learning_rate": 2.8120480030115065e-05, "loss": 0.7666, "step": 8461 }, { "epoch": 9.638176638176638, "grad_norm": 0.2305549532175064, "learning_rate": 2.8115857231928638e-05, "loss": 0.6757, "step": 8462 }, { "epoch": 9.639316239316239, "grad_norm": 0.28918159008026123, "learning_rate": 2.8111234325519924e-05, "loss": 0.4352, "step": 8463 }, { "epoch": 9.640455840455841, "grad_norm": 0.19419384002685547, "learning_rate": 2.810661131104948e-05, "loss": 0.7367, "step": 8464 }, { "epoch": 9.641595441595442, "grad_norm": 0.18359477818012238, "learning_rate": 2.8101988188677887e-05, "loss": 0.7335, "step": 8465 }, { "epoch": 9.642735042735042, "grad_norm": 0.27075687050819397, "learning_rate": 2.8097364958565707e-05, "loss": 0.4621, "step": 8466 }, { "epoch": 9.643874643874645, "grad_norm": 0.15775761008262634, "learning_rate": 2.809274162087353e-05, "loss": 0.8219, "step": 8467 }, { "epoch": 9.645014245014245, "grad_norm": 0.267281174659729, "learning_rate": 2.808811817576193e-05, "loss": 0.55, "step": 8468 }, { "epoch": 9.646153846153846, "grad_norm": 0.2001519352197647, "learning_rate": 2.8083494623391492e-05, "loss": 0.7807, "step": 8469 }, { "epoch": 9.647293447293448, "grad_norm": 0.215030699968338, "learning_rate": 2.807887096392281e-05, "loss": 0.7114, "step": 8470 }, { "epoch": 9.648433048433048, "grad_norm": 0.26007959246635437, "learning_rate": 2.8074247197516474e-05, "loss": 0.5199, "step": 8471 }, { "epoch": 9.649572649572649, "grad_norm": 0.2286146730184555, "learning_rate": 2.8069623324333072e-05, "loss": 0.6747, "step": 8472 }, { "epoch": 9.650712250712251, "grad_norm": 0.24231044948101044, "learning_rate": 2.806499934453321e-05, "loss": 0.6583, "step": 8473 }, { "epoch": 9.651851851851852, "grad_norm": 0.22648459672927856, "learning_rate": 2.8060375258277494e-05, "loss": 0.5948, "step": 8474 }, { "epoch": 9.652991452991452, "grad_norm": 0.21289721131324768, "learning_rate": 2.805575106572653e-05, "loss": 0.7268, "step": 8475 }, { "epoch": 9.654131054131055, "grad_norm": 0.2718423902988434, "learning_rate": 2.8051126767040932e-05, "loss": 0.6427, "step": 8476 }, { "epoch": 9.655270655270655, "grad_norm": 0.19400310516357422, "learning_rate": 2.8046502362381304e-05, "loss": 0.8316, "step": 8477 }, { "epoch": 9.656410256410256, "grad_norm": 0.1917850524187088, "learning_rate": 2.8041877851908276e-05, "loss": 0.7485, "step": 8478 }, { "epoch": 9.657549857549858, "grad_norm": 0.14941884577274323, "learning_rate": 2.803725323578246e-05, "loss": 0.9673, "step": 8479 }, { "epoch": 9.658689458689459, "grad_norm": 0.19698305428028107, "learning_rate": 2.8032628514164493e-05, "loss": 0.6372, "step": 8480 }, { "epoch": 9.65982905982906, "grad_norm": 0.23960430920124054, "learning_rate": 2.8028003687214993e-05, "loss": 0.7234, "step": 8481 }, { "epoch": 9.660968660968662, "grad_norm": 0.22642646729946136, "learning_rate": 2.8023378755094605e-05, "loss": 0.6822, "step": 8482 }, { "epoch": 9.662108262108262, "grad_norm": 0.16527873277664185, "learning_rate": 2.8018753717963952e-05, "loss": 0.826, "step": 8483 }, { "epoch": 9.663247863247863, "grad_norm": 0.1699703484773636, "learning_rate": 2.8014128575983678e-05, "loss": 0.9285, "step": 8484 }, { "epoch": 9.664387464387465, "grad_norm": 0.2637763023376465, "learning_rate": 2.8009503329314428e-05, "loss": 0.617, "step": 8485 }, { "epoch": 9.665527065527066, "grad_norm": 0.2114337682723999, "learning_rate": 2.800487797811686e-05, "loss": 0.7386, "step": 8486 }, { "epoch": 9.666666666666666, "grad_norm": 0.20547452569007874, "learning_rate": 2.800025252255162e-05, "loss": 0.7259, "step": 8487 }, { "epoch": 9.667806267806268, "grad_norm": 0.1680016666650772, "learning_rate": 2.7995626962779354e-05, "loss": 0.9245, "step": 8488 }, { "epoch": 9.668945868945869, "grad_norm": 0.2023734748363495, "learning_rate": 2.799100129896072e-05, "loss": 0.8274, "step": 8489 }, { "epoch": 9.67008547008547, "grad_norm": 0.2510012984275818, "learning_rate": 2.7986375531256386e-05, "loss": 0.8166, "step": 8490 }, { "epoch": 9.671225071225072, "grad_norm": 0.21623970568180084, "learning_rate": 2.798174965982702e-05, "loss": 0.8594, "step": 8491 }, { "epoch": 9.672364672364672, "grad_norm": 0.21272176504135132, "learning_rate": 2.797712368483329e-05, "loss": 0.7567, "step": 8492 }, { "epoch": 9.673504273504273, "grad_norm": 0.18072400987148285, "learning_rate": 2.7972497606435866e-05, "loss": 0.8329, "step": 8493 }, { "epoch": 9.674643874643875, "grad_norm": 0.21961405873298645, "learning_rate": 2.7967871424795433e-05, "loss": 0.6713, "step": 8494 }, { "epoch": 9.675783475783476, "grad_norm": 0.20609161257743835, "learning_rate": 2.7963245140072653e-05, "loss": 0.779, "step": 8495 }, { "epoch": 9.676923076923076, "grad_norm": 0.25514522194862366, "learning_rate": 2.7958618752428223e-05, "loss": 0.432, "step": 8496 }, { "epoch": 9.678062678062679, "grad_norm": 0.17109011113643646, "learning_rate": 2.7953992262022834e-05, "loss": 0.8166, "step": 8497 }, { "epoch": 9.67920227920228, "grad_norm": 0.21235273778438568, "learning_rate": 2.7949365669017157e-05, "loss": 0.6714, "step": 8498 }, { "epoch": 9.68034188034188, "grad_norm": 0.23909524083137512, "learning_rate": 2.7944738973571915e-05, "loss": 0.5226, "step": 8499 }, { "epoch": 9.681481481481482, "grad_norm": 0.24783433973789215, "learning_rate": 2.7940112175847784e-05, "loss": 0.4658, "step": 8500 }, { "epoch": 9.682621082621083, "grad_norm": 0.22748740017414093, "learning_rate": 2.793548527600546e-05, "loss": 0.5848, "step": 8501 }, { "epoch": 9.683760683760683, "grad_norm": 0.19321490824222565, "learning_rate": 2.7930858274205672e-05, "loss": 0.778, "step": 8502 }, { "epoch": 9.684900284900285, "grad_norm": 0.22267040610313416, "learning_rate": 2.7926231170609117e-05, "loss": 0.5523, "step": 8503 }, { "epoch": 9.686039886039886, "grad_norm": 0.20147395133972168, "learning_rate": 2.7921603965376503e-05, "loss": 0.7143, "step": 8504 }, { "epoch": 9.687179487179487, "grad_norm": 0.2002495974302292, "learning_rate": 2.7916976658668552e-05, "loss": 0.5548, "step": 8505 }, { "epoch": 9.688319088319089, "grad_norm": 0.23312561213970184, "learning_rate": 2.7912349250645975e-05, "loss": 0.7927, "step": 8506 }, { "epoch": 9.68945868945869, "grad_norm": 0.16021563112735748, "learning_rate": 2.7907721741469506e-05, "loss": 0.6837, "step": 8507 }, { "epoch": 9.69059829059829, "grad_norm": 0.1961434930562973, "learning_rate": 2.7903094131299855e-05, "loss": 0.7884, "step": 8508 }, { "epoch": 9.691737891737892, "grad_norm": 0.19885706901550293, "learning_rate": 2.7898466420297777e-05, "loss": 0.7048, "step": 8509 }, { "epoch": 9.692877492877493, "grad_norm": 0.2357889711856842, "learning_rate": 2.7893838608623972e-05, "loss": 0.55, "step": 8510 }, { "epoch": 9.694017094017093, "grad_norm": 0.23352935910224915, "learning_rate": 2.7889210696439205e-05, "loss": 0.7906, "step": 8511 }, { "epoch": 9.695156695156696, "grad_norm": 0.18605369329452515, "learning_rate": 2.7884582683904205e-05, "loss": 0.9088, "step": 8512 }, { "epoch": 9.696296296296296, "grad_norm": 0.1729101985692978, "learning_rate": 2.7879954571179705e-05, "loss": 0.6314, "step": 8513 }, { "epoch": 9.697435897435897, "grad_norm": 0.2530671954154968, "learning_rate": 2.787532635842648e-05, "loss": 0.6066, "step": 8514 }, { "epoch": 9.698575498575499, "grad_norm": 0.23992305994033813, "learning_rate": 2.787069804580525e-05, "loss": 0.6046, "step": 8515 }, { "epoch": 9.6997150997151, "grad_norm": 0.1798892617225647, "learning_rate": 2.7866069633476787e-05, "loss": 0.7873, "step": 8516 }, { "epoch": 9.7008547008547, "grad_norm": 0.21981604397296906, "learning_rate": 2.7861441121601838e-05, "loss": 0.5761, "step": 8517 }, { "epoch": 9.701994301994302, "grad_norm": 0.1948310285806656, "learning_rate": 2.785681251034118e-05, "loss": 0.759, "step": 8518 }, { "epoch": 9.703133903133903, "grad_norm": 0.21624235808849335, "learning_rate": 2.7852183799855558e-05, "loss": 0.6875, "step": 8519 }, { "epoch": 9.704273504273504, "grad_norm": 0.23673588037490845, "learning_rate": 2.7847554990305756e-05, "loss": 0.8201, "step": 8520 }, { "epoch": 9.705413105413106, "grad_norm": 0.20163588225841522, "learning_rate": 2.7842926081852533e-05, "loss": 0.7495, "step": 8521 }, { "epoch": 9.706552706552706, "grad_norm": 0.18318887054920197, "learning_rate": 2.7838297074656677e-05, "loss": 0.924, "step": 8522 }, { "epoch": 9.707692307692307, "grad_norm": 0.2320988029241562, "learning_rate": 2.7833667968878952e-05, "loss": 0.6304, "step": 8523 }, { "epoch": 9.70883190883191, "grad_norm": 0.21639958024024963, "learning_rate": 2.7829038764680143e-05, "loss": 0.6043, "step": 8524 }, { "epoch": 9.70997150997151, "grad_norm": 0.21211375296115875, "learning_rate": 2.7824409462221047e-05, "loss": 0.8569, "step": 8525 }, { "epoch": 9.71111111111111, "grad_norm": 0.20413239300251007, "learning_rate": 2.7819780061662436e-05, "loss": 0.7475, "step": 8526 }, { "epoch": 9.712250712250713, "grad_norm": 0.2298765927553177, "learning_rate": 2.7815150563165116e-05, "loss": 0.6669, "step": 8527 }, { "epoch": 9.713390313390313, "grad_norm": 0.19131982326507568, "learning_rate": 2.7810520966889863e-05, "loss": 0.6668, "step": 8528 }, { "epoch": 9.714529914529914, "grad_norm": 0.2224835306406021, "learning_rate": 2.78058912729975e-05, "loss": 0.8234, "step": 8529 }, { "epoch": 9.715669515669516, "grad_norm": 0.17884741723537445, "learning_rate": 2.7801261481648807e-05, "loss": 0.7831, "step": 8530 }, { "epoch": 9.716809116809117, "grad_norm": 0.22266241908073425, "learning_rate": 2.7796631593004608e-05, "loss": 0.6715, "step": 8531 }, { "epoch": 9.717948717948717, "grad_norm": 0.2213110327720642, "learning_rate": 2.77920016072257e-05, "loss": 0.7549, "step": 8532 }, { "epoch": 9.71908831908832, "grad_norm": 0.21489235758781433, "learning_rate": 2.77873715244729e-05, "loss": 0.7453, "step": 8533 }, { "epoch": 9.72022792022792, "grad_norm": 0.2805543839931488, "learning_rate": 2.7782741344907008e-05, "loss": 0.5758, "step": 8534 }, { "epoch": 9.72136752136752, "grad_norm": 0.20469795167446136, "learning_rate": 2.777811106868887e-05, "loss": 0.7528, "step": 8535 }, { "epoch": 9.722507122507123, "grad_norm": 0.21546640992164612, "learning_rate": 2.777348069597929e-05, "loss": 0.8487, "step": 8536 }, { "epoch": 9.723646723646723, "grad_norm": 0.2270408421754837, "learning_rate": 2.7768850226939108e-05, "loss": 0.6702, "step": 8537 }, { "epoch": 9.724786324786324, "grad_norm": 0.20566929876804352, "learning_rate": 2.7764219661729135e-05, "loss": 0.7752, "step": 8538 }, { "epoch": 9.725925925925926, "grad_norm": 0.21666070818901062, "learning_rate": 2.775958900051021e-05, "loss": 0.6814, "step": 8539 }, { "epoch": 9.727065527065527, "grad_norm": 0.19858340919017792, "learning_rate": 2.7754958243443174e-05, "loss": 0.5329, "step": 8540 }, { "epoch": 9.728205128205127, "grad_norm": 0.18944010138511658, "learning_rate": 2.7750327390688856e-05, "loss": 0.8507, "step": 8541 }, { "epoch": 9.72934472934473, "grad_norm": 0.21177950501441956, "learning_rate": 2.7745696442408114e-05, "loss": 0.7086, "step": 8542 }, { "epoch": 9.73048433048433, "grad_norm": 0.19310303032398224, "learning_rate": 2.774106539876178e-05, "loss": 0.8071, "step": 8543 }, { "epoch": 9.73162393162393, "grad_norm": 0.2265753597021103, "learning_rate": 2.773643425991071e-05, "loss": 0.7076, "step": 8544 }, { "epoch": 9.732763532763533, "grad_norm": 0.25541719794273376, "learning_rate": 2.773180302601574e-05, "loss": 0.5428, "step": 8545 }, { "epoch": 9.733903133903134, "grad_norm": 0.25014615058898926, "learning_rate": 2.7727171697237747e-05, "loss": 0.5156, "step": 8546 }, { "epoch": 9.735042735042736, "grad_norm": 0.24546726047992706, "learning_rate": 2.772254027373758e-05, "loss": 0.582, "step": 8547 }, { "epoch": 9.736182336182337, "grad_norm": 0.20591729879379272, "learning_rate": 2.771790875567611e-05, "loss": 0.8366, "step": 8548 }, { "epoch": 9.737321937321937, "grad_norm": 0.20504891872406006, "learning_rate": 2.7713277143214184e-05, "loss": 0.7317, "step": 8549 }, { "epoch": 9.73846153846154, "grad_norm": 0.21834293007850647, "learning_rate": 2.770864543651268e-05, "loss": 0.6696, "step": 8550 }, { "epoch": 9.73960113960114, "grad_norm": 0.1837180107831955, "learning_rate": 2.7704013635732477e-05, "loss": 0.7439, "step": 8551 }, { "epoch": 9.74074074074074, "grad_norm": 0.1709396094083786, "learning_rate": 2.7699381741034447e-05, "loss": 0.8316, "step": 8552 }, { "epoch": 9.741880341880343, "grad_norm": 0.22776567935943604, "learning_rate": 2.769474975257946e-05, "loss": 0.6377, "step": 8553 }, { "epoch": 9.743019943019943, "grad_norm": 0.18397639691829681, "learning_rate": 2.769011767052841e-05, "loss": 0.7457, "step": 8554 }, { "epoch": 9.744159544159544, "grad_norm": 0.2865777015686035, "learning_rate": 2.768548549504217e-05, "loss": 0.6397, "step": 8555 }, { "epoch": 9.745299145299146, "grad_norm": 0.19933927059173584, "learning_rate": 2.7680853226281632e-05, "loss": 0.8608, "step": 8556 }, { "epoch": 9.746438746438747, "grad_norm": 0.2017425298690796, "learning_rate": 2.76762208644077e-05, "loss": 0.7348, "step": 8557 }, { "epoch": 9.747578347578347, "grad_norm": 0.21706503629684448, "learning_rate": 2.7671588409581245e-05, "loss": 0.7129, "step": 8558 }, { "epoch": 9.74871794871795, "grad_norm": 0.20259225368499756, "learning_rate": 2.7666955861963185e-05, "loss": 0.6993, "step": 8559 }, { "epoch": 9.74985754985755, "grad_norm": 0.18231546878814697, "learning_rate": 2.766232322171441e-05, "loss": 0.7069, "step": 8560 }, { "epoch": 9.75099715099715, "grad_norm": 0.24575591087341309, "learning_rate": 2.765769048899583e-05, "loss": 0.6927, "step": 8561 }, { "epoch": 9.752136752136753, "grad_norm": 0.18512912094593048, "learning_rate": 2.7653057663968345e-05, "loss": 0.842, "step": 8562 }, { "epoch": 9.753276353276354, "grad_norm": 0.20622968673706055, "learning_rate": 2.7648424746792883e-05, "loss": 0.5028, "step": 8563 }, { "epoch": 9.754415954415954, "grad_norm": 0.20363913476467133, "learning_rate": 2.7643791737630338e-05, "loss": 0.7849, "step": 8564 }, { "epoch": 9.755555555555556, "grad_norm": 0.1941133737564087, "learning_rate": 2.763915863664164e-05, "loss": 0.7551, "step": 8565 }, { "epoch": 9.756695156695157, "grad_norm": 0.24222709238529205, "learning_rate": 2.7634525443987698e-05, "loss": 0.6559, "step": 8566 }, { "epoch": 9.757834757834758, "grad_norm": 0.2114405781030655, "learning_rate": 2.7629892159829445e-05, "loss": 0.6597, "step": 8567 }, { "epoch": 9.75897435897436, "grad_norm": 0.2961379587650299, "learning_rate": 2.7625258784327806e-05, "loss": 0.6928, "step": 8568 }, { "epoch": 9.76011396011396, "grad_norm": 0.19896571338176727, "learning_rate": 2.762062531764371e-05, "loss": 0.6149, "step": 8569 }, { "epoch": 9.761253561253561, "grad_norm": 0.2416018843650818, "learning_rate": 2.7615991759938092e-05, "loss": 0.518, "step": 8570 }, { "epoch": 9.762393162393163, "grad_norm": 0.22412768006324768, "learning_rate": 2.7611358111371886e-05, "loss": 0.6632, "step": 8571 }, { "epoch": 9.763532763532764, "grad_norm": 0.23438774049282074, "learning_rate": 2.760672437210603e-05, "loss": 0.6062, "step": 8572 }, { "epoch": 9.764672364672364, "grad_norm": 0.2205171287059784, "learning_rate": 2.7602090542301468e-05, "loss": 0.6381, "step": 8573 }, { "epoch": 9.765811965811967, "grad_norm": 0.22630876302719116, "learning_rate": 2.759745662211915e-05, "loss": 0.7963, "step": 8574 }, { "epoch": 9.766951566951567, "grad_norm": 0.23154351115226746, "learning_rate": 2.7592822611720016e-05, "loss": 0.5376, "step": 8575 }, { "epoch": 9.768091168091168, "grad_norm": 0.21721205115318298, "learning_rate": 2.7588188511265023e-05, "loss": 0.5719, "step": 8576 }, { "epoch": 9.76923076923077, "grad_norm": 0.22528202831745148, "learning_rate": 2.7583554320915124e-05, "loss": 0.5727, "step": 8577 }, { "epoch": 9.77037037037037, "grad_norm": 0.22177764773368835, "learning_rate": 2.7578920040831273e-05, "loss": 0.7164, "step": 8578 }, { "epoch": 9.771509971509971, "grad_norm": 0.2995389997959137, "learning_rate": 2.7574285671174444e-05, "loss": 0.5085, "step": 8579 }, { "epoch": 9.772649572649573, "grad_norm": 0.1767067164182663, "learning_rate": 2.7569651212105602e-05, "loss": 0.8467, "step": 8580 }, { "epoch": 9.773789173789174, "grad_norm": 0.2124761939048767, "learning_rate": 2.7565016663785692e-05, "loss": 0.7081, "step": 8581 }, { "epoch": 9.774928774928775, "grad_norm": 0.2243443727493286, "learning_rate": 2.7560382026375714e-05, "loss": 0.5879, "step": 8582 }, { "epoch": 9.776068376068377, "grad_norm": 0.2404758781194687, "learning_rate": 2.7555747300036617e-05, "loss": 0.7576, "step": 8583 }, { "epoch": 9.777207977207977, "grad_norm": 0.2202008217573166, "learning_rate": 2.7551112484929385e-05, "loss": 0.7425, "step": 8584 }, { "epoch": 9.778347578347578, "grad_norm": 0.20645569264888763, "learning_rate": 2.7546477581215006e-05, "loss": 0.7184, "step": 8585 }, { "epoch": 9.77948717948718, "grad_norm": 0.174381285905838, "learning_rate": 2.7541842589054463e-05, "loss": 0.8345, "step": 8586 }, { "epoch": 9.78062678062678, "grad_norm": 0.20073771476745605, "learning_rate": 2.753720750860873e-05, "loss": 0.783, "step": 8587 }, { "epoch": 9.781766381766381, "grad_norm": 0.20477743446826935, "learning_rate": 2.753257234003881e-05, "loss": 0.8477, "step": 8588 }, { "epoch": 9.782905982905984, "grad_norm": 0.2629711925983429, "learning_rate": 2.7527937083505677e-05, "loss": 0.7077, "step": 8589 }, { "epoch": 9.784045584045584, "grad_norm": 0.16403615474700928, "learning_rate": 2.7523301739170343e-05, "loss": 1.0227, "step": 8590 }, { "epoch": 9.785185185185185, "grad_norm": 0.23415862023830414, "learning_rate": 2.7518666307193803e-05, "loss": 0.7995, "step": 8591 }, { "epoch": 9.786324786324787, "grad_norm": 0.22708284854888916, "learning_rate": 2.751403078773705e-05, "loss": 0.6878, "step": 8592 }, { "epoch": 9.787464387464388, "grad_norm": 0.2390306442975998, "learning_rate": 2.75093951809611e-05, "loss": 0.5568, "step": 8593 }, { "epoch": 9.788603988603988, "grad_norm": 0.19914384186267853, "learning_rate": 2.750475948702695e-05, "loss": 0.7419, "step": 8594 }, { "epoch": 9.78974358974359, "grad_norm": 0.2082630842924118, "learning_rate": 2.7500123706095614e-05, "loss": 0.8309, "step": 8595 }, { "epoch": 9.790883190883191, "grad_norm": 0.237423375248909, "learning_rate": 2.749548783832811e-05, "loss": 0.6335, "step": 8596 }, { "epoch": 9.792022792022792, "grad_norm": 0.24349914491176605, "learning_rate": 2.7490851883885454e-05, "loss": 0.6496, "step": 8597 }, { "epoch": 9.793162393162394, "grad_norm": 0.22968685626983643, "learning_rate": 2.7486215842928658e-05, "loss": 0.6947, "step": 8598 }, { "epoch": 9.794301994301994, "grad_norm": 0.20698566734790802, "learning_rate": 2.748157971561875e-05, "loss": 0.7617, "step": 8599 }, { "epoch": 9.795441595441595, "grad_norm": 0.203297957777977, "learning_rate": 2.7476943502116752e-05, "loss": 0.6874, "step": 8600 }, { "epoch": 9.796581196581197, "grad_norm": 0.2980722486972809, "learning_rate": 2.747230720258369e-05, "loss": 0.6644, "step": 8601 }, { "epoch": 9.797720797720798, "grad_norm": 0.2615736722946167, "learning_rate": 2.7467670817180612e-05, "loss": 0.5958, "step": 8602 }, { "epoch": 9.798860398860398, "grad_norm": 0.25255948305130005, "learning_rate": 2.7463034346068532e-05, "loss": 0.4842, "step": 8603 }, { "epoch": 9.8, "grad_norm": 0.20887507498264313, "learning_rate": 2.7458397789408503e-05, "loss": 0.8247, "step": 8604 }, { "epoch": 9.801139601139601, "grad_norm": 0.19382621347904205, "learning_rate": 2.7453761147361552e-05, "loss": 0.8055, "step": 8605 }, { "epoch": 9.802279202279202, "grad_norm": 0.20952007174491882, "learning_rate": 2.7449124420088725e-05, "loss": 0.8727, "step": 8606 }, { "epoch": 9.803418803418804, "grad_norm": 0.20277926325798035, "learning_rate": 2.7444487607751075e-05, "loss": 0.7493, "step": 8607 }, { "epoch": 9.804558404558405, "grad_norm": 0.23700590431690216, "learning_rate": 2.7439850710509652e-05, "loss": 0.5966, "step": 8608 }, { "epoch": 9.805698005698005, "grad_norm": 0.1902497410774231, "learning_rate": 2.74352137285255e-05, "loss": 0.8184, "step": 8609 }, { "epoch": 9.806837606837608, "grad_norm": 0.25543636083602905, "learning_rate": 2.743057666195968e-05, "loss": 0.5711, "step": 8610 }, { "epoch": 9.807977207977208, "grad_norm": 0.21522241830825806, "learning_rate": 2.7425939510973243e-05, "loss": 0.7143, "step": 8611 }, { "epoch": 9.809116809116809, "grad_norm": 0.1974640190601349, "learning_rate": 2.7421302275727263e-05, "loss": 0.805, "step": 8612 }, { "epoch": 9.810256410256411, "grad_norm": 0.22378608584403992, "learning_rate": 2.7416664956382793e-05, "loss": 0.6073, "step": 8613 }, { "epoch": 9.811396011396011, "grad_norm": 0.21204134821891785, "learning_rate": 2.7412027553100905e-05, "loss": 0.6566, "step": 8614 }, { "epoch": 9.812535612535612, "grad_norm": 0.2479940950870514, "learning_rate": 2.7407390066042665e-05, "loss": 0.6049, "step": 8615 }, { "epoch": 9.813675213675214, "grad_norm": 0.20882539451122284, "learning_rate": 2.740275249536915e-05, "loss": 0.7491, "step": 8616 }, { "epoch": 9.814814814814815, "grad_norm": 0.2341488152742386, "learning_rate": 2.739811484124143e-05, "loss": 0.7128, "step": 8617 }, { "epoch": 9.815954415954415, "grad_norm": 0.208388552069664, "learning_rate": 2.7393477103820586e-05, "loss": 0.7572, "step": 8618 }, { "epoch": 9.817094017094018, "grad_norm": 0.19041959941387177, "learning_rate": 2.738883928326771e-05, "loss": 0.7578, "step": 8619 }, { "epoch": 9.818233618233618, "grad_norm": 0.20121407508850098, "learning_rate": 2.7384201379743868e-05, "loss": 0.7521, "step": 8620 }, { "epoch": 9.819373219373219, "grad_norm": 0.20214299857616425, "learning_rate": 2.7379563393410158e-05, "loss": 0.836, "step": 8621 }, { "epoch": 9.820512820512821, "grad_norm": 0.18229928612709045, "learning_rate": 2.737492532442766e-05, "loss": 0.8039, "step": 8622 }, { "epoch": 9.821652421652422, "grad_norm": 0.23522637784481049, "learning_rate": 2.7370287172957483e-05, "loss": 0.763, "step": 8623 }, { "epoch": 9.822792022792022, "grad_norm": 0.21190528571605682, "learning_rate": 2.736564893916071e-05, "loss": 0.6686, "step": 8624 }, { "epoch": 9.823931623931625, "grad_norm": 0.21622231602668762, "learning_rate": 2.7361010623198445e-05, "loss": 0.6791, "step": 8625 }, { "epoch": 9.825071225071225, "grad_norm": 0.18883854150772095, "learning_rate": 2.735637222523179e-05, "loss": 0.7064, "step": 8626 }, { "epoch": 9.826210826210826, "grad_norm": 0.2039935290813446, "learning_rate": 2.735173374542185e-05, "loss": 0.7518, "step": 8627 }, { "epoch": 9.827350427350428, "grad_norm": 0.19084110856056213, "learning_rate": 2.7347095183929716e-05, "loss": 0.8417, "step": 8628 }, { "epoch": 9.828490028490029, "grad_norm": 0.17323356866836548, "learning_rate": 2.734245654091653e-05, "loss": 0.9049, "step": 8629 }, { "epoch": 9.829629629629629, "grad_norm": 0.2370297610759735, "learning_rate": 2.733781781654337e-05, "loss": 0.6323, "step": 8630 }, { "epoch": 9.830769230769231, "grad_norm": 0.22816629707813263, "learning_rate": 2.733317901097138e-05, "loss": 0.5564, "step": 8631 }, { "epoch": 9.831908831908832, "grad_norm": 0.2457447350025177, "learning_rate": 2.7328540124361662e-05, "loss": 0.6702, "step": 8632 }, { "epoch": 9.833048433048432, "grad_norm": 0.21235868334770203, "learning_rate": 2.732390115687534e-05, "loss": 0.7327, "step": 8633 }, { "epoch": 9.834188034188035, "grad_norm": 0.2475348562002182, "learning_rate": 2.731926210867355e-05, "loss": 0.6758, "step": 8634 }, { "epoch": 9.835327635327635, "grad_norm": 0.20760078728199005, "learning_rate": 2.7314622979917398e-05, "loss": 0.6831, "step": 8635 }, { "epoch": 9.836467236467236, "grad_norm": 0.1691773384809494, "learning_rate": 2.7309983770768033e-05, "loss": 1.002, "step": 8636 }, { "epoch": 9.837606837606838, "grad_norm": 0.1824226826429367, "learning_rate": 2.7305344481386576e-05, "loss": 0.7868, "step": 8637 }, { "epoch": 9.838746438746439, "grad_norm": 0.19655545055866241, "learning_rate": 2.7300705111934165e-05, "loss": 0.7432, "step": 8638 }, { "epoch": 9.83988603988604, "grad_norm": 0.17179349064826965, "learning_rate": 2.7296065662571935e-05, "loss": 0.792, "step": 8639 }, { "epoch": 9.841025641025642, "grad_norm": 0.273364394903183, "learning_rate": 2.7291426133461034e-05, "loss": 0.6771, "step": 8640 }, { "epoch": 9.842165242165242, "grad_norm": 0.20707401633262634, "learning_rate": 2.72867865247626e-05, "loss": 0.8326, "step": 8641 }, { "epoch": 9.843304843304843, "grad_norm": 0.22714680433273315, "learning_rate": 2.7282146836637783e-05, "loss": 0.6631, "step": 8642 }, { "epoch": 9.844444444444445, "grad_norm": 0.204382985830307, "learning_rate": 2.7277507069247732e-05, "loss": 0.5877, "step": 8643 }, { "epoch": 9.845584045584046, "grad_norm": 0.22668123245239258, "learning_rate": 2.727286722275359e-05, "loss": 0.5728, "step": 8644 }, { "epoch": 9.846723646723646, "grad_norm": 0.2232021540403366, "learning_rate": 2.7268227297316525e-05, "loss": 0.628, "step": 8645 }, { "epoch": 9.847863247863248, "grad_norm": 0.22668619453907013, "learning_rate": 2.726358729309769e-05, "loss": 0.6794, "step": 8646 }, { "epoch": 9.849002849002849, "grad_norm": 0.17795786261558533, "learning_rate": 2.725894721025824e-05, "loss": 0.7246, "step": 8647 }, { "epoch": 9.85014245014245, "grad_norm": 0.21658514440059662, "learning_rate": 2.7254307048959345e-05, "loss": 0.6665, "step": 8648 }, { "epoch": 9.851282051282052, "grad_norm": 0.1895408183336258, "learning_rate": 2.724966680936216e-05, "loss": 0.8378, "step": 8649 }, { "epoch": 9.852421652421652, "grad_norm": 0.20734098553657532, "learning_rate": 2.7245026491627862e-05, "loss": 0.7603, "step": 8650 }, { "epoch": 9.853561253561253, "grad_norm": 0.2191520631313324, "learning_rate": 2.7240386095917624e-05, "loss": 0.8362, "step": 8651 }, { "epoch": 9.854700854700855, "grad_norm": 0.1940697282552719, "learning_rate": 2.7235745622392617e-05, "loss": 0.817, "step": 8652 }, { "epoch": 9.855840455840456, "grad_norm": 0.19857257604599, "learning_rate": 2.7231105071214015e-05, "loss": 0.6497, "step": 8653 }, { "epoch": 9.856980056980056, "grad_norm": 0.1806962490081787, "learning_rate": 2.722646444254299e-05, "loss": 0.824, "step": 8654 }, { "epoch": 9.858119658119659, "grad_norm": 0.18675166368484497, "learning_rate": 2.7221823736540742e-05, "loss": 0.6341, "step": 8655 }, { "epoch": 9.85925925925926, "grad_norm": 0.20888161659240723, "learning_rate": 2.7217182953368442e-05, "loss": 0.8685, "step": 8656 }, { "epoch": 9.86039886039886, "grad_norm": 0.1748548150062561, "learning_rate": 2.721254209318728e-05, "loss": 0.8589, "step": 8657 }, { "epoch": 9.861538461538462, "grad_norm": 0.21147984266281128, "learning_rate": 2.720790115615845e-05, "loss": 0.7148, "step": 8658 }, { "epoch": 9.862678062678063, "grad_norm": 0.1968158334493637, "learning_rate": 2.7203260142443137e-05, "loss": 0.8946, "step": 8659 }, { "epoch": 9.863817663817663, "grad_norm": 0.21211007237434387, "learning_rate": 2.7198619052202545e-05, "loss": 0.6632, "step": 8660 }, { "epoch": 9.864957264957265, "grad_norm": 0.24838034808635712, "learning_rate": 2.719397788559786e-05, "loss": 0.7353, "step": 8661 }, { "epoch": 9.866096866096866, "grad_norm": 0.19553515315055847, "learning_rate": 2.7189336642790297e-05, "loss": 0.7358, "step": 8662 }, { "epoch": 9.867236467236467, "grad_norm": 0.20539981126785278, "learning_rate": 2.7184695323941046e-05, "loss": 0.7182, "step": 8663 }, { "epoch": 9.868376068376069, "grad_norm": 0.19742438197135925, "learning_rate": 2.718005392921132e-05, "loss": 0.8297, "step": 8664 }, { "epoch": 9.86951566951567, "grad_norm": 0.37122297286987305, "learning_rate": 2.7175412458762328e-05, "loss": 0.4323, "step": 8665 }, { "epoch": 9.87065527065527, "grad_norm": 0.17589843273162842, "learning_rate": 2.717077091275528e-05, "loss": 0.8281, "step": 8666 }, { "epoch": 9.871794871794872, "grad_norm": 0.15270763635635376, "learning_rate": 2.7166129291351382e-05, "loss": 0.7995, "step": 8667 }, { "epoch": 9.872934472934473, "grad_norm": 0.1615580916404724, "learning_rate": 2.7161487594711866e-05, "loss": 1.0074, "step": 8668 }, { "epoch": 9.874074074074073, "grad_norm": 0.1597990244626999, "learning_rate": 2.7156845822997934e-05, "loss": 1.0043, "step": 8669 }, { "epoch": 9.875213675213676, "grad_norm": 0.20243625342845917, "learning_rate": 2.715220397637082e-05, "loss": 0.802, "step": 8670 }, { "epoch": 9.876353276353276, "grad_norm": 0.20510946214199066, "learning_rate": 2.7147562054991737e-05, "loss": 0.7117, "step": 8671 }, { "epoch": 9.877492877492877, "grad_norm": 0.2172488123178482, "learning_rate": 2.714292005902192e-05, "loss": 0.7563, "step": 8672 }, { "epoch": 9.878632478632479, "grad_norm": 0.23654407262802124, "learning_rate": 2.71382779886226e-05, "loss": 0.7729, "step": 8673 }, { "epoch": 9.87977207977208, "grad_norm": 0.21040476858615875, "learning_rate": 2.713363584395501e-05, "loss": 0.7365, "step": 8674 }, { "epoch": 9.88091168091168, "grad_norm": 0.2197181135416031, "learning_rate": 2.7128993625180366e-05, "loss": 0.6031, "step": 8675 }, { "epoch": 9.882051282051282, "grad_norm": 0.20595000684261322, "learning_rate": 2.712435133245993e-05, "loss": 0.6825, "step": 8676 }, { "epoch": 9.883190883190883, "grad_norm": 0.24480144679546356, "learning_rate": 2.7119708965954925e-05, "loss": 0.4792, "step": 8677 }, { "epoch": 9.884330484330484, "grad_norm": 0.28222033381462097, "learning_rate": 2.71150665258266e-05, "loss": 0.4097, "step": 8678 }, { "epoch": 9.885470085470086, "grad_norm": 0.21712517738342285, "learning_rate": 2.7110424012236197e-05, "loss": 0.6584, "step": 8679 }, { "epoch": 9.886609686609686, "grad_norm": 0.24547608196735382, "learning_rate": 2.7105781425344966e-05, "loss": 0.7856, "step": 8680 }, { "epoch": 9.887749287749287, "grad_norm": 0.19895847141742706, "learning_rate": 2.7101138765314154e-05, "loss": 0.7386, "step": 8681 }, { "epoch": 9.88888888888889, "grad_norm": 0.19437870383262634, "learning_rate": 2.7096496032305013e-05, "loss": 0.989, "step": 8682 }, { "epoch": 9.89002849002849, "grad_norm": 0.22372710704803467, "learning_rate": 2.7091853226478804e-05, "loss": 0.6882, "step": 8683 }, { "epoch": 9.89116809116809, "grad_norm": 0.2139015793800354, "learning_rate": 2.7087210347996772e-05, "loss": 0.7785, "step": 8684 }, { "epoch": 9.892307692307693, "grad_norm": 0.23976415395736694, "learning_rate": 2.7082567397020192e-05, "loss": 0.6503, "step": 8685 }, { "epoch": 9.893447293447293, "grad_norm": 0.2095179259777069, "learning_rate": 2.7077924373710316e-05, "loss": 0.7618, "step": 8686 }, { "epoch": 9.894586894586894, "grad_norm": 0.2666536271572113, "learning_rate": 2.7073281278228417e-05, "loss": 0.3682, "step": 8687 }, { "epoch": 9.895726495726496, "grad_norm": 0.21089398860931396, "learning_rate": 2.7068638110735745e-05, "loss": 0.9049, "step": 8688 }, { "epoch": 9.896866096866097, "grad_norm": 0.1931055635213852, "learning_rate": 2.7063994871393595e-05, "loss": 0.6098, "step": 8689 }, { "epoch": 9.898005698005697, "grad_norm": 0.18896043300628662, "learning_rate": 2.705935156036322e-05, "loss": 0.8139, "step": 8690 }, { "epoch": 9.8991452991453, "grad_norm": 0.2181152105331421, "learning_rate": 2.705470817780591e-05, "loss": 0.5171, "step": 8691 }, { "epoch": 9.9002849002849, "grad_norm": 0.15674111247062683, "learning_rate": 2.7050064723882927e-05, "loss": 0.8819, "step": 8692 }, { "epoch": 9.9014245014245, "grad_norm": 0.24580217897891998, "learning_rate": 2.7045421198755567e-05, "loss": 0.6895, "step": 8693 }, { "epoch": 9.902564102564103, "grad_norm": 0.2359219491481781, "learning_rate": 2.7040777602585098e-05, "loss": 0.7899, "step": 8694 }, { "epoch": 9.903703703703703, "grad_norm": 0.1815393567085266, "learning_rate": 2.703613393553281e-05, "loss": 0.7982, "step": 8695 }, { "epoch": 9.904843304843304, "grad_norm": 0.20599913597106934, "learning_rate": 2.7031490197759997e-05, "loss": 0.6765, "step": 8696 }, { "epoch": 9.905982905982906, "grad_norm": 0.22677111625671387, "learning_rate": 2.7026846389427934e-05, "loss": 0.7334, "step": 8697 }, { "epoch": 9.907122507122507, "grad_norm": 0.1993129551410675, "learning_rate": 2.702220251069793e-05, "loss": 0.821, "step": 8698 }, { "epoch": 9.908262108262107, "grad_norm": 0.2008620649576187, "learning_rate": 2.701755856173126e-05, "loss": 0.786, "step": 8699 }, { "epoch": 9.90940170940171, "grad_norm": 0.31846582889556885, "learning_rate": 2.7012914542689244e-05, "loss": 0.7542, "step": 8700 }, { "epoch": 9.91054131054131, "grad_norm": 0.1618286669254303, "learning_rate": 2.7008270453733166e-05, "loss": 0.946, "step": 8701 }, { "epoch": 9.91168091168091, "grad_norm": 0.21714560687541962, "learning_rate": 2.7003626295024338e-05, "loss": 0.684, "step": 8702 }, { "epoch": 9.912820512820513, "grad_norm": 0.22912001609802246, "learning_rate": 2.699898206672405e-05, "loss": 0.7059, "step": 8703 }, { "epoch": 9.913960113960114, "grad_norm": 0.22242160141468048, "learning_rate": 2.6994337768993628e-05, "loss": 0.7202, "step": 8704 }, { "epoch": 9.915099715099714, "grad_norm": 0.20211511850357056, "learning_rate": 2.6989693401994353e-05, "loss": 0.7771, "step": 8705 }, { "epoch": 9.916239316239317, "grad_norm": 0.22866898775100708, "learning_rate": 2.698504896588757e-05, "loss": 0.6256, "step": 8706 }, { "epoch": 9.917378917378917, "grad_norm": 0.18713034689426422, "learning_rate": 2.6980404460834575e-05, "loss": 0.8939, "step": 8707 }, { "epoch": 9.918518518518518, "grad_norm": 0.22166401147842407, "learning_rate": 2.6975759886996688e-05, "loss": 0.638, "step": 8708 }, { "epoch": 9.91965811965812, "grad_norm": 0.19920535385608673, "learning_rate": 2.697111524453522e-05, "loss": 0.7802, "step": 8709 }, { "epoch": 9.92079772079772, "grad_norm": 0.22823186218738556, "learning_rate": 2.69664705336115e-05, "loss": 0.6718, "step": 8710 }, { "epoch": 9.921937321937321, "grad_norm": 0.19130191206932068, "learning_rate": 2.696182575438686e-05, "loss": 0.8057, "step": 8711 }, { "epoch": 9.923076923076923, "grad_norm": 0.18313159048557281, "learning_rate": 2.6957180907022604e-05, "loss": 0.7729, "step": 8712 }, { "epoch": 9.924216524216524, "grad_norm": 0.18810537457466125, "learning_rate": 2.6952535991680085e-05, "loss": 0.5622, "step": 8713 }, { "epoch": 9.925356125356124, "grad_norm": 0.21185599267482758, "learning_rate": 2.6947891008520615e-05, "loss": 0.8193, "step": 8714 }, { "epoch": 9.926495726495727, "grad_norm": 0.19980546832084656, "learning_rate": 2.6943245957705536e-05, "loss": 0.6878, "step": 8715 }, { "epoch": 9.927635327635327, "grad_norm": 0.21896570920944214, "learning_rate": 2.6938600839396173e-05, "loss": 0.6131, "step": 8716 }, { "epoch": 9.928774928774928, "grad_norm": 0.2273005247116089, "learning_rate": 2.693395565375388e-05, "loss": 0.705, "step": 8717 }, { "epoch": 9.92991452991453, "grad_norm": 0.1967950165271759, "learning_rate": 2.6929310400939983e-05, "loss": 0.8488, "step": 8718 }, { "epoch": 9.93105413105413, "grad_norm": 0.22161847352981567, "learning_rate": 2.692466508111584e-05, "loss": 0.5494, "step": 8719 }, { "epoch": 9.932193732193731, "grad_norm": 0.2432965487241745, "learning_rate": 2.6920019694442776e-05, "loss": 0.6465, "step": 8720 }, { "epoch": 9.933333333333334, "grad_norm": 0.23050159215927124, "learning_rate": 2.6915374241082157e-05, "loss": 0.6693, "step": 8721 }, { "epoch": 9.934472934472934, "grad_norm": 0.19228944182395935, "learning_rate": 2.691072872119531e-05, "loss": 0.8071, "step": 8722 }, { "epoch": 9.935612535612536, "grad_norm": 0.21512329578399658, "learning_rate": 2.690608313494361e-05, "loss": 0.6593, "step": 8723 }, { "epoch": 9.936752136752137, "grad_norm": 0.1586693525314331, "learning_rate": 2.6901437482488396e-05, "loss": 0.7464, "step": 8724 }, { "epoch": 9.937891737891738, "grad_norm": 0.21485713124275208, "learning_rate": 2.6896791763991035e-05, "loss": 0.8258, "step": 8725 }, { "epoch": 9.93903133903134, "grad_norm": 0.1861257255077362, "learning_rate": 2.6892145979612875e-05, "loss": 0.7165, "step": 8726 }, { "epoch": 9.94017094017094, "grad_norm": 0.2221956104040146, "learning_rate": 2.6887500129515288e-05, "loss": 0.6439, "step": 8727 }, { "epoch": 9.941310541310541, "grad_norm": 0.1893671303987503, "learning_rate": 2.6882854213859628e-05, "loss": 0.7803, "step": 8728 }, { "epoch": 9.942450142450143, "grad_norm": 0.22396253049373627, "learning_rate": 2.6878208232807254e-05, "loss": 0.63, "step": 8729 }, { "epoch": 9.943589743589744, "grad_norm": 0.2166588455438614, "learning_rate": 2.6873562186519558e-05, "loss": 0.6246, "step": 8730 }, { "epoch": 9.944729344729344, "grad_norm": 0.18603435158729553, "learning_rate": 2.6868916075157884e-05, "loss": 0.5585, "step": 8731 }, { "epoch": 9.945868945868947, "grad_norm": 0.18738475441932678, "learning_rate": 2.686426989888362e-05, "loss": 0.7881, "step": 8732 }, { "epoch": 9.947008547008547, "grad_norm": 0.24822686612606049, "learning_rate": 2.685962365785813e-05, "loss": 0.6375, "step": 8733 }, { "epoch": 9.948148148148148, "grad_norm": 0.21976767480373383, "learning_rate": 2.6854977352242805e-05, "loss": 0.703, "step": 8734 }, { "epoch": 9.94928774928775, "grad_norm": 0.22957386076450348, "learning_rate": 2.6850330982199007e-05, "loss": 0.6039, "step": 8735 }, { "epoch": 9.95042735042735, "grad_norm": 0.19751594960689545, "learning_rate": 2.6845684547888133e-05, "loss": 0.8182, "step": 8736 }, { "epoch": 9.951566951566951, "grad_norm": 0.23954753577709198, "learning_rate": 2.6841038049471556e-05, "loss": 0.6498, "step": 8737 }, { "epoch": 9.952706552706553, "grad_norm": 0.19721907377243042, "learning_rate": 2.6836391487110662e-05, "loss": 0.7403, "step": 8738 }, { "epoch": 9.953846153846154, "grad_norm": 0.2065429985523224, "learning_rate": 2.6831744860966846e-05, "loss": 0.7021, "step": 8739 }, { "epoch": 9.954985754985755, "grad_norm": 0.2047637403011322, "learning_rate": 2.6827098171201493e-05, "loss": 0.7156, "step": 8740 }, { "epoch": 9.956125356125357, "grad_norm": 0.24892592430114746, "learning_rate": 2.6822451417975996e-05, "loss": 0.4964, "step": 8741 }, { "epoch": 9.957264957264957, "grad_norm": 0.25440195202827454, "learning_rate": 2.6817804601451747e-05, "loss": 0.4231, "step": 8742 }, { "epoch": 9.958404558404558, "grad_norm": 0.21469826996326447, "learning_rate": 2.6813157721790146e-05, "loss": 0.6043, "step": 8743 }, { "epoch": 9.95954415954416, "grad_norm": 0.20514146983623505, "learning_rate": 2.680851077915259e-05, "loss": 0.9217, "step": 8744 }, { "epoch": 9.96068376068376, "grad_norm": 0.19511772692203522, "learning_rate": 2.6803863773700482e-05, "loss": 0.7281, "step": 8745 }, { "epoch": 9.961823361823361, "grad_norm": 0.16267132759094238, "learning_rate": 2.6799216705595226e-05, "loss": 0.6238, "step": 8746 }, { "epoch": 9.962962962962964, "grad_norm": 0.19934651255607605, "learning_rate": 2.679456957499823e-05, "loss": 0.7066, "step": 8747 }, { "epoch": 9.964102564102564, "grad_norm": 0.14993909001350403, "learning_rate": 2.6789922382070887e-05, "loss": 1.0104, "step": 8748 }, { "epoch": 9.965242165242165, "grad_norm": 0.19606129825115204, "learning_rate": 2.6785275126974624e-05, "loss": 0.6397, "step": 8749 }, { "epoch": 9.966381766381767, "grad_norm": 0.19740979373455048, "learning_rate": 2.678062780987084e-05, "loss": 0.6837, "step": 8750 }, { "epoch": 9.967521367521368, "grad_norm": 0.24982787668704987, "learning_rate": 2.6775980430920966e-05, "loss": 0.6456, "step": 8751 }, { "epoch": 9.968660968660968, "grad_norm": 0.20626413822174072, "learning_rate": 2.67713329902864e-05, "loss": 0.6292, "step": 8752 }, { "epoch": 9.96980056980057, "grad_norm": 0.16925214231014252, "learning_rate": 2.6766685488128572e-05, "loss": 0.8886, "step": 8753 }, { "epoch": 9.970940170940171, "grad_norm": 0.23425494134426117, "learning_rate": 2.6762037924608897e-05, "loss": 0.6506, "step": 8754 }, { "epoch": 9.972079772079772, "grad_norm": 0.1767222136259079, "learning_rate": 2.6757390299888795e-05, "loss": 0.802, "step": 8755 }, { "epoch": 9.973219373219374, "grad_norm": 0.2280825823545456, "learning_rate": 2.6752742614129706e-05, "loss": 0.7288, "step": 8756 }, { "epoch": 9.974358974358974, "grad_norm": 0.2023814171552658, "learning_rate": 2.674809486749304e-05, "loss": 0.7595, "step": 8757 }, { "epoch": 9.975498575498575, "grad_norm": 0.2898479402065277, "learning_rate": 2.674344706014023e-05, "loss": 0.6536, "step": 8758 }, { "epoch": 9.976638176638177, "grad_norm": 0.25874680280685425, "learning_rate": 2.673879919223271e-05, "loss": 0.5329, "step": 8759 }, { "epoch": 9.977777777777778, "grad_norm": 0.20675835013389587, "learning_rate": 2.673415126393191e-05, "loss": 0.945, "step": 8760 }, { "epoch": 9.978917378917378, "grad_norm": 0.23534797132015228, "learning_rate": 2.672950327539927e-05, "loss": 0.6476, "step": 8761 }, { "epoch": 9.98005698005698, "grad_norm": 0.2391483187675476, "learning_rate": 2.6724855226796232e-05, "loss": 0.6501, "step": 8762 }, { "epoch": 9.981196581196581, "grad_norm": 0.2711732089519501, "learning_rate": 2.672020711828422e-05, "loss": 0.6641, "step": 8763 }, { "epoch": 9.982336182336182, "grad_norm": 0.2136969417333603, "learning_rate": 2.6715558950024687e-05, "loss": 0.8328, "step": 8764 }, { "epoch": 9.983475783475784, "grad_norm": 0.18608087301254272, "learning_rate": 2.6710910722179074e-05, "loss": 0.6553, "step": 8765 }, { "epoch": 9.984615384615385, "grad_norm": 0.1654229313135147, "learning_rate": 2.6706262434908818e-05, "loss": 0.6155, "step": 8766 }, { "epoch": 9.985754985754985, "grad_norm": 0.18682758510112762, "learning_rate": 2.6701614088375383e-05, "loss": 0.6275, "step": 8767 }, { "epoch": 9.986894586894588, "grad_norm": 0.25929149985313416, "learning_rate": 2.6696965682740217e-05, "loss": 0.5789, "step": 8768 }, { "epoch": 9.988034188034188, "grad_norm": 0.22134925425052643, "learning_rate": 2.669231721816476e-05, "loss": 0.5432, "step": 8769 }, { "epoch": 9.989173789173789, "grad_norm": 0.19857807457447052, "learning_rate": 2.6687668694810475e-05, "loss": 0.852, "step": 8770 }, { "epoch": 9.990313390313391, "grad_norm": 0.22108644247055054, "learning_rate": 2.668302011283881e-05, "loss": 0.8514, "step": 8771 }, { "epoch": 9.991452991452991, "grad_norm": 0.19870635867118835, "learning_rate": 2.667837147241123e-05, "loss": 0.6856, "step": 8772 }, { "epoch": 9.992592592592592, "grad_norm": 0.24190519750118256, "learning_rate": 2.6673722773689196e-05, "loss": 0.4523, "step": 8773 }, { "epoch": 9.993732193732194, "grad_norm": 0.19627119600772858, "learning_rate": 2.666907401683416e-05, "loss": 1.0093, "step": 8774 }, { "epoch": 9.994871794871795, "grad_norm": 0.1963256150484085, "learning_rate": 2.6664425202007604e-05, "loss": 0.814, "step": 8775 }, { "epoch": 9.996011396011395, "grad_norm": 0.2213260382413864, "learning_rate": 2.6659776329370977e-05, "loss": 0.7355, "step": 8776 }, { "epoch": 9.997150997150998, "grad_norm": 0.2520917057991028, "learning_rate": 2.6655127399085755e-05, "loss": 0.4771, "step": 8777 }, { "epoch": 9.998290598290598, "grad_norm": 0.24032898247241974, "learning_rate": 2.6650478411313407e-05, "loss": 0.6357, "step": 8778 }, { "epoch": 9.999430199430199, "grad_norm": 0.18503084778785706, "learning_rate": 2.6645829366215407e-05, "loss": 0.586, "step": 8779 }, { "epoch": 10.0, "grad_norm": 0.554973840713501, "learning_rate": 2.6641180263953224e-05, "loss": 0.5772, "step": 8780 }, { "epoch": 10.0011396011396, "grad_norm": 0.27616214752197266, "learning_rate": 2.663653110468834e-05, "loss": 0.4348, "step": 8781 }, { "epoch": 10.002279202279203, "grad_norm": 0.21189844608306885, "learning_rate": 2.663188188858222e-05, "loss": 0.5661, "step": 8782 }, { "epoch": 10.003418803418803, "grad_norm": 0.19806593656539917, "learning_rate": 2.6627232615796367e-05, "loss": 0.7006, "step": 8783 }, { "epoch": 10.004558404558404, "grad_norm": 0.22303634881973267, "learning_rate": 2.6622583286492243e-05, "loss": 0.522, "step": 8784 }, { "epoch": 10.005698005698006, "grad_norm": 0.18687784671783447, "learning_rate": 2.661793390083135e-05, "loss": 0.7389, "step": 8785 }, { "epoch": 10.006837606837607, "grad_norm": 0.18081918358802795, "learning_rate": 2.661328445897515e-05, "loss": 0.7818, "step": 8786 }, { "epoch": 10.007977207977207, "grad_norm": 0.2659003436565399, "learning_rate": 2.6608634961085155e-05, "loss": 0.5833, "step": 8787 }, { "epoch": 10.00911680911681, "grad_norm": 0.18005883693695068, "learning_rate": 2.6603985407322834e-05, "loss": 0.8288, "step": 8788 }, { "epoch": 10.01025641025641, "grad_norm": 0.21639782190322876, "learning_rate": 2.6599335797849695e-05, "loss": 0.6301, "step": 8789 }, { "epoch": 10.01139601139601, "grad_norm": 0.19014930725097656, "learning_rate": 2.6594686132827227e-05, "loss": 0.7667, "step": 8790 }, { "epoch": 10.012535612535613, "grad_norm": 0.22009646892547607, "learning_rate": 2.659003641241692e-05, "loss": 0.6251, "step": 8791 }, { "epoch": 10.013675213675214, "grad_norm": 0.24239443242549896, "learning_rate": 2.658538663678028e-05, "loss": 0.5819, "step": 8792 }, { "epoch": 10.014814814814814, "grad_norm": 0.20067726075649261, "learning_rate": 2.6580736806078792e-05, "loss": 0.7514, "step": 8793 }, { "epoch": 10.015954415954416, "grad_norm": 0.17462316155433655, "learning_rate": 2.657608692047398e-05, "loss": 0.8196, "step": 8794 }, { "epoch": 10.017094017094017, "grad_norm": 0.18429355323314667, "learning_rate": 2.657143698012733e-05, "loss": 0.7408, "step": 8795 }, { "epoch": 10.018233618233618, "grad_norm": 0.19342951476573944, "learning_rate": 2.6566786985200355e-05, "loss": 0.7168, "step": 8796 }, { "epoch": 10.01937321937322, "grad_norm": 0.25074589252471924, "learning_rate": 2.6562136935854553e-05, "loss": 0.61, "step": 8797 }, { "epoch": 10.02051282051282, "grad_norm": 0.2421872615814209, "learning_rate": 2.655748683225145e-05, "loss": 0.5853, "step": 8798 }, { "epoch": 10.021652421652421, "grad_norm": 0.19836491346359253, "learning_rate": 2.655283667455253e-05, "loss": 0.4798, "step": 8799 }, { "epoch": 10.022792022792023, "grad_norm": 0.2570490837097168, "learning_rate": 2.6548186462919334e-05, "loss": 0.4748, "step": 8800 }, { "epoch": 10.023931623931624, "grad_norm": 0.20948342978954315, "learning_rate": 2.6543536197513358e-05, "loss": 0.838, "step": 8801 }, { "epoch": 10.025071225071224, "grad_norm": 0.2177944779396057, "learning_rate": 2.6538885878496135e-05, "loss": 0.8068, "step": 8802 }, { "epoch": 10.026210826210827, "grad_norm": 0.2718174457550049, "learning_rate": 2.6534235506029165e-05, "loss": 0.5763, "step": 8803 }, { "epoch": 10.027350427350427, "grad_norm": 0.1805044412612915, "learning_rate": 2.6529585080273976e-05, "loss": 0.7984, "step": 8804 }, { "epoch": 10.028490028490028, "grad_norm": 0.22032229602336884, "learning_rate": 2.65249346013921e-05, "loss": 0.7791, "step": 8805 }, { "epoch": 10.02962962962963, "grad_norm": 0.19147786498069763, "learning_rate": 2.6520284069545044e-05, "loss": 0.6717, "step": 8806 }, { "epoch": 10.03076923076923, "grad_norm": 0.18560323119163513, "learning_rate": 2.6515633484894342e-05, "loss": 0.7543, "step": 8807 }, { "epoch": 10.031908831908831, "grad_norm": 0.1824505776166916, "learning_rate": 2.6510982847601525e-05, "loss": 0.7975, "step": 8808 }, { "epoch": 10.033048433048434, "grad_norm": 0.18685854971408844, "learning_rate": 2.6506332157828118e-05, "loss": 0.7852, "step": 8809 }, { "epoch": 10.034188034188034, "grad_norm": 0.17172589898109436, "learning_rate": 2.650168141573564e-05, "loss": 0.799, "step": 8810 }, { "epoch": 10.035327635327635, "grad_norm": 0.16937170922756195, "learning_rate": 2.6497030621485654e-05, "loss": 0.7407, "step": 8811 }, { "epoch": 10.036467236467237, "grad_norm": 0.24063147604465485, "learning_rate": 2.6492379775239668e-05, "loss": 0.5964, "step": 8812 }, { "epoch": 10.037606837606837, "grad_norm": 0.17484796047210693, "learning_rate": 2.6487728877159233e-05, "loss": 0.7741, "step": 8813 }, { "epoch": 10.038746438746438, "grad_norm": 0.17384153604507446, "learning_rate": 2.6483077927405882e-05, "loss": 0.8228, "step": 8814 }, { "epoch": 10.03988603988604, "grad_norm": 0.20385192334651947, "learning_rate": 2.6478426926141154e-05, "loss": 0.6191, "step": 8815 }, { "epoch": 10.04102564102564, "grad_norm": 0.18895360827445984, "learning_rate": 2.6473775873526595e-05, "loss": 0.6375, "step": 8816 }, { "epoch": 10.042165242165241, "grad_norm": 0.15249338746070862, "learning_rate": 2.6469124769723748e-05, "loss": 0.9603, "step": 8817 }, { "epoch": 10.043304843304844, "grad_norm": 0.24605506658554077, "learning_rate": 2.646447361489416e-05, "loss": 0.7325, "step": 8818 }, { "epoch": 10.044444444444444, "grad_norm": 0.17037871479988098, "learning_rate": 2.645982240919937e-05, "loss": 0.7791, "step": 8819 }, { "epoch": 10.045584045584045, "grad_norm": 0.19802500307559967, "learning_rate": 2.6455171152800933e-05, "loss": 0.6591, "step": 8820 }, { "epoch": 10.046723646723647, "grad_norm": 0.21303272247314453, "learning_rate": 2.64505198458604e-05, "loss": 0.5241, "step": 8821 }, { "epoch": 10.047863247863248, "grad_norm": 0.1992967426776886, "learning_rate": 2.6445868488539327e-05, "loss": 0.7214, "step": 8822 }, { "epoch": 10.049002849002848, "grad_norm": 0.22691546380519867, "learning_rate": 2.6441217080999264e-05, "loss": 0.7292, "step": 8823 }, { "epoch": 10.05014245014245, "grad_norm": 0.23367862403392792, "learning_rate": 2.643656562340177e-05, "loss": 0.5527, "step": 8824 }, { "epoch": 10.051282051282051, "grad_norm": 0.22327671945095062, "learning_rate": 2.64319141159084e-05, "loss": 0.4207, "step": 8825 }, { "epoch": 10.052421652421652, "grad_norm": 0.2017182856798172, "learning_rate": 2.642726255868071e-05, "loss": 0.588, "step": 8826 }, { "epoch": 10.053561253561254, "grad_norm": 0.18831363320350647, "learning_rate": 2.6422610951880272e-05, "loss": 0.7952, "step": 8827 }, { "epoch": 10.054700854700855, "grad_norm": 0.19915471971035004, "learning_rate": 2.641795929566865e-05, "loss": 0.6541, "step": 8828 }, { "epoch": 10.055840455840455, "grad_norm": 0.1804710179567337, "learning_rate": 2.6413307590207393e-05, "loss": 0.7298, "step": 8829 }, { "epoch": 10.056980056980057, "grad_norm": 0.20258505642414093, "learning_rate": 2.640865583565808e-05, "loss": 0.9078, "step": 8830 }, { "epoch": 10.058119658119658, "grad_norm": 0.20247726142406464, "learning_rate": 2.6404004032182272e-05, "loss": 0.6612, "step": 8831 }, { "epoch": 10.059259259259258, "grad_norm": 0.208050936460495, "learning_rate": 2.6399352179941543e-05, "loss": 0.6559, "step": 8832 }, { "epoch": 10.06039886039886, "grad_norm": 0.17111118137836456, "learning_rate": 2.639470027909747e-05, "loss": 0.7809, "step": 8833 }, { "epoch": 10.061538461538461, "grad_norm": 0.2093285322189331, "learning_rate": 2.639004832981162e-05, "loss": 0.5354, "step": 8834 }, { "epoch": 10.062678062678062, "grad_norm": 0.23502209782600403, "learning_rate": 2.6385396332245572e-05, "loss": 0.7269, "step": 8835 }, { "epoch": 10.063817663817664, "grad_norm": 0.20847783982753754, "learning_rate": 2.638074428656089e-05, "loss": 0.8058, "step": 8836 }, { "epoch": 10.064957264957265, "grad_norm": 0.2332785576581955, "learning_rate": 2.637609219291917e-05, "loss": 0.5732, "step": 8837 }, { "epoch": 10.066096866096865, "grad_norm": 0.22393395006656647, "learning_rate": 2.637144005148197e-05, "loss": 0.6826, "step": 8838 }, { "epoch": 10.067236467236468, "grad_norm": 0.25773870944976807, "learning_rate": 2.6366787862410902e-05, "loss": 0.6802, "step": 8839 }, { "epoch": 10.068376068376068, "grad_norm": 0.20467348396778107, "learning_rate": 2.6362135625867524e-05, "loss": 0.7429, "step": 8840 }, { "epoch": 10.069515669515669, "grad_norm": 0.23453713953495026, "learning_rate": 2.635748334201344e-05, "loss": 0.6959, "step": 8841 }, { "epoch": 10.070655270655271, "grad_norm": 0.18696318566799164, "learning_rate": 2.6352831011010216e-05, "loss": 0.7494, "step": 8842 }, { "epoch": 10.071794871794872, "grad_norm": 0.17967763543128967, "learning_rate": 2.634817863301945e-05, "loss": 0.7552, "step": 8843 }, { "epoch": 10.072934472934472, "grad_norm": 0.18998804688453674, "learning_rate": 2.6343526208202735e-05, "loss": 0.7303, "step": 8844 }, { "epoch": 10.074074074074074, "grad_norm": 0.22002533078193665, "learning_rate": 2.6338873736721664e-05, "loss": 0.4131, "step": 8845 }, { "epoch": 10.075213675213675, "grad_norm": 0.24679215252399445, "learning_rate": 2.6334221218737826e-05, "loss": 0.5834, "step": 8846 }, { "epoch": 10.076353276353275, "grad_norm": 0.24645689129829407, "learning_rate": 2.6329568654412812e-05, "loss": 0.561, "step": 8847 }, { "epoch": 10.077492877492878, "grad_norm": 0.19638419151306152, "learning_rate": 2.6324916043908226e-05, "loss": 0.6671, "step": 8848 }, { "epoch": 10.078632478632478, "grad_norm": 0.2307794988155365, "learning_rate": 2.6320263387385658e-05, "loss": 0.531, "step": 8849 }, { "epoch": 10.079772079772079, "grad_norm": 0.22745245695114136, "learning_rate": 2.6315610685006715e-05, "loss": 0.6205, "step": 8850 }, { "epoch": 10.080911680911681, "grad_norm": 0.1542021632194519, "learning_rate": 2.6310957936932994e-05, "loss": 0.7889, "step": 8851 }, { "epoch": 10.082051282051282, "grad_norm": 0.19776001572608948, "learning_rate": 2.63063051433261e-05, "loss": 0.7851, "step": 8852 }, { "epoch": 10.083190883190884, "grad_norm": 0.25708770751953125, "learning_rate": 2.6301652304347633e-05, "loss": 0.6544, "step": 8853 }, { "epoch": 10.084330484330485, "grad_norm": 0.19531428813934326, "learning_rate": 2.62969994201592e-05, "loss": 0.627, "step": 8854 }, { "epoch": 10.085470085470085, "grad_norm": 0.2588439881801605, "learning_rate": 2.6292346490922415e-05, "loss": 0.7755, "step": 8855 }, { "epoch": 10.086609686609687, "grad_norm": 0.17775475978851318, "learning_rate": 2.6287693516798884e-05, "loss": 0.8084, "step": 8856 }, { "epoch": 10.087749287749288, "grad_norm": 0.1992853581905365, "learning_rate": 2.628304049795021e-05, "loss": 0.9049, "step": 8857 }, { "epoch": 10.088888888888889, "grad_norm": 0.2174171358346939, "learning_rate": 2.6278387434538022e-05, "loss": 0.8111, "step": 8858 }, { "epoch": 10.090028490028491, "grad_norm": 0.23244938254356384, "learning_rate": 2.627373432672391e-05, "loss": 0.6628, "step": 8859 }, { "epoch": 10.091168091168091, "grad_norm": 0.21566203236579895, "learning_rate": 2.6269081174669507e-05, "loss": 0.5991, "step": 8860 }, { "epoch": 10.092307692307692, "grad_norm": 0.23708350956439972, "learning_rate": 2.626442797853642e-05, "loss": 0.8406, "step": 8861 }, { "epoch": 10.093447293447294, "grad_norm": 0.27565261721611023, "learning_rate": 2.6259774738486283e-05, "loss": 0.6404, "step": 8862 }, { "epoch": 10.094586894586895, "grad_norm": 0.23175352811813354, "learning_rate": 2.62551214546807e-05, "loss": 0.7613, "step": 8863 }, { "epoch": 10.095726495726495, "grad_norm": 0.20315219461917877, "learning_rate": 2.6250468127281298e-05, "loss": 0.7884, "step": 8864 }, { "epoch": 10.096866096866098, "grad_norm": 0.18797439336776733, "learning_rate": 2.6245814756449692e-05, "loss": 0.8777, "step": 8865 }, { "epoch": 10.098005698005698, "grad_norm": 0.21581979095935822, "learning_rate": 2.624116134234752e-05, "loss": 0.724, "step": 8866 }, { "epoch": 10.099145299145299, "grad_norm": 0.1983931064605713, "learning_rate": 2.6236507885136404e-05, "loss": 0.5552, "step": 8867 }, { "epoch": 10.100284900284901, "grad_norm": 0.214815154671669, "learning_rate": 2.6231854384977965e-05, "loss": 0.5143, "step": 8868 }, { "epoch": 10.101424501424502, "grad_norm": 0.19817157089710236, "learning_rate": 2.622720084203384e-05, "loss": 0.7363, "step": 8869 }, { "epoch": 10.102564102564102, "grad_norm": 0.20763924717903137, "learning_rate": 2.6222547256465646e-05, "loss": 0.5611, "step": 8870 }, { "epoch": 10.103703703703705, "grad_norm": 0.22716370224952698, "learning_rate": 2.621789362843503e-05, "loss": 0.6832, "step": 8871 }, { "epoch": 10.104843304843305, "grad_norm": 0.23576398193836212, "learning_rate": 2.6213239958103614e-05, "loss": 0.8271, "step": 8872 }, { "epoch": 10.105982905982906, "grad_norm": 0.23042328655719757, "learning_rate": 2.6208586245633043e-05, "loss": 0.728, "step": 8873 }, { "epoch": 10.107122507122508, "grad_norm": 0.17731733620166779, "learning_rate": 2.6203932491184946e-05, "loss": 0.7988, "step": 8874 }, { "epoch": 10.108262108262108, "grad_norm": 0.20725920796394348, "learning_rate": 2.6199278694920963e-05, "loss": 0.8181, "step": 8875 }, { "epoch": 10.109401709401709, "grad_norm": 0.16817273199558258, "learning_rate": 2.6194624857002736e-05, "loss": 0.767, "step": 8876 }, { "epoch": 10.110541310541311, "grad_norm": 0.25306424498558044, "learning_rate": 2.6189970977591898e-05, "loss": 0.5109, "step": 8877 }, { "epoch": 10.111680911680912, "grad_norm": 0.18125019967556, "learning_rate": 2.61853170568501e-05, "loss": 0.8867, "step": 8878 }, { "epoch": 10.112820512820512, "grad_norm": 0.21355712413787842, "learning_rate": 2.6180663094938973e-05, "loss": 0.769, "step": 8879 }, { "epoch": 10.113960113960115, "grad_norm": 0.1993047297000885, "learning_rate": 2.6176009092020175e-05, "loss": 0.7364, "step": 8880 }, { "epoch": 10.115099715099715, "grad_norm": 0.2002207636833191, "learning_rate": 2.6171355048255354e-05, "loss": 0.5944, "step": 8881 }, { "epoch": 10.116239316239316, "grad_norm": 0.20867647230625153, "learning_rate": 2.616670096380614e-05, "loss": 0.5918, "step": 8882 }, { "epoch": 10.117378917378918, "grad_norm": 0.21191585063934326, "learning_rate": 2.6162046838834197e-05, "loss": 0.5892, "step": 8883 }, { "epoch": 10.118518518518519, "grad_norm": 0.18844129145145416, "learning_rate": 2.6157392673501175e-05, "loss": 0.7598, "step": 8884 }, { "epoch": 10.11965811965812, "grad_norm": 0.1850503534078598, "learning_rate": 2.6152738467968717e-05, "loss": 0.7819, "step": 8885 }, { "epoch": 10.120797720797722, "grad_norm": 0.2429039627313614, "learning_rate": 2.614808422239849e-05, "loss": 0.5887, "step": 8886 }, { "epoch": 10.121937321937322, "grad_norm": 0.1783706396818161, "learning_rate": 2.6143429936952126e-05, "loss": 0.8119, "step": 8887 }, { "epoch": 10.123076923076923, "grad_norm": 0.21580688655376434, "learning_rate": 2.6138775611791304e-05, "loss": 0.6471, "step": 8888 }, { "epoch": 10.124216524216525, "grad_norm": 0.24312008917331696, "learning_rate": 2.6134121247077676e-05, "loss": 0.668, "step": 8889 }, { "epoch": 10.125356125356126, "grad_norm": 0.16163799166679382, "learning_rate": 2.61294668429729e-05, "loss": 0.8689, "step": 8890 }, { "epoch": 10.126495726495726, "grad_norm": 0.1943366676568985, "learning_rate": 2.6124812399638632e-05, "loss": 0.8453, "step": 8891 }, { "epoch": 10.127635327635328, "grad_norm": 0.21864405274391174, "learning_rate": 2.612015791723654e-05, "loss": 0.6204, "step": 8892 }, { "epoch": 10.128774928774929, "grad_norm": 0.22353316843509674, "learning_rate": 2.611550339592827e-05, "loss": 0.7569, "step": 8893 }, { "epoch": 10.12991452991453, "grad_norm": 0.21298466622829437, "learning_rate": 2.6110848835875506e-05, "loss": 0.7513, "step": 8894 }, { "epoch": 10.131054131054132, "grad_norm": 0.23452836275100708, "learning_rate": 2.6106194237239916e-05, "loss": 0.6682, "step": 8895 }, { "epoch": 10.132193732193732, "grad_norm": 0.30738216638565063, "learning_rate": 2.6101539600183146e-05, "loss": 0.3911, "step": 8896 }, { "epoch": 10.133333333333333, "grad_norm": 0.20332716405391693, "learning_rate": 2.6096884924866886e-05, "loss": 0.5929, "step": 8897 }, { "epoch": 10.134472934472935, "grad_norm": 0.17943629622459412, "learning_rate": 2.6092230211452783e-05, "loss": 0.7025, "step": 8898 }, { "epoch": 10.135612535612536, "grad_norm": 0.21828137338161469, "learning_rate": 2.6087575460102536e-05, "loss": 0.7702, "step": 8899 }, { "epoch": 10.136752136752136, "grad_norm": 0.1929798424243927, "learning_rate": 2.6082920670977795e-05, "loss": 0.6992, "step": 8900 }, { "epoch": 10.137891737891739, "grad_norm": 0.24545778334140778, "learning_rate": 2.6078265844240247e-05, "loss": 0.5378, "step": 8901 }, { "epoch": 10.13903133903134, "grad_norm": 0.27415305376052856, "learning_rate": 2.6073610980051548e-05, "loss": 0.4073, "step": 8902 }, { "epoch": 10.14017094017094, "grad_norm": 0.19098827242851257, "learning_rate": 2.6068956078573402e-05, "loss": 0.7139, "step": 8903 }, { "epoch": 10.141310541310542, "grad_norm": 0.1778879463672638, "learning_rate": 2.6064301139967458e-05, "loss": 0.8728, "step": 8904 }, { "epoch": 10.142450142450143, "grad_norm": 0.2150803506374359, "learning_rate": 2.6059646164395417e-05, "loss": 0.6732, "step": 8905 }, { "epoch": 10.143589743589743, "grad_norm": 0.24175742268562317, "learning_rate": 2.6054991152018947e-05, "loss": 0.6842, "step": 8906 }, { "epoch": 10.144729344729345, "grad_norm": 0.19339333474636078, "learning_rate": 2.605033610299974e-05, "loss": 0.7744, "step": 8907 }, { "epoch": 10.145868945868946, "grad_norm": 0.17048229277133942, "learning_rate": 2.6045681017499462e-05, "loss": 1.0048, "step": 8908 }, { "epoch": 10.147008547008546, "grad_norm": 0.21198265254497528, "learning_rate": 2.604102589567981e-05, "loss": 0.8113, "step": 8909 }, { "epoch": 10.148148148148149, "grad_norm": 0.234702005982399, "learning_rate": 2.6036370737702476e-05, "loss": 0.5801, "step": 8910 }, { "epoch": 10.14928774928775, "grad_norm": 0.22299301624298096, "learning_rate": 2.6031715543729125e-05, "loss": 0.6885, "step": 8911 }, { "epoch": 10.15042735042735, "grad_norm": 0.23150447010993958, "learning_rate": 2.602706031392146e-05, "loss": 0.6336, "step": 8912 }, { "epoch": 10.151566951566952, "grad_norm": 0.18681679666042328, "learning_rate": 2.602240504844116e-05, "loss": 0.8529, "step": 8913 }, { "epoch": 10.152706552706553, "grad_norm": 0.20661205053329468, "learning_rate": 2.601774974744993e-05, "loss": 0.6779, "step": 8914 }, { "epoch": 10.153846153846153, "grad_norm": 0.22453156113624573, "learning_rate": 2.6013094411109447e-05, "loss": 0.5817, "step": 8915 }, { "epoch": 10.154985754985756, "grad_norm": 0.20682759582996368, "learning_rate": 2.6008439039581412e-05, "loss": 0.5923, "step": 8916 }, { "epoch": 10.156125356125356, "grad_norm": 0.20472748577594757, "learning_rate": 2.6003783633027512e-05, "loss": 0.6358, "step": 8917 }, { "epoch": 10.157264957264957, "grad_norm": 0.21450816094875336, "learning_rate": 2.599912819160945e-05, "loss": 0.7361, "step": 8918 }, { "epoch": 10.158404558404559, "grad_norm": 0.2018934190273285, "learning_rate": 2.5994472715488917e-05, "loss": 0.6788, "step": 8919 }, { "epoch": 10.15954415954416, "grad_norm": 0.1991586983203888, "learning_rate": 2.5989817204827606e-05, "loss": 0.6879, "step": 8920 }, { "epoch": 10.16068376068376, "grad_norm": 0.22522181272506714, "learning_rate": 2.5985161659787227e-05, "loss": 0.5518, "step": 8921 }, { "epoch": 10.161823361823362, "grad_norm": 0.23106618225574493, "learning_rate": 2.5980506080529475e-05, "loss": 0.5903, "step": 8922 }, { "epoch": 10.162962962962963, "grad_norm": 0.24910876154899597, "learning_rate": 2.5975850467216045e-05, "loss": 0.5767, "step": 8923 }, { "epoch": 10.164102564102564, "grad_norm": 0.1799972653388977, "learning_rate": 2.5971194820008656e-05, "loss": 0.6332, "step": 8924 }, { "epoch": 10.165242165242166, "grad_norm": 0.21993529796600342, "learning_rate": 2.596653913906899e-05, "loss": 0.5968, "step": 8925 }, { "epoch": 10.166381766381766, "grad_norm": 0.19376817345619202, "learning_rate": 2.5961883424558764e-05, "loss": 0.6556, "step": 8926 }, { "epoch": 10.167521367521367, "grad_norm": 0.19510741531848907, "learning_rate": 2.5957227676639683e-05, "loss": 0.8258, "step": 8927 }, { "epoch": 10.16866096866097, "grad_norm": 0.2588106691837311, "learning_rate": 2.5952571895473454e-05, "loss": 0.7846, "step": 8928 }, { "epoch": 10.16980056980057, "grad_norm": 0.232466459274292, "learning_rate": 2.5947916081221786e-05, "loss": 0.5949, "step": 8929 }, { "epoch": 10.17094017094017, "grad_norm": 0.17070221900939941, "learning_rate": 2.5943260234046374e-05, "loss": 0.7432, "step": 8930 }, { "epoch": 10.172079772079773, "grad_norm": 0.2561127841472626, "learning_rate": 2.5938604354108948e-05, "loss": 0.4896, "step": 8931 }, { "epoch": 10.173219373219373, "grad_norm": 0.18016211688518524, "learning_rate": 2.593394844157121e-05, "loss": 0.7407, "step": 8932 }, { "epoch": 10.174358974358974, "grad_norm": 0.26223939657211304, "learning_rate": 2.5929292496594875e-05, "loss": 0.8116, "step": 8933 }, { "epoch": 10.175498575498576, "grad_norm": 0.2142946422100067, "learning_rate": 2.5924636519341654e-05, "loss": 0.7046, "step": 8934 }, { "epoch": 10.176638176638177, "grad_norm": 0.21094518899917603, "learning_rate": 2.591998050997327e-05, "loss": 0.5874, "step": 8935 }, { "epoch": 10.177777777777777, "grad_norm": 0.16477617621421814, "learning_rate": 2.5915324468651425e-05, "loss": 0.818, "step": 8936 }, { "epoch": 10.17891737891738, "grad_norm": 0.24145513772964478, "learning_rate": 2.5910668395537845e-05, "loss": 0.331, "step": 8937 }, { "epoch": 10.18005698005698, "grad_norm": 0.2187226116657257, "learning_rate": 2.5906012290794253e-05, "loss": 0.635, "step": 8938 }, { "epoch": 10.18119658119658, "grad_norm": 0.2143223136663437, "learning_rate": 2.5901356154582363e-05, "loss": 0.7668, "step": 8939 }, { "epoch": 10.182336182336183, "grad_norm": 0.2693440914154053, "learning_rate": 2.5896699987063888e-05, "loss": 0.4283, "step": 8940 }, { "epoch": 10.183475783475783, "grad_norm": 0.20912426710128784, "learning_rate": 2.5892043788400567e-05, "loss": 0.7052, "step": 8941 }, { "epoch": 10.184615384615384, "grad_norm": 0.15379904210567474, "learning_rate": 2.58873875587541e-05, "loss": 0.7951, "step": 8942 }, { "epoch": 10.185754985754986, "grad_norm": 0.21134737133979797, "learning_rate": 2.5882731298286227e-05, "loss": 0.7501, "step": 8943 }, { "epoch": 10.186894586894587, "grad_norm": 0.2301952987909317, "learning_rate": 2.587807500715867e-05, "loss": 0.6642, "step": 8944 }, { "epoch": 10.188034188034187, "grad_norm": 0.18264517188072205, "learning_rate": 2.587341868553315e-05, "loss": 0.8658, "step": 8945 }, { "epoch": 10.18917378917379, "grad_norm": 0.2299744039773941, "learning_rate": 2.5868762333571405e-05, "loss": 0.5489, "step": 8946 }, { "epoch": 10.19031339031339, "grad_norm": 0.15143314003944397, "learning_rate": 2.5864105951435147e-05, "loss": 0.9025, "step": 8947 }, { "epoch": 10.19145299145299, "grad_norm": 0.22624920308589935, "learning_rate": 2.5859449539286117e-05, "loss": 0.7747, "step": 8948 }, { "epoch": 10.192592592592593, "grad_norm": 0.1977541744709015, "learning_rate": 2.585479309728604e-05, "loss": 0.6921, "step": 8949 }, { "epoch": 10.193732193732194, "grad_norm": 0.21025383472442627, "learning_rate": 2.5850136625596648e-05, "loss": 0.6524, "step": 8950 }, { "epoch": 10.194871794871794, "grad_norm": 0.16774974763393402, "learning_rate": 2.5845480124379674e-05, "loss": 0.7247, "step": 8951 }, { "epoch": 10.196011396011396, "grad_norm": 0.15837088227272034, "learning_rate": 2.5840823593796853e-05, "loss": 0.7806, "step": 8952 }, { "epoch": 10.197150997150997, "grad_norm": 0.18233932554721832, "learning_rate": 2.5836167034009913e-05, "loss": 0.9084, "step": 8953 }, { "epoch": 10.198290598290598, "grad_norm": 0.18736779689788818, "learning_rate": 2.5831510445180584e-05, "loss": 0.71, "step": 8954 }, { "epoch": 10.1994301994302, "grad_norm": 0.2276691049337387, "learning_rate": 2.5826853827470626e-05, "loss": 0.7951, "step": 8955 }, { "epoch": 10.2005698005698, "grad_norm": 0.1869109719991684, "learning_rate": 2.5822197181041752e-05, "loss": 0.8659, "step": 8956 }, { "epoch": 10.201709401709401, "grad_norm": 0.17995122075080872, "learning_rate": 2.5817540506055715e-05, "loss": 0.7442, "step": 8957 }, { "epoch": 10.202849002849003, "grad_norm": 0.19730986654758453, "learning_rate": 2.5812883802674237e-05, "loss": 0.7441, "step": 8958 }, { "epoch": 10.203988603988604, "grad_norm": 0.1917230784893036, "learning_rate": 2.5808227071059076e-05, "loss": 0.8383, "step": 8959 }, { "epoch": 10.205128205128204, "grad_norm": 0.2498435229063034, "learning_rate": 2.580357031137196e-05, "loss": 0.719, "step": 8960 }, { "epoch": 10.206267806267807, "grad_norm": 0.26864364743232727, "learning_rate": 2.5798913523774648e-05, "loss": 0.5109, "step": 8961 }, { "epoch": 10.207407407407407, "grad_norm": 0.19821903109550476, "learning_rate": 2.5794256708428864e-05, "loss": 0.7657, "step": 8962 }, { "epoch": 10.208547008547008, "grad_norm": 0.21681812405586243, "learning_rate": 2.5789599865496373e-05, "loss": 0.7247, "step": 8963 }, { "epoch": 10.20968660968661, "grad_norm": 0.2496197521686554, "learning_rate": 2.5784942995138894e-05, "loss": 0.5004, "step": 8964 }, { "epoch": 10.21082621082621, "grad_norm": 0.20248307287693024, "learning_rate": 2.5780286097518184e-05, "loss": 0.7017, "step": 8965 }, { "epoch": 10.211965811965811, "grad_norm": 0.2095515877008438, "learning_rate": 2.5775629172795996e-05, "loss": 0.6386, "step": 8966 }, { "epoch": 10.213105413105414, "grad_norm": 0.231080561876297, "learning_rate": 2.5770972221134083e-05, "loss": 0.6178, "step": 8967 }, { "epoch": 10.214245014245014, "grad_norm": 0.20979668200016022, "learning_rate": 2.5766315242694172e-05, "loss": 0.6536, "step": 8968 }, { "epoch": 10.215384615384615, "grad_norm": 0.201679989695549, "learning_rate": 2.5761658237638037e-05, "loss": 0.6553, "step": 8969 }, { "epoch": 10.216524216524217, "grad_norm": 0.2316141426563263, "learning_rate": 2.5757001206127406e-05, "loss": 0.714, "step": 8970 }, { "epoch": 10.217663817663817, "grad_norm": 0.26693350076675415, "learning_rate": 2.5752344148324042e-05, "loss": 0.5044, "step": 8971 }, { "epoch": 10.218803418803418, "grad_norm": 0.2370952069759369, "learning_rate": 2.574768706438971e-05, "loss": 0.7485, "step": 8972 }, { "epoch": 10.21994301994302, "grad_norm": 0.23007620871067047, "learning_rate": 2.5743029954486137e-05, "loss": 0.7276, "step": 8973 }, { "epoch": 10.221082621082621, "grad_norm": 0.19329151511192322, "learning_rate": 2.57383728187751e-05, "loss": 0.643, "step": 8974 }, { "epoch": 10.222222222222221, "grad_norm": 0.1936943382024765, "learning_rate": 2.5733715657418332e-05, "loss": 0.7795, "step": 8975 }, { "epoch": 10.223361823361824, "grad_norm": 0.22971801459789276, "learning_rate": 2.5729058470577606e-05, "loss": 0.7746, "step": 8976 }, { "epoch": 10.224501424501424, "grad_norm": 0.18487688899040222, "learning_rate": 2.5724401258414675e-05, "loss": 0.8174, "step": 8977 }, { "epoch": 10.225641025641025, "grad_norm": 0.19056154787540436, "learning_rate": 2.57197440210913e-05, "loss": 0.7458, "step": 8978 }, { "epoch": 10.226780626780627, "grad_norm": 0.2256275713443756, "learning_rate": 2.571508675876923e-05, "loss": 0.8264, "step": 8979 }, { "epoch": 10.227920227920228, "grad_norm": 0.2609212398529053, "learning_rate": 2.5710429471610242e-05, "loss": 0.6448, "step": 8980 }, { "epoch": 10.229059829059828, "grad_norm": 0.2109462469816208, "learning_rate": 2.5705772159776066e-05, "loss": 0.6721, "step": 8981 }, { "epoch": 10.23019943019943, "grad_norm": 0.19234539568424225, "learning_rate": 2.5701114823428496e-05, "loss": 0.8113, "step": 8982 }, { "epoch": 10.231339031339031, "grad_norm": 0.21584919095039368, "learning_rate": 2.5696457462729274e-05, "loss": 0.6119, "step": 8983 }, { "epoch": 10.232478632478632, "grad_norm": 0.2534079849720001, "learning_rate": 2.5691800077840177e-05, "loss": 0.6815, "step": 8984 }, { "epoch": 10.233618233618234, "grad_norm": 0.23516786098480225, "learning_rate": 2.5687142668922954e-05, "loss": 0.6465, "step": 8985 }, { "epoch": 10.234757834757835, "grad_norm": 0.19675037264823914, "learning_rate": 2.568248523613938e-05, "loss": 0.8126, "step": 8986 }, { "epoch": 10.235897435897435, "grad_norm": 0.19146771728992462, "learning_rate": 2.5677827779651216e-05, "loss": 0.636, "step": 8987 }, { "epoch": 10.237037037037037, "grad_norm": 0.20973047614097595, "learning_rate": 2.5673170299620224e-05, "loss": 0.7118, "step": 8988 }, { "epoch": 10.238176638176638, "grad_norm": 0.20711617171764374, "learning_rate": 2.566851279620818e-05, "loss": 0.6087, "step": 8989 }, { "epoch": 10.239316239316238, "grad_norm": 0.21229654550552368, "learning_rate": 2.5663855269576852e-05, "loss": 0.5955, "step": 8990 }, { "epoch": 10.24045584045584, "grad_norm": 0.2514808177947998, "learning_rate": 2.5659197719888005e-05, "loss": 0.5145, "step": 8991 }, { "epoch": 10.241595441595441, "grad_norm": 0.2512102723121643, "learning_rate": 2.5654540147303396e-05, "loss": 0.5224, "step": 8992 }, { "epoch": 10.242735042735042, "grad_norm": 0.2813994884490967, "learning_rate": 2.5649882551984823e-05, "loss": 0.6023, "step": 8993 }, { "epoch": 10.243874643874644, "grad_norm": 0.2017429769039154, "learning_rate": 2.564522493409403e-05, "loss": 0.5882, "step": 8994 }, { "epoch": 10.245014245014245, "grad_norm": 0.18968677520751953, "learning_rate": 2.564056729379281e-05, "loss": 0.8654, "step": 8995 }, { "epoch": 10.246153846153845, "grad_norm": 0.20515674352645874, "learning_rate": 2.5635909631242923e-05, "loss": 0.7102, "step": 8996 }, { "epoch": 10.247293447293448, "grad_norm": 0.20291048288345337, "learning_rate": 2.5631251946606145e-05, "loss": 0.7177, "step": 8997 }, { "epoch": 10.248433048433048, "grad_norm": 0.19213135540485382, "learning_rate": 2.5626594240044245e-05, "loss": 0.6859, "step": 8998 }, { "epoch": 10.249572649572649, "grad_norm": 0.20880888402462006, "learning_rate": 2.5621936511719015e-05, "loss": 0.7952, "step": 8999 }, { "epoch": 10.250712250712251, "grad_norm": 0.198693186044693, "learning_rate": 2.5617278761792206e-05, "loss": 0.5229, "step": 9000 }, { "epoch": 10.251851851851852, "grad_norm": 0.24556277692317963, "learning_rate": 2.5612620990425624e-05, "loss": 0.6393, "step": 9001 }, { "epoch": 10.252991452991452, "grad_norm": 0.21111436188220978, "learning_rate": 2.5607963197781022e-05, "loss": 0.5632, "step": 9002 }, { "epoch": 10.254131054131054, "grad_norm": 0.21761007606983185, "learning_rate": 2.560330538402018e-05, "loss": 0.6946, "step": 9003 }, { "epoch": 10.255270655270655, "grad_norm": 0.19828318059444427, "learning_rate": 2.55986475493049e-05, "loss": 0.8467, "step": 9004 }, { "epoch": 10.256410256410255, "grad_norm": 0.25546392798423767, "learning_rate": 2.5593989693796927e-05, "loss": 0.7468, "step": 9005 }, { "epoch": 10.257549857549858, "grad_norm": 0.17720606923103333, "learning_rate": 2.5589331817658076e-05, "loss": 0.8694, "step": 9006 }, { "epoch": 10.258689458689458, "grad_norm": 0.2313048243522644, "learning_rate": 2.5584673921050097e-05, "loss": 0.7185, "step": 9007 }, { "epoch": 10.25982905982906, "grad_norm": 0.251424640417099, "learning_rate": 2.5580016004134787e-05, "loss": 0.6053, "step": 9008 }, { "epoch": 10.260968660968661, "grad_norm": 0.22431643307209015, "learning_rate": 2.5575358067073924e-05, "loss": 0.7118, "step": 9009 }, { "epoch": 10.262108262108262, "grad_norm": 0.19620449841022491, "learning_rate": 2.5570700110029294e-05, "loss": 0.6793, "step": 9010 }, { "epoch": 10.263247863247864, "grad_norm": 0.2035008817911148, "learning_rate": 2.5566042133162684e-05, "loss": 0.7385, "step": 9011 }, { "epoch": 10.264387464387465, "grad_norm": 0.23007768392562866, "learning_rate": 2.5561384136635873e-05, "loss": 0.6546, "step": 9012 }, { "epoch": 10.265527065527065, "grad_norm": 0.16230762004852295, "learning_rate": 2.5556726120610648e-05, "loss": 0.8617, "step": 9013 }, { "epoch": 10.266666666666667, "grad_norm": 0.1794481724500656, "learning_rate": 2.555206808524879e-05, "loss": 0.8314, "step": 9014 }, { "epoch": 10.267806267806268, "grad_norm": 0.20571161806583405, "learning_rate": 2.554741003071209e-05, "loss": 0.7129, "step": 9015 }, { "epoch": 10.268945868945869, "grad_norm": 0.31665948033332825, "learning_rate": 2.5542751957162336e-05, "loss": 0.5694, "step": 9016 }, { "epoch": 10.270085470085471, "grad_norm": 0.19845467805862427, "learning_rate": 2.553809386476131e-05, "loss": 0.7631, "step": 9017 }, { "epoch": 10.271225071225071, "grad_norm": 0.19833163917064667, "learning_rate": 2.5533435753670815e-05, "loss": 0.6717, "step": 9018 }, { "epoch": 10.272364672364672, "grad_norm": 0.27582642436027527, "learning_rate": 2.5528777624052617e-05, "loss": 0.4747, "step": 9019 }, { "epoch": 10.273504273504274, "grad_norm": 0.1753959208726883, "learning_rate": 2.552411947606852e-05, "loss": 0.7539, "step": 9020 }, { "epoch": 10.274643874643875, "grad_norm": 0.20944949984550476, "learning_rate": 2.551946130988032e-05, "loss": 0.5579, "step": 9021 }, { "epoch": 10.275783475783475, "grad_norm": 0.22696365416049957, "learning_rate": 2.5514803125649796e-05, "loss": 0.6115, "step": 9022 }, { "epoch": 10.276923076923078, "grad_norm": 0.2090187817811966, "learning_rate": 2.5510144923538744e-05, "loss": 0.7988, "step": 9023 }, { "epoch": 10.278062678062678, "grad_norm": 0.2817879915237427, "learning_rate": 2.5505486703708952e-05, "loss": 0.6709, "step": 9024 }, { "epoch": 10.279202279202279, "grad_norm": 0.1797921508550644, "learning_rate": 2.550082846632222e-05, "loss": 0.8487, "step": 9025 }, { "epoch": 10.280341880341881, "grad_norm": 0.24475227296352386, "learning_rate": 2.5496170211540332e-05, "loss": 0.6877, "step": 9026 }, { "epoch": 10.281481481481482, "grad_norm": 0.2791805565357208, "learning_rate": 2.5491511939525098e-05, "loss": 0.42, "step": 9027 }, { "epoch": 10.282621082621082, "grad_norm": 0.25618109107017517, "learning_rate": 2.5486853650438297e-05, "loss": 0.686, "step": 9028 }, { "epoch": 10.283760683760685, "grad_norm": 0.20590481162071228, "learning_rate": 2.5482195344441734e-05, "loss": 0.7595, "step": 9029 }, { "epoch": 10.284900284900285, "grad_norm": 0.20788438618183136, "learning_rate": 2.5477537021697202e-05, "loss": 0.7769, "step": 9030 }, { "epoch": 10.286039886039886, "grad_norm": 0.1922910213470459, "learning_rate": 2.5472878682366484e-05, "loss": 0.6286, "step": 9031 }, { "epoch": 10.287179487179488, "grad_norm": 0.21666008234024048, "learning_rate": 2.5468220326611404e-05, "loss": 0.7785, "step": 9032 }, { "epoch": 10.288319088319088, "grad_norm": 0.24231019616127014, "learning_rate": 2.5463561954593736e-05, "loss": 0.5637, "step": 9033 }, { "epoch": 10.289458689458689, "grad_norm": 0.20492561161518097, "learning_rate": 2.545890356647529e-05, "loss": 0.6971, "step": 9034 }, { "epoch": 10.290598290598291, "grad_norm": 0.20477353036403656, "learning_rate": 2.5454245162417857e-05, "loss": 0.7399, "step": 9035 }, { "epoch": 10.291737891737892, "grad_norm": 0.2130691260099411, "learning_rate": 2.5449586742583244e-05, "loss": 0.5531, "step": 9036 }, { "epoch": 10.292877492877492, "grad_norm": 0.20497938990592957, "learning_rate": 2.5444928307133243e-05, "loss": 0.8259, "step": 9037 }, { "epoch": 10.294017094017095, "grad_norm": 0.2435048669576645, "learning_rate": 2.5440269856229658e-05, "loss": 0.6226, "step": 9038 }, { "epoch": 10.295156695156695, "grad_norm": 0.18705344200134277, "learning_rate": 2.54356113900343e-05, "loss": 0.478, "step": 9039 }, { "epoch": 10.296296296296296, "grad_norm": 0.19532230496406555, "learning_rate": 2.543095290870895e-05, "loss": 0.8921, "step": 9040 }, { "epoch": 10.297435897435898, "grad_norm": 0.17932532727718353, "learning_rate": 2.5426294412415424e-05, "loss": 0.7321, "step": 9041 }, { "epoch": 10.298575498575499, "grad_norm": 0.2342417687177658, "learning_rate": 2.5421635901315514e-05, "loss": 0.6227, "step": 9042 }, { "epoch": 10.2997150997151, "grad_norm": 0.23890571296215057, "learning_rate": 2.541697737557104e-05, "loss": 0.5157, "step": 9043 }, { "epoch": 10.300854700854702, "grad_norm": 0.20371253788471222, "learning_rate": 2.5412318835343794e-05, "loss": 0.7216, "step": 9044 }, { "epoch": 10.301994301994302, "grad_norm": 0.22724924981594086, "learning_rate": 2.5407660280795577e-05, "loss": 0.7191, "step": 9045 }, { "epoch": 10.303133903133903, "grad_norm": 0.20855417847633362, "learning_rate": 2.5403001712088198e-05, "loss": 0.6524, "step": 9046 }, { "epoch": 10.304273504273505, "grad_norm": 0.2518128752708435, "learning_rate": 2.539834312938346e-05, "loss": 0.42, "step": 9047 }, { "epoch": 10.305413105413106, "grad_norm": 0.2135232836008072, "learning_rate": 2.5393684532843172e-05, "loss": 0.4781, "step": 9048 }, { "epoch": 10.306552706552706, "grad_norm": 0.19639183580875397, "learning_rate": 2.538902592262914e-05, "loss": 0.7857, "step": 9049 }, { "epoch": 10.307692307692308, "grad_norm": 0.2120703160762787, "learning_rate": 2.5384367298903165e-05, "loss": 0.7064, "step": 9050 }, { "epoch": 10.308831908831909, "grad_norm": 0.17633357644081116, "learning_rate": 2.537970866182706e-05, "loss": 0.6981, "step": 9051 }, { "epoch": 10.30997150997151, "grad_norm": 0.24732357263565063, "learning_rate": 2.5375050011562623e-05, "loss": 0.6496, "step": 9052 }, { "epoch": 10.311111111111112, "grad_norm": 0.21399758756160736, "learning_rate": 2.5370391348271666e-05, "loss": 0.6885, "step": 9053 }, { "epoch": 10.312250712250712, "grad_norm": 0.20664972066879272, "learning_rate": 2.5365732672116006e-05, "loss": 0.7207, "step": 9054 }, { "epoch": 10.313390313390313, "grad_norm": 0.2515653073787689, "learning_rate": 2.5361073983257444e-05, "loss": 0.542, "step": 9055 }, { "epoch": 10.314529914529915, "grad_norm": 0.21457159519195557, "learning_rate": 2.535641528185778e-05, "loss": 0.5081, "step": 9056 }, { "epoch": 10.315669515669516, "grad_norm": 0.23283055424690247, "learning_rate": 2.5351756568078848e-05, "loss": 0.7544, "step": 9057 }, { "epoch": 10.316809116809116, "grad_norm": 0.21763315796852112, "learning_rate": 2.5347097842082428e-05, "loss": 0.6422, "step": 9058 }, { "epoch": 10.317948717948719, "grad_norm": 0.199775829911232, "learning_rate": 2.5342439104030357e-05, "loss": 0.6528, "step": 9059 }, { "epoch": 10.31908831908832, "grad_norm": 0.19493013620376587, "learning_rate": 2.533778035408443e-05, "loss": 0.7672, "step": 9060 }, { "epoch": 10.32022792022792, "grad_norm": 0.1942964792251587, "learning_rate": 2.533312159240647e-05, "loss": 0.6985, "step": 9061 }, { "epoch": 10.321367521367522, "grad_norm": 0.2270016074180603, "learning_rate": 2.5328462819158268e-05, "loss": 0.6211, "step": 9062 }, { "epoch": 10.322507122507123, "grad_norm": 0.22101987898349762, "learning_rate": 2.532380403450166e-05, "loss": 0.7484, "step": 9063 }, { "epoch": 10.323646723646723, "grad_norm": 0.19234544038772583, "learning_rate": 2.5319145238598437e-05, "loss": 0.6808, "step": 9064 }, { "epoch": 10.324786324786325, "grad_norm": 0.23379816114902496, "learning_rate": 2.5314486431610424e-05, "loss": 0.6194, "step": 9065 }, { "epoch": 10.325925925925926, "grad_norm": 0.18350186944007874, "learning_rate": 2.530982761369944e-05, "loss": 0.7657, "step": 9066 }, { "epoch": 10.327065527065526, "grad_norm": 0.20779354870319366, "learning_rate": 2.5305168785027285e-05, "loss": 0.6434, "step": 9067 }, { "epoch": 10.328205128205129, "grad_norm": 0.32120004296302795, "learning_rate": 2.5300509945755783e-05, "loss": 0.7547, "step": 9068 }, { "epoch": 10.32934472934473, "grad_norm": 0.18596863746643066, "learning_rate": 2.5295851096046735e-05, "loss": 0.8268, "step": 9069 }, { "epoch": 10.33048433048433, "grad_norm": 0.21592064201831818, "learning_rate": 2.529119223606197e-05, "loss": 0.5348, "step": 9070 }, { "epoch": 10.331623931623932, "grad_norm": 0.23385873436927795, "learning_rate": 2.5286533365963298e-05, "loss": 0.7101, "step": 9071 }, { "epoch": 10.332763532763533, "grad_norm": 0.18794724345207214, "learning_rate": 2.5281874485912544e-05, "loss": 0.798, "step": 9072 }, { "epoch": 10.333903133903133, "grad_norm": 0.24771134555339813, "learning_rate": 2.52772155960715e-05, "loss": 0.6846, "step": 9073 }, { "epoch": 10.335042735042736, "grad_norm": 0.22779740393161774, "learning_rate": 2.5272556696602005e-05, "loss": 0.7489, "step": 9074 }, { "epoch": 10.336182336182336, "grad_norm": 0.19389639794826508, "learning_rate": 2.5267897787665856e-05, "loss": 0.9602, "step": 9075 }, { "epoch": 10.337321937321937, "grad_norm": 0.1903086155653, "learning_rate": 2.5263238869424893e-05, "loss": 0.7167, "step": 9076 }, { "epoch": 10.338461538461539, "grad_norm": 0.19804181158542633, "learning_rate": 2.525857994204091e-05, "loss": 0.8767, "step": 9077 }, { "epoch": 10.33960113960114, "grad_norm": 0.22120852768421173, "learning_rate": 2.5253921005675747e-05, "loss": 0.5659, "step": 9078 }, { "epoch": 10.34074074074074, "grad_norm": 0.24915292859077454, "learning_rate": 2.5249262060491195e-05, "loss": 0.553, "step": 9079 }, { "epoch": 10.341880341880342, "grad_norm": 0.20701691508293152, "learning_rate": 2.5244603106649095e-05, "loss": 0.7905, "step": 9080 }, { "epoch": 10.343019943019943, "grad_norm": 0.19434839487075806, "learning_rate": 2.5239944144311252e-05, "loss": 0.7948, "step": 9081 }, { "epoch": 10.344159544159544, "grad_norm": 0.20556633174419403, "learning_rate": 2.523528517363949e-05, "loss": 0.755, "step": 9082 }, { "epoch": 10.345299145299146, "grad_norm": 0.18583397567272186, "learning_rate": 2.5230626194795633e-05, "loss": 0.8336, "step": 9083 }, { "epoch": 10.346438746438746, "grad_norm": 0.17649589478969574, "learning_rate": 2.522596720794148e-05, "loss": 0.7059, "step": 9084 }, { "epoch": 10.347578347578347, "grad_norm": 0.19684922695159912, "learning_rate": 2.522130821323888e-05, "loss": 0.5711, "step": 9085 }, { "epoch": 10.34871794871795, "grad_norm": 0.21036779880523682, "learning_rate": 2.5216649210849624e-05, "loss": 0.7529, "step": 9086 }, { "epoch": 10.34985754985755, "grad_norm": 0.268655002117157, "learning_rate": 2.5211990200935552e-05, "loss": 0.6021, "step": 9087 }, { "epoch": 10.35099715099715, "grad_norm": 0.2607416808605194, "learning_rate": 2.5207331183658478e-05, "loss": 0.5326, "step": 9088 }, { "epoch": 10.352136752136753, "grad_norm": 0.23464538156986237, "learning_rate": 2.5202672159180224e-05, "loss": 0.5043, "step": 9089 }, { "epoch": 10.353276353276353, "grad_norm": 0.24285195767879486, "learning_rate": 2.51980131276626e-05, "loss": 0.5214, "step": 9090 }, { "epoch": 10.354415954415954, "grad_norm": 0.22683288156986237, "learning_rate": 2.5193354089267446e-05, "loss": 0.7075, "step": 9091 }, { "epoch": 10.355555555555556, "grad_norm": 0.246296688914299, "learning_rate": 2.5188695044156564e-05, "loss": 0.6478, "step": 9092 }, { "epoch": 10.356695156695157, "grad_norm": 0.2281445860862732, "learning_rate": 2.5184035992491785e-05, "loss": 0.5469, "step": 9093 }, { "epoch": 10.357834757834757, "grad_norm": 0.2476527839899063, "learning_rate": 2.517937693443493e-05, "loss": 0.5761, "step": 9094 }, { "epoch": 10.35897435897436, "grad_norm": 0.204382985830307, "learning_rate": 2.517471787014783e-05, "loss": 0.7355, "step": 9095 }, { "epoch": 10.36011396011396, "grad_norm": 0.21977843344211578, "learning_rate": 2.5170058799792283e-05, "loss": 0.5223, "step": 9096 }, { "epoch": 10.36125356125356, "grad_norm": 0.23635803163051605, "learning_rate": 2.5165399723530138e-05, "loss": 0.5094, "step": 9097 }, { "epoch": 10.362393162393163, "grad_norm": 0.25939029455184937, "learning_rate": 2.5160740641523202e-05, "loss": 0.5848, "step": 9098 }, { "epoch": 10.363532763532763, "grad_norm": 0.1882343739271164, "learning_rate": 2.5156081553933302e-05, "loss": 0.8578, "step": 9099 }, { "epoch": 10.364672364672364, "grad_norm": 0.16867879033088684, "learning_rate": 2.5151422460922263e-05, "loss": 0.8172, "step": 9100 }, { "epoch": 10.365811965811966, "grad_norm": 0.18571686744689941, "learning_rate": 2.5146763362651898e-05, "loss": 0.7812, "step": 9101 }, { "epoch": 10.366951566951567, "grad_norm": 0.2958218455314636, "learning_rate": 2.5142104259284048e-05, "loss": 0.4948, "step": 9102 }, { "epoch": 10.368091168091167, "grad_norm": 0.18356817960739136, "learning_rate": 2.513744515098051e-05, "loss": 0.6299, "step": 9103 }, { "epoch": 10.36923076923077, "grad_norm": 0.1769261211156845, "learning_rate": 2.5132786037903138e-05, "loss": 0.7487, "step": 9104 }, { "epoch": 10.37037037037037, "grad_norm": 0.20499293506145477, "learning_rate": 2.5128126920213736e-05, "loss": 0.6185, "step": 9105 }, { "epoch": 10.37150997150997, "grad_norm": 0.24470171332359314, "learning_rate": 2.5123467798074142e-05, "loss": 0.7261, "step": 9106 }, { "epoch": 10.372649572649573, "grad_norm": 0.1934887170791626, "learning_rate": 2.511880867164616e-05, "loss": 0.7357, "step": 9107 }, { "epoch": 10.373789173789174, "grad_norm": 0.2458781898021698, "learning_rate": 2.511414954109163e-05, "loss": 0.591, "step": 9108 }, { "epoch": 10.374928774928774, "grad_norm": 0.25309842824935913, "learning_rate": 2.510949040657238e-05, "loss": 0.7098, "step": 9109 }, { "epoch": 10.376068376068377, "grad_norm": 0.24694037437438965, "learning_rate": 2.5104831268250217e-05, "loss": 0.4802, "step": 9110 }, { "epoch": 10.377207977207977, "grad_norm": 0.3160017728805542, "learning_rate": 2.5100172126286986e-05, "loss": 0.8525, "step": 9111 }, { "epoch": 10.378347578347578, "grad_norm": 0.19555474817752838, "learning_rate": 2.5095512980844495e-05, "loss": 0.8472, "step": 9112 }, { "epoch": 10.37948717948718, "grad_norm": 0.2248135209083557, "learning_rate": 2.5090853832084576e-05, "loss": 0.5696, "step": 9113 }, { "epoch": 10.38062678062678, "grad_norm": 0.26771700382232666, "learning_rate": 2.508619468016905e-05, "loss": 0.5819, "step": 9114 }, { "epoch": 10.381766381766381, "grad_norm": 0.22154538333415985, "learning_rate": 2.5081535525259753e-05, "loss": 0.6595, "step": 9115 }, { "epoch": 10.382905982905983, "grad_norm": 0.1972038894891739, "learning_rate": 2.5076876367518497e-05, "loss": 0.6492, "step": 9116 }, { "epoch": 10.384045584045584, "grad_norm": 0.22136695683002472, "learning_rate": 2.5072217207107118e-05, "loss": 0.7124, "step": 9117 }, { "epoch": 10.385185185185184, "grad_norm": 0.19860419631004333, "learning_rate": 2.5067558044187434e-05, "loss": 0.5594, "step": 9118 }, { "epoch": 10.386324786324787, "grad_norm": 0.2616336941719055, "learning_rate": 2.506289887892127e-05, "loss": 0.533, "step": 9119 }, { "epoch": 10.387464387464387, "grad_norm": 0.23004023730754852, "learning_rate": 2.505823971147046e-05, "loss": 0.7366, "step": 9120 }, { "epoch": 10.388603988603988, "grad_norm": 0.28769731521606445, "learning_rate": 2.5053580541996825e-05, "loss": 0.7604, "step": 9121 }, { "epoch": 10.38974358974359, "grad_norm": 0.2059997171163559, "learning_rate": 2.5048921370662188e-05, "loss": 0.83, "step": 9122 }, { "epoch": 10.39088319088319, "grad_norm": 0.21634230017662048, "learning_rate": 2.504426219762838e-05, "loss": 0.781, "step": 9123 }, { "epoch": 10.392022792022791, "grad_norm": 0.19295218586921692, "learning_rate": 2.5039603023057224e-05, "loss": 0.804, "step": 9124 }, { "epoch": 10.393162393162394, "grad_norm": 0.24506190419197083, "learning_rate": 2.5034943847110543e-05, "loss": 0.6323, "step": 9125 }, { "epoch": 10.394301994301994, "grad_norm": 0.24567458033561707, "learning_rate": 2.5030284669950165e-05, "loss": 0.528, "step": 9126 }, { "epoch": 10.395441595441595, "grad_norm": 0.20807871222496033, "learning_rate": 2.5025625491737918e-05, "loss": 0.7562, "step": 9127 }, { "epoch": 10.396581196581197, "grad_norm": 0.18178433179855347, "learning_rate": 2.5020966312635636e-05, "loss": 0.7226, "step": 9128 }, { "epoch": 10.397720797720797, "grad_norm": 0.24313658475875854, "learning_rate": 2.501630713280513e-05, "loss": 0.5879, "step": 9129 }, { "epoch": 10.398860398860398, "grad_norm": 0.17432768642902374, "learning_rate": 2.5011647952408228e-05, "loss": 0.8408, "step": 9130 }, { "epoch": 10.4, "grad_norm": 0.2106270045042038, "learning_rate": 2.500698877160676e-05, "loss": 0.6269, "step": 9131 }, { "epoch": 10.401139601139601, "grad_norm": 0.1890728622674942, "learning_rate": 2.500232959056256e-05, "loss": 0.8182, "step": 9132 }, { "epoch": 10.402279202279201, "grad_norm": 0.15904033184051514, "learning_rate": 2.4997670409437442e-05, "loss": 0.7987, "step": 9133 }, { "epoch": 10.403418803418804, "grad_norm": 0.24167656898498535, "learning_rate": 2.4993011228393245e-05, "loss": 0.6376, "step": 9134 }, { "epoch": 10.404558404558404, "grad_norm": 0.26482895016670227, "learning_rate": 2.4988352047591778e-05, "loss": 0.5588, "step": 9135 }, { "epoch": 10.405698005698005, "grad_norm": 0.22393764555454254, "learning_rate": 2.4983692867194883e-05, "loss": 0.6064, "step": 9136 }, { "epoch": 10.406837606837607, "grad_norm": 0.19010251760482788, "learning_rate": 2.4979033687364377e-05, "loss": 0.8819, "step": 9137 }, { "epoch": 10.407977207977208, "grad_norm": 0.2721906304359436, "learning_rate": 2.4974374508262088e-05, "loss": 0.4509, "step": 9138 }, { "epoch": 10.40911680911681, "grad_norm": 0.17516137659549713, "learning_rate": 2.4969715330049837e-05, "loss": 0.8094, "step": 9139 }, { "epoch": 10.41025641025641, "grad_norm": 0.19217105209827423, "learning_rate": 2.496505615288946e-05, "loss": 0.7668, "step": 9140 }, { "epoch": 10.411396011396011, "grad_norm": 0.18340454995632172, "learning_rate": 2.4960396976942785e-05, "loss": 0.8836, "step": 9141 }, { "epoch": 10.412535612535613, "grad_norm": 0.2205548882484436, "learning_rate": 2.4955737802371624e-05, "loss": 0.73, "step": 9142 }, { "epoch": 10.413675213675214, "grad_norm": 0.2542133629322052, "learning_rate": 2.4951078629337818e-05, "loss": 0.5596, "step": 9143 }, { "epoch": 10.414814814814815, "grad_norm": 0.18789978325366974, "learning_rate": 2.494641945800318e-05, "loss": 0.7735, "step": 9144 }, { "epoch": 10.415954415954417, "grad_norm": 0.22405776381492615, "learning_rate": 2.494176028852955e-05, "loss": 0.7452, "step": 9145 }, { "epoch": 10.417094017094017, "grad_norm": 0.24683396518230438, "learning_rate": 2.4937101121078733e-05, "loss": 0.6412, "step": 9146 }, { "epoch": 10.418233618233618, "grad_norm": 0.22049246728420258, "learning_rate": 2.493244195581258e-05, "loss": 0.5547, "step": 9147 }, { "epoch": 10.41937321937322, "grad_norm": 0.224561870098114, "learning_rate": 2.492778279289289e-05, "loss": 0.6858, "step": 9148 }, { "epoch": 10.42051282051282, "grad_norm": 0.25088754296302795, "learning_rate": 2.4923123632481516e-05, "loss": 0.6639, "step": 9149 }, { "epoch": 10.421652421652421, "grad_norm": 0.22070088982582092, "learning_rate": 2.4918464474740246e-05, "loss": 0.7119, "step": 9150 }, { "epoch": 10.422792022792024, "grad_norm": 0.21628473699092865, "learning_rate": 2.4913805319830953e-05, "loss": 0.7809, "step": 9151 }, { "epoch": 10.423931623931624, "grad_norm": 0.18613789975643158, "learning_rate": 2.4909146167915426e-05, "loss": 0.7631, "step": 9152 }, { "epoch": 10.425071225071225, "grad_norm": 0.20988789200782776, "learning_rate": 2.490448701915551e-05, "loss": 0.7389, "step": 9153 }, { "epoch": 10.426210826210827, "grad_norm": 0.20752722024917603, "learning_rate": 2.489982787371302e-05, "loss": 0.8322, "step": 9154 }, { "epoch": 10.427350427350428, "grad_norm": 0.2517094612121582, "learning_rate": 2.4895168731749785e-05, "loss": 0.7053, "step": 9155 }, { "epoch": 10.428490028490028, "grad_norm": 0.26300299167633057, "learning_rate": 2.4890509593427623e-05, "loss": 0.5408, "step": 9156 }, { "epoch": 10.42962962962963, "grad_norm": 0.21144193410873413, "learning_rate": 2.488585045890837e-05, "loss": 0.56, "step": 9157 }, { "epoch": 10.430769230769231, "grad_norm": 0.3269287943840027, "learning_rate": 2.4881191328353844e-05, "loss": 0.6435, "step": 9158 }, { "epoch": 10.431908831908832, "grad_norm": 0.21544551849365234, "learning_rate": 2.487653220192587e-05, "loss": 0.8135, "step": 9159 }, { "epoch": 10.433048433048434, "grad_norm": 0.2292126566171646, "learning_rate": 2.4871873079786266e-05, "loss": 0.6312, "step": 9160 }, { "epoch": 10.434188034188034, "grad_norm": 0.19939513504505157, "learning_rate": 2.486721396209686e-05, "loss": 0.7441, "step": 9161 }, { "epoch": 10.435327635327635, "grad_norm": 0.240824356675148, "learning_rate": 2.4862554849019486e-05, "loss": 0.8579, "step": 9162 }, { "epoch": 10.436467236467237, "grad_norm": 0.18286165595054626, "learning_rate": 2.4857895740715958e-05, "loss": 0.6545, "step": 9163 }, { "epoch": 10.437606837606838, "grad_norm": 0.20635467767715454, "learning_rate": 2.4853236637348108e-05, "loss": 0.71, "step": 9164 }, { "epoch": 10.438746438746438, "grad_norm": 0.16474197804927826, "learning_rate": 2.4848577539077743e-05, "loss": 0.9268, "step": 9165 }, { "epoch": 10.43988603988604, "grad_norm": 0.29166439175605774, "learning_rate": 2.4843918446066703e-05, "loss": 0.5326, "step": 9166 }, { "epoch": 10.441025641025641, "grad_norm": 0.2056402862071991, "learning_rate": 2.48392593584768e-05, "loss": 0.695, "step": 9167 }, { "epoch": 10.442165242165242, "grad_norm": 0.25406187772750854, "learning_rate": 2.4834600276469868e-05, "loss": 0.5937, "step": 9168 }, { "epoch": 10.443304843304844, "grad_norm": 0.2251429706811905, "learning_rate": 2.482994120020772e-05, "loss": 0.6429, "step": 9169 }, { "epoch": 10.444444444444445, "grad_norm": 0.19202420115470886, "learning_rate": 2.4825282129852185e-05, "loss": 0.8191, "step": 9170 }, { "epoch": 10.445584045584045, "grad_norm": 0.2481834441423416, "learning_rate": 2.4820623065565074e-05, "loss": 0.6438, "step": 9171 }, { "epoch": 10.446723646723648, "grad_norm": 0.20098984241485596, "learning_rate": 2.4815964007508214e-05, "loss": 0.7943, "step": 9172 }, { "epoch": 10.447863247863248, "grad_norm": 0.2452813982963562, "learning_rate": 2.4811304955843442e-05, "loss": 0.4899, "step": 9173 }, { "epoch": 10.449002849002849, "grad_norm": 0.22470121085643768, "learning_rate": 2.480664591073256e-05, "loss": 0.7734, "step": 9174 }, { "epoch": 10.450142450142451, "grad_norm": 0.20485779643058777, "learning_rate": 2.4801986872337405e-05, "loss": 0.729, "step": 9175 }, { "epoch": 10.451282051282051, "grad_norm": 0.1785956621170044, "learning_rate": 2.4797327840819786e-05, "loss": 0.9572, "step": 9176 }, { "epoch": 10.452421652421652, "grad_norm": 0.1983714997768402, "learning_rate": 2.479266881634153e-05, "loss": 0.7518, "step": 9177 }, { "epoch": 10.453561253561254, "grad_norm": 0.2530549168586731, "learning_rate": 2.478800979906445e-05, "loss": 0.5448, "step": 9178 }, { "epoch": 10.454700854700855, "grad_norm": 0.18680530786514282, "learning_rate": 2.4783350789150382e-05, "loss": 0.7539, "step": 9179 }, { "epoch": 10.455840455840455, "grad_norm": 0.18843460083007812, "learning_rate": 2.477869178676113e-05, "loss": 0.8563, "step": 9180 }, { "epoch": 10.456980056980058, "grad_norm": 0.1869039237499237, "learning_rate": 2.4774032792058528e-05, "loss": 0.7921, "step": 9181 }, { "epoch": 10.458119658119658, "grad_norm": 0.19671130180358887, "learning_rate": 2.476937380520438e-05, "loss": 0.6853, "step": 9182 }, { "epoch": 10.459259259259259, "grad_norm": 0.16532261669635773, "learning_rate": 2.476471482636051e-05, "loss": 0.8053, "step": 9183 }, { "epoch": 10.460398860398861, "grad_norm": 0.20217740535736084, "learning_rate": 2.476005585568875e-05, "loss": 0.6541, "step": 9184 }, { "epoch": 10.461538461538462, "grad_norm": 0.2208511084318161, "learning_rate": 2.475539689335091e-05, "loss": 0.6965, "step": 9185 }, { "epoch": 10.462678062678062, "grad_norm": 0.2560354471206665, "learning_rate": 2.4750737939508807e-05, "loss": 0.5133, "step": 9186 }, { "epoch": 10.463817663817665, "grad_norm": 0.24244089424610138, "learning_rate": 2.4746078994324262e-05, "loss": 0.8707, "step": 9187 }, { "epoch": 10.464957264957265, "grad_norm": 0.21213071048259735, "learning_rate": 2.4741420057959095e-05, "loss": 0.6993, "step": 9188 }, { "epoch": 10.466096866096866, "grad_norm": 0.25305524468421936, "learning_rate": 2.4736761130575113e-05, "loss": 0.5913, "step": 9189 }, { "epoch": 10.467236467236468, "grad_norm": 0.2465902715921402, "learning_rate": 2.473210221233415e-05, "loss": 0.523, "step": 9190 }, { "epoch": 10.468376068376068, "grad_norm": 0.1970120519399643, "learning_rate": 2.4727443303398004e-05, "loss": 0.772, "step": 9191 }, { "epoch": 10.469515669515669, "grad_norm": 0.2229175716638565, "learning_rate": 2.4722784403928507e-05, "loss": 0.4972, "step": 9192 }, { "epoch": 10.470655270655271, "grad_norm": 0.1660129576921463, "learning_rate": 2.471812551408747e-05, "loss": 0.7344, "step": 9193 }, { "epoch": 10.471794871794872, "grad_norm": 0.24172258377075195, "learning_rate": 2.4713466634036697e-05, "loss": 0.6205, "step": 9194 }, { "epoch": 10.472934472934472, "grad_norm": 0.1414172202348709, "learning_rate": 2.4708807763938025e-05, "loss": 0.8975, "step": 9195 }, { "epoch": 10.474074074074075, "grad_norm": 0.21612918376922607, "learning_rate": 2.4704148903953267e-05, "loss": 0.7639, "step": 9196 }, { "epoch": 10.475213675213675, "grad_norm": 0.27035245299339294, "learning_rate": 2.4699490054244223e-05, "loss": 0.5958, "step": 9197 }, { "epoch": 10.476353276353276, "grad_norm": 0.25695329904556274, "learning_rate": 2.469483121497272e-05, "loss": 0.6122, "step": 9198 }, { "epoch": 10.477492877492878, "grad_norm": 0.2563282251358032, "learning_rate": 2.4690172386300566e-05, "loss": 0.5376, "step": 9199 }, { "epoch": 10.478632478632479, "grad_norm": 0.3040502071380615, "learning_rate": 2.4685513568389582e-05, "loss": 0.5579, "step": 9200 }, { "epoch": 10.47977207977208, "grad_norm": 0.20318159461021423, "learning_rate": 2.468085476140157e-05, "loss": 0.8939, "step": 9201 }, { "epoch": 10.480911680911682, "grad_norm": 0.18470041453838348, "learning_rate": 2.4676195965498354e-05, "loss": 0.7726, "step": 9202 }, { "epoch": 10.482051282051282, "grad_norm": 0.2489010989665985, "learning_rate": 2.4671537180841738e-05, "loss": 0.5073, "step": 9203 }, { "epoch": 10.483190883190883, "grad_norm": 0.18530192971229553, "learning_rate": 2.4666878407593548e-05, "loss": 0.5438, "step": 9204 }, { "epoch": 10.484330484330485, "grad_norm": 0.2398802638053894, "learning_rate": 2.466221964591557e-05, "loss": 0.6481, "step": 9205 }, { "epoch": 10.485470085470086, "grad_norm": 0.3169020414352417, "learning_rate": 2.4657560895969642e-05, "loss": 0.6789, "step": 9206 }, { "epoch": 10.486609686609686, "grad_norm": 0.2191244512796402, "learning_rate": 2.465290215791757e-05, "loss": 0.7045, "step": 9207 }, { "epoch": 10.487749287749288, "grad_norm": 0.2541128396987915, "learning_rate": 2.4648243431921155e-05, "loss": 0.6094, "step": 9208 }, { "epoch": 10.488888888888889, "grad_norm": 0.18173936009407043, "learning_rate": 2.4643584718142222e-05, "loss": 0.7499, "step": 9209 }, { "epoch": 10.49002849002849, "grad_norm": 0.22637872397899628, "learning_rate": 2.4638926016742565e-05, "loss": 0.7103, "step": 9210 }, { "epoch": 10.491168091168092, "grad_norm": 0.2989262640476227, "learning_rate": 2.4634267327884003e-05, "loss": 0.5129, "step": 9211 }, { "epoch": 10.492307692307692, "grad_norm": 0.31442877650260925, "learning_rate": 2.4629608651728336e-05, "loss": 0.7376, "step": 9212 }, { "epoch": 10.493447293447293, "grad_norm": 0.26555660367012024, "learning_rate": 2.4624949988437386e-05, "loss": 0.5071, "step": 9213 }, { "epoch": 10.494586894586895, "grad_norm": 0.22290778160095215, "learning_rate": 2.462029133817295e-05, "loss": 0.6802, "step": 9214 }, { "epoch": 10.495726495726496, "grad_norm": 0.1604098081588745, "learning_rate": 2.4615632701096847e-05, "loss": 0.8559, "step": 9215 }, { "epoch": 10.496866096866096, "grad_norm": 0.26704758405685425, "learning_rate": 2.4610974077370867e-05, "loss": 0.4808, "step": 9216 }, { "epoch": 10.498005698005699, "grad_norm": 0.2803448438644409, "learning_rate": 2.4606315467156827e-05, "loss": 0.398, "step": 9217 }, { "epoch": 10.4991452991453, "grad_norm": 0.26626428961753845, "learning_rate": 2.4601656870616545e-05, "loss": 0.656, "step": 9218 }, { "epoch": 10.5002849002849, "grad_norm": 0.18743109703063965, "learning_rate": 2.4596998287911804e-05, "loss": 0.9127, "step": 9219 }, { "epoch": 10.501424501424502, "grad_norm": 0.18646414577960968, "learning_rate": 2.459233971920443e-05, "loss": 0.7461, "step": 9220 }, { "epoch": 10.502564102564103, "grad_norm": 0.19627082347869873, "learning_rate": 2.458768116465621e-05, "loss": 0.7265, "step": 9221 }, { "epoch": 10.503703703703703, "grad_norm": 0.2861216068267822, "learning_rate": 2.4583022624428967e-05, "loss": 0.4033, "step": 9222 }, { "epoch": 10.504843304843305, "grad_norm": 0.2405533343553543, "learning_rate": 2.4578364098684488e-05, "loss": 0.5667, "step": 9223 }, { "epoch": 10.505982905982906, "grad_norm": 0.22812910377979279, "learning_rate": 2.4573705587584586e-05, "loss": 0.7377, "step": 9224 }, { "epoch": 10.507122507122507, "grad_norm": 0.21617454290390015, "learning_rate": 2.456904709129106e-05, "loss": 0.4541, "step": 9225 }, { "epoch": 10.508262108262109, "grad_norm": 0.1916038990020752, "learning_rate": 2.4564388609965717e-05, "loss": 0.7945, "step": 9226 }, { "epoch": 10.50940170940171, "grad_norm": 0.23479774594306946, "learning_rate": 2.4559730143770345e-05, "loss": 0.6864, "step": 9227 }, { "epoch": 10.51054131054131, "grad_norm": 0.2074567824602127, "learning_rate": 2.455507169286676e-05, "loss": 0.7917, "step": 9228 }, { "epoch": 10.511680911680912, "grad_norm": 0.23633286356925964, "learning_rate": 2.455041325741676e-05, "loss": 0.6294, "step": 9229 }, { "epoch": 10.512820512820513, "grad_norm": 0.19726577401161194, "learning_rate": 2.454575483758215e-05, "loss": 0.7558, "step": 9230 }, { "epoch": 10.513960113960113, "grad_norm": 0.18224595487117767, "learning_rate": 2.4541096433524717e-05, "loss": 0.9511, "step": 9231 }, { "epoch": 10.515099715099716, "grad_norm": 0.26174822449684143, "learning_rate": 2.453643804540627e-05, "loss": 0.6631, "step": 9232 }, { "epoch": 10.516239316239316, "grad_norm": 0.25406506657600403, "learning_rate": 2.45317796733886e-05, "loss": 0.5523, "step": 9233 }, { "epoch": 10.517378917378917, "grad_norm": 0.16637858748435974, "learning_rate": 2.4527121317633518e-05, "loss": 0.6464, "step": 9234 }, { "epoch": 10.518518518518519, "grad_norm": 0.2870800495147705, "learning_rate": 2.4522462978302807e-05, "loss": 0.6354, "step": 9235 }, { "epoch": 10.51965811965812, "grad_norm": 0.22352823615074158, "learning_rate": 2.4517804655558275e-05, "loss": 0.5128, "step": 9236 }, { "epoch": 10.52079772079772, "grad_norm": 0.2656743824481964, "learning_rate": 2.451314634956171e-05, "loss": 0.653, "step": 9237 }, { "epoch": 10.521937321937322, "grad_norm": 0.21166376769542694, "learning_rate": 2.4508488060474898e-05, "loss": 0.8678, "step": 9238 }, { "epoch": 10.523076923076923, "grad_norm": 0.21826742589473724, "learning_rate": 2.4503829788459664e-05, "loss": 0.6687, "step": 9239 }, { "epoch": 10.524216524216524, "grad_norm": 0.20269319415092468, "learning_rate": 2.4499171533677782e-05, "loss": 0.8563, "step": 9240 }, { "epoch": 10.525356125356126, "grad_norm": 0.20262055099010468, "learning_rate": 2.4494513296291054e-05, "loss": 0.625, "step": 9241 }, { "epoch": 10.526495726495726, "grad_norm": 0.218480184674263, "learning_rate": 2.4489855076461262e-05, "loss": 0.8716, "step": 9242 }, { "epoch": 10.527635327635327, "grad_norm": 0.23494336009025574, "learning_rate": 2.4485196874350213e-05, "loss": 0.6349, "step": 9243 }, { "epoch": 10.52877492877493, "grad_norm": 0.19913607835769653, "learning_rate": 2.4480538690119685e-05, "loss": 0.4072, "step": 9244 }, { "epoch": 10.52991452991453, "grad_norm": 0.25605538487434387, "learning_rate": 2.4475880523931487e-05, "loss": 0.5784, "step": 9245 }, { "epoch": 10.53105413105413, "grad_norm": 0.23360183835029602, "learning_rate": 2.447122237594739e-05, "loss": 0.5614, "step": 9246 }, { "epoch": 10.532193732193733, "grad_norm": 0.22565987706184387, "learning_rate": 2.4466564246329197e-05, "loss": 0.6422, "step": 9247 }, { "epoch": 10.533333333333333, "grad_norm": 0.2499256432056427, "learning_rate": 2.4461906135238693e-05, "loss": 0.664, "step": 9248 }, { "epoch": 10.534472934472934, "grad_norm": 0.29015761613845825, "learning_rate": 2.4457248042837663e-05, "loss": 0.4316, "step": 9249 }, { "epoch": 10.535612535612536, "grad_norm": 0.20617923140525818, "learning_rate": 2.4452589969287913e-05, "loss": 0.7275, "step": 9250 }, { "epoch": 10.536752136752137, "grad_norm": 0.291335791349411, "learning_rate": 2.4447931914751214e-05, "loss": 0.6367, "step": 9251 }, { "epoch": 10.537891737891737, "grad_norm": 0.18563072383403778, "learning_rate": 2.444327387938936e-05, "loss": 0.8131, "step": 9252 }, { "epoch": 10.53903133903134, "grad_norm": 0.17296385765075684, "learning_rate": 2.443861586336413e-05, "loss": 0.7935, "step": 9253 }, { "epoch": 10.54017094017094, "grad_norm": 0.1931563913822174, "learning_rate": 2.4433957866837322e-05, "loss": 0.7372, "step": 9254 }, { "epoch": 10.54131054131054, "grad_norm": 0.22677938640117645, "learning_rate": 2.4429299889970708e-05, "loss": 0.7651, "step": 9255 }, { "epoch": 10.542450142450143, "grad_norm": 0.20666667819023132, "learning_rate": 2.4424641932926085e-05, "loss": 0.6628, "step": 9256 }, { "epoch": 10.543589743589743, "grad_norm": 0.2504830062389374, "learning_rate": 2.4419983995865222e-05, "loss": 0.8023, "step": 9257 }, { "epoch": 10.544729344729344, "grad_norm": 0.18859297037124634, "learning_rate": 2.441532607894992e-05, "loss": 0.7016, "step": 9258 }, { "epoch": 10.545868945868946, "grad_norm": 0.19908884167671204, "learning_rate": 2.441066818234194e-05, "loss": 0.7568, "step": 9259 }, { "epoch": 10.547008547008547, "grad_norm": 0.23446115851402283, "learning_rate": 2.4406010306203068e-05, "loss": 0.5168, "step": 9260 }, { "epoch": 10.548148148148147, "grad_norm": 0.21079429984092712, "learning_rate": 2.440135245069511e-05, "loss": 0.8225, "step": 9261 }, { "epoch": 10.54928774928775, "grad_norm": 0.24125830829143524, "learning_rate": 2.4396694615979815e-05, "loss": 0.6538, "step": 9262 }, { "epoch": 10.55042735042735, "grad_norm": 0.2429075837135315, "learning_rate": 2.4392036802218983e-05, "loss": 0.5568, "step": 9263 }, { "epoch": 10.55156695156695, "grad_norm": 0.177407369017601, "learning_rate": 2.438737900957438e-05, "loss": 0.8232, "step": 9264 }, { "epoch": 10.552706552706553, "grad_norm": 0.2627646028995514, "learning_rate": 2.4382721238207796e-05, "loss": 0.5178, "step": 9265 }, { "epoch": 10.553846153846154, "grad_norm": 0.26185911893844604, "learning_rate": 2.4378063488280994e-05, "loss": 0.4895, "step": 9266 }, { "epoch": 10.554985754985754, "grad_norm": 0.20316262543201447, "learning_rate": 2.437340575995576e-05, "loss": 0.721, "step": 9267 }, { "epoch": 10.556125356125357, "grad_norm": 0.26230934262275696, "learning_rate": 2.4368748053393868e-05, "loss": 0.6229, "step": 9268 }, { "epoch": 10.557264957264957, "grad_norm": 0.19705653190612793, "learning_rate": 2.4364090368757093e-05, "loss": 0.73, "step": 9269 }, { "epoch": 10.558404558404558, "grad_norm": 0.24998103082180023, "learning_rate": 2.43594327062072e-05, "loss": 0.6415, "step": 9270 }, { "epoch": 10.55954415954416, "grad_norm": 0.19474878907203674, "learning_rate": 2.4354775065905968e-05, "loss": 0.6854, "step": 9271 }, { "epoch": 10.56068376068376, "grad_norm": 0.23544877767562866, "learning_rate": 2.435011744801518e-05, "loss": 0.7466, "step": 9272 }, { "epoch": 10.561823361823361, "grad_norm": 0.22600121796131134, "learning_rate": 2.4345459852696603e-05, "loss": 0.3899, "step": 9273 }, { "epoch": 10.562962962962963, "grad_norm": 0.25168585777282715, "learning_rate": 2.4340802280112e-05, "loss": 0.6318, "step": 9274 }, { "epoch": 10.564102564102564, "grad_norm": 0.19967123866081238, "learning_rate": 2.4336144730423154e-05, "loss": 0.5816, "step": 9275 }, { "epoch": 10.565242165242164, "grad_norm": 0.18906304240226746, "learning_rate": 2.433148720379182e-05, "loss": 0.7827, "step": 9276 }, { "epoch": 10.566381766381767, "grad_norm": 0.2090078741312027, "learning_rate": 2.432682970037978e-05, "loss": 0.679, "step": 9277 }, { "epoch": 10.567521367521367, "grad_norm": 0.21482527256011963, "learning_rate": 2.432217222034879e-05, "loss": 0.7569, "step": 9278 }, { "epoch": 10.568660968660968, "grad_norm": 0.2805091440677643, "learning_rate": 2.431751476386063e-05, "loss": 0.5128, "step": 9279 }, { "epoch": 10.56980056980057, "grad_norm": 0.18089546263217926, "learning_rate": 2.4312857331077052e-05, "loss": 1.0565, "step": 9280 }, { "epoch": 10.57094017094017, "grad_norm": 0.202863410115242, "learning_rate": 2.4308199922159833e-05, "loss": 0.6263, "step": 9281 }, { "epoch": 10.572079772079771, "grad_norm": 0.2146240621805191, "learning_rate": 2.4303542537270725e-05, "loss": 0.7396, "step": 9282 }, { "epoch": 10.573219373219374, "grad_norm": 0.23256784677505493, "learning_rate": 2.4298885176571503e-05, "loss": 0.7889, "step": 9283 }, { "epoch": 10.574358974358974, "grad_norm": 0.15683580935001373, "learning_rate": 2.4294227840223933e-05, "loss": 0.7822, "step": 9284 }, { "epoch": 10.575498575498575, "grad_norm": 0.23500467836856842, "learning_rate": 2.4289570528389764e-05, "loss": 0.4418, "step": 9285 }, { "epoch": 10.576638176638177, "grad_norm": 0.1923016458749771, "learning_rate": 2.4284913241230773e-05, "loss": 0.8286, "step": 9286 }, { "epoch": 10.577777777777778, "grad_norm": 0.20705291628837585, "learning_rate": 2.42802559789087e-05, "loss": 0.6979, "step": 9287 }, { "epoch": 10.578917378917378, "grad_norm": 0.2262626737356186, "learning_rate": 2.427559874158533e-05, "loss": 0.6143, "step": 9288 }, { "epoch": 10.58005698005698, "grad_norm": 0.16545400023460388, "learning_rate": 2.4270941529422396e-05, "loss": 0.81, "step": 9289 }, { "epoch": 10.581196581196581, "grad_norm": 0.22591204941272736, "learning_rate": 2.4266284342581677e-05, "loss": 0.6341, "step": 9290 }, { "epoch": 10.582336182336181, "grad_norm": 0.2036348134279251, "learning_rate": 2.4261627181224914e-05, "loss": 0.6356, "step": 9291 }, { "epoch": 10.583475783475784, "grad_norm": 0.25399860739707947, "learning_rate": 2.4256970045513875e-05, "loss": 0.619, "step": 9292 }, { "epoch": 10.584615384615384, "grad_norm": 0.17429670691490173, "learning_rate": 2.42523129356103e-05, "loss": 0.759, "step": 9293 }, { "epoch": 10.585754985754985, "grad_norm": 0.22275413572788239, "learning_rate": 2.4247655851675953e-05, "loss": 0.6622, "step": 9294 }, { "epoch": 10.586894586894587, "grad_norm": 0.26823070645332336, "learning_rate": 2.42429987938726e-05, "loss": 0.3567, "step": 9295 }, { "epoch": 10.588034188034188, "grad_norm": 0.20424869656562805, "learning_rate": 2.423834176236197e-05, "loss": 0.7477, "step": 9296 }, { "epoch": 10.589173789173788, "grad_norm": 0.2231968194246292, "learning_rate": 2.4233684757305833e-05, "loss": 0.8067, "step": 9297 }, { "epoch": 10.59031339031339, "grad_norm": 0.19134043157100677, "learning_rate": 2.4229027778865923e-05, "loss": 0.7333, "step": 9298 }, { "epoch": 10.591452991452991, "grad_norm": 0.22105371952056885, "learning_rate": 2.4224370827204007e-05, "loss": 0.6057, "step": 9299 }, { "epoch": 10.592592592592592, "grad_norm": 0.17917677760124207, "learning_rate": 2.4219713902481818e-05, "loss": 0.7783, "step": 9300 }, { "epoch": 10.593732193732194, "grad_norm": 0.22497954964637756, "learning_rate": 2.421505700486112e-05, "loss": 0.5128, "step": 9301 }, { "epoch": 10.594871794871795, "grad_norm": 0.2308385968208313, "learning_rate": 2.4210400134503643e-05, "loss": 0.6799, "step": 9302 }, { "epoch": 10.596011396011395, "grad_norm": 0.20483887195587158, "learning_rate": 2.4205743291571145e-05, "loss": 0.7108, "step": 9303 }, { "epoch": 10.597150997150997, "grad_norm": 0.2276771366596222, "learning_rate": 2.4201086476225358e-05, "loss": 0.6361, "step": 9304 }, { "epoch": 10.598290598290598, "grad_norm": 0.1998155415058136, "learning_rate": 2.4196429688628035e-05, "loss": 0.7152, "step": 9305 }, { "epoch": 10.5994301994302, "grad_norm": 0.23089168965816498, "learning_rate": 2.4191772928940933e-05, "loss": 0.7714, "step": 9306 }, { "epoch": 10.6005698005698, "grad_norm": 0.21779486536979675, "learning_rate": 2.4187116197325766e-05, "loss": 0.867, "step": 9307 }, { "epoch": 10.601709401709401, "grad_norm": 0.20363807678222656, "learning_rate": 2.4182459493944294e-05, "loss": 0.7719, "step": 9308 }, { "epoch": 10.602849002849004, "grad_norm": 0.1687886267900467, "learning_rate": 2.4177802818958257e-05, "loss": 0.8526, "step": 9309 }, { "epoch": 10.603988603988604, "grad_norm": 0.17591343820095062, "learning_rate": 2.4173146172529383e-05, "loss": 0.7253, "step": 9310 }, { "epoch": 10.605128205128205, "grad_norm": 0.22673729062080383, "learning_rate": 2.416848955481942e-05, "loss": 0.5707, "step": 9311 }, { "epoch": 10.606267806267807, "grad_norm": 0.20079649984836578, "learning_rate": 2.4163832965990097e-05, "loss": 0.7127, "step": 9312 }, { "epoch": 10.607407407407408, "grad_norm": 0.23676098883152008, "learning_rate": 2.415917640620316e-05, "loss": 0.556, "step": 9313 }, { "epoch": 10.608547008547008, "grad_norm": 0.24559113383293152, "learning_rate": 2.4154519875620332e-05, "loss": 0.6615, "step": 9314 }, { "epoch": 10.60968660968661, "grad_norm": 0.2337861955165863, "learning_rate": 2.414986337440335e-05, "loss": 0.7069, "step": 9315 }, { "epoch": 10.610826210826211, "grad_norm": 0.23466922342777252, "learning_rate": 2.4145206902713963e-05, "loss": 0.5664, "step": 9316 }, { "epoch": 10.611965811965812, "grad_norm": 0.2164163887500763, "learning_rate": 2.4140550460713885e-05, "loss": 0.7887, "step": 9317 }, { "epoch": 10.613105413105414, "grad_norm": 0.22938133776187897, "learning_rate": 2.4135894048564855e-05, "loss": 0.7338, "step": 9318 }, { "epoch": 10.614245014245014, "grad_norm": 0.16698004305362701, "learning_rate": 2.41312376664286e-05, "loss": 0.7036, "step": 9319 }, { "epoch": 10.615384615384615, "grad_norm": 0.20773845911026, "learning_rate": 2.4126581314466855e-05, "loss": 0.7996, "step": 9320 }, { "epoch": 10.616524216524217, "grad_norm": 0.18643100559711456, "learning_rate": 2.4121924992841334e-05, "loss": 0.7027, "step": 9321 }, { "epoch": 10.617663817663818, "grad_norm": 0.2112257182598114, "learning_rate": 2.411726870171378e-05, "loss": 0.6154, "step": 9322 }, { "epoch": 10.618803418803418, "grad_norm": 0.21273544430732727, "learning_rate": 2.4112612441245906e-05, "loss": 0.8325, "step": 9323 }, { "epoch": 10.61994301994302, "grad_norm": 0.24942681193351746, "learning_rate": 2.410795621159945e-05, "loss": 0.6568, "step": 9324 }, { "epoch": 10.621082621082621, "grad_norm": 0.19099989533424377, "learning_rate": 2.4103300012936118e-05, "loss": 0.8224, "step": 9325 }, { "epoch": 10.622222222222222, "grad_norm": 0.16613510251045227, "learning_rate": 2.409864384541764e-05, "loss": 0.8944, "step": 9326 }, { "epoch": 10.623361823361824, "grad_norm": 0.2000277191400528, "learning_rate": 2.409398770920575e-05, "loss": 0.7425, "step": 9327 }, { "epoch": 10.624501424501425, "grad_norm": 0.23993152379989624, "learning_rate": 2.408933160446215e-05, "loss": 0.8044, "step": 9328 }, { "epoch": 10.625641025641025, "grad_norm": 0.15128369629383087, "learning_rate": 2.4084675531348577e-05, "loss": 0.7737, "step": 9329 }, { "epoch": 10.626780626780628, "grad_norm": 0.2629297375679016, "learning_rate": 2.4080019490026736e-05, "loss": 0.5529, "step": 9330 }, { "epoch": 10.627920227920228, "grad_norm": 0.17327046394348145, "learning_rate": 2.4075363480658352e-05, "loss": 0.9431, "step": 9331 }, { "epoch": 10.629059829059829, "grad_norm": 0.19449730217456818, "learning_rate": 2.407070750340513e-05, "loss": 0.6353, "step": 9332 }, { "epoch": 10.630199430199431, "grad_norm": 0.20580913126468658, "learning_rate": 2.4066051558428797e-05, "loss": 0.5459, "step": 9333 }, { "epoch": 10.631339031339031, "grad_norm": 0.2111603319644928, "learning_rate": 2.4061395645891058e-05, "loss": 0.7031, "step": 9334 }, { "epoch": 10.632478632478632, "grad_norm": 0.2038755714893341, "learning_rate": 2.4056739765953635e-05, "loss": 0.8113, "step": 9335 }, { "epoch": 10.633618233618234, "grad_norm": 0.2601739168167114, "learning_rate": 2.405208391877823e-05, "loss": 0.5135, "step": 9336 }, { "epoch": 10.634757834757835, "grad_norm": 0.22661654651165009, "learning_rate": 2.404742810452655e-05, "loss": 0.4459, "step": 9337 }, { "epoch": 10.635897435897435, "grad_norm": 0.17968492209911346, "learning_rate": 2.4042772323360323e-05, "loss": 0.7283, "step": 9338 }, { "epoch": 10.637037037037038, "grad_norm": 0.21282139420509338, "learning_rate": 2.4038116575441235e-05, "loss": 0.7586, "step": 9339 }, { "epoch": 10.638176638176638, "grad_norm": 0.23090216517448425, "learning_rate": 2.4033460860931013e-05, "loss": 0.772, "step": 9340 }, { "epoch": 10.639316239316239, "grad_norm": 0.2081783413887024, "learning_rate": 2.402880517999135e-05, "loss": 0.7908, "step": 9341 }, { "epoch": 10.640455840455841, "grad_norm": 0.21366605162620544, "learning_rate": 2.4024149532783958e-05, "loss": 0.7416, "step": 9342 }, { "epoch": 10.641595441595442, "grad_norm": 0.1726526916027069, "learning_rate": 2.401949391947053e-05, "loss": 0.5983, "step": 9343 }, { "epoch": 10.642735042735042, "grad_norm": 0.19404564797878265, "learning_rate": 2.4014838340212782e-05, "loss": 0.8304, "step": 9344 }, { "epoch": 10.643874643874645, "grad_norm": 0.25574666261672974, "learning_rate": 2.40101827951724e-05, "loss": 0.6129, "step": 9345 }, { "epoch": 10.645014245014245, "grad_norm": 0.268744558095932, "learning_rate": 2.40055272845111e-05, "loss": 0.7402, "step": 9346 }, { "epoch": 10.646153846153846, "grad_norm": 0.22246092557907104, "learning_rate": 2.4000871808390562e-05, "loss": 0.6153, "step": 9347 }, { "epoch": 10.647293447293448, "grad_norm": 0.20332017540931702, "learning_rate": 2.399621636697249e-05, "loss": 0.7302, "step": 9348 }, { "epoch": 10.648433048433048, "grad_norm": 0.22714626789093018, "learning_rate": 2.399156096041859e-05, "loss": 0.548, "step": 9349 }, { "epoch": 10.649572649572649, "grad_norm": 0.21403177082538605, "learning_rate": 2.3986905588890555e-05, "loss": 0.5772, "step": 9350 }, { "epoch": 10.650712250712251, "grad_norm": 0.16222523152828217, "learning_rate": 2.3982250252550072e-05, "loss": 1.0908, "step": 9351 }, { "epoch": 10.651851851851852, "grad_norm": 0.2693890929222107, "learning_rate": 2.397759495155884e-05, "loss": 0.5959, "step": 9352 }, { "epoch": 10.652991452991452, "grad_norm": 0.22275036573410034, "learning_rate": 2.3972939686078543e-05, "loss": 0.8052, "step": 9353 }, { "epoch": 10.654131054131055, "grad_norm": 0.21302983164787292, "learning_rate": 2.396828445627088e-05, "loss": 0.6546, "step": 9354 }, { "epoch": 10.655270655270655, "grad_norm": 0.22570183873176575, "learning_rate": 2.3963629262297533e-05, "loss": 0.7182, "step": 9355 }, { "epoch": 10.656410256410256, "grad_norm": 0.2311042696237564, "learning_rate": 2.3958974104320193e-05, "loss": 0.721, "step": 9356 }, { "epoch": 10.657549857549858, "grad_norm": 0.2063482701778412, "learning_rate": 2.395431898250054e-05, "loss": 0.8842, "step": 9357 }, { "epoch": 10.658689458689459, "grad_norm": 0.22663186490535736, "learning_rate": 2.3949663897000272e-05, "loss": 0.5803, "step": 9358 }, { "epoch": 10.65982905982906, "grad_norm": 0.21537287533283234, "learning_rate": 2.3945008847981052e-05, "loss": 0.7621, "step": 9359 }, { "epoch": 10.660968660968662, "grad_norm": 0.18531012535095215, "learning_rate": 2.3940353835604582e-05, "loss": 0.8327, "step": 9360 }, { "epoch": 10.662108262108262, "grad_norm": 0.18514885008335114, "learning_rate": 2.3935698860032545e-05, "loss": 0.6698, "step": 9361 }, { "epoch": 10.663247863247863, "grad_norm": 0.23315702378749847, "learning_rate": 2.3931043921426604e-05, "loss": 0.59, "step": 9362 }, { "epoch": 10.664387464387465, "grad_norm": 0.21217089891433716, "learning_rate": 2.3926389019948454e-05, "loss": 0.6775, "step": 9363 }, { "epoch": 10.665527065527066, "grad_norm": 0.22452345490455627, "learning_rate": 2.3921734155759762e-05, "loss": 0.5892, "step": 9364 }, { "epoch": 10.666666666666666, "grad_norm": 0.21150195598602295, "learning_rate": 2.3917079329022214e-05, "loss": 0.6751, "step": 9365 }, { "epoch": 10.667806267806268, "grad_norm": 0.26901328563690186, "learning_rate": 2.391242453989747e-05, "loss": 0.5041, "step": 9366 }, { "epoch": 10.668945868945869, "grad_norm": 0.2344285100698471, "learning_rate": 2.390776978854722e-05, "loss": 0.5478, "step": 9367 }, { "epoch": 10.67008547008547, "grad_norm": 0.24526192247867584, "learning_rate": 2.3903115075133126e-05, "loss": 0.7689, "step": 9368 }, { "epoch": 10.671225071225072, "grad_norm": 0.26171600818634033, "learning_rate": 2.3898460399816863e-05, "loss": 0.5725, "step": 9369 }, { "epoch": 10.672364672364672, "grad_norm": 0.2161663919687271, "learning_rate": 2.3893805762760096e-05, "loss": 0.871, "step": 9370 }, { "epoch": 10.673504273504273, "grad_norm": 0.20319776237010956, "learning_rate": 2.388915116412449e-05, "loss": 0.5461, "step": 9371 }, { "epoch": 10.674643874643875, "grad_norm": 0.2261560708284378, "learning_rate": 2.388449660407173e-05, "loss": 0.6596, "step": 9372 }, { "epoch": 10.675783475783476, "grad_norm": 0.18702539801597595, "learning_rate": 2.387984208276347e-05, "loss": 0.8152, "step": 9373 }, { "epoch": 10.676923076923076, "grad_norm": 0.16738612949848175, "learning_rate": 2.3875187600361377e-05, "loss": 0.8331, "step": 9374 }, { "epoch": 10.678062678062679, "grad_norm": 0.18628069758415222, "learning_rate": 2.3870533157027106e-05, "loss": 0.7551, "step": 9375 }, { "epoch": 10.67920227920228, "grad_norm": 0.23995763063430786, "learning_rate": 2.386587875292233e-05, "loss": 0.5629, "step": 9376 }, { "epoch": 10.68034188034188, "grad_norm": 0.167999729514122, "learning_rate": 2.3861224388208698e-05, "loss": 0.953, "step": 9377 }, { "epoch": 10.681481481481482, "grad_norm": 0.18648307025432587, "learning_rate": 2.385657006304788e-05, "loss": 0.8309, "step": 9378 }, { "epoch": 10.682621082621083, "grad_norm": 0.2104375660419464, "learning_rate": 2.3851915777601523e-05, "loss": 0.745, "step": 9379 }, { "epoch": 10.683760683760683, "grad_norm": 0.23774532973766327, "learning_rate": 2.3847261532031295e-05, "loss": 0.6492, "step": 9380 }, { "epoch": 10.684900284900285, "grad_norm": 0.20850513875484467, "learning_rate": 2.3842607326498838e-05, "loss": 0.8143, "step": 9381 }, { "epoch": 10.686039886039886, "grad_norm": 0.21070361137390137, "learning_rate": 2.3837953161165805e-05, "loss": 0.9054, "step": 9382 }, { "epoch": 10.687179487179487, "grad_norm": 1.9932860136032104, "learning_rate": 2.3833299036193865e-05, "loss": 0.744, "step": 9383 }, { "epoch": 10.688319088319089, "grad_norm": 0.2588234841823578, "learning_rate": 2.3828644951744652e-05, "loss": 0.6634, "step": 9384 }, { "epoch": 10.68945868945869, "grad_norm": 0.1745430827140808, "learning_rate": 2.3823990907979828e-05, "loss": 0.65, "step": 9385 }, { "epoch": 10.69059829059829, "grad_norm": 0.19741609692573547, "learning_rate": 2.381933690506103e-05, "loss": 0.5539, "step": 9386 }, { "epoch": 10.691737891737892, "grad_norm": 0.16858762502670288, "learning_rate": 2.3814682943149904e-05, "loss": 0.7887, "step": 9387 }, { "epoch": 10.692877492877493, "grad_norm": 0.19676348567008972, "learning_rate": 2.3810029022408108e-05, "loss": 0.7957, "step": 9388 }, { "epoch": 10.694017094017093, "grad_norm": 0.23177698254585266, "learning_rate": 2.380537514299727e-05, "loss": 0.6576, "step": 9389 }, { "epoch": 10.695156695156696, "grad_norm": 0.19935131072998047, "learning_rate": 2.3800721305079043e-05, "loss": 0.8319, "step": 9390 }, { "epoch": 10.696296296296296, "grad_norm": 0.18709790706634521, "learning_rate": 2.379606750881506e-05, "loss": 0.8519, "step": 9391 }, { "epoch": 10.697435897435897, "grad_norm": 0.24430139362812042, "learning_rate": 2.3791413754366966e-05, "loss": 0.709, "step": 9392 }, { "epoch": 10.698575498575499, "grad_norm": 0.19017036259174347, "learning_rate": 2.3786760041896385e-05, "loss": 0.7555, "step": 9393 }, { "epoch": 10.6997150997151, "grad_norm": 0.20336014032363892, "learning_rate": 2.3782106371564972e-05, "loss": 0.6461, "step": 9394 }, { "epoch": 10.7008547008547, "grad_norm": 0.18139511346817017, "learning_rate": 2.377745274353436e-05, "loss": 0.9409, "step": 9395 }, { "epoch": 10.701994301994302, "grad_norm": 0.19394861161708832, "learning_rate": 2.3772799157966167e-05, "loss": 0.8289, "step": 9396 }, { "epoch": 10.703133903133903, "grad_norm": 0.23802120983600616, "learning_rate": 2.376814561502204e-05, "loss": 0.6504, "step": 9397 }, { "epoch": 10.704273504273504, "grad_norm": 0.24348127841949463, "learning_rate": 2.37634921148636e-05, "loss": 0.8562, "step": 9398 }, { "epoch": 10.705413105413106, "grad_norm": 0.23403213918209076, "learning_rate": 2.3758838657652485e-05, "loss": 0.6461, "step": 9399 }, { "epoch": 10.706552706552706, "grad_norm": 0.29111894965171814, "learning_rate": 2.375418524355031e-05, "loss": 0.5882, "step": 9400 }, { "epoch": 10.707692307692307, "grad_norm": 0.1834484487771988, "learning_rate": 2.3749531872718715e-05, "loss": 0.8172, "step": 9401 }, { "epoch": 10.70883190883191, "grad_norm": 0.23306624591350555, "learning_rate": 2.3744878545319307e-05, "loss": 0.772, "step": 9402 }, { "epoch": 10.70997150997151, "grad_norm": 0.2537129819393158, "learning_rate": 2.3740225261513727e-05, "loss": 0.4566, "step": 9403 }, { "epoch": 10.71111111111111, "grad_norm": 0.2458798736333847, "learning_rate": 2.3735572021463578e-05, "loss": 0.5786, "step": 9404 }, { "epoch": 10.712250712250713, "grad_norm": 0.22602513432502747, "learning_rate": 2.3730918825330495e-05, "loss": 0.6162, "step": 9405 }, { "epoch": 10.713390313390313, "grad_norm": 0.3012692928314209, "learning_rate": 2.3726265673276093e-05, "loss": 0.3404, "step": 9406 }, { "epoch": 10.714529914529914, "grad_norm": 0.2674121558666229, "learning_rate": 2.3721612565461987e-05, "loss": 0.7307, "step": 9407 }, { "epoch": 10.715669515669516, "grad_norm": 0.20394116640090942, "learning_rate": 2.3716959502049792e-05, "loss": 0.7418, "step": 9408 }, { "epoch": 10.716809116809117, "grad_norm": 0.2481083869934082, "learning_rate": 2.3712306483201122e-05, "loss": 0.6607, "step": 9409 }, { "epoch": 10.717948717948717, "grad_norm": 0.21053311228752136, "learning_rate": 2.370765350907759e-05, "loss": 0.7188, "step": 9410 }, { "epoch": 10.71908831908832, "grad_norm": 0.23599965870380402, "learning_rate": 2.3703000579840802e-05, "loss": 0.6121, "step": 9411 }, { "epoch": 10.72022792022792, "grad_norm": 0.2498737871646881, "learning_rate": 2.369834769565238e-05, "loss": 0.7792, "step": 9412 }, { "epoch": 10.72136752136752, "grad_norm": 0.24417205154895782, "learning_rate": 2.369369485667391e-05, "loss": 0.5859, "step": 9413 }, { "epoch": 10.722507122507123, "grad_norm": 0.30487245321273804, "learning_rate": 2.368904206306702e-05, "loss": 0.669, "step": 9414 }, { "epoch": 10.723646723646723, "grad_norm": 0.26632317900657654, "learning_rate": 2.3684389314993287e-05, "loss": 0.6994, "step": 9415 }, { "epoch": 10.724786324786324, "grad_norm": 0.18595530092716217, "learning_rate": 2.3679736612614344e-05, "loss": 0.82, "step": 9416 }, { "epoch": 10.725925925925926, "grad_norm": 0.17614929378032684, "learning_rate": 2.367508395609178e-05, "loss": 0.8156, "step": 9417 }, { "epoch": 10.727065527065527, "grad_norm": 0.2698836922645569, "learning_rate": 2.367043134558719e-05, "loss": 0.6949, "step": 9418 }, { "epoch": 10.728205128205127, "grad_norm": 0.2092035710811615, "learning_rate": 2.3665778781262183e-05, "loss": 0.7827, "step": 9419 }, { "epoch": 10.72934472934473, "grad_norm": 0.20295335352420807, "learning_rate": 2.3661126263278342e-05, "loss": 0.874, "step": 9420 }, { "epoch": 10.73048433048433, "grad_norm": 0.23088309168815613, "learning_rate": 2.365647379179727e-05, "loss": 0.6252, "step": 9421 }, { "epoch": 10.73162393162393, "grad_norm": 0.251143217086792, "learning_rate": 2.365182136698055e-05, "loss": 0.6036, "step": 9422 }, { "epoch": 10.732763532763533, "grad_norm": 0.16860376298427582, "learning_rate": 2.3647168988989796e-05, "loss": 0.92, "step": 9423 }, { "epoch": 10.733903133903134, "grad_norm": 0.22184909880161285, "learning_rate": 2.3642516657986574e-05, "loss": 0.8504, "step": 9424 }, { "epoch": 10.735042735042736, "grad_norm": 0.2654574513435364, "learning_rate": 2.3637864374132485e-05, "loss": 0.5537, "step": 9425 }, { "epoch": 10.736182336182337, "grad_norm": 0.2587309181690216, "learning_rate": 2.3633212137589097e-05, "loss": 0.8296, "step": 9426 }, { "epoch": 10.737321937321937, "grad_norm": 0.2274673581123352, "learning_rate": 2.3628559948518025e-05, "loss": 0.772, "step": 9427 }, { "epoch": 10.73846153846154, "grad_norm": 0.21977035701274872, "learning_rate": 2.3623907807080838e-05, "loss": 0.6141, "step": 9428 }, { "epoch": 10.73960113960114, "grad_norm": 0.20789380371570587, "learning_rate": 2.3619255713439114e-05, "loss": 0.8131, "step": 9429 }, { "epoch": 10.74074074074074, "grad_norm": 0.21119746565818787, "learning_rate": 2.3614603667754437e-05, "loss": 0.729, "step": 9430 }, { "epoch": 10.741880341880343, "grad_norm": 0.21227894723415375, "learning_rate": 2.3609951670188387e-05, "loss": 0.587, "step": 9431 }, { "epoch": 10.743019943019943, "grad_norm": 0.2010239213705063, "learning_rate": 2.360529972090253e-05, "loss": 0.6677, "step": 9432 }, { "epoch": 10.744159544159544, "grad_norm": 0.2607322633266449, "learning_rate": 2.3600647820058462e-05, "loss": 0.5946, "step": 9433 }, { "epoch": 10.745299145299146, "grad_norm": 0.22844435274600983, "learning_rate": 2.3595995967817733e-05, "loss": 0.63, "step": 9434 }, { "epoch": 10.746438746438747, "grad_norm": 0.1804017275571823, "learning_rate": 2.359134416434193e-05, "loss": 0.6988, "step": 9435 }, { "epoch": 10.747578347578347, "grad_norm": 0.27404120564460754, "learning_rate": 2.3586692409792616e-05, "loss": 0.539, "step": 9436 }, { "epoch": 10.74871794871795, "grad_norm": 0.16426067054271698, "learning_rate": 2.3582040704331353e-05, "loss": 0.7869, "step": 9437 }, { "epoch": 10.74985754985755, "grad_norm": 0.22315113246440887, "learning_rate": 2.3577389048119727e-05, "loss": 0.8015, "step": 9438 }, { "epoch": 10.75099715099715, "grad_norm": 0.18692094087600708, "learning_rate": 2.3572737441319287e-05, "loss": 0.8446, "step": 9439 }, { "epoch": 10.752136752136753, "grad_norm": 0.1945079118013382, "learning_rate": 2.3568085884091606e-05, "loss": 0.6903, "step": 9440 }, { "epoch": 10.753276353276354, "grad_norm": 0.17877104878425598, "learning_rate": 2.3563434376598233e-05, "loss": 0.7957, "step": 9441 }, { "epoch": 10.754415954415954, "grad_norm": 0.20790593326091766, "learning_rate": 2.3558782919000742e-05, "loss": 0.7003, "step": 9442 }, { "epoch": 10.755555555555556, "grad_norm": 0.2612801194190979, "learning_rate": 2.355413151146068e-05, "loss": 0.8619, "step": 9443 }, { "epoch": 10.756695156695157, "grad_norm": 0.249959334731102, "learning_rate": 2.3549480154139607e-05, "loss": 0.6737, "step": 9444 }, { "epoch": 10.757834757834758, "grad_norm": 0.20381850004196167, "learning_rate": 2.3544828847199073e-05, "loss": 0.6509, "step": 9445 }, { "epoch": 10.75897435897436, "grad_norm": 0.18282800912857056, "learning_rate": 2.3540177590800645e-05, "loss": 0.8114, "step": 9446 }, { "epoch": 10.76011396011396, "grad_norm": 0.23745818436145782, "learning_rate": 2.3535526385105853e-05, "loss": 0.6211, "step": 9447 }, { "epoch": 10.761253561253561, "grad_norm": 0.20694097876548767, "learning_rate": 2.3530875230276254e-05, "loss": 0.663, "step": 9448 }, { "epoch": 10.762393162393163, "grad_norm": 0.2063913643360138, "learning_rate": 2.3526224126473407e-05, "loss": 0.7413, "step": 9449 }, { "epoch": 10.763532763532764, "grad_norm": 0.27030718326568604, "learning_rate": 2.3521573073858845e-05, "loss": 0.3244, "step": 9450 }, { "epoch": 10.764672364672364, "grad_norm": 0.23140282928943634, "learning_rate": 2.3516922072594124e-05, "loss": 0.5387, "step": 9451 }, { "epoch": 10.765811965811967, "grad_norm": 0.19600550830364227, "learning_rate": 2.351227112284077e-05, "loss": 0.6774, "step": 9452 }, { "epoch": 10.766951566951567, "grad_norm": 0.2009340524673462, "learning_rate": 2.3507620224760338e-05, "loss": 0.5952, "step": 9453 }, { "epoch": 10.768091168091168, "grad_norm": 0.19685262441635132, "learning_rate": 2.3502969378514352e-05, "loss": 0.805, "step": 9454 }, { "epoch": 10.76923076923077, "grad_norm": 0.1902560442686081, "learning_rate": 2.349831858426436e-05, "loss": 0.7882, "step": 9455 }, { "epoch": 10.77037037037037, "grad_norm": 0.21719329059123993, "learning_rate": 2.3493667842171894e-05, "loss": 0.8039, "step": 9456 }, { "epoch": 10.771509971509971, "grad_norm": 0.18993858993053436, "learning_rate": 2.3489017152398488e-05, "loss": 0.7998, "step": 9457 }, { "epoch": 10.772649572649573, "grad_norm": 0.28052687644958496, "learning_rate": 2.3484366515105667e-05, "loss": 0.5439, "step": 9458 }, { "epoch": 10.773789173789174, "grad_norm": 0.3004973828792572, "learning_rate": 2.3479715930454958e-05, "loss": 0.415, "step": 9459 }, { "epoch": 10.774928774928775, "grad_norm": 0.2153039127588272, "learning_rate": 2.3475065398607907e-05, "loss": 0.8786, "step": 9460 }, { "epoch": 10.776068376068377, "grad_norm": 0.20625334978103638, "learning_rate": 2.3470414919726023e-05, "loss": 0.6469, "step": 9461 }, { "epoch": 10.777207977207977, "grad_norm": 0.20467057824134827, "learning_rate": 2.346576449397084e-05, "loss": 0.6427, "step": 9462 }, { "epoch": 10.778347578347578, "grad_norm": 0.2243751436471939, "learning_rate": 2.346111412150387e-05, "loss": 0.9085, "step": 9463 }, { "epoch": 10.77948717948718, "grad_norm": 0.2660139799118042, "learning_rate": 2.3456463802486645e-05, "loss": 0.6218, "step": 9464 }, { "epoch": 10.78062678062678, "grad_norm": 0.1993478685617447, "learning_rate": 2.345181353708067e-05, "loss": 0.9086, "step": 9465 }, { "epoch": 10.781766381766381, "grad_norm": 0.2073563188314438, "learning_rate": 2.3447163325447477e-05, "loss": 0.7225, "step": 9466 }, { "epoch": 10.782905982905984, "grad_norm": 0.2541685998439789, "learning_rate": 2.3442513167748563e-05, "loss": 0.6917, "step": 9467 }, { "epoch": 10.784045584045584, "grad_norm": 0.20809699594974518, "learning_rate": 2.3437863064145453e-05, "loss": 0.6991, "step": 9468 }, { "epoch": 10.785185185185185, "grad_norm": 0.17374277114868164, "learning_rate": 2.3433213014799658e-05, "loss": 0.7477, "step": 9469 }, { "epoch": 10.786324786324787, "grad_norm": 0.1751074641942978, "learning_rate": 2.3428563019872674e-05, "loss": 0.9247, "step": 9470 }, { "epoch": 10.787464387464388, "grad_norm": 0.22171640396118164, "learning_rate": 2.3423913079526023e-05, "loss": 0.7436, "step": 9471 }, { "epoch": 10.788603988603988, "grad_norm": 0.23284953832626343, "learning_rate": 2.3419263193921207e-05, "loss": 0.6626, "step": 9472 }, { "epoch": 10.78974358974359, "grad_norm": 0.2270098626613617, "learning_rate": 2.3414613363219725e-05, "loss": 0.7602, "step": 9473 }, { "epoch": 10.790883190883191, "grad_norm": 0.20243126153945923, "learning_rate": 2.3409963587583086e-05, "loss": 0.8009, "step": 9474 }, { "epoch": 10.792022792022792, "grad_norm": 0.23066499829292297, "learning_rate": 2.340531386717278e-05, "loss": 0.6073, "step": 9475 }, { "epoch": 10.793162393162394, "grad_norm": 0.20418374240398407, "learning_rate": 2.340066420215031e-05, "loss": 0.793, "step": 9476 }, { "epoch": 10.794301994301994, "grad_norm": 0.16435778141021729, "learning_rate": 2.339601459267717e-05, "loss": 0.7931, "step": 9477 }, { "epoch": 10.795441595441595, "grad_norm": 0.20960985124111176, "learning_rate": 2.3391365038914857e-05, "loss": 0.6735, "step": 9478 }, { "epoch": 10.796581196581197, "grad_norm": 0.18827983736991882, "learning_rate": 2.3386715541024855e-05, "loss": 0.7043, "step": 9479 }, { "epoch": 10.797720797720798, "grad_norm": 0.27252548933029175, "learning_rate": 2.3382066099168664e-05, "loss": 0.4469, "step": 9480 }, { "epoch": 10.798860398860398, "grad_norm": 0.2389608919620514, "learning_rate": 2.3377416713507756e-05, "loss": 0.624, "step": 9481 }, { "epoch": 10.8, "grad_norm": 0.25767797231674194, "learning_rate": 2.3372767384203632e-05, "loss": 0.5268, "step": 9482 }, { "epoch": 10.801139601139601, "grad_norm": 0.19765399396419525, "learning_rate": 2.336811811141778e-05, "loss": 0.7206, "step": 9483 }, { "epoch": 10.802279202279202, "grad_norm": 0.22773143649101257, "learning_rate": 2.3363468895311667e-05, "loss": 0.7796, "step": 9484 }, { "epoch": 10.803418803418804, "grad_norm": 0.22266587615013123, "learning_rate": 2.3358819736046782e-05, "loss": 0.5538, "step": 9485 }, { "epoch": 10.804558404558405, "grad_norm": 0.1914573609828949, "learning_rate": 2.33541706337846e-05, "loss": 0.7803, "step": 9486 }, { "epoch": 10.805698005698005, "grad_norm": 0.21992233395576477, "learning_rate": 2.3349521588686602e-05, "loss": 0.6589, "step": 9487 }, { "epoch": 10.806837606837608, "grad_norm": 0.17620261013507843, "learning_rate": 2.334487260091425e-05, "loss": 0.7919, "step": 9488 }, { "epoch": 10.807977207977208, "grad_norm": 0.1932898461818695, "learning_rate": 2.3340223670629032e-05, "loss": 0.7954, "step": 9489 }, { "epoch": 10.809116809116809, "grad_norm": 0.22569020092487335, "learning_rate": 2.3335574797992406e-05, "loss": 0.8335, "step": 9490 }, { "epoch": 10.810256410256411, "grad_norm": 0.22013911604881287, "learning_rate": 2.3330925983165845e-05, "loss": 0.7486, "step": 9491 }, { "epoch": 10.811396011396011, "grad_norm": 0.16830658912658691, "learning_rate": 2.332627722631081e-05, "loss": 0.6876, "step": 9492 }, { "epoch": 10.812535612535612, "grad_norm": 0.2666875720024109, "learning_rate": 2.332162852758877e-05, "loss": 0.6808, "step": 9493 }, { "epoch": 10.813675213675214, "grad_norm": 0.17709283530712128, "learning_rate": 2.3316979887161194e-05, "loss": 0.7737, "step": 9494 }, { "epoch": 10.814814814814815, "grad_norm": 0.17941021919250488, "learning_rate": 2.3312331305189534e-05, "loss": 0.7108, "step": 9495 }, { "epoch": 10.815954415954415, "grad_norm": 0.22202011942863464, "learning_rate": 2.330768278183525e-05, "loss": 0.8362, "step": 9496 }, { "epoch": 10.817094017094018, "grad_norm": 0.19255803525447845, "learning_rate": 2.330303431725979e-05, "loss": 0.6976, "step": 9497 }, { "epoch": 10.818233618233618, "grad_norm": 0.22026483714580536, "learning_rate": 2.329838591162462e-05, "loss": 0.747, "step": 9498 }, { "epoch": 10.819373219373219, "grad_norm": 0.1821742206811905, "learning_rate": 2.3293737565091185e-05, "loss": 0.8356, "step": 9499 }, { "epoch": 10.820512820512821, "grad_norm": 0.232045516371727, "learning_rate": 2.3289089277820942e-05, "loss": 0.5012, "step": 9500 }, { "epoch": 10.821652421652422, "grad_norm": 0.19723717868328094, "learning_rate": 2.3284441049975326e-05, "loss": 0.6696, "step": 9501 }, { "epoch": 10.822792022792022, "grad_norm": 0.23821412026882172, "learning_rate": 2.3279792881715794e-05, "loss": 0.6626, "step": 9502 }, { "epoch": 10.823931623931625, "grad_norm": 0.22820888459682465, "learning_rate": 2.327514477320377e-05, "loss": 0.5946, "step": 9503 }, { "epoch": 10.825071225071225, "grad_norm": 0.21242305636405945, "learning_rate": 2.327049672460073e-05, "loss": 0.7893, "step": 9504 }, { "epoch": 10.826210826210826, "grad_norm": 0.1972009539604187, "learning_rate": 2.326584873606809e-05, "loss": 0.8423, "step": 9505 }, { "epoch": 10.827350427350428, "grad_norm": 0.17009088397026062, "learning_rate": 2.3261200807767295e-05, "loss": 0.6882, "step": 9506 }, { "epoch": 10.828490028490029, "grad_norm": 0.21269096434116364, "learning_rate": 2.3256552939859775e-05, "loss": 0.7192, "step": 9507 }, { "epoch": 10.829629629629629, "grad_norm": 0.19281990826129913, "learning_rate": 2.325190513250697e-05, "loss": 0.7661, "step": 9508 }, { "epoch": 10.830769230769231, "grad_norm": 0.23136039078235626, "learning_rate": 2.32472573858703e-05, "loss": 0.5096, "step": 9509 }, { "epoch": 10.831908831908832, "grad_norm": 0.1862705945968628, "learning_rate": 2.3242609700111208e-05, "loss": 0.8891, "step": 9510 }, { "epoch": 10.833048433048432, "grad_norm": 0.198419451713562, "learning_rate": 2.3237962075391105e-05, "loss": 0.8216, "step": 9511 }, { "epoch": 10.834188034188035, "grad_norm": 0.23318611085414886, "learning_rate": 2.3233314511871437e-05, "loss": 0.5296, "step": 9512 }, { "epoch": 10.835327635327635, "grad_norm": 0.23531882464885712, "learning_rate": 2.3228667009713608e-05, "loss": 0.7086, "step": 9513 }, { "epoch": 10.836467236467236, "grad_norm": 0.19806161522865295, "learning_rate": 2.3224019569079033e-05, "loss": 0.7197, "step": 9514 }, { "epoch": 10.837606837606838, "grad_norm": 0.22317473590373993, "learning_rate": 2.3219372190129156e-05, "loss": 0.602, "step": 9515 }, { "epoch": 10.838746438746439, "grad_norm": 0.20388571918010712, "learning_rate": 2.321472487302538e-05, "loss": 0.7902, "step": 9516 }, { "epoch": 10.83988603988604, "grad_norm": 0.2629711925983429, "learning_rate": 2.3210077617929116e-05, "loss": 0.5558, "step": 9517 }, { "epoch": 10.841025641025642, "grad_norm": 0.1970510184764862, "learning_rate": 2.3205430425001778e-05, "loss": 0.8118, "step": 9518 }, { "epoch": 10.842165242165242, "grad_norm": 0.2255692481994629, "learning_rate": 2.320078329440478e-05, "loss": 0.7234, "step": 9519 }, { "epoch": 10.843304843304843, "grad_norm": 0.18998631834983826, "learning_rate": 2.3196136226299524e-05, "loss": 0.9203, "step": 9520 }, { "epoch": 10.844444444444445, "grad_norm": 0.22631555795669556, "learning_rate": 2.319148922084742e-05, "loss": 0.7307, "step": 9521 }, { "epoch": 10.845584045584046, "grad_norm": 0.22473649680614471, "learning_rate": 2.318684227820986e-05, "loss": 0.6334, "step": 9522 }, { "epoch": 10.846723646723646, "grad_norm": 0.24857501685619354, "learning_rate": 2.3182195398548266e-05, "loss": 0.6308, "step": 9523 }, { "epoch": 10.847863247863248, "grad_norm": 0.19463545083999634, "learning_rate": 2.3177548582024013e-05, "loss": 0.8542, "step": 9524 }, { "epoch": 10.849002849002849, "grad_norm": 0.22861404716968536, "learning_rate": 2.317290182879851e-05, "loss": 0.7474, "step": 9525 }, { "epoch": 10.85014245014245, "grad_norm": 0.20999133586883545, "learning_rate": 2.3168255139033157e-05, "loss": 0.7299, "step": 9526 }, { "epoch": 10.851282051282052, "grad_norm": 0.2135160118341446, "learning_rate": 2.3163608512889337e-05, "loss": 0.6717, "step": 9527 }, { "epoch": 10.852421652421652, "grad_norm": 0.2210507094860077, "learning_rate": 2.315896195052845e-05, "loss": 0.6291, "step": 9528 }, { "epoch": 10.853561253561253, "grad_norm": 0.17308570444583893, "learning_rate": 2.315431545211187e-05, "loss": 0.9724, "step": 9529 }, { "epoch": 10.854700854700855, "grad_norm": 0.1988760232925415, "learning_rate": 2.3149669017800995e-05, "loss": 0.7288, "step": 9530 }, { "epoch": 10.855840455840456, "grad_norm": 0.19695118069648743, "learning_rate": 2.31450226477572e-05, "loss": 0.6422, "step": 9531 }, { "epoch": 10.856980056980056, "grad_norm": 0.1963445246219635, "learning_rate": 2.3140376342141877e-05, "loss": 0.7182, "step": 9532 }, { "epoch": 10.858119658119659, "grad_norm": 0.2153501659631729, "learning_rate": 2.3135730101116386e-05, "loss": 0.6257, "step": 9533 }, { "epoch": 10.85925925925926, "grad_norm": 0.24595047533512115, "learning_rate": 2.3131083924842125e-05, "loss": 0.4354, "step": 9534 }, { "epoch": 10.86039886039886, "grad_norm": 0.2607561945915222, "learning_rate": 2.3126437813480455e-05, "loss": 0.4597, "step": 9535 }, { "epoch": 10.861538461538462, "grad_norm": 0.15536083281040192, "learning_rate": 2.3121791767192745e-05, "loss": 0.8255, "step": 9536 }, { "epoch": 10.862678062678063, "grad_norm": 0.2075459510087967, "learning_rate": 2.3117145786140378e-05, "loss": 0.7696, "step": 9537 }, { "epoch": 10.863817663817663, "grad_norm": 0.21588337421417236, "learning_rate": 2.3112499870484715e-05, "loss": 0.6296, "step": 9538 }, { "epoch": 10.864957264957265, "grad_norm": 0.1848537176847458, "learning_rate": 2.3107854020387128e-05, "loss": 0.8752, "step": 9539 }, { "epoch": 10.866096866096866, "grad_norm": 0.23128467798233032, "learning_rate": 2.3103208236008967e-05, "loss": 0.6872, "step": 9540 }, { "epoch": 10.867236467236467, "grad_norm": 0.20321355760097504, "learning_rate": 2.3098562517511607e-05, "loss": 0.8158, "step": 9541 }, { "epoch": 10.868376068376069, "grad_norm": 0.2519338130950928, "learning_rate": 2.3093916865056395e-05, "loss": 0.779, "step": 9542 }, { "epoch": 10.86951566951567, "grad_norm": 0.24459229409694672, "learning_rate": 2.3089271278804696e-05, "loss": 0.3526, "step": 9543 }, { "epoch": 10.87065527065527, "grad_norm": 0.18571284413337708, "learning_rate": 2.3084625758917856e-05, "loss": 0.8751, "step": 9544 }, { "epoch": 10.871794871794872, "grad_norm": 0.21225886046886444, "learning_rate": 2.3079980305557233e-05, "loss": 0.6816, "step": 9545 }, { "epoch": 10.872934472934473, "grad_norm": 0.24481455981731415, "learning_rate": 2.3075334918884173e-05, "loss": 0.7486, "step": 9546 }, { "epoch": 10.874074074074073, "grad_norm": 0.2009333223104477, "learning_rate": 2.3070689599060012e-05, "loss": 0.7127, "step": 9547 }, { "epoch": 10.875213675213676, "grad_norm": 0.2494220733642578, "learning_rate": 2.306604434624612e-05, "loss": 0.602, "step": 9548 }, { "epoch": 10.876353276353276, "grad_norm": 0.20982743799686432, "learning_rate": 2.3061399160603826e-05, "loss": 0.6367, "step": 9549 }, { "epoch": 10.877492877492877, "grad_norm": 0.30105796456336975, "learning_rate": 2.305675404229447e-05, "loss": 0.5472, "step": 9550 }, { "epoch": 10.878632478632479, "grad_norm": 0.23179566860198975, "learning_rate": 2.305210899147939e-05, "loss": 0.7156, "step": 9551 }, { "epoch": 10.87977207977208, "grad_norm": 0.27807697653770447, "learning_rate": 2.3047464008319917e-05, "loss": 0.4736, "step": 9552 }, { "epoch": 10.88091168091168, "grad_norm": 0.3016132414340973, "learning_rate": 2.3042819092977398e-05, "loss": 0.7421, "step": 9553 }, { "epoch": 10.882051282051282, "grad_norm": 0.18473359942436218, "learning_rate": 2.303817424561315e-05, "loss": 0.8034, "step": 9554 }, { "epoch": 10.883190883190883, "grad_norm": 0.25154560804367065, "learning_rate": 2.3033529466388502e-05, "loss": 0.5372, "step": 9555 }, { "epoch": 10.884330484330484, "grad_norm": 0.19905391335487366, "learning_rate": 2.3028884755464784e-05, "loss": 0.8261, "step": 9556 }, { "epoch": 10.885470085470086, "grad_norm": 0.2105431705713272, "learning_rate": 2.3024240113003324e-05, "loss": 1.0053, "step": 9557 }, { "epoch": 10.886609686609686, "grad_norm": 0.18650519847869873, "learning_rate": 2.3019595539165428e-05, "loss": 0.6904, "step": 9558 }, { "epoch": 10.887749287749287, "grad_norm": 0.19791394472122192, "learning_rate": 2.301495103411243e-05, "loss": 0.7383, "step": 9559 }, { "epoch": 10.88888888888889, "grad_norm": 0.23774339258670807, "learning_rate": 2.3010306598005646e-05, "loss": 0.8357, "step": 9560 }, { "epoch": 10.89002849002849, "grad_norm": 0.2502046823501587, "learning_rate": 2.300566223100638e-05, "loss": 0.6744, "step": 9561 }, { "epoch": 10.89116809116809, "grad_norm": 0.2330288589000702, "learning_rate": 2.3001017933275957e-05, "loss": 0.6214, "step": 9562 }, { "epoch": 10.892307692307693, "grad_norm": 0.27952054142951965, "learning_rate": 2.299637370497567e-05, "loss": 0.5266, "step": 9563 }, { "epoch": 10.893447293447293, "grad_norm": 0.23781828582286835, "learning_rate": 2.299172954626684e-05, "loss": 0.7795, "step": 9564 }, { "epoch": 10.894586894586894, "grad_norm": 0.19092990458011627, "learning_rate": 2.2987085457310762e-05, "loss": 0.7526, "step": 9565 }, { "epoch": 10.895726495726496, "grad_norm": 0.30046021938323975, "learning_rate": 2.2982441438268746e-05, "loss": 0.5764, "step": 9566 }, { "epoch": 10.896866096866097, "grad_norm": 0.20064473152160645, "learning_rate": 2.2977797489302084e-05, "loss": 0.7293, "step": 9567 }, { "epoch": 10.898005698005697, "grad_norm": 0.2130577266216278, "learning_rate": 2.2973153610572076e-05, "loss": 0.7348, "step": 9568 }, { "epoch": 10.8991452991453, "grad_norm": 0.1925908476114273, "learning_rate": 2.296850980224001e-05, "loss": 0.706, "step": 9569 }, { "epoch": 10.9002849002849, "grad_norm": 0.21011324226856232, "learning_rate": 2.296386606446719e-05, "loss": 0.6495, "step": 9570 }, { "epoch": 10.9014245014245, "grad_norm": 0.21512216329574585, "learning_rate": 2.2959222397414908e-05, "loss": 0.6298, "step": 9571 }, { "epoch": 10.902564102564103, "grad_norm": 0.2582896053791046, "learning_rate": 2.2954578801244435e-05, "loss": 0.4469, "step": 9572 }, { "epoch": 10.903703703703703, "grad_norm": 0.18377749621868134, "learning_rate": 2.2949935276117075e-05, "loss": 0.6704, "step": 9573 }, { "epoch": 10.904843304843304, "grad_norm": 0.2577991485595703, "learning_rate": 2.2945291822194093e-05, "loss": 0.884, "step": 9574 }, { "epoch": 10.905982905982906, "grad_norm": 0.22379299998283386, "learning_rate": 2.2940648439636784e-05, "loss": 0.6633, "step": 9575 }, { "epoch": 10.907122507122507, "grad_norm": 0.20631907880306244, "learning_rate": 2.2936005128606407e-05, "loss": 0.8447, "step": 9576 }, { "epoch": 10.908262108262107, "grad_norm": 0.21610593795776367, "learning_rate": 2.2931361889264257e-05, "loss": 0.7037, "step": 9577 }, { "epoch": 10.90940170940171, "grad_norm": 0.24314944446086884, "learning_rate": 2.2926718721771595e-05, "loss": 0.7203, "step": 9578 }, { "epoch": 10.91054131054131, "grad_norm": 0.19356660544872284, "learning_rate": 2.2922075626289694e-05, "loss": 0.6483, "step": 9579 }, { "epoch": 10.91168091168091, "grad_norm": 0.20415425300598145, "learning_rate": 2.2917432602979807e-05, "loss": 0.7931, "step": 9580 }, { "epoch": 10.912820512820513, "grad_norm": 0.18701380491256714, "learning_rate": 2.2912789652003227e-05, "loss": 0.8832, "step": 9581 }, { "epoch": 10.913960113960114, "grad_norm": 0.22453537583351135, "learning_rate": 2.2908146773521198e-05, "loss": 0.4972, "step": 9582 }, { "epoch": 10.915099715099714, "grad_norm": 0.21279367804527283, "learning_rate": 2.290350396769499e-05, "loss": 0.6631, "step": 9583 }, { "epoch": 10.916239316239317, "grad_norm": 0.21103893220424652, "learning_rate": 2.289886123468585e-05, "loss": 0.8214, "step": 9584 }, { "epoch": 10.917378917378917, "grad_norm": 0.21029996871948242, "learning_rate": 2.289421857465504e-05, "loss": 0.6253, "step": 9585 }, { "epoch": 10.918518518518518, "grad_norm": 0.23768191039562225, "learning_rate": 2.2889575987763806e-05, "loss": 0.7939, "step": 9586 }, { "epoch": 10.91965811965812, "grad_norm": 0.21049334108829498, "learning_rate": 2.288493347417341e-05, "loss": 0.7907, "step": 9587 }, { "epoch": 10.92079772079772, "grad_norm": 0.2304389923810959, "learning_rate": 2.288029103404508e-05, "loss": 0.6486, "step": 9588 }, { "epoch": 10.921937321937321, "grad_norm": 0.2353249192237854, "learning_rate": 2.2875648667540082e-05, "loss": 0.5568, "step": 9589 }, { "epoch": 10.923076923076923, "grad_norm": 0.2526458203792572, "learning_rate": 2.2871006374819636e-05, "loss": 0.8105, "step": 9590 }, { "epoch": 10.924216524216524, "grad_norm": 0.17263050377368927, "learning_rate": 2.2866364156044994e-05, "loss": 0.9349, "step": 9591 }, { "epoch": 10.925356125356124, "grad_norm": 0.28131937980651855, "learning_rate": 2.2861722011377403e-05, "loss": 0.6651, "step": 9592 }, { "epoch": 10.926495726495727, "grad_norm": 0.2113354653120041, "learning_rate": 2.2857079940978077e-05, "loss": 0.5469, "step": 9593 }, { "epoch": 10.927635327635327, "grad_norm": 0.25539398193359375, "learning_rate": 2.285243794500827e-05, "loss": 0.6967, "step": 9594 }, { "epoch": 10.928774928774928, "grad_norm": 0.24127545952796936, "learning_rate": 2.2847796023629188e-05, "loss": 0.6924, "step": 9595 }, { "epoch": 10.92991452991453, "grad_norm": 0.2079651802778244, "learning_rate": 2.2843154177002075e-05, "loss": 0.6441, "step": 9596 }, { "epoch": 10.93105413105413, "grad_norm": 0.2402574121952057, "learning_rate": 2.2838512405288143e-05, "loss": 0.5708, "step": 9597 }, { "epoch": 10.932193732193731, "grad_norm": 0.24295979738235474, "learning_rate": 2.2833870708648624e-05, "loss": 0.6287, "step": 9598 }, { "epoch": 10.933333333333334, "grad_norm": 0.1972571313381195, "learning_rate": 2.2829229087244727e-05, "loss": 0.7331, "step": 9599 }, { "epoch": 10.934472934472934, "grad_norm": 0.2275058478116989, "learning_rate": 2.282458754123768e-05, "loss": 0.564, "step": 9600 }, { "epoch": 10.935612535612536, "grad_norm": 0.20724822580814362, "learning_rate": 2.2819946070788685e-05, "loss": 0.7242, "step": 9601 }, { "epoch": 10.936752136752137, "grad_norm": 0.19633297622203827, "learning_rate": 2.2815304676058953e-05, "loss": 0.7411, "step": 9602 }, { "epoch": 10.937891737891738, "grad_norm": 0.22073744237422943, "learning_rate": 2.281066335720971e-05, "loss": 0.5131, "step": 9603 }, { "epoch": 10.93903133903134, "grad_norm": 0.18169017136096954, "learning_rate": 2.280602211440214e-05, "loss": 0.8153, "step": 9604 }, { "epoch": 10.94017094017094, "grad_norm": 0.23444296419620514, "learning_rate": 2.2801380947797464e-05, "loss": 0.586, "step": 9605 }, { "epoch": 10.941310541310541, "grad_norm": 0.22850699722766876, "learning_rate": 2.2796739857556866e-05, "loss": 0.7663, "step": 9606 }, { "epoch": 10.942450142450143, "grad_norm": 0.20789551734924316, "learning_rate": 2.2792098843841557e-05, "loss": 0.5332, "step": 9607 }, { "epoch": 10.943589743589744, "grad_norm": 0.3431934118270874, "learning_rate": 2.278745790681272e-05, "loss": 0.5089, "step": 9608 }, { "epoch": 10.944729344729344, "grad_norm": 0.21108171343803406, "learning_rate": 2.2782817046631567e-05, "loss": 0.6472, "step": 9609 }, { "epoch": 10.945868945868947, "grad_norm": 0.177011176943779, "learning_rate": 2.2778176263459264e-05, "loss": 0.7573, "step": 9610 }, { "epoch": 10.947008547008547, "grad_norm": 0.23713651299476624, "learning_rate": 2.2773535557457015e-05, "loss": 0.7791, "step": 9611 }, { "epoch": 10.948148148148148, "grad_norm": 0.15720611810684204, "learning_rate": 2.2768894928785997e-05, "loss": 0.7181, "step": 9612 }, { "epoch": 10.94928774928775, "grad_norm": 0.19592933356761932, "learning_rate": 2.2764254377607395e-05, "loss": 0.6866, "step": 9613 }, { "epoch": 10.95042735042735, "grad_norm": 0.20813706517219543, "learning_rate": 2.275961390408238e-05, "loss": 0.845, "step": 9614 }, { "epoch": 10.951566951566951, "grad_norm": 0.23146525025367737, "learning_rate": 2.2754973508372134e-05, "loss": 0.5723, "step": 9615 }, { "epoch": 10.952706552706553, "grad_norm": 0.204881489276886, "learning_rate": 2.2750333190637845e-05, "loss": 0.7392, "step": 9616 }, { "epoch": 10.953846153846154, "grad_norm": 0.2550255060195923, "learning_rate": 2.274569295104066e-05, "loss": 0.6184, "step": 9617 }, { "epoch": 10.954985754985755, "grad_norm": 0.1903819739818573, "learning_rate": 2.2741052789741767e-05, "loss": 0.9335, "step": 9618 }, { "epoch": 10.956125356125357, "grad_norm": 0.2497108280658722, "learning_rate": 2.2736412706902314e-05, "loss": 0.5935, "step": 9619 }, { "epoch": 10.957264957264957, "grad_norm": 0.19148176908493042, "learning_rate": 2.273177270268348e-05, "loss": 0.7955, "step": 9620 }, { "epoch": 10.958404558404558, "grad_norm": 0.21798738837242126, "learning_rate": 2.2727132777246413e-05, "loss": 0.8257, "step": 9621 }, { "epoch": 10.95954415954416, "grad_norm": 0.20050744712352753, "learning_rate": 2.272249293075228e-05, "loss": 0.61, "step": 9622 }, { "epoch": 10.96068376068376, "grad_norm": 0.17249904572963715, "learning_rate": 2.2717853163362226e-05, "loss": 0.8241, "step": 9623 }, { "epoch": 10.961823361823361, "grad_norm": 0.23640227317810059, "learning_rate": 2.271321347523741e-05, "loss": 0.6602, "step": 9624 }, { "epoch": 10.962962962962964, "grad_norm": 0.18986733257770538, "learning_rate": 2.2708573866538965e-05, "loss": 0.6984, "step": 9625 }, { "epoch": 10.964102564102564, "grad_norm": 0.22353368997573853, "learning_rate": 2.2703934337428068e-05, "loss": 0.6482, "step": 9626 }, { "epoch": 10.965242165242165, "grad_norm": 0.2648194432258606, "learning_rate": 2.269929488806584e-05, "loss": 0.5999, "step": 9627 }, { "epoch": 10.966381766381767, "grad_norm": 0.20746877789497375, "learning_rate": 2.269465551861343e-05, "loss": 0.7823, "step": 9628 }, { "epoch": 10.967521367521368, "grad_norm": 0.19003280997276306, "learning_rate": 2.269001622923197e-05, "loss": 0.704, "step": 9629 }, { "epoch": 10.968660968660968, "grad_norm": 0.22691278159618378, "learning_rate": 2.2685377020082604e-05, "loss": 0.5462, "step": 9630 }, { "epoch": 10.96980056980057, "grad_norm": 0.19482676684856415, "learning_rate": 2.2680737891326457e-05, "loss": 0.9215, "step": 9631 }, { "epoch": 10.970940170940171, "grad_norm": 0.21007443964481354, "learning_rate": 2.2676098843124662e-05, "loss": 0.6382, "step": 9632 }, { "epoch": 10.972079772079772, "grad_norm": 0.19882825016975403, "learning_rate": 2.267145987563834e-05, "loss": 0.7259, "step": 9633 }, { "epoch": 10.973219373219374, "grad_norm": 0.1697952300310135, "learning_rate": 2.2666820989028628e-05, "loss": 0.8101, "step": 9634 }, { "epoch": 10.974358974358974, "grad_norm": 0.21096788346767426, "learning_rate": 2.2662182183456632e-05, "loss": 0.7524, "step": 9635 }, { "epoch": 10.975498575498575, "grad_norm": 0.19336964190006256, "learning_rate": 2.2657543459083474e-05, "loss": 0.8086, "step": 9636 }, { "epoch": 10.976638176638177, "grad_norm": 0.24253030121326447, "learning_rate": 2.2652904816070283e-05, "loss": 0.6149, "step": 9637 }, { "epoch": 10.977777777777778, "grad_norm": 0.20174847543239594, "learning_rate": 2.2648266254578156e-05, "loss": 0.7214, "step": 9638 }, { "epoch": 10.978917378917378, "grad_norm": 0.2170076072216034, "learning_rate": 2.2643627774768218e-05, "loss": 0.5366, "step": 9639 }, { "epoch": 10.98005698005698, "grad_norm": 0.24862629175186157, "learning_rate": 2.2638989376801557e-05, "loss": 0.5583, "step": 9640 }, { "epoch": 10.981196581196581, "grad_norm": 0.2333691269159317, "learning_rate": 2.2634351060839297e-05, "loss": 0.5767, "step": 9641 }, { "epoch": 10.982336182336182, "grad_norm": 0.20830348134040833, "learning_rate": 2.2629712827042523e-05, "loss": 0.6416, "step": 9642 }, { "epoch": 10.983475783475784, "grad_norm": 0.24028995633125305, "learning_rate": 2.2625074675572347e-05, "loss": 0.5828, "step": 9643 }, { "epoch": 10.984615384615385, "grad_norm": 0.27821603417396545, "learning_rate": 2.2620436606589855e-05, "loss": 0.5707, "step": 9644 }, { "epoch": 10.985754985754985, "grad_norm": 0.20995180308818817, "learning_rate": 2.2615798620256144e-05, "loss": 0.7726, "step": 9645 }, { "epoch": 10.986894586894588, "grad_norm": 0.26799318194389343, "learning_rate": 2.2611160716732302e-05, "loss": 0.6795, "step": 9646 }, { "epoch": 10.988034188034188, "grad_norm": 0.17716257274150848, "learning_rate": 2.260652289617941e-05, "loss": 0.6196, "step": 9647 }, { "epoch": 10.989173789173789, "grad_norm": 0.23527143895626068, "learning_rate": 2.2601885158758572e-05, "loss": 0.6737, "step": 9648 }, { "epoch": 10.990313390313391, "grad_norm": 0.18669234216213226, "learning_rate": 2.2597247504630852e-05, "loss": 0.6653, "step": 9649 }, { "epoch": 10.991452991452991, "grad_norm": 0.19625809788703918, "learning_rate": 2.259260993395734e-05, "loss": 0.6879, "step": 9650 }, { "epoch": 10.992592592592592, "grad_norm": 0.17003391683101654, "learning_rate": 2.25879724468991e-05, "loss": 0.9956, "step": 9651 }, { "epoch": 10.993732193732194, "grad_norm": 0.19090116024017334, "learning_rate": 2.2583335043617216e-05, "loss": 0.8175, "step": 9652 }, { "epoch": 10.994871794871795, "grad_norm": 0.1942262053489685, "learning_rate": 2.2578697724272743e-05, "loss": 0.8009, "step": 9653 }, { "epoch": 10.996011396011395, "grad_norm": 0.18930639326572418, "learning_rate": 2.2574060489026763e-05, "loss": 0.856, "step": 9654 }, { "epoch": 10.997150997150998, "grad_norm": 0.20694051682949066, "learning_rate": 2.256942333804033e-05, "loss": 0.611, "step": 9655 }, { "epoch": 10.998290598290598, "grad_norm": 0.23336829245090485, "learning_rate": 2.2564786271474513e-05, "loss": 0.6213, "step": 9656 }, { "epoch": 10.999430199430199, "grad_norm": 0.21446311473846436, "learning_rate": 2.256014928949036e-05, "loss": 0.84, "step": 9657 }, { "epoch": 11.0, "grad_norm": 0.3496212065219879, "learning_rate": 2.2555512392248927e-05, "loss": 0.5784, "step": 9658 }, { "epoch": 11.0011396011396, "grad_norm": 0.2207029163837433, "learning_rate": 2.2550875579911278e-05, "loss": 0.5183, "step": 9659 }, { "epoch": 11.002279202279203, "grad_norm": 0.17076687514781952, "learning_rate": 2.2546238852638457e-05, "loss": 0.9158, "step": 9660 }, { "epoch": 11.003418803418803, "grad_norm": 0.21002976596355438, "learning_rate": 2.2541602210591506e-05, "loss": 0.5735, "step": 9661 }, { "epoch": 11.004558404558404, "grad_norm": 0.20029595494270325, "learning_rate": 2.2536965653931474e-05, "loss": 0.812, "step": 9662 }, { "epoch": 11.005698005698006, "grad_norm": 0.24149265885353088, "learning_rate": 2.2532329182819394e-05, "loss": 0.4502, "step": 9663 }, { "epoch": 11.006837606837607, "grad_norm": 0.1931881606578827, "learning_rate": 2.2527692797416314e-05, "loss": 0.7628, "step": 9664 }, { "epoch": 11.007977207977207, "grad_norm": 0.20538873970508575, "learning_rate": 2.2523056497883253e-05, "loss": 0.7331, "step": 9665 }, { "epoch": 11.00911680911681, "grad_norm": 0.22720561921596527, "learning_rate": 2.2518420284381262e-05, "loss": 0.3993, "step": 9666 }, { "epoch": 11.01025641025641, "grad_norm": 0.23557770252227783, "learning_rate": 2.2513784157071348e-05, "loss": 0.6348, "step": 9667 }, { "epoch": 11.01139601139601, "grad_norm": 0.27387315034866333, "learning_rate": 2.2509148116114555e-05, "loss": 0.4139, "step": 9668 }, { "epoch": 11.012535612535613, "grad_norm": 0.2563721239566803, "learning_rate": 2.250451216167189e-05, "loss": 0.701, "step": 9669 }, { "epoch": 11.013675213675214, "grad_norm": 0.23487509787082672, "learning_rate": 2.2499876293904382e-05, "loss": 0.5986, "step": 9670 }, { "epoch": 11.014814814814814, "grad_norm": 0.24869404733181, "learning_rate": 2.2495240512973053e-05, "loss": 0.614, "step": 9671 }, { "epoch": 11.015954415954416, "grad_norm": 0.20076461136341095, "learning_rate": 2.2490604819038903e-05, "loss": 0.925, "step": 9672 }, { "epoch": 11.017094017094017, "grad_norm": 0.2506314814090729, "learning_rate": 2.2485969212262953e-05, "loss": 0.6251, "step": 9673 }, { "epoch": 11.018233618233618, "grad_norm": 0.20979809761047363, "learning_rate": 2.2481333692806203e-05, "loss": 0.6028, "step": 9674 }, { "epoch": 11.01937321937322, "grad_norm": 0.18014901876449585, "learning_rate": 2.2476698260829663e-05, "loss": 0.7324, "step": 9675 }, { "epoch": 11.02051282051282, "grad_norm": 0.24035555124282837, "learning_rate": 2.247206291649433e-05, "loss": 0.515, "step": 9676 }, { "epoch": 11.021652421652421, "grad_norm": 0.21577733755111694, "learning_rate": 2.2467427659961203e-05, "loss": 0.7592, "step": 9677 }, { "epoch": 11.022792022792023, "grad_norm": 0.17657114565372467, "learning_rate": 2.2462792491391276e-05, "loss": 0.7243, "step": 9678 }, { "epoch": 11.023931623931624, "grad_norm": 0.21607467532157898, "learning_rate": 2.245815741094555e-05, "loss": 0.6467, "step": 9679 }, { "epoch": 11.025071225071224, "grad_norm": 0.21872879564762115, "learning_rate": 2.2453522418784996e-05, "loss": 0.4416, "step": 9680 }, { "epoch": 11.026210826210827, "grad_norm": 0.19626228511333466, "learning_rate": 2.2448887515070614e-05, "loss": 0.7154, "step": 9681 }, { "epoch": 11.027350427350427, "grad_norm": 0.19481174647808075, "learning_rate": 2.244425269996339e-05, "loss": 0.8605, "step": 9682 }, { "epoch": 11.028490028490028, "grad_norm": 0.21111561357975006, "learning_rate": 2.2439617973624296e-05, "loss": 0.6984, "step": 9683 }, { "epoch": 11.02962962962963, "grad_norm": 0.21285225450992584, "learning_rate": 2.243498333621431e-05, "loss": 0.6571, "step": 9684 }, { "epoch": 11.03076923076923, "grad_norm": 0.21245799958705902, "learning_rate": 2.2430348787894407e-05, "loss": 0.626, "step": 9685 }, { "epoch": 11.031908831908831, "grad_norm": 0.23150581121444702, "learning_rate": 2.242571432882556e-05, "loss": 0.6927, "step": 9686 }, { "epoch": 11.033048433048434, "grad_norm": 0.20729854702949524, "learning_rate": 2.242107995916873e-05, "loss": 0.6512, "step": 9687 }, { "epoch": 11.034188034188034, "grad_norm": 0.187919020652771, "learning_rate": 2.241644567908489e-05, "loss": 0.7536, "step": 9688 }, { "epoch": 11.035327635327635, "grad_norm": 0.20111168920993805, "learning_rate": 2.2411811488734986e-05, "loss": 0.6246, "step": 9689 }, { "epoch": 11.036467236467237, "grad_norm": 0.20703180134296417, "learning_rate": 2.240717738828e-05, "loss": 0.7803, "step": 9690 }, { "epoch": 11.037606837606837, "grad_norm": 0.25329071283340454, "learning_rate": 2.2402543377880858e-05, "loss": 0.6766, "step": 9691 }, { "epoch": 11.038746438746438, "grad_norm": 0.18658600747585297, "learning_rate": 2.2397909457698535e-05, "loss": 0.731, "step": 9692 }, { "epoch": 11.03988603988604, "grad_norm": 0.20471811294555664, "learning_rate": 2.2393275627893977e-05, "loss": 0.6684, "step": 9693 }, { "epoch": 11.04102564102564, "grad_norm": 0.19438152015209198, "learning_rate": 2.2388641888628116e-05, "loss": 0.706, "step": 9694 }, { "epoch": 11.042165242165241, "grad_norm": 0.19723469018936157, "learning_rate": 2.2384008240061914e-05, "loss": 0.6144, "step": 9695 }, { "epoch": 11.043304843304844, "grad_norm": 0.20604175329208374, "learning_rate": 2.237937468235629e-05, "loss": 0.6555, "step": 9696 }, { "epoch": 11.044444444444444, "grad_norm": 0.20576262474060059, "learning_rate": 2.23747412156722e-05, "loss": 0.5842, "step": 9697 }, { "epoch": 11.045584045584045, "grad_norm": 0.2670564651489258, "learning_rate": 2.237010784017056e-05, "loss": 0.452, "step": 9698 }, { "epoch": 11.046723646723647, "grad_norm": 0.21373350918293, "learning_rate": 2.236547455601231e-05, "loss": 0.7006, "step": 9699 }, { "epoch": 11.047863247863248, "grad_norm": 0.22663438320159912, "learning_rate": 2.2360841363358374e-05, "loss": 0.5797, "step": 9700 }, { "epoch": 11.049002849002848, "grad_norm": 0.20244929194450378, "learning_rate": 2.2356208262369675e-05, "loss": 0.8084, "step": 9701 }, { "epoch": 11.05014245014245, "grad_norm": 0.24661555886268616, "learning_rate": 2.235157525320712e-05, "loss": 0.5419, "step": 9702 }, { "epoch": 11.051282051282051, "grad_norm": 0.21187053620815277, "learning_rate": 2.2346942336031654e-05, "loss": 0.8437, "step": 9703 }, { "epoch": 11.052421652421652, "grad_norm": 0.174552321434021, "learning_rate": 2.234230951100417e-05, "loss": 0.6676, "step": 9704 }, { "epoch": 11.053561253561254, "grad_norm": 0.18161781132221222, "learning_rate": 2.2337676778285597e-05, "loss": 0.7831, "step": 9705 }, { "epoch": 11.054700854700855, "grad_norm": 0.22311250865459442, "learning_rate": 2.2333044138036817e-05, "loss": 0.8345, "step": 9706 }, { "epoch": 11.055840455840455, "grad_norm": 0.18615169823169708, "learning_rate": 2.232841159041876e-05, "loss": 0.7275, "step": 9707 }, { "epoch": 11.056980056980057, "grad_norm": 0.1606191247701645, "learning_rate": 2.232377913559231e-05, "loss": 0.6179, "step": 9708 }, { "epoch": 11.058119658119658, "grad_norm": 0.226676344871521, "learning_rate": 2.2319146773718373e-05, "loss": 0.574, "step": 9709 }, { "epoch": 11.059259259259258, "grad_norm": 0.2089432179927826, "learning_rate": 2.2314514504957834e-05, "loss": 0.6092, "step": 9710 }, { "epoch": 11.06039886039886, "grad_norm": 0.19234353303909302, "learning_rate": 2.23098823294716e-05, "loss": 0.8133, "step": 9711 }, { "epoch": 11.061538461538461, "grad_norm": 0.1979590207338333, "learning_rate": 2.2305250247420544e-05, "loss": 0.9034, "step": 9712 }, { "epoch": 11.062678062678062, "grad_norm": 0.19378747045993805, "learning_rate": 2.2300618258965552e-05, "loss": 0.7448, "step": 9713 }, { "epoch": 11.063817663817664, "grad_norm": 0.19898751378059387, "learning_rate": 2.2295986364267525e-05, "loss": 0.6707, "step": 9714 }, { "epoch": 11.064957264957265, "grad_norm": 0.22572246193885803, "learning_rate": 2.229135456348732e-05, "loss": 0.6467, "step": 9715 }, { "epoch": 11.066096866096865, "grad_norm": 0.17180564999580383, "learning_rate": 2.228672285678582e-05, "loss": 0.6771, "step": 9716 }, { "epoch": 11.067236467236468, "grad_norm": 0.17500701546669006, "learning_rate": 2.22820912443239e-05, "loss": 0.7246, "step": 9717 }, { "epoch": 11.068376068376068, "grad_norm": 0.19284003973007202, "learning_rate": 2.2277459726262426e-05, "loss": 0.7178, "step": 9718 }, { "epoch": 11.069515669515669, "grad_norm": 0.20313270390033722, "learning_rate": 2.2272828302762255e-05, "loss": 0.6786, "step": 9719 }, { "epoch": 11.070655270655271, "grad_norm": 0.26945075392723083, "learning_rate": 2.2268196973984267e-05, "loss": 0.6193, "step": 9720 }, { "epoch": 11.071794871794872, "grad_norm": 0.21798992156982422, "learning_rate": 2.2263565740089304e-05, "loss": 0.7099, "step": 9721 }, { "epoch": 11.072934472934472, "grad_norm": 0.19746606051921844, "learning_rate": 2.225893460123823e-05, "loss": 0.7334, "step": 9722 }, { "epoch": 11.074074074074074, "grad_norm": 0.21590682864189148, "learning_rate": 2.2254303557591895e-05, "loss": 0.8148, "step": 9723 }, { "epoch": 11.075213675213675, "grad_norm": 0.24861934781074524, "learning_rate": 2.224967260931114e-05, "loss": 0.6105, "step": 9724 }, { "epoch": 11.076353276353275, "grad_norm": 0.17617283761501312, "learning_rate": 2.2245041756556832e-05, "loss": 0.6675, "step": 9725 }, { "epoch": 11.077492877492878, "grad_norm": 0.20351779460906982, "learning_rate": 2.2240410999489793e-05, "loss": 0.4936, "step": 9726 }, { "epoch": 11.078632478632478, "grad_norm": 0.223384290933609, "learning_rate": 2.2235780338270875e-05, "loss": 0.6656, "step": 9727 }, { "epoch": 11.079772079772079, "grad_norm": 0.26108142733573914, "learning_rate": 2.2231149773060898e-05, "loss": 0.4883, "step": 9728 }, { "epoch": 11.080911680911681, "grad_norm": 0.19692204892635345, "learning_rate": 2.222651930402071e-05, "loss": 0.6913, "step": 9729 }, { "epoch": 11.082051282051282, "grad_norm": 0.2371622622013092, "learning_rate": 2.2221888931311132e-05, "loss": 0.6264, "step": 9730 }, { "epoch": 11.083190883190884, "grad_norm": 0.19994454085826874, "learning_rate": 2.2217258655092994e-05, "loss": 0.5097, "step": 9731 }, { "epoch": 11.084330484330485, "grad_norm": 0.16892565786838531, "learning_rate": 2.2212628475527113e-05, "loss": 0.7712, "step": 9732 }, { "epoch": 11.085470085470085, "grad_norm": 0.17062221467494965, "learning_rate": 2.2207998392774317e-05, "loss": 0.8953, "step": 9733 }, { "epoch": 11.086609686609687, "grad_norm": 0.26964378356933594, "learning_rate": 2.2203368406995405e-05, "loss": 0.6229, "step": 9734 }, { "epoch": 11.087749287749288, "grad_norm": 0.24692533910274506, "learning_rate": 2.2198738518351192e-05, "loss": 0.7107, "step": 9735 }, { "epoch": 11.088888888888889, "grad_norm": 0.25305140018463135, "learning_rate": 2.2194108727002507e-05, "loss": 0.4131, "step": 9736 }, { "epoch": 11.090028490028491, "grad_norm": 0.20555326342582703, "learning_rate": 2.218947903311014e-05, "loss": 0.6852, "step": 9737 }, { "epoch": 11.091168091168091, "grad_norm": 0.2388518899679184, "learning_rate": 2.218484943683489e-05, "loss": 0.7015, "step": 9738 }, { "epoch": 11.092307692307692, "grad_norm": 0.21486225724220276, "learning_rate": 2.218021993833757e-05, "loss": 0.7757, "step": 9739 }, { "epoch": 11.093447293447294, "grad_norm": 0.20804311335086823, "learning_rate": 2.217559053777896e-05, "loss": 0.8216, "step": 9740 }, { "epoch": 11.094586894586895, "grad_norm": 0.226480633020401, "learning_rate": 2.217096123531986e-05, "loss": 0.3399, "step": 9741 }, { "epoch": 11.095726495726495, "grad_norm": 0.20435543358325958, "learning_rate": 2.2166332031121053e-05, "loss": 0.683, "step": 9742 }, { "epoch": 11.096866096866098, "grad_norm": 0.22503264248371124, "learning_rate": 2.2161702925343332e-05, "loss": 0.7535, "step": 9743 }, { "epoch": 11.098005698005698, "grad_norm": 0.2349788248538971, "learning_rate": 2.215707391814747e-05, "loss": 0.6157, "step": 9744 }, { "epoch": 11.099145299145299, "grad_norm": 0.2529385983943939, "learning_rate": 2.2152445009694253e-05, "loss": 0.5379, "step": 9745 }, { "epoch": 11.100284900284901, "grad_norm": 0.26216834783554077, "learning_rate": 2.214781620014444e-05, "loss": 0.5371, "step": 9746 }, { "epoch": 11.101424501424502, "grad_norm": 0.25731146335601807, "learning_rate": 2.214318748965882e-05, "loss": 0.7146, "step": 9747 }, { "epoch": 11.102564102564102, "grad_norm": 0.22455823421478271, "learning_rate": 2.2138558878398164e-05, "loss": 0.6028, "step": 9748 }, { "epoch": 11.103703703703705, "grad_norm": 0.20155535638332367, "learning_rate": 2.213393036652322e-05, "loss": 0.6991, "step": 9749 }, { "epoch": 11.104843304843305, "grad_norm": 0.17237181961536407, "learning_rate": 2.2129301954194756e-05, "loss": 0.8365, "step": 9750 }, { "epoch": 11.105982905982906, "grad_norm": 0.23253187537193298, "learning_rate": 2.212467364157353e-05, "loss": 0.5382, "step": 9751 }, { "epoch": 11.107122507122508, "grad_norm": 0.18165284395217896, "learning_rate": 2.2120045428820297e-05, "loss": 0.7427, "step": 9752 }, { "epoch": 11.108262108262108, "grad_norm": 0.22816559672355652, "learning_rate": 2.21154173160958e-05, "loss": 0.649, "step": 9753 }, { "epoch": 11.109401709401709, "grad_norm": 0.20542609691619873, "learning_rate": 2.21107893035608e-05, "loss": 0.623, "step": 9754 }, { "epoch": 11.110541310541311, "grad_norm": 0.167438343167305, "learning_rate": 2.210616139137603e-05, "loss": 0.8256, "step": 9755 }, { "epoch": 11.111680911680912, "grad_norm": 0.21956948935985565, "learning_rate": 2.210153357970224e-05, "loss": 0.7568, "step": 9756 }, { "epoch": 11.112820512820512, "grad_norm": 0.19001701474189758, "learning_rate": 2.209690586870014e-05, "loss": 0.8796, "step": 9757 }, { "epoch": 11.113960113960115, "grad_norm": 0.2250533252954483, "learning_rate": 2.2092278258530493e-05, "loss": 0.6293, "step": 9758 }, { "epoch": 11.115099715099715, "grad_norm": 0.30526164174079895, "learning_rate": 2.2087650749354028e-05, "loss": 0.4394, "step": 9759 }, { "epoch": 11.116239316239316, "grad_norm": 0.2091488093137741, "learning_rate": 2.208302334133145e-05, "loss": 0.6569, "step": 9760 }, { "epoch": 11.117378917378918, "grad_norm": 0.27805742621421814, "learning_rate": 2.20783960346235e-05, "loss": 0.4609, "step": 9761 }, { "epoch": 11.118518518518519, "grad_norm": 0.2604121267795563, "learning_rate": 2.2073768829390885e-05, "loss": 0.5672, "step": 9762 }, { "epoch": 11.11965811965812, "grad_norm": 0.2439527064561844, "learning_rate": 2.206914172579433e-05, "loss": 0.7369, "step": 9763 }, { "epoch": 11.120797720797722, "grad_norm": 0.20667394995689392, "learning_rate": 2.206451472399454e-05, "loss": 0.697, "step": 9764 }, { "epoch": 11.121937321937322, "grad_norm": 0.26415547728538513, "learning_rate": 2.2059887824152232e-05, "loss": 0.5126, "step": 9765 }, { "epoch": 11.123076923076923, "grad_norm": 0.17649686336517334, "learning_rate": 2.2055261026428097e-05, "loss": 0.8044, "step": 9766 }, { "epoch": 11.124216524216525, "grad_norm": 0.1769997775554657, "learning_rate": 2.205063433098285e-05, "loss": 0.8358, "step": 9767 }, { "epoch": 11.125356125356126, "grad_norm": 0.19905953109264374, "learning_rate": 2.2046007737977175e-05, "loss": 0.7311, "step": 9768 }, { "epoch": 11.126495726495726, "grad_norm": 0.2069428563117981, "learning_rate": 2.2041381247571776e-05, "loss": 0.7473, "step": 9769 }, { "epoch": 11.127635327635328, "grad_norm": 0.19244493544101715, "learning_rate": 2.2036754859927353e-05, "loss": 0.7852, "step": 9770 }, { "epoch": 11.128774928774929, "grad_norm": 0.2240649312734604, "learning_rate": 2.2032128575204576e-05, "loss": 0.6966, "step": 9771 }, { "epoch": 11.12991452991453, "grad_norm": 0.2013823390007019, "learning_rate": 2.202750239356414e-05, "loss": 0.7629, "step": 9772 }, { "epoch": 11.131054131054132, "grad_norm": 0.18561714887619019, "learning_rate": 2.2022876315166712e-05, "loss": 0.6058, "step": 9773 }, { "epoch": 11.132193732193732, "grad_norm": 0.20360417664051056, "learning_rate": 2.2018250340172984e-05, "loss": 0.7447, "step": 9774 }, { "epoch": 11.133333333333333, "grad_norm": 0.19969744980335236, "learning_rate": 2.201362446874362e-05, "loss": 0.6927, "step": 9775 }, { "epoch": 11.134472934472935, "grad_norm": 0.1618545651435852, "learning_rate": 2.200899870103929e-05, "loss": 0.6203, "step": 9776 }, { "epoch": 11.135612535612536, "grad_norm": 0.23471501469612122, "learning_rate": 2.200437303722066e-05, "loss": 0.6579, "step": 9777 }, { "epoch": 11.136752136752136, "grad_norm": 0.15572519600391388, "learning_rate": 2.1999747477448397e-05, "loss": 0.9026, "step": 9778 }, { "epoch": 11.137891737891739, "grad_norm": 0.18805311620235443, "learning_rate": 2.1995122021883136e-05, "loss": 0.9989, "step": 9779 }, { "epoch": 11.13903133903134, "grad_norm": 0.23099660873413086, "learning_rate": 2.1990496670685568e-05, "loss": 0.5055, "step": 9780 }, { "epoch": 11.14017094017094, "grad_norm": 0.18490153551101685, "learning_rate": 2.198587142401632e-05, "loss": 0.8941, "step": 9781 }, { "epoch": 11.141310541310542, "grad_norm": 0.20048584043979645, "learning_rate": 2.1981246282036057e-05, "loss": 0.7024, "step": 9782 }, { "epoch": 11.142450142450143, "grad_norm": 0.19016273319721222, "learning_rate": 2.19766212449054e-05, "loss": 0.7242, "step": 9783 }, { "epoch": 11.143589743589743, "grad_norm": 0.2155466228723526, "learning_rate": 2.1971996312785013e-05, "loss": 0.6497, "step": 9784 }, { "epoch": 11.144729344729345, "grad_norm": 0.2170635163784027, "learning_rate": 2.196737148583551e-05, "loss": 0.7111, "step": 9785 }, { "epoch": 11.145868945868946, "grad_norm": 0.1944105327129364, "learning_rate": 2.1962746764217545e-05, "loss": 0.8279, "step": 9786 }, { "epoch": 11.147008547008546, "grad_norm": 0.2234017252922058, "learning_rate": 2.195812214809173e-05, "loss": 0.4085, "step": 9787 }, { "epoch": 11.148148148148149, "grad_norm": 0.24321797490119934, "learning_rate": 2.1953497637618702e-05, "loss": 0.6206, "step": 9788 }, { "epoch": 11.14928774928775, "grad_norm": 0.17733822762966156, "learning_rate": 2.1948873232959077e-05, "loss": 0.6844, "step": 9789 }, { "epoch": 11.15042735042735, "grad_norm": 0.22268755733966827, "learning_rate": 2.194424893427347e-05, "loss": 0.5847, "step": 9790 }, { "epoch": 11.151566951566952, "grad_norm": 0.2131318598985672, "learning_rate": 2.1939624741722508e-05, "loss": 0.728, "step": 9791 }, { "epoch": 11.152706552706553, "grad_norm": 0.20112057030200958, "learning_rate": 2.1935000655466793e-05, "loss": 0.5825, "step": 9792 }, { "epoch": 11.153846153846153, "grad_norm": 0.25268763303756714, "learning_rate": 2.1930376675666937e-05, "loss": 0.4938, "step": 9793 }, { "epoch": 11.154985754985756, "grad_norm": 0.2349073886871338, "learning_rate": 2.1925752802483535e-05, "loss": 0.6303, "step": 9794 }, { "epoch": 11.156125356125356, "grad_norm": 0.26190075278282166, "learning_rate": 2.19211290360772e-05, "loss": 0.6914, "step": 9795 }, { "epoch": 11.157264957264957, "grad_norm": 0.20109465718269348, "learning_rate": 2.1916505376608514e-05, "loss": 0.8024, "step": 9796 }, { "epoch": 11.158404558404559, "grad_norm": 0.19945837557315826, "learning_rate": 2.191188182423808e-05, "loss": 0.59, "step": 9797 }, { "epoch": 11.15954415954416, "grad_norm": 0.2376086711883545, "learning_rate": 2.1907258379126477e-05, "loss": 0.7078, "step": 9798 }, { "epoch": 11.16068376068376, "grad_norm": 0.1969120353460312, "learning_rate": 2.19026350414343e-05, "loss": 0.7601, "step": 9799 }, { "epoch": 11.161823361823362, "grad_norm": 0.25738725066185, "learning_rate": 2.1898011811322122e-05, "loss": 0.5418, "step": 9800 }, { "epoch": 11.162962962962963, "grad_norm": 0.22950421273708344, "learning_rate": 2.189338868895052e-05, "loss": 0.7164, "step": 9801 }, { "epoch": 11.164102564102564, "grad_norm": 0.20779669284820557, "learning_rate": 2.188876567448008e-05, "loss": 0.5155, "step": 9802 }, { "epoch": 11.165242165242166, "grad_norm": 0.20574414730072021, "learning_rate": 2.1884142768071365e-05, "loss": 0.502, "step": 9803 }, { "epoch": 11.166381766381766, "grad_norm": 0.18326134979724884, "learning_rate": 2.187951996988494e-05, "loss": 0.7151, "step": 9804 }, { "epoch": 11.167521367521367, "grad_norm": 0.1888405978679657, "learning_rate": 2.1874897280081362e-05, "loss": 0.8161, "step": 9805 }, { "epoch": 11.16866096866097, "grad_norm": 0.20030201971530914, "learning_rate": 2.1870274698821204e-05, "loss": 0.692, "step": 9806 }, { "epoch": 11.16980056980057, "grad_norm": 0.1764650195837021, "learning_rate": 2.1865652226265006e-05, "loss": 0.9014, "step": 9807 }, { "epoch": 11.17094017094017, "grad_norm": 0.22131212055683136, "learning_rate": 2.186102986257333e-05, "loss": 0.522, "step": 9808 }, { "epoch": 11.172079772079773, "grad_norm": 0.24243780970573425, "learning_rate": 2.185640760790672e-05, "loss": 0.5526, "step": 9809 }, { "epoch": 11.173219373219373, "grad_norm": 0.22127288579940796, "learning_rate": 2.1851785462425717e-05, "loss": 0.6612, "step": 9810 }, { "epoch": 11.174358974358974, "grad_norm": 0.2630375921726227, "learning_rate": 2.1847163426290857e-05, "loss": 0.4255, "step": 9811 }, { "epoch": 11.175498575498576, "grad_norm": 0.196077361702919, "learning_rate": 2.1842541499662677e-05, "loss": 0.655, "step": 9812 }, { "epoch": 11.176638176638177, "grad_norm": 0.17487069964408875, "learning_rate": 2.1837919682701727e-05, "loss": 0.7687, "step": 9813 }, { "epoch": 11.177777777777777, "grad_norm": 0.23239560425281525, "learning_rate": 2.1833297975568515e-05, "loss": 0.5071, "step": 9814 }, { "epoch": 11.17891737891738, "grad_norm": 0.23284125328063965, "learning_rate": 2.1828676378423578e-05, "loss": 0.5937, "step": 9815 }, { "epoch": 11.18005698005698, "grad_norm": 0.28568243980407715, "learning_rate": 2.182405489142743e-05, "loss": 0.4378, "step": 9816 }, { "epoch": 11.18119658119658, "grad_norm": 0.20135775208473206, "learning_rate": 2.1819433514740584e-05, "loss": 0.7704, "step": 9817 }, { "epoch": 11.182336182336183, "grad_norm": 0.19652235507965088, "learning_rate": 2.1814812248523566e-05, "loss": 0.756, "step": 9818 }, { "epoch": 11.183475783475783, "grad_norm": 0.215298593044281, "learning_rate": 2.181019109293687e-05, "loss": 0.6081, "step": 9819 }, { "epoch": 11.184615384615384, "grad_norm": 0.21990089118480682, "learning_rate": 2.180557004814102e-05, "loss": 0.6343, "step": 9820 }, { "epoch": 11.185754985754986, "grad_norm": 0.2597898244857788, "learning_rate": 2.18009491142965e-05, "loss": 0.5453, "step": 9821 }, { "epoch": 11.186894586894587, "grad_norm": 0.2307051569223404, "learning_rate": 2.1796328291563818e-05, "loss": 0.6102, "step": 9822 }, { "epoch": 11.188034188034187, "grad_norm": 0.17483721673488617, "learning_rate": 2.1791707580103456e-05, "loss": 0.8583, "step": 9823 }, { "epoch": 11.18917378917379, "grad_norm": 0.2380446195602417, "learning_rate": 2.1787086980075916e-05, "loss": 0.6745, "step": 9824 }, { "epoch": 11.19031339031339, "grad_norm": 0.24400512874126434, "learning_rate": 2.1782466491641688e-05, "loss": 0.4882, "step": 9825 }, { "epoch": 11.19145299145299, "grad_norm": 0.20268435776233673, "learning_rate": 2.177784611496124e-05, "loss": 0.9009, "step": 9826 }, { "epoch": 11.192592592592593, "grad_norm": 0.21574735641479492, "learning_rate": 2.1773225850195062e-05, "loss": 0.6727, "step": 9827 }, { "epoch": 11.193732193732194, "grad_norm": 0.2220917046070099, "learning_rate": 2.176860569750362e-05, "loss": 0.6873, "step": 9828 }, { "epoch": 11.194871794871794, "grad_norm": 0.16839267313480377, "learning_rate": 2.1763985657047393e-05, "loss": 0.8842, "step": 9829 }, { "epoch": 11.196011396011396, "grad_norm": 0.17957979440689087, "learning_rate": 2.1759365728986842e-05, "loss": 0.9364, "step": 9830 }, { "epoch": 11.197150997150997, "grad_norm": 0.20117245614528656, "learning_rate": 2.175474591348243e-05, "loss": 0.9111, "step": 9831 }, { "epoch": 11.198290598290598, "grad_norm": 0.21290776133537292, "learning_rate": 2.1750126210694616e-05, "loss": 0.6402, "step": 9832 }, { "epoch": 11.1994301994302, "grad_norm": 0.2107255607843399, "learning_rate": 2.174550662078386e-05, "loss": 0.7428, "step": 9833 }, { "epoch": 11.2005698005698, "grad_norm": 0.19782611727714539, "learning_rate": 2.1740887143910594e-05, "loss": 0.8813, "step": 9834 }, { "epoch": 11.201709401709401, "grad_norm": 0.18664376437664032, "learning_rate": 2.1736267780235292e-05, "loss": 0.6515, "step": 9835 }, { "epoch": 11.202849002849003, "grad_norm": 0.21450534462928772, "learning_rate": 2.173164852991839e-05, "loss": 0.5894, "step": 9836 }, { "epoch": 11.203988603988604, "grad_norm": 0.23006367683410645, "learning_rate": 2.1727029393120313e-05, "loss": 0.737, "step": 9837 }, { "epoch": 11.205128205128204, "grad_norm": 0.22872765362262726, "learning_rate": 2.1722410370001513e-05, "loss": 0.7273, "step": 9838 }, { "epoch": 11.206267806267807, "grad_norm": 0.21571621298789978, "learning_rate": 2.1717791460722412e-05, "loss": 0.7711, "step": 9839 }, { "epoch": 11.207407407407407, "grad_norm": 0.2251289188861847, "learning_rate": 2.171317266544344e-05, "loss": 0.5974, "step": 9840 }, { "epoch": 11.208547008547008, "grad_norm": 0.2158321589231491, "learning_rate": 2.170855398432502e-05, "loss": 0.8194, "step": 9841 }, { "epoch": 11.20968660968661, "grad_norm": 0.19628766179084778, "learning_rate": 2.170393541752757e-05, "loss": 0.8734, "step": 9842 }, { "epoch": 11.21082621082621, "grad_norm": 0.18358439207077026, "learning_rate": 2.1699316965211507e-05, "loss": 0.7593, "step": 9843 }, { "epoch": 11.211965811965811, "grad_norm": 0.17860673367977142, "learning_rate": 2.169469862753725e-05, "loss": 0.8906, "step": 9844 }, { "epoch": 11.213105413105414, "grad_norm": 0.2071894109249115, "learning_rate": 2.169008040466519e-05, "loss": 0.7339, "step": 9845 }, { "epoch": 11.214245014245014, "grad_norm": 0.2334849238395691, "learning_rate": 2.1685462296755734e-05, "loss": 0.5959, "step": 9846 }, { "epoch": 11.215384615384615, "grad_norm": 0.19137489795684814, "learning_rate": 2.16808443039693e-05, "loss": 0.5544, "step": 9847 }, { "epoch": 11.216524216524217, "grad_norm": 0.2179301381111145, "learning_rate": 2.1676226426466268e-05, "loss": 0.7019, "step": 9848 }, { "epoch": 11.217663817663817, "grad_norm": 0.18027031421661377, "learning_rate": 2.1671608664407033e-05, "loss": 0.8747, "step": 9849 }, { "epoch": 11.218803418803418, "grad_norm": 0.2572190463542938, "learning_rate": 2.166699101795198e-05, "loss": 0.6616, "step": 9850 }, { "epoch": 11.21994301994302, "grad_norm": 0.2263181507587433, "learning_rate": 2.16623734872615e-05, "loss": 0.686, "step": 9851 }, { "epoch": 11.221082621082621, "grad_norm": 0.1940896362066269, "learning_rate": 2.1657756072495962e-05, "loss": 0.7975, "step": 9852 }, { "epoch": 11.222222222222221, "grad_norm": 0.22373104095458984, "learning_rate": 2.165313877381575e-05, "loss": 0.4646, "step": 9853 }, { "epoch": 11.223361823361824, "grad_norm": 0.21348810195922852, "learning_rate": 2.1648521591381228e-05, "loss": 0.597, "step": 9854 }, { "epoch": 11.224501424501424, "grad_norm": 0.23158220946788788, "learning_rate": 2.1643904525352774e-05, "loss": 0.5146, "step": 9855 }, { "epoch": 11.225641025641025, "grad_norm": 0.20586775243282318, "learning_rate": 2.1639287575890737e-05, "loss": 0.6993, "step": 9856 }, { "epoch": 11.226780626780627, "grad_norm": 0.19456638395786285, "learning_rate": 2.1634670743155482e-05, "loss": 0.7805, "step": 9857 }, { "epoch": 11.227920227920228, "grad_norm": 0.29578936100006104, "learning_rate": 2.1630054027307372e-05, "loss": 0.6898, "step": 9858 }, { "epoch": 11.229059829059828, "grad_norm": 0.18408513069152832, "learning_rate": 2.1625437428506757e-05, "loss": 0.8714, "step": 9859 }, { "epoch": 11.23019943019943, "grad_norm": 0.239699587225914, "learning_rate": 2.1620820946913977e-05, "loss": 0.5641, "step": 9860 }, { "epoch": 11.231339031339031, "grad_norm": 0.21915483474731445, "learning_rate": 2.161620458268938e-05, "loss": 0.6919, "step": 9861 }, { "epoch": 11.232478632478632, "grad_norm": 0.18734203279018402, "learning_rate": 2.1611588335993303e-05, "loss": 0.8573, "step": 9862 }, { "epoch": 11.233618233618234, "grad_norm": 0.21176421642303467, "learning_rate": 2.1606972206986082e-05, "loss": 0.8366, "step": 9863 }, { "epoch": 11.234757834757835, "grad_norm": 0.2217629849910736, "learning_rate": 2.1602356195828046e-05, "loss": 0.6826, "step": 9864 }, { "epoch": 11.235897435897435, "grad_norm": 0.2744024097919464, "learning_rate": 2.1597740302679527e-05, "loss": 0.6763, "step": 9865 }, { "epoch": 11.237037037037037, "grad_norm": 0.2232164740562439, "learning_rate": 2.1593124527700832e-05, "loss": 0.6673, "step": 9866 }, { "epoch": 11.238176638176638, "grad_norm": 0.23179891705513, "learning_rate": 2.1588508871052305e-05, "loss": 0.6066, "step": 9867 }, { "epoch": 11.239316239316238, "grad_norm": 0.18228130042552948, "learning_rate": 2.158389333289423e-05, "loss": 0.697, "step": 9868 }, { "epoch": 11.24045584045584, "grad_norm": 0.1845717430114746, "learning_rate": 2.1579277913386942e-05, "loss": 0.7356, "step": 9869 }, { "epoch": 11.241595441595441, "grad_norm": 0.21241462230682373, "learning_rate": 2.1574662612690743e-05, "loss": 0.6656, "step": 9870 }, { "epoch": 11.242735042735042, "grad_norm": 0.19101712107658386, "learning_rate": 2.157004743096593e-05, "loss": 0.6139, "step": 9871 }, { "epoch": 11.243874643874644, "grad_norm": 0.17314413189888, "learning_rate": 2.1565432368372802e-05, "loss": 0.7731, "step": 9872 }, { "epoch": 11.245014245014245, "grad_norm": 0.20264515280723572, "learning_rate": 2.1560817425071648e-05, "loss": 0.5648, "step": 9873 }, { "epoch": 11.246153846153845, "grad_norm": 0.23518063127994537, "learning_rate": 2.155620260122277e-05, "loss": 0.5511, "step": 9874 }, { "epoch": 11.247293447293448, "grad_norm": 0.2438165247440338, "learning_rate": 2.155158789698644e-05, "loss": 0.3496, "step": 9875 }, { "epoch": 11.248433048433048, "grad_norm": 0.2112264782190323, "learning_rate": 2.1546973312522955e-05, "loss": 0.6901, "step": 9876 }, { "epoch": 11.249572649572649, "grad_norm": 0.2107095569372177, "learning_rate": 2.1542358847992572e-05, "loss": 0.7665, "step": 9877 }, { "epoch": 11.250712250712251, "grad_norm": 0.17880818247795105, "learning_rate": 2.1537744503555584e-05, "loss": 0.8022, "step": 9878 }, { "epoch": 11.251851851851852, "grad_norm": 0.27017727494239807, "learning_rate": 2.1533130279372236e-05, "loss": 0.6106, "step": 9879 }, { "epoch": 11.252991452991452, "grad_norm": 0.22795578837394714, "learning_rate": 2.1528516175602814e-05, "loss": 0.795, "step": 9880 }, { "epoch": 11.254131054131054, "grad_norm": 0.18021778762340546, "learning_rate": 2.152390219240758e-05, "loss": 0.716, "step": 9881 }, { "epoch": 11.255270655270655, "grad_norm": 0.19654394686222076, "learning_rate": 2.1519288329946773e-05, "loss": 0.6681, "step": 9882 }, { "epoch": 11.256410256410255, "grad_norm": 0.23984694480895996, "learning_rate": 2.151467458838066e-05, "loss": 0.77, "step": 9883 }, { "epoch": 11.257549857549858, "grad_norm": 0.23274153470993042, "learning_rate": 2.151006096786948e-05, "loss": 0.7865, "step": 9884 }, { "epoch": 11.258689458689458, "grad_norm": 0.18167702853679657, "learning_rate": 2.1505447468573488e-05, "loss": 0.7253, "step": 9885 }, { "epoch": 11.25982905982906, "grad_norm": 0.23046329617500305, "learning_rate": 2.1500834090652904e-05, "loss": 0.6818, "step": 9886 }, { "epoch": 11.260968660968661, "grad_norm": 0.24770574271678925, "learning_rate": 2.1496220834267982e-05, "loss": 0.562, "step": 9887 }, { "epoch": 11.262108262108262, "grad_norm": 0.18659816682338715, "learning_rate": 2.1491607699578943e-05, "loss": 0.6392, "step": 9888 }, { "epoch": 11.263247863247864, "grad_norm": 0.2297334223985672, "learning_rate": 2.148699468674602e-05, "loss": 0.6751, "step": 9889 }, { "epoch": 11.264387464387465, "grad_norm": 0.21482333540916443, "learning_rate": 2.148238179592942e-05, "loss": 0.6046, "step": 9890 }, { "epoch": 11.265527065527065, "grad_norm": 0.20615600049495697, "learning_rate": 2.1477769027289384e-05, "loss": 0.6861, "step": 9891 }, { "epoch": 11.266666666666667, "grad_norm": 0.1993170827627182, "learning_rate": 2.147315638098612e-05, "loss": 0.7783, "step": 9892 }, { "epoch": 11.267806267806268, "grad_norm": 0.1882879137992859, "learning_rate": 2.1468543857179823e-05, "loss": 0.6327, "step": 9893 }, { "epoch": 11.268945868945869, "grad_norm": 0.17871378362178802, "learning_rate": 2.1463931456030715e-05, "loss": 0.7525, "step": 9894 }, { "epoch": 11.270085470085471, "grad_norm": 0.2027646005153656, "learning_rate": 2.1459319177698993e-05, "loss": 0.9119, "step": 9895 }, { "epoch": 11.271225071225071, "grad_norm": 0.2039962261915207, "learning_rate": 2.145470702234485e-05, "loss": 0.696, "step": 9896 }, { "epoch": 11.272364672364672, "grad_norm": 0.2487625777721405, "learning_rate": 2.1450094990128484e-05, "loss": 0.3707, "step": 9897 }, { "epoch": 11.273504273504274, "grad_norm": 0.19862404465675354, "learning_rate": 2.144548308121008e-05, "loss": 0.6894, "step": 9898 }, { "epoch": 11.274643874643875, "grad_norm": 0.19828101992607117, "learning_rate": 2.1440871295749828e-05, "loss": 0.6912, "step": 9899 }, { "epoch": 11.275783475783475, "grad_norm": 0.20451399683952332, "learning_rate": 2.14362596339079e-05, "loss": 0.6508, "step": 9900 }, { "epoch": 11.276923076923078, "grad_norm": 0.2265573889017105, "learning_rate": 2.1431648095844463e-05, "loss": 0.7613, "step": 9901 }, { "epoch": 11.278062678062678, "grad_norm": 0.2591131031513214, "learning_rate": 2.1427036681719718e-05, "loss": 0.6222, "step": 9902 }, { "epoch": 11.279202279202279, "grad_norm": 0.23359829187393188, "learning_rate": 2.1422425391693807e-05, "loss": 0.841, "step": 9903 }, { "epoch": 11.280341880341881, "grad_norm": 0.21924211084842682, "learning_rate": 2.1417814225926908e-05, "loss": 0.8135, "step": 9904 }, { "epoch": 11.281481481481482, "grad_norm": 0.19912149012088776, "learning_rate": 2.141320318457916e-05, "loss": 0.7128, "step": 9905 }, { "epoch": 11.282621082621082, "grad_norm": 0.22762812674045563, "learning_rate": 2.1408592267810742e-05, "loss": 0.7108, "step": 9906 }, { "epoch": 11.283760683760685, "grad_norm": 0.1968327760696411, "learning_rate": 2.1403981475781783e-05, "loss": 0.7217, "step": 9907 }, { "epoch": 11.284900284900285, "grad_norm": 0.2060253620147705, "learning_rate": 2.1399370808652443e-05, "loss": 0.7564, "step": 9908 }, { "epoch": 11.286039886039886, "grad_norm": 0.17061100900173187, "learning_rate": 2.139476026658285e-05, "loss": 0.8732, "step": 9909 }, { "epoch": 11.287179487179488, "grad_norm": 0.18478287756443024, "learning_rate": 2.139014984973315e-05, "loss": 0.6568, "step": 9910 }, { "epoch": 11.288319088319088, "grad_norm": 0.1702798455953598, "learning_rate": 2.138553955826347e-05, "loss": 0.7676, "step": 9911 }, { "epoch": 11.289458689458689, "grad_norm": 0.2569606900215149, "learning_rate": 2.1380929392333938e-05, "loss": 0.6848, "step": 9912 }, { "epoch": 11.290598290598291, "grad_norm": 0.16441692411899567, "learning_rate": 2.1376319352104687e-05, "loss": 0.8524, "step": 9913 }, { "epoch": 11.291737891737892, "grad_norm": 0.23033924400806427, "learning_rate": 2.1371709437735828e-05, "loss": 0.6069, "step": 9914 }, { "epoch": 11.292877492877492, "grad_norm": 0.18105411529541016, "learning_rate": 2.136709964938748e-05, "loss": 0.7156, "step": 9915 }, { "epoch": 11.294017094017095, "grad_norm": 0.26649007201194763, "learning_rate": 2.136248998721975e-05, "loss": 0.5712, "step": 9916 }, { "epoch": 11.295156695156695, "grad_norm": 0.19740529358386993, "learning_rate": 2.135788045139275e-05, "loss": 0.7514, "step": 9917 }, { "epoch": 11.296296296296296, "grad_norm": 0.21046902239322662, "learning_rate": 2.135327104206657e-05, "loss": 0.7903, "step": 9918 }, { "epoch": 11.297435897435898, "grad_norm": 0.25303810834884644, "learning_rate": 2.1348661759401325e-05, "loss": 0.6965, "step": 9919 }, { "epoch": 11.298575498575499, "grad_norm": 0.20778560638427734, "learning_rate": 2.134405260355709e-05, "loss": 0.6021, "step": 9920 }, { "epoch": 11.2997150997151, "grad_norm": 0.20996792614459991, "learning_rate": 2.1339443574693967e-05, "loss": 0.7712, "step": 9921 }, { "epoch": 11.300854700854702, "grad_norm": 0.2482452243566513, "learning_rate": 2.133483467297203e-05, "loss": 0.6228, "step": 9922 }, { "epoch": 11.301994301994302, "grad_norm": 0.20021946728229523, "learning_rate": 2.133022589855136e-05, "loss": 0.629, "step": 9923 }, { "epoch": 11.303133903133903, "grad_norm": 0.21196231245994568, "learning_rate": 2.132561725159205e-05, "loss": 0.7473, "step": 9924 }, { "epoch": 11.304273504273505, "grad_norm": 0.15690581500530243, "learning_rate": 2.132100873225415e-05, "loss": 0.866, "step": 9925 }, { "epoch": 11.305413105413106, "grad_norm": 0.19401024281978607, "learning_rate": 2.1316400340697737e-05, "loss": 0.7815, "step": 9926 }, { "epoch": 11.306552706552706, "grad_norm": 0.2112329602241516, "learning_rate": 2.1311792077082864e-05, "loss": 0.7375, "step": 9927 }, { "epoch": 11.307692307692308, "grad_norm": 0.1995096653699875, "learning_rate": 2.13071839415696e-05, "loss": 0.8467, "step": 9928 }, { "epoch": 11.308831908831909, "grad_norm": 0.17122425138950348, "learning_rate": 2.130257593431799e-05, "loss": 0.8192, "step": 9929 }, { "epoch": 11.30997150997151, "grad_norm": 0.2505601644515991, "learning_rate": 2.1297968055488092e-05, "loss": 0.5322, "step": 9930 }, { "epoch": 11.311111111111112, "grad_norm": 0.26188337802886963, "learning_rate": 2.129336030523994e-05, "loss": 0.4682, "step": 9931 }, { "epoch": 11.312250712250712, "grad_norm": 0.2111901044845581, "learning_rate": 2.128875268373358e-05, "loss": 0.6826, "step": 9932 }, { "epoch": 11.313390313390313, "grad_norm": 0.21512557566165924, "learning_rate": 2.128414519112904e-05, "loss": 0.6091, "step": 9933 }, { "epoch": 11.314529914529915, "grad_norm": 0.2135426253080368, "learning_rate": 2.1279537827586355e-05, "loss": 0.512, "step": 9934 }, { "epoch": 11.315669515669516, "grad_norm": 0.19052626192569733, "learning_rate": 2.1274930593265553e-05, "loss": 0.9078, "step": 9935 }, { "epoch": 11.316809116809116, "grad_norm": 0.21458658576011658, "learning_rate": 2.1270323488326662e-05, "loss": 0.7339, "step": 9936 }, { "epoch": 11.317948717948719, "grad_norm": 0.25585073232650757, "learning_rate": 2.1265716512929687e-05, "loss": 0.5645, "step": 9937 }, { "epoch": 11.31908831908832, "grad_norm": 0.21846206486225128, "learning_rate": 2.1261109667234656e-05, "loss": 0.7172, "step": 9938 }, { "epoch": 11.32022792022792, "grad_norm": 0.16231784224510193, "learning_rate": 2.1256502951401556e-05, "loss": 0.7898, "step": 9939 }, { "epoch": 11.321367521367522, "grad_norm": 0.24106508493423462, "learning_rate": 2.1251896365590416e-05, "loss": 0.7337, "step": 9940 }, { "epoch": 11.322507122507123, "grad_norm": 0.18453285098075867, "learning_rate": 2.1247289909961213e-05, "loss": 0.7732, "step": 9941 }, { "epoch": 11.323646723646723, "grad_norm": 0.22222761809825897, "learning_rate": 2.1242683584673954e-05, "loss": 0.5541, "step": 9942 }, { "epoch": 11.324786324786325, "grad_norm": 0.2011951208114624, "learning_rate": 2.1238077389888628e-05, "loss": 0.6079, "step": 9943 }, { "epoch": 11.325925925925926, "grad_norm": 0.22160476446151733, "learning_rate": 2.123347132576522e-05, "loss": 0.46, "step": 9944 }, { "epoch": 11.327065527065526, "grad_norm": 0.1950511336326599, "learning_rate": 2.1228865392463704e-05, "loss": 0.6339, "step": 9945 }, { "epoch": 11.328205128205129, "grad_norm": 0.20166008174419403, "learning_rate": 2.1224259590144067e-05, "loss": 0.905, "step": 9946 }, { "epoch": 11.32934472934473, "grad_norm": 0.23275083303451538, "learning_rate": 2.1219653918966283e-05, "loss": 0.7707, "step": 9947 }, { "epoch": 11.33048433048433, "grad_norm": 0.2161443531513214, "learning_rate": 2.1215048379090308e-05, "loss": 0.6015, "step": 9948 }, { "epoch": 11.331623931623932, "grad_norm": 0.17823894321918488, "learning_rate": 2.1210442970676116e-05, "loss": 0.749, "step": 9949 }, { "epoch": 11.332763532763533, "grad_norm": 0.2397448718547821, "learning_rate": 2.120583769388366e-05, "loss": 0.6788, "step": 9950 }, { "epoch": 11.333903133903133, "grad_norm": 0.17530478537082672, "learning_rate": 2.1201232548872895e-05, "loss": 1.0001, "step": 9951 }, { "epoch": 11.335042735042736, "grad_norm": 0.3379182517528534, "learning_rate": 2.119662753580377e-05, "loss": 0.6362, "step": 9952 }, { "epoch": 11.336182336182336, "grad_norm": 0.204105406999588, "learning_rate": 2.119202265483623e-05, "loss": 0.758, "step": 9953 }, { "epoch": 11.337321937321937, "grad_norm": 0.27438604831695557, "learning_rate": 2.1187417906130212e-05, "loss": 0.5061, "step": 9954 }, { "epoch": 11.338461538461539, "grad_norm": 0.2232668250799179, "learning_rate": 2.1182813289845664e-05, "loss": 0.6069, "step": 9955 }, { "epoch": 11.33960113960114, "grad_norm": 0.20483927428722382, "learning_rate": 2.1178208806142495e-05, "loss": 0.7208, "step": 9956 }, { "epoch": 11.34074074074074, "grad_norm": 0.21186719834804535, "learning_rate": 2.1173604455180646e-05, "loss": 0.6007, "step": 9957 }, { "epoch": 11.341880341880342, "grad_norm": 0.17334899306297302, "learning_rate": 2.1169000237120047e-05, "loss": 0.6903, "step": 9958 }, { "epoch": 11.343019943019943, "grad_norm": 0.20546293258666992, "learning_rate": 2.1164396152120594e-05, "loss": 0.5858, "step": 9959 }, { "epoch": 11.344159544159544, "grad_norm": 0.2439803183078766, "learning_rate": 2.115979220034222e-05, "loss": 0.6547, "step": 9960 }, { "epoch": 11.345299145299146, "grad_norm": 0.21434029936790466, "learning_rate": 2.115518838194482e-05, "loss": 0.5725, "step": 9961 }, { "epoch": 11.346438746438746, "grad_norm": 0.22616712749004364, "learning_rate": 2.1150584697088302e-05, "loss": 0.5493, "step": 9962 }, { "epoch": 11.347578347578347, "grad_norm": 0.2434915006160736, "learning_rate": 2.1145981145932557e-05, "loss": 0.5391, "step": 9963 }, { "epoch": 11.34871794871795, "grad_norm": 0.19631722569465637, "learning_rate": 2.1141377728637495e-05, "loss": 0.7343, "step": 9964 }, { "epoch": 11.34985754985755, "grad_norm": 0.1945822387933731, "learning_rate": 2.1136774445362988e-05, "loss": 0.7902, "step": 9965 }, { "epoch": 11.35099715099715, "grad_norm": 0.2038748562335968, "learning_rate": 2.1132171296268935e-05, "loss": 0.6883, "step": 9966 }, { "epoch": 11.352136752136753, "grad_norm": 0.22871534526348114, "learning_rate": 2.1127568281515198e-05, "loss": 0.6013, "step": 9967 }, { "epoch": 11.353276353276353, "grad_norm": 0.2101345807313919, "learning_rate": 2.1122965401261667e-05, "loss": 0.8981, "step": 9968 }, { "epoch": 11.354415954415954, "grad_norm": 0.2620834708213806, "learning_rate": 2.1118362655668218e-05, "loss": 0.5471, "step": 9969 }, { "epoch": 11.355555555555556, "grad_norm": 0.27750974893569946, "learning_rate": 2.1113760044894703e-05, "loss": 0.4879, "step": 9970 }, { "epoch": 11.356695156695157, "grad_norm": 0.20539726316928864, "learning_rate": 2.1109157569100997e-05, "loss": 0.6741, "step": 9971 }, { "epoch": 11.357834757834757, "grad_norm": 0.19485247135162354, "learning_rate": 2.110455522844694e-05, "loss": 0.5967, "step": 9972 }, { "epoch": 11.35897435897436, "grad_norm": 0.1940249800682068, "learning_rate": 2.1099953023092394e-05, "loss": 0.7604, "step": 9973 }, { "epoch": 11.36011396011396, "grad_norm": 0.20628228783607483, "learning_rate": 2.1095350953197202e-05, "loss": 0.7112, "step": 9974 }, { "epoch": 11.36125356125356, "grad_norm": 0.2294902205467224, "learning_rate": 2.109074901892121e-05, "loss": 0.6547, "step": 9975 }, { "epoch": 11.362393162393163, "grad_norm": 0.22965016961097717, "learning_rate": 2.108614722042426e-05, "loss": 0.532, "step": 9976 }, { "epoch": 11.363532763532763, "grad_norm": 0.19517067074775696, "learning_rate": 2.1081545557866176e-05, "loss": 0.5695, "step": 9977 }, { "epoch": 11.364672364672364, "grad_norm": 0.17772167921066284, "learning_rate": 2.1076944031406787e-05, "loss": 0.8712, "step": 9978 }, { "epoch": 11.365811965811966, "grad_norm": 0.19007042050361633, "learning_rate": 2.1072342641205926e-05, "loss": 0.69, "step": 9979 }, { "epoch": 11.366951566951567, "grad_norm": 0.2111845165491104, "learning_rate": 2.1067741387423404e-05, "loss": 0.7293, "step": 9980 }, { "epoch": 11.368091168091167, "grad_norm": 0.23049502074718475, "learning_rate": 2.1063140270219042e-05, "loss": 0.71, "step": 9981 }, { "epoch": 11.36923076923077, "grad_norm": 0.233639195561409, "learning_rate": 2.105853928975264e-05, "loss": 0.5686, "step": 9982 }, { "epoch": 11.37037037037037, "grad_norm": 0.21934100985527039, "learning_rate": 2.1053938446184013e-05, "loss": 0.6469, "step": 9983 }, { "epoch": 11.37150997150997, "grad_norm": 0.17151589691638947, "learning_rate": 2.1049337739672947e-05, "loss": 0.7946, "step": 9984 }, { "epoch": 11.372649572649573, "grad_norm": 0.17608436942100525, "learning_rate": 2.1044737170379254e-05, "loss": 0.8317, "step": 9985 }, { "epoch": 11.373789173789174, "grad_norm": 0.25941383838653564, "learning_rate": 2.104013673846271e-05, "loss": 0.5901, "step": 9986 }, { "epoch": 11.374928774928774, "grad_norm": 0.18507154285907745, "learning_rate": 2.1035536444083112e-05, "loss": 0.62, "step": 9987 }, { "epoch": 11.376068376068377, "grad_norm": 0.23103323578834534, "learning_rate": 2.103093628740023e-05, "loss": 0.7076, "step": 9988 }, { "epoch": 11.377207977207977, "grad_norm": 0.25450006127357483, "learning_rate": 2.1026336268573843e-05, "loss": 0.6977, "step": 9989 }, { "epoch": 11.378347578347578, "grad_norm": 0.23431473970413208, "learning_rate": 2.1021736387763732e-05, "loss": 0.4698, "step": 9990 }, { "epoch": 11.37948717948718, "grad_norm": 0.2532706558704376, "learning_rate": 2.1017136645129655e-05, "loss": 0.6794, "step": 9991 }, { "epoch": 11.38062678062678, "grad_norm": 0.2173871099948883, "learning_rate": 2.1012537040831376e-05, "loss": 0.5126, "step": 9992 }, { "epoch": 11.381766381766381, "grad_norm": 0.18048971891403198, "learning_rate": 2.1007937575028645e-05, "loss": 0.7476, "step": 9993 }, { "epoch": 11.382905982905983, "grad_norm": 0.2078721523284912, "learning_rate": 2.100333824788123e-05, "loss": 0.7912, "step": 9994 }, { "epoch": 11.384045584045584, "grad_norm": 0.1874990612268448, "learning_rate": 2.0998739059548865e-05, "loss": 0.8126, "step": 9995 }, { "epoch": 11.385185185185184, "grad_norm": 0.17952513694763184, "learning_rate": 2.0994140010191296e-05, "loss": 0.8035, "step": 9996 }, { "epoch": 11.386324786324787, "grad_norm": 0.21788497269153595, "learning_rate": 2.0989541099968257e-05, "loss": 0.726, "step": 9997 }, { "epoch": 11.387464387464387, "grad_norm": 0.2562752366065979, "learning_rate": 2.098494232903949e-05, "loss": 0.6623, "step": 9998 }, { "epoch": 11.388603988603988, "grad_norm": 0.21369265019893646, "learning_rate": 2.0980343697564714e-05, "loss": 0.6807, "step": 9999 }, { "epoch": 11.38974358974359, "grad_norm": 0.2155311405658722, "learning_rate": 2.0975745205703648e-05, "loss": 0.743, "step": 10000 }, { "epoch": 11.39088319088319, "grad_norm": 0.18946735560894012, "learning_rate": 2.0971146853616032e-05, "loss": 0.83, "step": 10001 }, { "epoch": 11.392022792022791, "grad_norm": 0.21094633638858795, "learning_rate": 2.0966548641461552e-05, "loss": 0.8659, "step": 10002 }, { "epoch": 11.393162393162394, "grad_norm": 0.23790320754051208, "learning_rate": 2.0961950569399942e-05, "loss": 0.7358, "step": 10003 }, { "epoch": 11.394301994301994, "grad_norm": 0.2713351845741272, "learning_rate": 2.0957352637590884e-05, "loss": 0.6183, "step": 10004 }, { "epoch": 11.395441595441595, "grad_norm": 0.1856396645307541, "learning_rate": 2.0952754846194094e-05, "loss": 0.7679, "step": 10005 }, { "epoch": 11.396581196581197, "grad_norm": 0.21260108053684235, "learning_rate": 2.0948157195369253e-05, "loss": 0.852, "step": 10006 }, { "epoch": 11.397720797720797, "grad_norm": 0.18913598358631134, "learning_rate": 2.094355968527606e-05, "loss": 0.6075, "step": 10007 }, { "epoch": 11.398860398860398, "grad_norm": 0.18461109697818756, "learning_rate": 2.0938962316074186e-05, "loss": 0.8129, "step": 10008 }, { "epoch": 11.4, "grad_norm": 0.1944519281387329, "learning_rate": 2.0934365087923326e-05, "loss": 0.6931, "step": 10009 }, { "epoch": 11.401139601139601, "grad_norm": 0.2516137957572937, "learning_rate": 2.0929768000983144e-05, "loss": 0.5684, "step": 10010 }, { "epoch": 11.402279202279201, "grad_norm": 0.21535034477710724, "learning_rate": 2.0925171055413307e-05, "loss": 0.6137, "step": 10011 }, { "epoch": 11.403418803418804, "grad_norm": 0.21665510535240173, "learning_rate": 2.0920574251373486e-05, "loss": 0.7792, "step": 10012 }, { "epoch": 11.404558404558404, "grad_norm": 0.1865619719028473, "learning_rate": 2.091597758902335e-05, "loss": 0.763, "step": 10013 }, { "epoch": 11.405698005698005, "grad_norm": 0.21660961210727692, "learning_rate": 2.0911381068522535e-05, "loss": 0.7896, "step": 10014 }, { "epoch": 11.406837606837607, "grad_norm": 0.2145041972398758, "learning_rate": 2.0906784690030705e-05, "loss": 0.7066, "step": 10015 }, { "epoch": 11.407977207977208, "grad_norm": 0.18325629830360413, "learning_rate": 2.0902188453707498e-05, "loss": 0.7039, "step": 10016 }, { "epoch": 11.40911680911681, "grad_norm": 0.18648825585842133, "learning_rate": 2.0897592359712554e-05, "loss": 0.9134, "step": 10017 }, { "epoch": 11.41025641025641, "grad_norm": 0.20018558204174042, "learning_rate": 2.0892996408205507e-05, "loss": 0.773, "step": 10018 }, { "epoch": 11.411396011396011, "grad_norm": 0.18970166146755219, "learning_rate": 2.0888400599345993e-05, "loss": 0.6343, "step": 10019 }, { "epoch": 11.412535612535613, "grad_norm": 0.20653219521045685, "learning_rate": 2.088380493329363e-05, "loss": 0.631, "step": 10020 }, { "epoch": 11.413675213675214, "grad_norm": 0.24264267086982727, "learning_rate": 2.0879209410208044e-05, "loss": 0.6642, "step": 10021 }, { "epoch": 11.414814814814815, "grad_norm": 0.2044183909893036, "learning_rate": 2.087461403024884e-05, "loss": 0.4789, "step": 10022 }, { "epoch": 11.415954415954417, "grad_norm": 0.17189714312553406, "learning_rate": 2.087001879357564e-05, "loss": 0.7172, "step": 10023 }, { "epoch": 11.417094017094017, "grad_norm": 0.18149492144584656, "learning_rate": 2.0865423700348045e-05, "loss": 0.7652, "step": 10024 }, { "epoch": 11.418233618233618, "grad_norm": 0.30384448170661926, "learning_rate": 2.0860828750725655e-05, "loss": 0.6009, "step": 10025 }, { "epoch": 11.41937321937322, "grad_norm": 0.20158112049102783, "learning_rate": 2.085623394486807e-05, "loss": 0.6967, "step": 10026 }, { "epoch": 11.42051282051282, "grad_norm": 0.205727681517601, "learning_rate": 2.085163928293487e-05, "loss": 0.769, "step": 10027 }, { "epoch": 11.421652421652421, "grad_norm": 0.18823014199733734, "learning_rate": 2.084704476508565e-05, "loss": 0.8645, "step": 10028 }, { "epoch": 11.422792022792024, "grad_norm": 0.27690598368644714, "learning_rate": 2.0842450391479982e-05, "loss": 0.6291, "step": 10029 }, { "epoch": 11.423931623931624, "grad_norm": 0.19192315638065338, "learning_rate": 2.0837856162277447e-05, "loss": 0.6176, "step": 10030 }, { "epoch": 11.425071225071225, "grad_norm": 0.2199936807155609, "learning_rate": 2.0833262077637612e-05, "loss": 0.4781, "step": 10031 }, { "epoch": 11.426210826210827, "grad_norm": 0.2021743208169937, "learning_rate": 2.0828668137720048e-05, "loss": 0.8053, "step": 10032 }, { "epoch": 11.427350427350428, "grad_norm": 0.2495376467704773, "learning_rate": 2.08240743426843e-05, "loss": 0.6379, "step": 10033 }, { "epoch": 11.428490028490028, "grad_norm": 0.23179969191551208, "learning_rate": 2.081948069268994e-05, "loss": 0.7341, "step": 10034 }, { "epoch": 11.42962962962963, "grad_norm": 0.22318165004253387, "learning_rate": 2.0814887187896513e-05, "loss": 0.4811, "step": 10035 }, { "epoch": 11.430769230769231, "grad_norm": 0.18104930222034454, "learning_rate": 2.0810293828463563e-05, "loss": 0.9114, "step": 10036 }, { "epoch": 11.431908831908832, "grad_norm": 0.2621336877346039, "learning_rate": 2.0805700614550633e-05, "loss": 0.4802, "step": 10037 }, { "epoch": 11.433048433048434, "grad_norm": 0.20020677149295807, "learning_rate": 2.080110754631725e-05, "loss": 0.6964, "step": 10038 }, { "epoch": 11.434188034188034, "grad_norm": 0.21619516611099243, "learning_rate": 2.0796514623922956e-05, "loss": 0.6739, "step": 10039 }, { "epoch": 11.435327635327635, "grad_norm": 0.15959975123405457, "learning_rate": 2.0791921847527263e-05, "loss": 0.8328, "step": 10040 }, { "epoch": 11.436467236467237, "grad_norm": 0.21069657802581787, "learning_rate": 2.07873292172897e-05, "loss": 0.8459, "step": 10041 }, { "epoch": 11.437606837606838, "grad_norm": 0.20942659676074982, "learning_rate": 2.0782736733369774e-05, "loss": 0.7156, "step": 10042 }, { "epoch": 11.438746438746438, "grad_norm": 0.18864844739437103, "learning_rate": 2.0778144395927002e-05, "loss": 0.7452, "step": 10043 }, { "epoch": 11.43988603988604, "grad_norm": 0.21977639198303223, "learning_rate": 2.077355220512088e-05, "loss": 0.7005, "step": 10044 }, { "epoch": 11.441025641025641, "grad_norm": 0.19923043251037598, "learning_rate": 2.0768960161110913e-05, "loss": 0.6548, "step": 10045 }, { "epoch": 11.442165242165242, "grad_norm": 0.24817754328250885, "learning_rate": 2.07643682640566e-05, "loss": 0.4917, "step": 10046 }, { "epoch": 11.443304843304844, "grad_norm": 0.22018466889858246, "learning_rate": 2.075977651411742e-05, "loss": 0.6338, "step": 10047 }, { "epoch": 11.444444444444445, "grad_norm": 0.20591236650943756, "learning_rate": 2.0755184911452873e-05, "loss": 0.6555, "step": 10048 }, { "epoch": 11.445584045584045, "grad_norm": 0.18632829189300537, "learning_rate": 2.0750593456222416e-05, "loss": 0.6364, "step": 10049 }, { "epoch": 11.446723646723648, "grad_norm": 0.23025359213352203, "learning_rate": 2.074600214858554e-05, "loss": 0.6342, "step": 10050 }, { "epoch": 11.447863247863248, "grad_norm": 0.24396909773349762, "learning_rate": 2.0741410988701706e-05, "loss": 0.6499, "step": 10051 }, { "epoch": 11.449002849002849, "grad_norm": 0.2707434892654419, "learning_rate": 2.0736819976730385e-05, "loss": 0.3684, "step": 10052 }, { "epoch": 11.450142450142451, "grad_norm": 0.19455496966838837, "learning_rate": 2.0732229112831023e-05, "loss": 0.6864, "step": 10053 }, { "epoch": 11.451282051282051, "grad_norm": 0.20653720200061798, "learning_rate": 2.0727638397163078e-05, "loss": 0.9295, "step": 10054 }, { "epoch": 11.452421652421652, "grad_norm": 0.1749943643808365, "learning_rate": 2.0723047829886e-05, "loss": 0.9289, "step": 10055 }, { "epoch": 11.453561253561254, "grad_norm": 0.1685071885585785, "learning_rate": 2.071845741115924e-05, "loss": 0.7246, "step": 10056 }, { "epoch": 11.454700854700855, "grad_norm": 0.18324369192123413, "learning_rate": 2.0713867141142223e-05, "loss": 0.8333, "step": 10057 }, { "epoch": 11.455840455840455, "grad_norm": 0.21532200276851654, "learning_rate": 2.070927701999439e-05, "loss": 0.6711, "step": 10058 }, { "epoch": 11.456980056980058, "grad_norm": 0.2650965452194214, "learning_rate": 2.0704687047875166e-05, "loss": 0.3715, "step": 10059 }, { "epoch": 11.458119658119658, "grad_norm": 0.1903204321861267, "learning_rate": 2.0700097224943975e-05, "loss": 0.741, "step": 10060 }, { "epoch": 11.459259259259259, "grad_norm": 0.20154109597206116, "learning_rate": 2.0695507551360228e-05, "loss": 0.7265, "step": 10061 }, { "epoch": 11.460398860398861, "grad_norm": 0.20880548655986786, "learning_rate": 2.0690918027283346e-05, "loss": 0.5857, "step": 10062 }, { "epoch": 11.461538461538462, "grad_norm": 0.23127515614032745, "learning_rate": 2.0686328652872723e-05, "loss": 0.6208, "step": 10063 }, { "epoch": 11.462678062678062, "grad_norm": 0.25560060143470764, "learning_rate": 2.068173942828778e-05, "loss": 0.5022, "step": 10064 }, { "epoch": 11.463817663817665, "grad_norm": 0.1762644499540329, "learning_rate": 2.067715035368789e-05, "loss": 0.7456, "step": 10065 }, { "epoch": 11.464957264957265, "grad_norm": 0.23545634746551514, "learning_rate": 2.0672561429232458e-05, "loss": 0.5954, "step": 10066 }, { "epoch": 11.466096866096866, "grad_norm": 0.23931023478507996, "learning_rate": 2.066797265508087e-05, "loss": 0.7562, "step": 10067 }, { "epoch": 11.467236467236468, "grad_norm": 0.28010281920433044, "learning_rate": 2.0663384031392504e-05, "loss": 0.6851, "step": 10068 }, { "epoch": 11.468376068376068, "grad_norm": 0.20878314971923828, "learning_rate": 2.0658795558326743e-05, "loss": 0.7336, "step": 10069 }, { "epoch": 11.469515669515669, "grad_norm": 0.2539985179901123, "learning_rate": 2.0654207236042945e-05, "loss": 0.3877, "step": 10070 }, { "epoch": 11.470655270655271, "grad_norm": 0.2357361912727356, "learning_rate": 2.0649619064700485e-05, "loss": 0.4854, "step": 10071 }, { "epoch": 11.471794871794872, "grad_norm": 0.22556303441524506, "learning_rate": 2.0645031044458714e-05, "loss": 0.7931, "step": 10072 }, { "epoch": 11.472934472934472, "grad_norm": 0.23753730952739716, "learning_rate": 2.064044317547699e-05, "loss": 0.7017, "step": 10073 }, { "epoch": 11.474074074074075, "grad_norm": 0.18520833551883698, "learning_rate": 2.0635855457914665e-05, "loss": 0.8661, "step": 10074 }, { "epoch": 11.475213675213675, "grad_norm": 0.2340151071548462, "learning_rate": 2.0631267891931087e-05, "loss": 0.6815, "step": 10075 }, { "epoch": 11.476353276353276, "grad_norm": 0.1645916998386383, "learning_rate": 2.0626680477685585e-05, "loss": 0.7149, "step": 10076 }, { "epoch": 11.477492877492878, "grad_norm": 0.17714065313339233, "learning_rate": 2.0622093215337484e-05, "loss": 0.7027, "step": 10077 }, { "epoch": 11.478632478632479, "grad_norm": 0.22676534950733185, "learning_rate": 2.0617506105046143e-05, "loss": 0.6457, "step": 10078 }, { "epoch": 11.47977207977208, "grad_norm": 0.2152765840291977, "learning_rate": 2.0612919146970858e-05, "loss": 0.7855, "step": 10079 }, { "epoch": 11.480911680911682, "grad_norm": 0.26596543192863464, "learning_rate": 2.060833234127096e-05, "loss": 0.5108, "step": 10080 }, { "epoch": 11.482051282051282, "grad_norm": 0.2279624491930008, "learning_rate": 2.060374568810575e-05, "loss": 0.8589, "step": 10081 }, { "epoch": 11.483190883190883, "grad_norm": 0.2043648511171341, "learning_rate": 2.0599159187634554e-05, "loss": 0.6467, "step": 10082 }, { "epoch": 11.484330484330485, "grad_norm": 0.20626182854175568, "learning_rate": 2.0594572840016653e-05, "loss": 0.785, "step": 10083 }, { "epoch": 11.485470085470086, "grad_norm": 0.23982909321784973, "learning_rate": 2.0589986645411356e-05, "loss": 0.3791, "step": 10084 }, { "epoch": 11.486609686609686, "grad_norm": 0.22007319331169128, "learning_rate": 2.058540060397795e-05, "loss": 0.7334, "step": 10085 }, { "epoch": 11.487749287749288, "grad_norm": 0.20136313140392303, "learning_rate": 2.0580814715875722e-05, "loss": 0.8072, "step": 10086 }, { "epoch": 11.488888888888889, "grad_norm": 0.21298660337924957, "learning_rate": 2.0576228981263947e-05, "loss": 0.773, "step": 10087 }, { "epoch": 11.49002849002849, "grad_norm": 0.21085113286972046, "learning_rate": 2.057164340030191e-05, "loss": 0.6982, "step": 10088 }, { "epoch": 11.491168091168092, "grad_norm": 0.21780793368816376, "learning_rate": 2.056705797314886e-05, "loss": 0.5916, "step": 10089 }, { "epoch": 11.492307692307692, "grad_norm": 0.17957738041877747, "learning_rate": 2.0562472699964092e-05, "loss": 0.833, "step": 10090 }, { "epoch": 11.493447293447293, "grad_norm": 0.20545561611652374, "learning_rate": 2.055788758090685e-05, "loss": 0.8376, "step": 10091 }, { "epoch": 11.494586894586895, "grad_norm": 0.16835999488830566, "learning_rate": 2.0553302616136388e-05, "loss": 0.9204, "step": 10092 }, { "epoch": 11.495726495726496, "grad_norm": 0.1910863220691681, "learning_rate": 2.0548717805811952e-05, "loss": 0.8283, "step": 10093 }, { "epoch": 11.496866096866096, "grad_norm": 0.18515583872795105, "learning_rate": 2.054413315009279e-05, "loss": 0.7845, "step": 10094 }, { "epoch": 11.498005698005699, "grad_norm": 0.2434854954481125, "learning_rate": 2.0539548649138136e-05, "loss": 0.5831, "step": 10095 }, { "epoch": 11.4991452991453, "grad_norm": 0.188311368227005, "learning_rate": 2.0534964303107227e-05, "loss": 1.0584, "step": 10096 }, { "epoch": 11.5002849002849, "grad_norm": 0.23528143763542175, "learning_rate": 2.053038011215928e-05, "loss": 0.7664, "step": 10097 }, { "epoch": 11.501424501424502, "grad_norm": 0.21836930513381958, "learning_rate": 2.052579607645353e-05, "loss": 0.7048, "step": 10098 }, { "epoch": 11.502564102564103, "grad_norm": 0.15405632555484772, "learning_rate": 2.052121219614918e-05, "loss": 0.8787, "step": 10099 }, { "epoch": 11.503703703703703, "grad_norm": 0.2536327540874481, "learning_rate": 2.0516628471405446e-05, "loss": 0.3659, "step": 10100 }, { "epoch": 11.504843304843305, "grad_norm": 0.19951657950878143, "learning_rate": 2.0512044902381543e-05, "loss": 0.6577, "step": 10101 }, { "epoch": 11.505982905982906, "grad_norm": 0.20418404042720795, "learning_rate": 2.0507461489236655e-05, "loss": 0.7083, "step": 10102 }, { "epoch": 11.507122507122507, "grad_norm": 0.24461476504802704, "learning_rate": 2.0502878232129992e-05, "loss": 0.7458, "step": 10103 }, { "epoch": 11.508262108262109, "grad_norm": 0.22976773977279663, "learning_rate": 2.0498295131220728e-05, "loss": 0.7533, "step": 10104 }, { "epoch": 11.50940170940171, "grad_norm": 0.24391049146652222, "learning_rate": 2.049371218666806e-05, "loss": 0.5373, "step": 10105 }, { "epoch": 11.51054131054131, "grad_norm": 0.19305101037025452, "learning_rate": 2.0489129398631153e-05, "loss": 0.67, "step": 10106 }, { "epoch": 11.511680911680912, "grad_norm": 0.15466570854187012, "learning_rate": 2.0484546767269196e-05, "loss": 0.813, "step": 10107 }, { "epoch": 11.512820512820513, "grad_norm": 0.20475360751152039, "learning_rate": 2.0479964292741338e-05, "loss": 0.7488, "step": 10108 }, { "epoch": 11.513960113960113, "grad_norm": 0.23447205126285553, "learning_rate": 2.0475381975206757e-05, "loss": 0.7557, "step": 10109 }, { "epoch": 11.515099715099716, "grad_norm": 0.20030346512794495, "learning_rate": 2.04707998148246e-05, "loss": 0.7338, "step": 10110 }, { "epoch": 11.516239316239316, "grad_norm": 0.30252522230148315, "learning_rate": 2.0466217811754012e-05, "loss": 0.4304, "step": 10111 }, { "epoch": 11.517378917378917, "grad_norm": 0.23555758595466614, "learning_rate": 2.046163596615416e-05, "loss": 0.4149, "step": 10112 }, { "epoch": 11.518518518518519, "grad_norm": 0.19094648957252502, "learning_rate": 2.045705427818416e-05, "loss": 0.8094, "step": 10113 }, { "epoch": 11.51965811965812, "grad_norm": 0.20734092593193054, "learning_rate": 2.0452472748003172e-05, "loss": 0.7286, "step": 10114 }, { "epoch": 11.52079772079772, "grad_norm": 0.20282462239265442, "learning_rate": 2.0447891375770304e-05, "loss": 0.7188, "step": 10115 }, { "epoch": 11.521937321937322, "grad_norm": 0.2904427647590637, "learning_rate": 2.044331016164469e-05, "loss": 0.5736, "step": 10116 }, { "epoch": 11.523076923076923, "grad_norm": 0.18206043541431427, "learning_rate": 2.043872910578544e-05, "loss": 0.6829, "step": 10117 }, { "epoch": 11.524216524216524, "grad_norm": 0.25908827781677246, "learning_rate": 2.0434148208351676e-05, "loss": 0.6485, "step": 10118 }, { "epoch": 11.525356125356126, "grad_norm": 0.25546640157699585, "learning_rate": 2.0429567469502497e-05, "loss": 0.608, "step": 10119 }, { "epoch": 11.526495726495726, "grad_norm": 0.2341763824224472, "learning_rate": 2.0424986889397016e-05, "loss": 0.6356, "step": 10120 }, { "epoch": 11.527635327635327, "grad_norm": 0.18672235310077667, "learning_rate": 2.0420406468194316e-05, "loss": 0.7211, "step": 10121 }, { "epoch": 11.52877492877493, "grad_norm": 0.2325475960969925, "learning_rate": 2.0415826206053483e-05, "loss": 0.8141, "step": 10122 }, { "epoch": 11.52991452991453, "grad_norm": 0.19826440513134003, "learning_rate": 2.041124610313363e-05, "loss": 0.6876, "step": 10123 }, { "epoch": 11.53105413105413, "grad_norm": 0.22760896384716034, "learning_rate": 2.0406666159593807e-05, "loss": 0.7007, "step": 10124 }, { "epoch": 11.532193732193733, "grad_norm": 0.21469786763191223, "learning_rate": 2.040208637559311e-05, "loss": 0.5696, "step": 10125 }, { "epoch": 11.533333333333333, "grad_norm": 0.19663602113723755, "learning_rate": 2.039750675129059e-05, "loss": 0.9364, "step": 10126 }, { "epoch": 11.534472934472934, "grad_norm": 0.24786512553691864, "learning_rate": 2.0392927286845322e-05, "loss": 0.4917, "step": 10127 }, { "epoch": 11.535612535612536, "grad_norm": 0.1861061155796051, "learning_rate": 2.0388347982416356e-05, "loss": 0.8017, "step": 10128 }, { "epoch": 11.536752136752137, "grad_norm": 0.23585036396980286, "learning_rate": 2.038376883816275e-05, "loss": 0.718, "step": 10129 }, { "epoch": 11.537891737891737, "grad_norm": 0.20937484502792358, "learning_rate": 2.0379189854243542e-05, "loss": 0.7784, "step": 10130 }, { "epoch": 11.53903133903134, "grad_norm": 0.2120661437511444, "learning_rate": 2.0374611030817782e-05, "loss": 0.7323, "step": 10131 }, { "epoch": 11.54017094017094, "grad_norm": 0.20045341551303864, "learning_rate": 2.0370032368044497e-05, "loss": 0.7793, "step": 10132 }, { "epoch": 11.54131054131054, "grad_norm": 0.21167951822280884, "learning_rate": 2.0365453866082716e-05, "loss": 0.7675, "step": 10133 }, { "epoch": 11.542450142450143, "grad_norm": 0.1805192083120346, "learning_rate": 2.036087552509147e-05, "loss": 0.7142, "step": 10134 }, { "epoch": 11.543589743589743, "grad_norm": 0.19848741590976715, "learning_rate": 2.035629734522978e-05, "loss": 0.6463, "step": 10135 }, { "epoch": 11.544729344729344, "grad_norm": 0.24124781787395477, "learning_rate": 2.0351719326656652e-05, "loss": 0.5639, "step": 10136 }, { "epoch": 11.545868945868946, "grad_norm": 0.24485869705677032, "learning_rate": 2.0347141469531094e-05, "loss": 0.601, "step": 10137 }, { "epoch": 11.547008547008547, "grad_norm": 0.21613085269927979, "learning_rate": 2.0342563774012107e-05, "loss": 0.5691, "step": 10138 }, { "epoch": 11.548148148148147, "grad_norm": 0.20234043896198273, "learning_rate": 2.0337986240258696e-05, "loss": 0.7644, "step": 10139 }, { "epoch": 11.54928774928775, "grad_norm": 0.19480158388614655, "learning_rate": 2.0333408868429832e-05, "loss": 0.6856, "step": 10140 }, { "epoch": 11.55042735042735, "grad_norm": 0.20556823909282684, "learning_rate": 2.0328831658684518e-05, "loss": 0.7236, "step": 10141 }, { "epoch": 11.55156695156695, "grad_norm": 0.24399961531162262, "learning_rate": 2.0324254611181724e-05, "loss": 0.6854, "step": 10142 }, { "epoch": 11.552706552706553, "grad_norm": 0.22309578955173492, "learning_rate": 2.031967772608043e-05, "loss": 0.6263, "step": 10143 }, { "epoch": 11.553846153846154, "grad_norm": 0.21229925751686096, "learning_rate": 2.0315101003539585e-05, "loss": 0.6705, "step": 10144 }, { "epoch": 11.554985754985754, "grad_norm": 0.16713200509548187, "learning_rate": 2.0310524443718176e-05, "loss": 0.9225, "step": 10145 }, { "epoch": 11.556125356125357, "grad_norm": 0.2773817479610443, "learning_rate": 2.0305948046775154e-05, "loss": 0.4484, "step": 10146 }, { "epoch": 11.557264957264957, "grad_norm": 0.1994384378194809, "learning_rate": 2.0301371812869458e-05, "loss": 0.8305, "step": 10147 }, { "epoch": 11.558404558404558, "grad_norm": 0.15961970388889313, "learning_rate": 2.0296795742160045e-05, "loss": 0.713, "step": 10148 }, { "epoch": 11.55954415954416, "grad_norm": 0.19760511815547943, "learning_rate": 2.0292219834805844e-05, "loss": 0.8293, "step": 10149 }, { "epoch": 11.56068376068376, "grad_norm": 0.23703165352344513, "learning_rate": 2.02876440909658e-05, "loss": 0.7275, "step": 10150 }, { "epoch": 11.561823361823361, "grad_norm": 0.2209303081035614, "learning_rate": 2.0283068510798833e-05, "loss": 0.6158, "step": 10151 }, { "epoch": 11.562962962962963, "grad_norm": 0.2222437858581543, "learning_rate": 2.0278493094463872e-05, "loss": 0.5919, "step": 10152 }, { "epoch": 11.564102564102564, "grad_norm": 0.2404080182313919, "learning_rate": 2.0273917842119823e-05, "loss": 0.4473, "step": 10153 }, { "epoch": 11.565242165242164, "grad_norm": 0.20224551856517792, "learning_rate": 2.0269342753925613e-05, "loss": 0.7044, "step": 10154 }, { "epoch": 11.566381766381767, "grad_norm": 0.21260076761245728, "learning_rate": 2.0264767830040127e-05, "loss": 0.8415, "step": 10155 }, { "epoch": 11.567521367521367, "grad_norm": 0.2516237795352936, "learning_rate": 2.026019307062228e-05, "loss": 0.6052, "step": 10156 }, { "epoch": 11.568660968660968, "grad_norm": 0.24627673625946045, "learning_rate": 2.025561847583097e-05, "loss": 0.6294, "step": 10157 }, { "epoch": 11.56980056980057, "grad_norm": 0.23037806153297424, "learning_rate": 2.025104404582507e-05, "loss": 0.4694, "step": 10158 }, { "epoch": 11.57094017094017, "grad_norm": 0.23542878031730652, "learning_rate": 2.0246469780763473e-05, "loss": 0.635, "step": 10159 }, { "epoch": 11.572079772079771, "grad_norm": 0.17926007509231567, "learning_rate": 2.0241895680805052e-05, "loss": 0.794, "step": 10160 }, { "epoch": 11.573219373219374, "grad_norm": 0.19894307851791382, "learning_rate": 2.0237321746108678e-05, "loss": 0.7871, "step": 10161 }, { "epoch": 11.574358974358974, "grad_norm": 0.20547333359718323, "learning_rate": 2.0232747976833217e-05, "loss": 0.7193, "step": 10162 }, { "epoch": 11.575498575498575, "grad_norm": 0.2085610032081604, "learning_rate": 2.0228174373137535e-05, "loss": 0.6491, "step": 10163 }, { "epoch": 11.576638176638177, "grad_norm": 0.2668093740940094, "learning_rate": 2.0223600935180472e-05, "loss": 0.5799, "step": 10164 }, { "epoch": 11.577777777777778, "grad_norm": 0.20587384700775146, "learning_rate": 2.0219027663120886e-05, "loss": 0.8061, "step": 10165 }, { "epoch": 11.578917378917378, "grad_norm": 0.20204438269138336, "learning_rate": 2.0214454557117613e-05, "loss": 0.7189, "step": 10166 }, { "epoch": 11.58005698005698, "grad_norm": 0.1639975756406784, "learning_rate": 2.0209881617329497e-05, "loss": 0.8037, "step": 10167 }, { "epoch": 11.581196581196581, "grad_norm": 0.22345274686813354, "learning_rate": 2.020530884391536e-05, "loss": 0.5246, "step": 10168 }, { "epoch": 11.582336182336181, "grad_norm": 0.18317808210849762, "learning_rate": 2.0200736237034042e-05, "loss": 0.7078, "step": 10169 }, { "epoch": 11.583475783475784, "grad_norm": 0.18807291984558105, "learning_rate": 2.0196163796844348e-05, "loss": 0.8574, "step": 10170 }, { "epoch": 11.584615384615384, "grad_norm": 0.2099536657333374, "learning_rate": 2.0191591523505098e-05, "loss": 0.6878, "step": 10171 }, { "epoch": 11.585754985754985, "grad_norm": 0.22654862701892853, "learning_rate": 2.0187019417175097e-05, "loss": 0.8243, "step": 10172 }, { "epoch": 11.586894586894587, "grad_norm": 0.2792012691497803, "learning_rate": 2.0182447478013147e-05, "loss": 0.4596, "step": 10173 }, { "epoch": 11.588034188034188, "grad_norm": 0.2196110039949417, "learning_rate": 2.0177875706178043e-05, "loss": 0.7237, "step": 10174 }, { "epoch": 11.589173789173788, "grad_norm": 0.22355572879314423, "learning_rate": 2.017330410182858e-05, "loss": 0.5842, "step": 10175 }, { "epoch": 11.59031339031339, "grad_norm": 0.2698277533054352, "learning_rate": 2.0168732665123535e-05, "loss": 0.5167, "step": 10176 }, { "epoch": 11.591452991452991, "grad_norm": 0.2722584307193756, "learning_rate": 2.0164161396221683e-05, "loss": 0.6275, "step": 10177 }, { "epoch": 11.592592592592592, "grad_norm": 0.1976955085992813, "learning_rate": 2.015959029528182e-05, "loss": 0.7372, "step": 10178 }, { "epoch": 11.593732193732194, "grad_norm": 0.23019570112228394, "learning_rate": 2.015501936246269e-05, "loss": 0.5574, "step": 10179 }, { "epoch": 11.594871794871795, "grad_norm": 0.17158330976963043, "learning_rate": 2.0150448597923062e-05, "loss": 0.7158, "step": 10180 }, { "epoch": 11.596011396011395, "grad_norm": 0.21241247653961182, "learning_rate": 2.014587800182169e-05, "loss": 0.6826, "step": 10181 }, { "epoch": 11.597150997150997, "grad_norm": 0.17392486333847046, "learning_rate": 2.0141307574317326e-05, "loss": 0.6694, "step": 10182 }, { "epoch": 11.598290598290598, "grad_norm": 0.2758224606513977, "learning_rate": 2.013673731556871e-05, "loss": 0.5387, "step": 10183 }, { "epoch": 11.5994301994302, "grad_norm": 0.25378984212875366, "learning_rate": 2.0132167225734582e-05, "loss": 0.5874, "step": 10184 }, { "epoch": 11.6005698005698, "grad_norm": 0.2129170149564743, "learning_rate": 2.012759730497367e-05, "loss": 0.7835, "step": 10185 }, { "epoch": 11.601709401709401, "grad_norm": 0.2610325217247009, "learning_rate": 2.0123027553444708e-05, "loss": 0.5777, "step": 10186 }, { "epoch": 11.602849002849004, "grad_norm": 0.19810867309570312, "learning_rate": 2.0118457971306402e-05, "loss": 0.6316, "step": 10187 }, { "epoch": 11.603988603988604, "grad_norm": 0.17435985803604126, "learning_rate": 2.0113888558717475e-05, "loss": 0.8116, "step": 10188 }, { "epoch": 11.605128205128205, "grad_norm": 0.24193398654460907, "learning_rate": 2.0109319315836644e-05, "loss": 0.6601, "step": 10189 }, { "epoch": 11.606267806267807, "grad_norm": 0.2750731408596039, "learning_rate": 2.01047502428226e-05, "loss": 0.7409, "step": 10190 }, { "epoch": 11.607407407407408, "grad_norm": 0.22689788043498993, "learning_rate": 2.010018133983404e-05, "loss": 0.6837, "step": 10191 }, { "epoch": 11.608547008547008, "grad_norm": 0.1787591278553009, "learning_rate": 2.0095612607029655e-05, "loss": 0.8582, "step": 10192 }, { "epoch": 11.60968660968661, "grad_norm": 0.22188825905323029, "learning_rate": 2.0091044044568138e-05, "loss": 0.7676, "step": 10193 }, { "epoch": 11.610826210826211, "grad_norm": 0.2448352873325348, "learning_rate": 2.0086475652608155e-05, "loss": 0.6001, "step": 10194 }, { "epoch": 11.611965811965812, "grad_norm": 0.22144730389118195, "learning_rate": 2.008190743130839e-05, "loss": 0.742, "step": 10195 }, { "epoch": 11.613105413105414, "grad_norm": 0.21111179888248444, "learning_rate": 2.0077339380827502e-05, "loss": 0.6029, "step": 10196 }, { "epoch": 11.614245014245014, "grad_norm": 0.25137585401535034, "learning_rate": 2.0072771501324157e-05, "loss": 0.6591, "step": 10197 }, { "epoch": 11.615384615384615, "grad_norm": 0.2582457363605499, "learning_rate": 2.0068203792957002e-05, "loss": 0.5815, "step": 10198 }, { "epoch": 11.616524216524217, "grad_norm": 0.2855578362941742, "learning_rate": 2.0063636255884686e-05, "loss": 0.8637, "step": 10199 }, { "epoch": 11.617663817663818, "grad_norm": 0.24564269185066223, "learning_rate": 2.005906889026587e-05, "loss": 0.602, "step": 10200 }, { "epoch": 11.618803418803418, "grad_norm": 0.22456574440002441, "learning_rate": 2.0054501696259177e-05, "loss": 0.6701, "step": 10201 }, { "epoch": 11.61994301994302, "grad_norm": 0.24417828023433685, "learning_rate": 2.004993467402324e-05, "loss": 0.7006, "step": 10202 }, { "epoch": 11.621082621082621, "grad_norm": 0.22325687110424042, "learning_rate": 2.004536782371668e-05, "loss": 0.7052, "step": 10203 }, { "epoch": 11.622222222222222, "grad_norm": 0.1909397542476654, "learning_rate": 2.0040801145498128e-05, "loss": 0.7895, "step": 10204 }, { "epoch": 11.623361823361824, "grad_norm": 0.21828581392765045, "learning_rate": 2.0036234639526185e-05, "loss": 0.7841, "step": 10205 }, { "epoch": 11.624501424501425, "grad_norm": 0.2113490253686905, "learning_rate": 2.0031668305959467e-05, "loss": 0.5941, "step": 10206 }, { "epoch": 11.625641025641025, "grad_norm": 0.26541098952293396, "learning_rate": 2.0027102144956566e-05, "loss": 0.414, "step": 10207 }, { "epoch": 11.626780626780628, "grad_norm": 0.21522648632526398, "learning_rate": 2.0022536156676093e-05, "loss": 0.5383, "step": 10208 }, { "epoch": 11.627920227920228, "grad_norm": 0.2644941806793213, "learning_rate": 2.0017970341276617e-05, "loss": 0.5464, "step": 10209 }, { "epoch": 11.629059829059829, "grad_norm": 0.23860087990760803, "learning_rate": 2.0013404698916726e-05, "loss": 0.6907, "step": 10210 }, { "epoch": 11.630199430199431, "grad_norm": 0.20339278876781464, "learning_rate": 2.000883922975501e-05, "loss": 0.7949, "step": 10211 }, { "epoch": 11.631339031339031, "grad_norm": 0.22736825048923492, "learning_rate": 2.000427393395004e-05, "loss": 0.6281, "step": 10212 }, { "epoch": 11.632478632478632, "grad_norm": 0.19720183312892914, "learning_rate": 1.9999708811660366e-05, "loss": 0.6793, "step": 10213 }, { "epoch": 11.633618233618234, "grad_norm": 0.19661399722099304, "learning_rate": 1.9995143863044566e-05, "loss": 0.8378, "step": 10214 }, { "epoch": 11.634757834757835, "grad_norm": 0.2816072404384613, "learning_rate": 1.9990579088261176e-05, "loss": 0.8704, "step": 10215 }, { "epoch": 11.635897435897435, "grad_norm": 0.27159765362739563, "learning_rate": 1.9986014487468752e-05, "loss": 0.5556, "step": 10216 }, { "epoch": 11.637037037037038, "grad_norm": 0.19535863399505615, "learning_rate": 1.998145006082583e-05, "loss": 0.6989, "step": 10217 }, { "epoch": 11.638176638176638, "grad_norm": 0.24935664236545563, "learning_rate": 1.9976885808490957e-05, "loss": 0.5877, "step": 10218 }, { "epoch": 11.639316239316239, "grad_norm": 0.3039929270744324, "learning_rate": 1.9972321730622644e-05, "loss": 0.5341, "step": 10219 }, { "epoch": 11.640455840455841, "grad_norm": 0.2003876268863678, "learning_rate": 1.996775782737943e-05, "loss": 0.7675, "step": 10220 }, { "epoch": 11.641595441595442, "grad_norm": 0.20660299062728882, "learning_rate": 1.9963194098919817e-05, "loss": 0.8653, "step": 10221 }, { "epoch": 11.642735042735042, "grad_norm": 0.18956273794174194, "learning_rate": 1.9958630545402327e-05, "loss": 0.6686, "step": 10222 }, { "epoch": 11.643874643874645, "grad_norm": 0.2787805497646332, "learning_rate": 1.9954067166985467e-05, "loss": 0.4714, "step": 10223 }, { "epoch": 11.645014245014245, "grad_norm": 0.21823909878730774, "learning_rate": 1.9949503963827726e-05, "loss": 0.4356, "step": 10224 }, { "epoch": 11.646153846153846, "grad_norm": 0.2522222697734833, "learning_rate": 1.9944940936087608e-05, "loss": 0.6116, "step": 10225 }, { "epoch": 11.647293447293448, "grad_norm": 0.16310052573680878, "learning_rate": 1.9940378083923587e-05, "loss": 0.8189, "step": 10226 }, { "epoch": 11.648433048433048, "grad_norm": 0.21357674896717072, "learning_rate": 1.993581540749415e-05, "loss": 0.7482, "step": 10227 }, { "epoch": 11.649572649572649, "grad_norm": 0.24183881282806396, "learning_rate": 1.993125290695777e-05, "loss": 0.5574, "step": 10228 }, { "epoch": 11.650712250712251, "grad_norm": 0.2169681191444397, "learning_rate": 1.992669058247292e-05, "loss": 0.6125, "step": 10229 }, { "epoch": 11.651851851851852, "grad_norm": 0.23740357160568237, "learning_rate": 1.9922128434198054e-05, "loss": 0.6825, "step": 10230 }, { "epoch": 11.652991452991452, "grad_norm": 0.2695971131324768, "learning_rate": 1.9917566462291634e-05, "loss": 0.4581, "step": 10231 }, { "epoch": 11.654131054131055, "grad_norm": 0.31167930364608765, "learning_rate": 1.99130046669121e-05, "loss": 0.4455, "step": 10232 }, { "epoch": 11.655270655270655, "grad_norm": 0.21581199765205383, "learning_rate": 1.9908443048217908e-05, "loss": 0.6783, "step": 10233 }, { "epoch": 11.656410256410256, "grad_norm": 0.2644268572330475, "learning_rate": 1.990388160636749e-05, "loss": 0.5075, "step": 10234 }, { "epoch": 11.657549857549858, "grad_norm": 0.177691251039505, "learning_rate": 1.989932034151928e-05, "loss": 0.6829, "step": 10235 }, { "epoch": 11.658689458689459, "grad_norm": 0.3367776572704315, "learning_rate": 1.98947592538317e-05, "loss": 0.3184, "step": 10236 }, { "epoch": 11.65982905982906, "grad_norm": 0.17489689588546753, "learning_rate": 1.9890198343463172e-05, "loss": 0.8122, "step": 10237 }, { "epoch": 11.660968660968662, "grad_norm": 0.18993866443634033, "learning_rate": 1.9885637610572112e-05, "loss": 0.6634, "step": 10238 }, { "epoch": 11.662108262108262, "grad_norm": 0.2165469080209732, "learning_rate": 1.9881077055316915e-05, "loss": 0.6498, "step": 10239 }, { "epoch": 11.663247863247863, "grad_norm": 0.2665802538394928, "learning_rate": 1.9876516677855994e-05, "loss": 0.4635, "step": 10240 }, { "epoch": 11.664387464387465, "grad_norm": 0.23495157063007355, "learning_rate": 1.9871956478347737e-05, "loss": 0.6152, "step": 10241 }, { "epoch": 11.665527065527066, "grad_norm": 0.2646816670894623, "learning_rate": 1.9867396456950532e-05, "loss": 0.6383, "step": 10242 }, { "epoch": 11.666666666666666, "grad_norm": 0.26022160053253174, "learning_rate": 1.9862836613822758e-05, "loss": 0.7183, "step": 10243 }, { "epoch": 11.667806267806268, "grad_norm": 0.2543501555919647, "learning_rate": 1.9858276949122796e-05, "loss": 0.5911, "step": 10244 }, { "epoch": 11.668945868945869, "grad_norm": 0.2152557224035263, "learning_rate": 1.9853717463009025e-05, "loss": 0.5946, "step": 10245 }, { "epoch": 11.67008547008547, "grad_norm": 0.18171733617782593, "learning_rate": 1.9849158155639795e-05, "loss": 0.573, "step": 10246 }, { "epoch": 11.671225071225072, "grad_norm": 0.2071221023797989, "learning_rate": 1.9844599027173464e-05, "loss": 0.6361, "step": 10247 }, { "epoch": 11.672364672364672, "grad_norm": 0.21655473113059998, "learning_rate": 1.984004007776839e-05, "loss": 0.5684, "step": 10248 }, { "epoch": 11.673504273504273, "grad_norm": 0.2225070297718048, "learning_rate": 1.983548130758291e-05, "loss": 0.5682, "step": 10249 }, { "epoch": 11.674643874643875, "grad_norm": 0.22800558805465698, "learning_rate": 1.9830922716775374e-05, "loss": 0.6468, "step": 10250 }, { "epoch": 11.675783475783476, "grad_norm": 0.1755601316690445, "learning_rate": 1.98263643055041e-05, "loss": 0.8017, "step": 10251 }, { "epoch": 11.676923076923076, "grad_norm": 0.19407087564468384, "learning_rate": 1.9821806073927427e-05, "loss": 0.7635, "step": 10252 }, { "epoch": 11.678062678062679, "grad_norm": 0.17562982439994812, "learning_rate": 1.9817248022203663e-05, "loss": 0.8464, "step": 10253 }, { "epoch": 11.67920227920228, "grad_norm": 0.20420879125595093, "learning_rate": 1.9812690150491124e-05, "loss": 0.6981, "step": 10254 }, { "epoch": 11.68034188034188, "grad_norm": 0.2194565385580063, "learning_rate": 1.980813245894813e-05, "loss": 0.661, "step": 10255 }, { "epoch": 11.681481481481482, "grad_norm": 0.2306593805551529, "learning_rate": 1.980357494773297e-05, "loss": 0.6342, "step": 10256 }, { "epoch": 11.682621082621083, "grad_norm": 0.20485879480838776, "learning_rate": 1.9799017617003947e-05, "loss": 0.6642, "step": 10257 }, { "epoch": 11.683760683760683, "grad_norm": 0.19167086482048035, "learning_rate": 1.979446046691934e-05, "loss": 0.565, "step": 10258 }, { "epoch": 11.684900284900285, "grad_norm": 0.19284793734550476, "learning_rate": 1.9789903497637437e-05, "loss": 0.8176, "step": 10259 }, { "epoch": 11.686039886039886, "grad_norm": 0.17451520264148712, "learning_rate": 1.9785346709316514e-05, "loss": 0.7541, "step": 10260 }, { "epoch": 11.687179487179487, "grad_norm": 0.2519378662109375, "learning_rate": 1.9780790102114844e-05, "loss": 0.5352, "step": 10261 }, { "epoch": 11.688319088319089, "grad_norm": 0.20037703216075897, "learning_rate": 1.977623367619068e-05, "loss": 0.8032, "step": 10262 }, { "epoch": 11.68945868945869, "grad_norm": 0.24198654294013977, "learning_rate": 1.977167743170229e-05, "loss": 0.5516, "step": 10263 }, { "epoch": 11.69059829059829, "grad_norm": 0.24944379925727844, "learning_rate": 1.9767121368807917e-05, "loss": 0.5552, "step": 10264 }, { "epoch": 11.691737891737892, "grad_norm": 0.18780256807804108, "learning_rate": 1.9762565487665806e-05, "loss": 0.6696, "step": 10265 }, { "epoch": 11.692877492877493, "grad_norm": 0.1785256266593933, "learning_rate": 1.9758009788434206e-05, "loss": 0.7848, "step": 10266 }, { "epoch": 11.694017094017093, "grad_norm": 0.24251900613307953, "learning_rate": 1.9753454271271338e-05, "loss": 0.6627, "step": 10267 }, { "epoch": 11.695156695156696, "grad_norm": 0.24134206771850586, "learning_rate": 1.9748898936335436e-05, "loss": 0.7947, "step": 10268 }, { "epoch": 11.696296296296296, "grad_norm": 0.2050948590040207, "learning_rate": 1.974434378378471e-05, "loss": 0.6494, "step": 10269 }, { "epoch": 11.697435897435897, "grad_norm": 0.2239166498184204, "learning_rate": 1.973978881377738e-05, "loss": 0.7998, "step": 10270 }, { "epoch": 11.698575498575499, "grad_norm": 0.21804192662239075, "learning_rate": 1.973523402647165e-05, "loss": 0.7006, "step": 10271 }, { "epoch": 11.6997150997151, "grad_norm": 0.21212780475616455, "learning_rate": 1.9730679422025723e-05, "loss": 0.6098, "step": 10272 }, { "epoch": 11.7008547008547, "grad_norm": 0.26690980792045593, "learning_rate": 1.9726125000597784e-05, "loss": 0.4963, "step": 10273 }, { "epoch": 11.701994301994302, "grad_norm": 0.2465326339006424, "learning_rate": 1.9721570762346033e-05, "loss": 0.4316, "step": 10274 }, { "epoch": 11.703133903133903, "grad_norm": 0.19193251430988312, "learning_rate": 1.971701670742864e-05, "loss": 0.5735, "step": 10275 }, { "epoch": 11.704273504273504, "grad_norm": 0.21202270686626434, "learning_rate": 1.971246283600378e-05, "loss": 0.6318, "step": 10276 }, { "epoch": 11.705413105413106, "grad_norm": 0.21323411166667938, "learning_rate": 1.9707909148229637e-05, "loss": 0.7567, "step": 10277 }, { "epoch": 11.706552706552706, "grad_norm": 0.22046038508415222, "learning_rate": 1.9703355644264358e-05, "loss": 0.767, "step": 10278 }, { "epoch": 11.707692307692307, "grad_norm": 0.23657524585723877, "learning_rate": 1.9698802324266107e-05, "loss": 0.8051, "step": 10279 }, { "epoch": 11.70883190883191, "grad_norm": 0.18003609776496887, "learning_rate": 1.9694249188393025e-05, "loss": 0.8554, "step": 10280 }, { "epoch": 11.70997150997151, "grad_norm": 0.1774459034204483, "learning_rate": 1.9689696236803264e-05, "loss": 0.9601, "step": 10281 }, { "epoch": 11.71111111111111, "grad_norm": 0.1597072333097458, "learning_rate": 1.9685143469654954e-05, "loss": 0.7435, "step": 10282 }, { "epoch": 11.712250712250713, "grad_norm": 0.2522660791873932, "learning_rate": 1.968059088710623e-05, "loss": 0.4176, "step": 10283 }, { "epoch": 11.713390313390313, "grad_norm": 0.20774319767951965, "learning_rate": 1.9676038489315206e-05, "loss": 0.8479, "step": 10284 }, { "epoch": 11.714529914529914, "grad_norm": 0.2235976606607437, "learning_rate": 1.9671486276440015e-05, "loss": 0.8855, "step": 10285 }, { "epoch": 11.715669515669516, "grad_norm": 0.2282378375530243, "learning_rate": 1.9666934248638752e-05, "loss": 0.6014, "step": 10286 }, { "epoch": 11.716809116809117, "grad_norm": 0.2331165373325348, "learning_rate": 1.9662382406069524e-05, "loss": 0.6405, "step": 10287 }, { "epoch": 11.717948717948717, "grad_norm": 0.2122265249490738, "learning_rate": 1.9657830748890437e-05, "loss": 0.785, "step": 10288 }, { "epoch": 11.71908831908832, "grad_norm": 0.18670682609081268, "learning_rate": 1.9653279277259586e-05, "loss": 0.7666, "step": 10289 }, { "epoch": 11.72022792022792, "grad_norm": 0.19317886233329773, "learning_rate": 1.9648727991335043e-05, "loss": 0.7951, "step": 10290 }, { "epoch": 11.72136752136752, "grad_norm": 0.19641445577144623, "learning_rate": 1.96441768912749e-05, "loss": 0.6064, "step": 10291 }, { "epoch": 11.722507122507123, "grad_norm": 0.19501538574695587, "learning_rate": 1.9639625977237215e-05, "loss": 0.5847, "step": 10292 }, { "epoch": 11.723646723646723, "grad_norm": 0.23385095596313477, "learning_rate": 1.9635075249380065e-05, "loss": 0.6066, "step": 10293 }, { "epoch": 11.724786324786324, "grad_norm": 0.20832736790180206, "learning_rate": 1.9630524707861504e-05, "loss": 0.6865, "step": 10294 }, { "epoch": 11.725925925925926, "grad_norm": 0.1870364248752594, "learning_rate": 1.9625974352839587e-05, "loss": 0.7703, "step": 10295 }, { "epoch": 11.727065527065527, "grad_norm": 0.24797910451889038, "learning_rate": 1.9621424184472355e-05, "loss": 0.6411, "step": 10296 }, { "epoch": 11.728205128205127, "grad_norm": 0.21023236215114594, "learning_rate": 1.9616874202917863e-05, "loss": 0.6611, "step": 10297 }, { "epoch": 11.72934472934473, "grad_norm": 0.23132221400737762, "learning_rate": 1.9612324408334114e-05, "loss": 0.7455, "step": 10298 }, { "epoch": 11.73048433048433, "grad_norm": 0.16348059475421906, "learning_rate": 1.9607774800879167e-05, "loss": 0.6869, "step": 10299 }, { "epoch": 11.73162393162393, "grad_norm": 0.2313406616449356, "learning_rate": 1.960322538071103e-05, "loss": 0.4654, "step": 10300 }, { "epoch": 11.732763532763533, "grad_norm": 0.18180276453495026, "learning_rate": 1.9598676147987715e-05, "loss": 0.707, "step": 10301 }, { "epoch": 11.733903133903134, "grad_norm": 0.2248227596282959, "learning_rate": 1.9594127102867235e-05, "loss": 0.7222, "step": 10302 }, { "epoch": 11.735042735042736, "grad_norm": 0.21386340260505676, "learning_rate": 1.9589578245507583e-05, "loss": 0.7127, "step": 10303 }, { "epoch": 11.736182336182337, "grad_norm": 0.22880034148693085, "learning_rate": 1.958502957606676e-05, "loss": 0.6575, "step": 10304 }, { "epoch": 11.737321937321937, "grad_norm": 0.2314443439245224, "learning_rate": 1.958048109470275e-05, "loss": 0.6544, "step": 10305 }, { "epoch": 11.73846153846154, "grad_norm": 0.22057729959487915, "learning_rate": 1.9575932801573537e-05, "loss": 0.5983, "step": 10306 }, { "epoch": 11.73960113960114, "grad_norm": 0.24679039418697357, "learning_rate": 1.957138469683709e-05, "loss": 0.4857, "step": 10307 }, { "epoch": 11.74074074074074, "grad_norm": 0.21340249478816986, "learning_rate": 1.9566836780651384e-05, "loss": 0.7037, "step": 10308 }, { "epoch": 11.741880341880343, "grad_norm": 0.2492295503616333, "learning_rate": 1.956228905317437e-05, "loss": 0.7971, "step": 10309 }, { "epoch": 11.743019943019943, "grad_norm": 0.21592360734939575, "learning_rate": 1.9557741514564014e-05, "loss": 0.8354, "step": 10310 }, { "epoch": 11.744159544159544, "grad_norm": 0.2611568570137024, "learning_rate": 1.955319416497827e-05, "loss": 0.6689, "step": 10311 }, { "epoch": 11.745299145299146, "grad_norm": 0.22354160249233246, "learning_rate": 1.9548647004575065e-05, "loss": 0.6798, "step": 10312 }, { "epoch": 11.746438746438747, "grad_norm": 0.20190760493278503, "learning_rate": 1.9544100033512345e-05, "loss": 0.7719, "step": 10313 }, { "epoch": 11.747578347578347, "grad_norm": 0.19497324526309967, "learning_rate": 1.953955325194803e-05, "loss": 0.9254, "step": 10314 }, { "epoch": 11.74871794871795, "grad_norm": 0.2249239683151245, "learning_rate": 1.953500666004005e-05, "loss": 0.7229, "step": 10315 }, { "epoch": 11.74985754985755, "grad_norm": 0.19481399655342102, "learning_rate": 1.9530460257946316e-05, "loss": 0.75, "step": 10316 }, { "epoch": 11.75099715099715, "grad_norm": 0.24393950402736664, "learning_rate": 1.952591404582474e-05, "loss": 0.7221, "step": 10317 }, { "epoch": 11.752136752136753, "grad_norm": 0.26911473274230957, "learning_rate": 1.9521368023833222e-05, "loss": 0.6141, "step": 10318 }, { "epoch": 11.753276353276354, "grad_norm": 0.21604038774967194, "learning_rate": 1.9516822192129664e-05, "loss": 0.8449, "step": 10319 }, { "epoch": 11.754415954415954, "grad_norm": 0.18873000144958496, "learning_rate": 1.951227655087194e-05, "loss": 0.9222, "step": 10320 }, { "epoch": 11.755555555555556, "grad_norm": 0.24527999758720398, "learning_rate": 1.950773110021794e-05, "loss": 0.5285, "step": 10321 }, { "epoch": 11.756695156695157, "grad_norm": 0.20278817415237427, "learning_rate": 1.9503185840325556e-05, "loss": 0.8539, "step": 10322 }, { "epoch": 11.757834757834758, "grad_norm": 0.20328575372695923, "learning_rate": 1.9498640771352633e-05, "loss": 0.765, "step": 10323 }, { "epoch": 11.75897435897436, "grad_norm": 0.18511201441287994, "learning_rate": 1.9494095893457054e-05, "loss": 0.5335, "step": 10324 }, { "epoch": 11.76011396011396, "grad_norm": 0.2065373957157135, "learning_rate": 1.948955120679666e-05, "loss": 0.6451, "step": 10325 }, { "epoch": 11.761253561253561, "grad_norm": 0.20976349711418152, "learning_rate": 1.9485006711529306e-05, "loss": 0.71, "step": 10326 }, { "epoch": 11.762393162393163, "grad_norm": 0.22290416061878204, "learning_rate": 1.9480462407812842e-05, "loss": 0.6158, "step": 10327 }, { "epoch": 11.763532763532764, "grad_norm": 0.2827098071575165, "learning_rate": 1.947591829580509e-05, "loss": 0.4498, "step": 10328 }, { "epoch": 11.764672364672364, "grad_norm": 0.17713472247123718, "learning_rate": 1.9471374375663893e-05, "loss": 0.6951, "step": 10329 }, { "epoch": 11.765811965811967, "grad_norm": 0.20703105628490448, "learning_rate": 1.9466830647547063e-05, "loss": 0.533, "step": 10330 }, { "epoch": 11.766951566951567, "grad_norm": 0.21153715252876282, "learning_rate": 1.9462287111612423e-05, "loss": 0.8177, "step": 10331 }, { "epoch": 11.768091168091168, "grad_norm": 0.21457445621490479, "learning_rate": 1.9457743768017773e-05, "loss": 0.6023, "step": 10332 }, { "epoch": 11.76923076923077, "grad_norm": 0.20348374545574188, "learning_rate": 1.9453200616920928e-05, "loss": 0.6315, "step": 10333 }, { "epoch": 11.77037037037037, "grad_norm": 0.24021980166435242, "learning_rate": 1.9448657658479684e-05, "loss": 0.6871, "step": 10334 }, { "epoch": 11.771509971509971, "grad_norm": 0.20378951728343964, "learning_rate": 1.9444114892851823e-05, "loss": 0.5951, "step": 10335 }, { "epoch": 11.772649572649573, "grad_norm": 0.21462382376194, "learning_rate": 1.9439572320195133e-05, "loss": 0.7688, "step": 10336 }, { "epoch": 11.773789173789174, "grad_norm": 0.25255388021469116, "learning_rate": 1.9435029940667383e-05, "loss": 0.554, "step": 10337 }, { "epoch": 11.774928774928775, "grad_norm": 0.23222294449806213, "learning_rate": 1.943048775442635e-05, "loss": 0.7733, "step": 10338 }, { "epoch": 11.776068376068377, "grad_norm": 0.22443172335624695, "learning_rate": 1.942594576162979e-05, "loss": 0.6834, "step": 10339 }, { "epoch": 11.777207977207977, "grad_norm": 0.23955731093883514, "learning_rate": 1.942140396243547e-05, "loss": 0.67, "step": 10340 }, { "epoch": 11.778347578347578, "grad_norm": 0.1832430064678192, "learning_rate": 1.9416862357001124e-05, "loss": 0.6956, "step": 10341 }, { "epoch": 11.77948717948718, "grad_norm": 0.24881421029567719, "learning_rate": 1.9412320945484507e-05, "loss": 0.6896, "step": 10342 }, { "epoch": 11.78062678062678, "grad_norm": 0.22075805068016052, "learning_rate": 1.940777972804334e-05, "loss": 0.8574, "step": 10343 }, { "epoch": 11.781766381766381, "grad_norm": 0.16448967158794403, "learning_rate": 1.9403238704835365e-05, "loss": 0.8845, "step": 10344 }, { "epoch": 11.782905982905984, "grad_norm": 0.24849064648151398, "learning_rate": 1.939869787601831e-05, "loss": 0.7614, "step": 10345 }, { "epoch": 11.784045584045584, "grad_norm": 0.1989343762397766, "learning_rate": 1.9394157241749875e-05, "loss": 0.6648, "step": 10346 }, { "epoch": 11.785185185185185, "grad_norm": 0.18648910522460938, "learning_rate": 1.938961680218778e-05, "loss": 0.7701, "step": 10347 }, { "epoch": 11.786324786324787, "grad_norm": 0.28118088841438293, "learning_rate": 1.9385076557489716e-05, "loss": 0.6901, "step": 10348 }, { "epoch": 11.787464387464388, "grad_norm": 0.27307435870170593, "learning_rate": 1.938053650781339e-05, "loss": 0.5536, "step": 10349 }, { "epoch": 11.788603988603988, "grad_norm": 0.24502988159656525, "learning_rate": 1.9375996653316476e-05, "loss": 0.7454, "step": 10350 }, { "epoch": 11.78974358974359, "grad_norm": 0.19854822754859924, "learning_rate": 1.9371456994156677e-05, "loss": 0.6989, "step": 10351 }, { "epoch": 11.790883190883191, "grad_norm": 0.21826986968517303, "learning_rate": 1.9366917530491645e-05, "loss": 0.7409, "step": 10352 }, { "epoch": 11.792022792022792, "grad_norm": 0.19717830419540405, "learning_rate": 1.9362378262479064e-05, "loss": 0.7393, "step": 10353 }, { "epoch": 11.793162393162394, "grad_norm": 0.2504718601703644, "learning_rate": 1.9357839190276576e-05, "loss": 0.6111, "step": 10354 }, { "epoch": 11.794301994301994, "grad_norm": 0.21012093126773834, "learning_rate": 1.9353300314041863e-05, "loss": 0.751, "step": 10355 }, { "epoch": 11.795441595441595, "grad_norm": 0.23705442249774933, "learning_rate": 1.9348761633932556e-05, "loss": 0.653, "step": 10356 }, { "epoch": 11.796581196581197, "grad_norm": 0.1830354630947113, "learning_rate": 1.9344223150106293e-05, "loss": 0.6232, "step": 10357 }, { "epoch": 11.797720797720798, "grad_norm": 0.307734876871109, "learning_rate": 1.9339684862720723e-05, "loss": 0.5568, "step": 10358 }, { "epoch": 11.798860398860398, "grad_norm": 0.2636401951313019, "learning_rate": 1.9335146771933454e-05, "loss": 0.6103, "step": 10359 }, { "epoch": 11.8, "grad_norm": 0.2028345763683319, "learning_rate": 1.9330608877902127e-05, "loss": 0.5552, "step": 10360 }, { "epoch": 11.801139601139601, "grad_norm": 0.21922144293785095, "learning_rate": 1.9326071180784334e-05, "loss": 0.6134, "step": 10361 }, { "epoch": 11.802279202279202, "grad_norm": 0.17650465667247772, "learning_rate": 1.93215336807377e-05, "loss": 0.7208, "step": 10362 }, { "epoch": 11.803418803418804, "grad_norm": 0.23563839495182037, "learning_rate": 1.931699637791981e-05, "loss": 0.6306, "step": 10363 }, { "epoch": 11.804558404558405, "grad_norm": 0.1969102919101715, "learning_rate": 1.9312459272488273e-05, "loss": 0.5486, "step": 10364 }, { "epoch": 11.805698005698005, "grad_norm": 0.16801908612251282, "learning_rate": 1.930792236460065e-05, "loss": 0.8046, "step": 10365 }, { "epoch": 11.806837606837608, "grad_norm": 0.220314621925354, "learning_rate": 1.9303385654414546e-05, "loss": 0.824, "step": 10366 }, { "epoch": 11.807977207977208, "grad_norm": 0.21237602829933167, "learning_rate": 1.9298849142087523e-05, "loss": 0.668, "step": 10367 }, { "epoch": 11.809116809116809, "grad_norm": 0.2324405461549759, "learning_rate": 1.9294312827777156e-05, "loss": 0.6865, "step": 10368 }, { "epoch": 11.810256410256411, "grad_norm": 0.1946413516998291, "learning_rate": 1.9289776711640986e-05, "loss": 0.6935, "step": 10369 }, { "epoch": 11.811396011396011, "grad_norm": 0.2359287589788437, "learning_rate": 1.928524079383658e-05, "loss": 0.57, "step": 10370 }, { "epoch": 11.812535612535612, "grad_norm": 0.19522544741630554, "learning_rate": 1.928070507452147e-05, "loss": 0.709, "step": 10371 }, { "epoch": 11.813675213675214, "grad_norm": 0.20918940007686615, "learning_rate": 1.927616955385321e-05, "loss": 0.5174, "step": 10372 }, { "epoch": 11.814814814814815, "grad_norm": 0.2223028987646103, "learning_rate": 1.9271634231989317e-05, "loss": 0.8102, "step": 10373 }, { "epoch": 11.815954415954415, "grad_norm": 0.2308153510093689, "learning_rate": 1.926709910908732e-05, "loss": 0.7818, "step": 10374 }, { "epoch": 11.817094017094018, "grad_norm": 0.2559540867805481, "learning_rate": 1.9262564185304736e-05, "loss": 0.6785, "step": 10375 }, { "epoch": 11.818233618233618, "grad_norm": 0.24020728468894958, "learning_rate": 1.925802946079907e-05, "loss": 0.7562, "step": 10376 }, { "epoch": 11.819373219373219, "grad_norm": 0.1708139330148697, "learning_rate": 1.925349493572784e-05, "loss": 0.8199, "step": 10377 }, { "epoch": 11.820512820512821, "grad_norm": 0.198536679148674, "learning_rate": 1.9248960610248528e-05, "loss": 0.6193, "step": 10378 }, { "epoch": 11.821652421652422, "grad_norm": 0.23113545775413513, "learning_rate": 1.9244426484518634e-05, "loss": 0.7066, "step": 10379 }, { "epoch": 11.822792022792022, "grad_norm": 0.16276973485946655, "learning_rate": 1.9239892558695634e-05, "loss": 0.8879, "step": 10380 }, { "epoch": 11.823931623931625, "grad_norm": 0.21287094056606293, "learning_rate": 1.9235358832937012e-05, "loss": 0.8354, "step": 10381 }, { "epoch": 11.825071225071225, "grad_norm": 0.2622362971305847, "learning_rate": 1.923082530740022e-05, "loss": 0.6175, "step": 10382 }, { "epoch": 11.826210826210826, "grad_norm": 0.2562221884727478, "learning_rate": 1.922629198224274e-05, "loss": 0.8062, "step": 10383 }, { "epoch": 11.827350427350428, "grad_norm": 0.24836982786655426, "learning_rate": 1.9221758857622012e-05, "loss": 0.5357, "step": 10384 }, { "epoch": 11.828490028490029, "grad_norm": 0.1780504584312439, "learning_rate": 1.921722593369549e-05, "loss": 0.6529, "step": 10385 }, { "epoch": 11.829629629629629, "grad_norm": 0.23514041304588318, "learning_rate": 1.9212693210620615e-05, "loss": 0.7813, "step": 10386 }, { "epoch": 11.830769230769231, "grad_norm": 0.2199113517999649, "learning_rate": 1.920816068855481e-05, "loss": 0.629, "step": 10387 }, { "epoch": 11.831908831908832, "grad_norm": 0.25433075428009033, "learning_rate": 1.9203628367655525e-05, "loss": 0.6047, "step": 10388 }, { "epoch": 11.833048433048432, "grad_norm": 0.20595094561576843, "learning_rate": 1.919909624808016e-05, "loss": 0.7213, "step": 10389 }, { "epoch": 11.834188034188035, "grad_norm": 0.24765519797801971, "learning_rate": 1.9194564329986143e-05, "loss": 0.6399, "step": 10390 }, { "epoch": 11.835327635327635, "grad_norm": 0.2323462814092636, "learning_rate": 1.919003261353086e-05, "loss": 0.6372, "step": 10391 }, { "epoch": 11.836467236467236, "grad_norm": 0.19704553484916687, "learning_rate": 1.9185501098871734e-05, "loss": 0.7029, "step": 10392 }, { "epoch": 11.837606837606838, "grad_norm": 0.2212490439414978, "learning_rate": 1.9180969786166135e-05, "loss": 0.5455, "step": 10393 }, { "epoch": 11.838746438746439, "grad_norm": 0.22143539786338806, "learning_rate": 1.9176438675571468e-05, "loss": 0.6438, "step": 10394 }, { "epoch": 11.83988603988604, "grad_norm": 0.18880659341812134, "learning_rate": 1.9171907767245094e-05, "loss": 0.6493, "step": 10395 }, { "epoch": 11.841025641025642, "grad_norm": 0.23644597828388214, "learning_rate": 1.9167377061344392e-05, "loss": 0.6113, "step": 10396 }, { "epoch": 11.842165242165242, "grad_norm": 0.18689411878585815, "learning_rate": 1.916284655802672e-05, "loss": 0.9227, "step": 10397 }, { "epoch": 11.843304843304843, "grad_norm": 0.2455085664987564, "learning_rate": 1.9158316257449437e-05, "loss": 0.6263, "step": 10398 }, { "epoch": 11.844444444444445, "grad_norm": 0.23546573519706726, "learning_rate": 1.9153786159769904e-05, "loss": 0.7906, "step": 10399 }, { "epoch": 11.845584045584046, "grad_norm": 0.27554669976234436, "learning_rate": 1.914925626514545e-05, "loss": 0.4858, "step": 10400 }, { "epoch": 11.846723646723646, "grad_norm": 0.26409924030303955, "learning_rate": 1.9144726573733417e-05, "loss": 0.5765, "step": 10401 }, { "epoch": 11.847863247863248, "grad_norm": 0.24652262032032013, "learning_rate": 1.914019708569113e-05, "loss": 0.4755, "step": 10402 }, { "epoch": 11.849002849002849, "grad_norm": 0.2093600332736969, "learning_rate": 1.9135667801175922e-05, "loss": 0.6372, "step": 10403 }, { "epoch": 11.85014245014245, "grad_norm": 0.24542714655399323, "learning_rate": 1.9131138720345087e-05, "loss": 0.6961, "step": 10404 }, { "epoch": 11.851282051282052, "grad_norm": 0.19561639428138733, "learning_rate": 1.9126609843355945e-05, "loss": 0.7853, "step": 10405 }, { "epoch": 11.852421652421652, "grad_norm": 0.19290103018283844, "learning_rate": 1.9122081170365803e-05, "loss": 0.7083, "step": 10406 }, { "epoch": 11.853561253561253, "grad_norm": 0.23284444212913513, "learning_rate": 1.911755270153194e-05, "loss": 0.5735, "step": 10407 }, { "epoch": 11.854700854700855, "grad_norm": 0.19906462728977203, "learning_rate": 1.911302443701165e-05, "loss": 0.7989, "step": 10408 }, { "epoch": 11.855840455840456, "grad_norm": 0.1934279501438141, "learning_rate": 1.9108496376962205e-05, "loss": 1.0198, "step": 10409 }, { "epoch": 11.856980056980056, "grad_norm": 0.22066517174243927, "learning_rate": 1.9103968521540884e-05, "loss": 0.5452, "step": 10410 }, { "epoch": 11.858119658119659, "grad_norm": 0.2257375568151474, "learning_rate": 1.909944087090496e-05, "loss": 0.6269, "step": 10411 }, { "epoch": 11.85925925925926, "grad_norm": 0.22910627722740173, "learning_rate": 1.909491342521167e-05, "loss": 0.5943, "step": 10412 }, { "epoch": 11.86039886039886, "grad_norm": 0.1854284405708313, "learning_rate": 1.9090386184618286e-05, "loss": 0.7018, "step": 10413 }, { "epoch": 11.861538461538462, "grad_norm": 0.22698117792606354, "learning_rate": 1.9085859149282033e-05, "loss": 0.6579, "step": 10414 }, { "epoch": 11.862678062678063, "grad_norm": 0.20717768371105194, "learning_rate": 1.9081332319360163e-05, "loss": 0.6729, "step": 10415 }, { "epoch": 11.863817663817663, "grad_norm": 0.21381470561027527, "learning_rate": 1.907680569500989e-05, "loss": 0.7774, "step": 10416 }, { "epoch": 11.864957264957265, "grad_norm": 0.18799078464508057, "learning_rate": 1.9072279276388453e-05, "loss": 0.7892, "step": 10417 }, { "epoch": 11.866096866096866, "grad_norm": 0.22165992856025696, "learning_rate": 1.9067753063653047e-05, "loss": 0.7188, "step": 10418 }, { "epoch": 11.867236467236467, "grad_norm": 0.23121723532676697, "learning_rate": 1.9063227056960904e-05, "loss": 0.5877, "step": 10419 }, { "epoch": 11.868376068376069, "grad_norm": 0.22566385567188263, "learning_rate": 1.9058701256469197e-05, "loss": 0.6566, "step": 10420 }, { "epoch": 11.86951566951567, "grad_norm": 0.18033069372177124, "learning_rate": 1.905417566233514e-05, "loss": 0.8203, "step": 10421 }, { "epoch": 11.87065527065527, "grad_norm": 0.22708441317081451, "learning_rate": 1.9049650274715917e-05, "loss": 0.732, "step": 10422 }, { "epoch": 11.871794871794872, "grad_norm": 0.22872836887836456, "learning_rate": 1.90451250937687e-05, "loss": 0.6609, "step": 10423 }, { "epoch": 11.872934472934473, "grad_norm": 0.212729349732399, "learning_rate": 1.9040600119650667e-05, "loss": 0.6221, "step": 10424 }, { "epoch": 11.874074074074073, "grad_norm": 0.2345046103000641, "learning_rate": 1.903607535251898e-05, "loss": 0.6607, "step": 10425 }, { "epoch": 11.875213675213676, "grad_norm": 0.2296135574579239, "learning_rate": 1.9031550792530798e-05, "loss": 0.6457, "step": 10426 }, { "epoch": 11.876353276353276, "grad_norm": 0.19891051948070526, "learning_rate": 1.9027026439843264e-05, "loss": 0.665, "step": 10427 }, { "epoch": 11.877492877492877, "grad_norm": 0.22334159910678864, "learning_rate": 1.9022502294613538e-05, "loss": 0.642, "step": 10428 }, { "epoch": 11.878632478632479, "grad_norm": 0.21290633082389832, "learning_rate": 1.9017978356998732e-05, "loss": 0.581, "step": 10429 }, { "epoch": 11.87977207977208, "grad_norm": 0.20970702171325684, "learning_rate": 1.9013454627156002e-05, "loss": 0.6994, "step": 10430 }, { "epoch": 11.88091168091168, "grad_norm": 0.20367108285427094, "learning_rate": 1.900893110524244e-05, "loss": 0.6247, "step": 10431 }, { "epoch": 11.882051282051282, "grad_norm": 0.17370279133319855, "learning_rate": 1.9004407791415185e-05, "loss": 0.8054, "step": 10432 }, { "epoch": 11.883190883190883, "grad_norm": 0.17330661416053772, "learning_rate": 1.8999884685831336e-05, "loss": 0.892, "step": 10433 }, { "epoch": 11.884330484330484, "grad_norm": 0.2370873987674713, "learning_rate": 1.899536178864799e-05, "loss": 0.6162, "step": 10434 }, { "epoch": 11.885470085470086, "grad_norm": 0.2574000358581543, "learning_rate": 1.899083910002225e-05, "loss": 0.6886, "step": 10435 }, { "epoch": 11.886609686609686, "grad_norm": 0.23008638620376587, "learning_rate": 1.8986316620111183e-05, "loss": 0.6253, "step": 10436 }, { "epoch": 11.887749287749287, "grad_norm": 0.23110231757164001, "learning_rate": 1.8981794349071885e-05, "loss": 0.7098, "step": 10437 }, { "epoch": 11.88888888888889, "grad_norm": 0.17536407709121704, "learning_rate": 1.8977272287061416e-05, "loss": 0.7131, "step": 10438 }, { "epoch": 11.89002849002849, "grad_norm": 0.1807669997215271, "learning_rate": 1.8972750434236847e-05, "loss": 0.6549, "step": 10439 }, { "epoch": 11.89116809116809, "grad_norm": 0.1426200121641159, "learning_rate": 1.896822879075522e-05, "loss": 0.7707, "step": 10440 }, { "epoch": 11.892307692307693, "grad_norm": 0.21230009198188782, "learning_rate": 1.8963707356773606e-05, "loss": 0.802, "step": 10441 }, { "epoch": 11.893447293447293, "grad_norm": 0.19961129128932953, "learning_rate": 1.895918613244902e-05, "loss": 0.7292, "step": 10442 }, { "epoch": 11.894586894586894, "grad_norm": 0.20966428518295288, "learning_rate": 1.8954665117938524e-05, "loss": 0.8312, "step": 10443 }, { "epoch": 11.895726495726496, "grad_norm": 0.2418104112148285, "learning_rate": 1.8950144313399128e-05, "loss": 0.645, "step": 10444 }, { "epoch": 11.896866096866097, "grad_norm": 0.21057556569576263, "learning_rate": 1.8945623718987864e-05, "loss": 0.6216, "step": 10445 }, { "epoch": 11.898005698005697, "grad_norm": 0.25361284613609314, "learning_rate": 1.8941103334861735e-05, "loss": 0.5863, "step": 10446 }, { "epoch": 11.8991452991453, "grad_norm": 0.23236331343650818, "learning_rate": 1.893658316117775e-05, "loss": 0.5719, "step": 10447 }, { "epoch": 11.9002849002849, "grad_norm": 0.23758509755134583, "learning_rate": 1.8932063198092902e-05, "loss": 0.709, "step": 10448 }, { "epoch": 11.9014245014245, "grad_norm": 0.21403639018535614, "learning_rate": 1.8927543445764196e-05, "loss": 0.715, "step": 10449 }, { "epoch": 11.902564102564103, "grad_norm": 0.23863375186920166, "learning_rate": 1.8923023904348595e-05, "loss": 0.5046, "step": 10450 }, { "epoch": 11.903703703703703, "grad_norm": 0.24822209775447845, "learning_rate": 1.8918504574003094e-05, "loss": 0.5895, "step": 10451 }, { "epoch": 11.904843304843304, "grad_norm": 0.235719233751297, "learning_rate": 1.8913985454884646e-05, "loss": 0.6689, "step": 10452 }, { "epoch": 11.905982905982906, "grad_norm": 0.20428641140460968, "learning_rate": 1.8909466547150212e-05, "loss": 0.7232, "step": 10453 }, { "epoch": 11.907122507122507, "grad_norm": 0.19499380886554718, "learning_rate": 1.8904947850956768e-05, "loss": 0.6505, "step": 10454 }, { "epoch": 11.908262108262107, "grad_norm": 0.1709848791360855, "learning_rate": 1.890042936646124e-05, "loss": 0.6629, "step": 10455 }, { "epoch": 11.90940170940171, "grad_norm": 0.23522630333900452, "learning_rate": 1.889591109382058e-05, "loss": 0.6986, "step": 10456 }, { "epoch": 11.91054131054131, "grad_norm": 0.245496466755867, "learning_rate": 1.889139303319171e-05, "loss": 0.6002, "step": 10457 }, { "epoch": 11.91168091168091, "grad_norm": 0.24911637604236603, "learning_rate": 1.8886875184731562e-05, "loss": 0.6298, "step": 10458 }, { "epoch": 11.912820512820513, "grad_norm": 0.2072620391845703, "learning_rate": 1.8882357548597046e-05, "loss": 0.7191, "step": 10459 }, { "epoch": 11.913960113960114, "grad_norm": 0.171847864985466, "learning_rate": 1.8877840124945078e-05, "loss": 0.9428, "step": 10460 }, { "epoch": 11.915099715099714, "grad_norm": 0.20700885355472565, "learning_rate": 1.8873322913932556e-05, "loss": 0.4979, "step": 10461 }, { "epoch": 11.916239316239317, "grad_norm": 0.1964074671268463, "learning_rate": 1.8868805915716384e-05, "loss": 0.8944, "step": 10462 }, { "epoch": 11.917378917378917, "grad_norm": 0.17745475471019745, "learning_rate": 1.8864289130453436e-05, "loss": 0.9082, "step": 10463 }, { "epoch": 11.918518518518518, "grad_norm": 0.18889325857162476, "learning_rate": 1.8859772558300597e-05, "loss": 0.6178, "step": 10464 }, { "epoch": 11.91965811965812, "grad_norm": 0.2223418802022934, "learning_rate": 1.885525619941475e-05, "loss": 0.5792, "step": 10465 }, { "epoch": 11.92079772079772, "grad_norm": 0.2345338761806488, "learning_rate": 1.8850740053952748e-05, "loss": 0.6999, "step": 10466 }, { "epoch": 11.921937321937321, "grad_norm": 0.1917179822921753, "learning_rate": 1.8846224122071463e-05, "loss": 0.7388, "step": 10467 }, { "epoch": 11.923076923076923, "grad_norm": 0.21479369699954987, "learning_rate": 1.8841708403927727e-05, "loss": 0.65, "step": 10468 }, { "epoch": 11.924216524216524, "grad_norm": 0.2258853018283844, "learning_rate": 1.88371928996784e-05, "loss": 0.7087, "step": 10469 }, { "epoch": 11.925356125356124, "grad_norm": 0.27944377064704895, "learning_rate": 1.883267760948031e-05, "loss": 0.4203, "step": 10470 }, { "epoch": 11.926495726495727, "grad_norm": 0.2511250674724579, "learning_rate": 1.882816253349029e-05, "loss": 0.6207, "step": 10471 }, { "epoch": 11.927635327635327, "grad_norm": 0.2124982476234436, "learning_rate": 1.8823647671865154e-05, "loss": 0.6641, "step": 10472 }, { "epoch": 11.928774928774928, "grad_norm": 0.21275320649147034, "learning_rate": 1.8819133024761722e-05, "loss": 0.9207, "step": 10473 }, { "epoch": 11.92991452991453, "grad_norm": 0.21663400530815125, "learning_rate": 1.8814618592336795e-05, "loss": 0.6812, "step": 10474 }, { "epoch": 11.93105413105413, "grad_norm": 0.19376392662525177, "learning_rate": 1.8810104374747168e-05, "loss": 0.6601, "step": 10475 }, { "epoch": 11.932193732193731, "grad_norm": 0.2539920210838318, "learning_rate": 1.880559037214965e-05, "loss": 0.5748, "step": 10476 }, { "epoch": 11.933333333333334, "grad_norm": 0.1883857548236847, "learning_rate": 1.8801076584701002e-05, "loss": 0.9136, "step": 10477 }, { "epoch": 11.934472934472934, "grad_norm": 0.22394411265850067, "learning_rate": 1.879656301255802e-05, "loss": 0.7334, "step": 10478 }, { "epoch": 11.935612535612536, "grad_norm": 0.17984677851200104, "learning_rate": 1.8792049655877462e-05, "loss": 0.7223, "step": 10479 }, { "epoch": 11.936752136752137, "grad_norm": 0.20834968984127045, "learning_rate": 1.8787536514816096e-05, "loss": 0.6667, "step": 10480 }, { "epoch": 11.937891737891738, "grad_norm": 0.23001785576343536, "learning_rate": 1.878302358953066e-05, "loss": 0.4773, "step": 10481 }, { "epoch": 11.93903133903134, "grad_norm": 0.23865705728530884, "learning_rate": 1.8778510880177923e-05, "loss": 0.7573, "step": 10482 }, { "epoch": 11.94017094017094, "grad_norm": 0.24285709857940674, "learning_rate": 1.8773998386914602e-05, "loss": 0.6665, "step": 10483 }, { "epoch": 11.941310541310541, "grad_norm": 0.2388620227575302, "learning_rate": 1.8769486109897445e-05, "loss": 0.5565, "step": 10484 }, { "epoch": 11.942450142450143, "grad_norm": 0.27939802408218384, "learning_rate": 1.8764974049283168e-05, "loss": 0.3106, "step": 10485 }, { "epoch": 11.943589743589744, "grad_norm": 0.24518218636512756, "learning_rate": 1.8760462205228484e-05, "loss": 0.5993, "step": 10486 }, { "epoch": 11.944729344729344, "grad_norm": 0.2060171514749527, "learning_rate": 1.8755950577890107e-05, "loss": 0.7309, "step": 10487 }, { "epoch": 11.945868945868947, "grad_norm": 0.17457851767539978, "learning_rate": 1.8751439167424744e-05, "loss": 0.7887, "step": 10488 }, { "epoch": 11.947008547008547, "grad_norm": 0.277523010969162, "learning_rate": 1.874692797398908e-05, "loss": 0.6043, "step": 10489 }, { "epoch": 11.948148148148148, "grad_norm": 0.20490017533302307, "learning_rate": 1.8742416997739806e-05, "loss": 0.7346, "step": 10490 }, { "epoch": 11.94928774928775, "grad_norm": 0.2505870759487152, "learning_rate": 1.8737906238833593e-05, "loss": 0.4213, "step": 10491 }, { "epoch": 11.95042735042735, "grad_norm": 0.23635120689868927, "learning_rate": 1.8733395697427124e-05, "loss": 0.5514, "step": 10492 }, { "epoch": 11.951566951566951, "grad_norm": 0.22870029509067535, "learning_rate": 1.8728885373677047e-05, "loss": 0.6564, "step": 10493 }, { "epoch": 11.952706552706553, "grad_norm": 0.20036360621452332, "learning_rate": 1.8724375267740035e-05, "loss": 0.7173, "step": 10494 }, { "epoch": 11.953846153846154, "grad_norm": 0.21915395557880402, "learning_rate": 1.8719865379772722e-05, "loss": 0.5279, "step": 10495 }, { "epoch": 11.954985754985755, "grad_norm": 0.21548305451869965, "learning_rate": 1.8715355709931758e-05, "loss": 0.6115, "step": 10496 }, { "epoch": 11.956125356125357, "grad_norm": 0.21113137900829315, "learning_rate": 1.8710846258373766e-05, "loss": 0.6758, "step": 10497 }, { "epoch": 11.957264957264957, "grad_norm": 0.20005397498607635, "learning_rate": 1.8706337025255384e-05, "loss": 0.8024, "step": 10498 }, { "epoch": 11.958404558404558, "grad_norm": 0.2555124759674072, "learning_rate": 1.8701828010733223e-05, "loss": 0.6329, "step": 10499 }, { "epoch": 11.95954415954416, "grad_norm": 0.2077946662902832, "learning_rate": 1.8697319214963897e-05, "loss": 0.5798, "step": 10500 }, { "epoch": 11.96068376068376, "grad_norm": 0.16201630234718323, "learning_rate": 1.8692810638104013e-05, "loss": 0.9224, "step": 10501 }, { "epoch": 11.961823361823361, "grad_norm": 0.19255182147026062, "learning_rate": 1.8688302280310152e-05, "loss": 0.7875, "step": 10502 }, { "epoch": 11.962962962962964, "grad_norm": 0.1905059814453125, "learning_rate": 1.8683794141738915e-05, "loss": 0.8075, "step": 10503 }, { "epoch": 11.964102564102564, "grad_norm": 0.220254048705101, "learning_rate": 1.8679286222546873e-05, "loss": 0.7947, "step": 10504 }, { "epoch": 11.965242165242165, "grad_norm": 0.20022335648536682, "learning_rate": 1.8674778522890605e-05, "loss": 0.7204, "step": 10505 }, { "epoch": 11.966381766381767, "grad_norm": 0.2103656828403473, "learning_rate": 1.8670271042926675e-05, "loss": 0.6555, "step": 10506 }, { "epoch": 11.967521367521368, "grad_norm": 0.21049273014068604, "learning_rate": 1.866576378281164e-05, "loss": 0.6235, "step": 10507 }, { "epoch": 11.968660968660968, "grad_norm": 0.23613597452640533, "learning_rate": 1.8661256742702036e-05, "loss": 0.6448, "step": 10508 }, { "epoch": 11.96980056980057, "grad_norm": 0.21678756177425385, "learning_rate": 1.8656749922754424e-05, "loss": 0.8768, "step": 10509 }, { "epoch": 11.970940170940171, "grad_norm": 0.20622332394123077, "learning_rate": 1.8652243323125334e-05, "loss": 0.7321, "step": 10510 }, { "epoch": 11.972079772079772, "grad_norm": 0.22001869976520538, "learning_rate": 1.8647736943971287e-05, "loss": 0.6975, "step": 10511 }, { "epoch": 11.973219373219374, "grad_norm": 0.23506364226341248, "learning_rate": 1.864323078544881e-05, "loss": 0.7675, "step": 10512 }, { "epoch": 11.974358974358974, "grad_norm": 0.23605220019817352, "learning_rate": 1.86387248477144e-05, "loss": 0.7852, "step": 10513 }, { "epoch": 11.975498575498575, "grad_norm": 0.22302347421646118, "learning_rate": 1.863421913092458e-05, "loss": 0.7394, "step": 10514 }, { "epoch": 11.976638176638177, "grad_norm": 0.17315006256103516, "learning_rate": 1.862971363523583e-05, "loss": 0.6575, "step": 10515 }, { "epoch": 11.977777777777778, "grad_norm": 0.22734105587005615, "learning_rate": 1.8625208360804646e-05, "loss": 0.7262, "step": 10516 }, { "epoch": 11.978917378917378, "grad_norm": 0.2036786675453186, "learning_rate": 1.86207033077875e-05, "loss": 0.6624, "step": 10517 }, { "epoch": 11.98005698005698, "grad_norm": 0.24453534185886383, "learning_rate": 1.8616198476340878e-05, "loss": 0.7948, "step": 10518 }, { "epoch": 11.981196581196581, "grad_norm": 0.19842353463172913, "learning_rate": 1.8611693866621225e-05, "loss": 0.849, "step": 10519 }, { "epoch": 11.982336182336182, "grad_norm": 0.22061681747436523, "learning_rate": 1.8607189478785025e-05, "loss": 0.5082, "step": 10520 }, { "epoch": 11.983475783475784, "grad_norm": 0.25918304920196533, "learning_rate": 1.860268531298871e-05, "loss": 0.5882, "step": 10521 }, { "epoch": 11.984615384615385, "grad_norm": 0.26322653889656067, "learning_rate": 1.8598181369388733e-05, "loss": 0.3666, "step": 10522 }, { "epoch": 11.985754985754985, "grad_norm": 0.24528221786022186, "learning_rate": 1.8593677648141515e-05, "loss": 0.6525, "step": 10523 }, { "epoch": 11.986894586894588, "grad_norm": 0.24048669636249542, "learning_rate": 1.85891741494035e-05, "loss": 0.7327, "step": 10524 }, { "epoch": 11.988034188034188, "grad_norm": 0.2024715095758438, "learning_rate": 1.8584670873331087e-05, "loss": 0.6911, "step": 10525 }, { "epoch": 11.989173789173789, "grad_norm": 0.21110621094703674, "learning_rate": 1.8580167820080703e-05, "loss": 0.5711, "step": 10526 }, { "epoch": 11.990313390313391, "grad_norm": 0.19894184172153473, "learning_rate": 1.857566498980874e-05, "loss": 0.8353, "step": 10527 }, { "epoch": 11.991452991452991, "grad_norm": 0.24942967295646667, "learning_rate": 1.8571162382671602e-05, "loss": 0.56, "step": 10528 }, { "epoch": 11.992592592592592, "grad_norm": 0.23511634767055511, "learning_rate": 1.8566659998825674e-05, "loss": 0.6542, "step": 10529 }, { "epoch": 11.993732193732194, "grad_norm": 0.2347230464220047, "learning_rate": 1.8562157838427325e-05, "loss": 0.6325, "step": 10530 }, { "epoch": 11.994871794871795, "grad_norm": 0.2472597360610962, "learning_rate": 1.8557655901632952e-05, "loss": 0.696, "step": 10531 }, { "epoch": 11.996011396011395, "grad_norm": 0.19690778851509094, "learning_rate": 1.8553154188598897e-05, "loss": 0.7692, "step": 10532 }, { "epoch": 11.997150997150998, "grad_norm": 0.1885654628276825, "learning_rate": 1.8548652699481535e-05, "loss": 0.728, "step": 10533 }, { "epoch": 11.998290598290598, "grad_norm": 0.19377920031547546, "learning_rate": 1.8544151434437202e-05, "loss": 0.824, "step": 10534 }, { "epoch": 11.999430199430199, "grad_norm": 0.2881132662296295, "learning_rate": 1.8539650393622246e-05, "loss": 0.5325, "step": 10535 }, { "epoch": 12.0, "grad_norm": 0.30600935220718384, "learning_rate": 1.8535149577192993e-05, "loss": 0.5586, "step": 10536 }, { "epoch": 12.0011396011396, "grad_norm": 0.23088836669921875, "learning_rate": 1.853064898530578e-05, "loss": 0.6768, "step": 10537 }, { "epoch": 12.002279202279203, "grad_norm": 0.1733100265264511, "learning_rate": 1.8526148618116913e-05, "loss": 0.8816, "step": 10538 }, { "epoch": 12.003418803418803, "grad_norm": 0.21051400899887085, "learning_rate": 1.852164847578271e-05, "loss": 0.732, "step": 10539 }, { "epoch": 12.004558404558404, "grad_norm": 0.17774631083011627, "learning_rate": 1.8517148558459468e-05, "loss": 0.8163, "step": 10540 }, { "epoch": 12.005698005698006, "grad_norm": 0.17382650077342987, "learning_rate": 1.851264886630348e-05, "loss": 0.9052, "step": 10541 }, { "epoch": 12.006837606837607, "grad_norm": 0.2258656919002533, "learning_rate": 1.850814939947105e-05, "loss": 0.5748, "step": 10542 }, { "epoch": 12.007977207977207, "grad_norm": 0.19269326329231262, "learning_rate": 1.850365015811843e-05, "loss": 0.6872, "step": 10543 }, { "epoch": 12.00911680911681, "grad_norm": 0.20680245757102966, "learning_rate": 1.8499151142401915e-05, "loss": 0.6299, "step": 10544 }, { "epoch": 12.01025641025641, "grad_norm": 0.16280394792556763, "learning_rate": 1.8494652352477755e-05, "loss": 0.711, "step": 10545 }, { "epoch": 12.01139601139601, "grad_norm": 0.19511796534061432, "learning_rate": 1.849015378850221e-05, "loss": 0.6957, "step": 10546 }, { "epoch": 12.012535612535613, "grad_norm": 0.18030062317848206, "learning_rate": 1.8485655450631528e-05, "loss": 0.5629, "step": 10547 }, { "epoch": 12.013675213675214, "grad_norm": 0.2140948325395584, "learning_rate": 1.8481157339021946e-05, "loss": 0.724, "step": 10548 }, { "epoch": 12.014814814814814, "grad_norm": 0.17993023991584778, "learning_rate": 1.8476659453829692e-05, "loss": 0.778, "step": 10549 }, { "epoch": 12.015954415954416, "grad_norm": 0.26871827244758606, "learning_rate": 1.8472161795211003e-05, "loss": 0.5918, "step": 10550 }, { "epoch": 12.017094017094017, "grad_norm": 0.2065512090921402, "learning_rate": 1.8467664363322075e-05, "loss": 0.6549, "step": 10551 }, { "epoch": 12.018233618233618, "grad_norm": 0.1633397936820984, "learning_rate": 1.8463167158319127e-05, "loss": 0.8051, "step": 10552 }, { "epoch": 12.01937321937322, "grad_norm": 0.1748124659061432, "learning_rate": 1.845867018035837e-05, "loss": 0.7225, "step": 10553 }, { "epoch": 12.02051282051282, "grad_norm": 0.21973411738872528, "learning_rate": 1.845417342959598e-05, "loss": 0.6511, "step": 10554 }, { "epoch": 12.021652421652421, "grad_norm": 0.20660950243473053, "learning_rate": 1.8449676906188154e-05, "loss": 0.6641, "step": 10555 }, { "epoch": 12.022792022792023, "grad_norm": 0.18575088679790497, "learning_rate": 1.8445180610291056e-05, "loss": 0.7962, "step": 10556 }, { "epoch": 12.023931623931624, "grad_norm": 0.19864317774772644, "learning_rate": 1.844068454206087e-05, "loss": 0.6913, "step": 10557 }, { "epoch": 12.025071225071224, "grad_norm": 0.1899966448545456, "learning_rate": 1.843618870165374e-05, "loss": 0.746, "step": 10558 }, { "epoch": 12.026210826210827, "grad_norm": 0.21713949739933014, "learning_rate": 1.8431693089225833e-05, "loss": 0.5305, "step": 10559 }, { "epoch": 12.027350427350427, "grad_norm": 0.23974689841270447, "learning_rate": 1.842719770493328e-05, "loss": 0.5182, "step": 10560 }, { "epoch": 12.028490028490028, "grad_norm": 0.22211728990077972, "learning_rate": 1.8422702548932232e-05, "loss": 0.6479, "step": 10561 }, { "epoch": 12.02962962962963, "grad_norm": 0.18979617953300476, "learning_rate": 1.8418207621378808e-05, "loss": 0.7081, "step": 10562 }, { "epoch": 12.03076923076923, "grad_norm": 0.286356121301651, "learning_rate": 1.841371292242913e-05, "loss": 0.3239, "step": 10563 }, { "epoch": 12.031908831908831, "grad_norm": 0.22852590680122375, "learning_rate": 1.840921845223932e-05, "loss": 0.5668, "step": 10564 }, { "epoch": 12.033048433048434, "grad_norm": 0.1991380751132965, "learning_rate": 1.840472421096548e-05, "loss": 0.6964, "step": 10565 }, { "epoch": 12.034188034188034, "grad_norm": 0.18954479694366455, "learning_rate": 1.84002301987637e-05, "loss": 0.6792, "step": 10566 }, { "epoch": 12.035327635327635, "grad_norm": 0.26388251781463623, "learning_rate": 1.839573641579008e-05, "loss": 0.4905, "step": 10567 }, { "epoch": 12.036467236467237, "grad_norm": 0.19882208108901978, "learning_rate": 1.839124286220069e-05, "loss": 0.7282, "step": 10568 }, { "epoch": 12.037606837606837, "grad_norm": 0.21259771287441254, "learning_rate": 1.8386749538151618e-05, "loss": 0.6895, "step": 10569 }, { "epoch": 12.038746438746438, "grad_norm": 0.2104126363992691, "learning_rate": 1.8382256443798916e-05, "loss": 0.721, "step": 10570 }, { "epoch": 12.03988603988604, "grad_norm": 0.20652472972869873, "learning_rate": 1.8377763579298647e-05, "loss": 0.8174, "step": 10571 }, { "epoch": 12.04102564102564, "grad_norm": 0.1936299055814743, "learning_rate": 1.837327094480686e-05, "loss": 0.6497, "step": 10572 }, { "epoch": 12.042165242165241, "grad_norm": 0.17510229349136353, "learning_rate": 1.83687785404796e-05, "loss": 0.7237, "step": 10573 }, { "epoch": 12.043304843304844, "grad_norm": 0.2524392008781433, "learning_rate": 1.8364286366472892e-05, "loss": 0.6238, "step": 10574 }, { "epoch": 12.044444444444444, "grad_norm": 0.15890449285507202, "learning_rate": 1.835979442294276e-05, "loss": 0.6447, "step": 10575 }, { "epoch": 12.045584045584045, "grad_norm": 0.2702021896839142, "learning_rate": 1.8355302710045242e-05, "loss": 0.5137, "step": 10576 }, { "epoch": 12.046723646723647, "grad_norm": 0.23867960274219513, "learning_rate": 1.8350811227936326e-05, "loss": 0.6878, "step": 10577 }, { "epoch": 12.047863247863248, "grad_norm": 0.2099626511335373, "learning_rate": 1.8346319976772028e-05, "loss": 0.6631, "step": 10578 }, { "epoch": 12.049002849002848, "grad_norm": 0.22138890624046326, "learning_rate": 1.834182895670833e-05, "loss": 0.6583, "step": 10579 }, { "epoch": 12.05014245014245, "grad_norm": 0.25138336420059204, "learning_rate": 1.8337338167901224e-05, "loss": 0.4966, "step": 10580 }, { "epoch": 12.051282051282051, "grad_norm": 0.1850442737340927, "learning_rate": 1.8332847610506688e-05, "loss": 0.6409, "step": 10581 }, { "epoch": 12.052421652421652, "grad_norm": 0.1915789395570755, "learning_rate": 1.8328357284680687e-05, "loss": 0.7341, "step": 10582 }, { "epoch": 12.053561253561254, "grad_norm": 0.19199860095977783, "learning_rate": 1.8323867190579183e-05, "loss": 0.8708, "step": 10583 }, { "epoch": 12.054700854700855, "grad_norm": 0.19183555245399475, "learning_rate": 1.8319377328358133e-05, "loss": 0.8864, "step": 10584 }, { "epoch": 12.055840455840455, "grad_norm": 0.1983659416437149, "learning_rate": 1.831488769817348e-05, "loss": 0.6469, "step": 10585 }, { "epoch": 12.056980056980057, "grad_norm": 0.21458525955677032, "learning_rate": 1.8310398300181153e-05, "loss": 0.6234, "step": 10586 }, { "epoch": 12.058119658119658, "grad_norm": 0.2103472352027893, "learning_rate": 1.83059091345371e-05, "loss": 0.8083, "step": 10587 }, { "epoch": 12.059259259259258, "grad_norm": 0.19039538502693176, "learning_rate": 1.830142020139723e-05, "loss": 0.79, "step": 10588 }, { "epoch": 12.06039886039886, "grad_norm": 0.2611573040485382, "learning_rate": 1.829693150091746e-05, "loss": 0.5305, "step": 10589 }, { "epoch": 12.061538461538461, "grad_norm": 0.25013938546180725, "learning_rate": 1.8292443033253682e-05, "loss": 0.6069, "step": 10590 }, { "epoch": 12.062678062678062, "grad_norm": 0.2411351501941681, "learning_rate": 1.8287954798561815e-05, "loss": 0.5709, "step": 10591 }, { "epoch": 12.063817663817664, "grad_norm": 0.20945434272289276, "learning_rate": 1.8283466796997728e-05, "loss": 0.6245, "step": 10592 }, { "epoch": 12.064957264957265, "grad_norm": 0.22989006340503693, "learning_rate": 1.827897902871732e-05, "loss": 0.53, "step": 10593 }, { "epoch": 12.066096866096865, "grad_norm": 0.19135035574436188, "learning_rate": 1.8274491493876442e-05, "loss": 0.6933, "step": 10594 }, { "epoch": 12.067236467236468, "grad_norm": 0.2087395340204239, "learning_rate": 1.8270004192630976e-05, "loss": 0.8001, "step": 10595 }, { "epoch": 12.068376068376068, "grad_norm": 0.24208243191242218, "learning_rate": 1.8265517125136768e-05, "loss": 0.6004, "step": 10596 }, { "epoch": 12.069515669515669, "grad_norm": 0.21831801533699036, "learning_rate": 1.8261030291549664e-05, "loss": 0.7293, "step": 10597 }, { "epoch": 12.070655270655271, "grad_norm": 0.22201023995876312, "learning_rate": 1.825654369202552e-05, "loss": 0.7607, "step": 10598 }, { "epoch": 12.071794871794872, "grad_norm": 0.19370655715465546, "learning_rate": 1.8252057326720157e-05, "loss": 0.6662, "step": 10599 }, { "epoch": 12.072934472934472, "grad_norm": 0.21607200801372528, "learning_rate": 1.8247571195789398e-05, "loss": 0.6671, "step": 10600 }, { "epoch": 12.074074074074074, "grad_norm": 0.18963831663131714, "learning_rate": 1.824308529938906e-05, "loss": 0.6537, "step": 10601 }, { "epoch": 12.075213675213675, "grad_norm": 0.22693610191345215, "learning_rate": 1.8238599637674954e-05, "loss": 0.5216, "step": 10602 }, { "epoch": 12.076353276353275, "grad_norm": 0.20524661242961884, "learning_rate": 1.823411421080288e-05, "loss": 0.8214, "step": 10603 }, { "epoch": 12.077492877492878, "grad_norm": 0.205473855137825, "learning_rate": 1.8229629018928617e-05, "loss": 0.553, "step": 10604 }, { "epoch": 12.078632478632478, "grad_norm": 0.1943778395652771, "learning_rate": 1.8225144062207966e-05, "loss": 0.681, "step": 10605 }, { "epoch": 12.079772079772079, "grad_norm": 0.20552857220172882, "learning_rate": 1.8220659340796684e-05, "loss": 0.797, "step": 10606 }, { "epoch": 12.080911680911681, "grad_norm": 0.21533159911632538, "learning_rate": 1.8216174854850556e-05, "loss": 0.6133, "step": 10607 }, { "epoch": 12.082051282051282, "grad_norm": 0.17289423942565918, "learning_rate": 1.8211690604525317e-05, "loss": 0.8102, "step": 10608 }, { "epoch": 12.083190883190884, "grad_norm": 0.21243703365325928, "learning_rate": 1.8207206589976734e-05, "loss": 0.8116, "step": 10609 }, { "epoch": 12.084330484330485, "grad_norm": 0.19925470650196075, "learning_rate": 1.8202722811360556e-05, "loss": 0.8322, "step": 10610 }, { "epoch": 12.085470085470085, "grad_norm": 0.18746793270111084, "learning_rate": 1.81982392688325e-05, "loss": 0.8362, "step": 10611 }, { "epoch": 12.086609686609687, "grad_norm": 0.2454654425382614, "learning_rate": 1.8193755962548304e-05, "loss": 0.5467, "step": 10612 }, { "epoch": 12.087749287749288, "grad_norm": 0.21298150718212128, "learning_rate": 1.8189272892663678e-05, "loss": 0.7309, "step": 10613 }, { "epoch": 12.088888888888889, "grad_norm": 0.21078455448150635, "learning_rate": 1.8184790059334338e-05, "loss": 0.6125, "step": 10614 }, { "epoch": 12.090028490028491, "grad_norm": 0.2133013904094696, "learning_rate": 1.8180307462715974e-05, "loss": 0.6139, "step": 10615 }, { "epoch": 12.091168091168091, "grad_norm": 0.2306240051984787, "learning_rate": 1.8175825102964293e-05, "loss": 0.5439, "step": 10616 }, { "epoch": 12.092307692307692, "grad_norm": 0.25794631242752075, "learning_rate": 1.8171342980234968e-05, "loss": 0.551, "step": 10617 }, { "epoch": 12.093447293447294, "grad_norm": 0.16845683753490448, "learning_rate": 1.8166861094683685e-05, "loss": 0.7664, "step": 10618 }, { "epoch": 12.094586894586895, "grad_norm": 0.21890786290168762, "learning_rate": 1.8162379446466096e-05, "loss": 0.7552, "step": 10619 }, { "epoch": 12.095726495726495, "grad_norm": 0.2112084925174713, "learning_rate": 1.815789803573788e-05, "loss": 0.6463, "step": 10620 }, { "epoch": 12.096866096866098, "grad_norm": 0.2195863425731659, "learning_rate": 1.8153416862654683e-05, "loss": 0.6289, "step": 10621 }, { "epoch": 12.098005698005698, "grad_norm": 0.18846504390239716, "learning_rate": 1.8148935927372143e-05, "loss": 0.8093, "step": 10622 }, { "epoch": 12.099145299145299, "grad_norm": 0.22838300466537476, "learning_rate": 1.8144455230045904e-05, "loss": 0.8178, "step": 10623 }, { "epoch": 12.100284900284901, "grad_norm": 0.2297404259443283, "learning_rate": 1.8139974770831584e-05, "loss": 0.5972, "step": 10624 }, { "epoch": 12.101424501424502, "grad_norm": 0.20052561163902283, "learning_rate": 1.8135494549884807e-05, "loss": 0.7274, "step": 10625 }, { "epoch": 12.102564102564102, "grad_norm": 0.19281281530857086, "learning_rate": 1.813101456736118e-05, "loss": 0.8087, "step": 10626 }, { "epoch": 12.103703703703705, "grad_norm": 0.18218734860420227, "learning_rate": 1.8126534823416306e-05, "loss": 0.6348, "step": 10627 }, { "epoch": 12.104843304843305, "grad_norm": 0.1917387694120407, "learning_rate": 1.8122055318205783e-05, "loss": 0.5225, "step": 10628 }, { "epoch": 12.105982905982906, "grad_norm": 0.19612638652324677, "learning_rate": 1.8117576051885195e-05, "loss": 0.4922, "step": 10629 }, { "epoch": 12.107122507122508, "grad_norm": 0.18411149084568024, "learning_rate": 1.8113097024610105e-05, "loss": 0.7932, "step": 10630 }, { "epoch": 12.108262108262108, "grad_norm": 0.19481295347213745, "learning_rate": 1.8108618236536103e-05, "loss": 0.6664, "step": 10631 }, { "epoch": 12.109401709401709, "grad_norm": 0.20142966508865356, "learning_rate": 1.8104139687818744e-05, "loss": 0.7496, "step": 10632 }, { "epoch": 12.110541310541311, "grad_norm": 0.20151075720787048, "learning_rate": 1.809966137861357e-05, "loss": 0.7564, "step": 10633 }, { "epoch": 12.111680911680912, "grad_norm": 0.22322115302085876, "learning_rate": 1.8095183309076142e-05, "loss": 0.6657, "step": 10634 }, { "epoch": 12.112820512820512, "grad_norm": 0.25068724155426025, "learning_rate": 1.809070547936198e-05, "loss": 0.7064, "step": 10635 }, { "epoch": 12.113960113960115, "grad_norm": 0.18898867070674896, "learning_rate": 1.808622788962662e-05, "loss": 0.7723, "step": 10636 }, { "epoch": 12.115099715099715, "grad_norm": 0.2050749659538269, "learning_rate": 1.8081750540025574e-05, "loss": 0.6587, "step": 10637 }, { "epoch": 12.116239316239316, "grad_norm": 0.2101127654314041, "learning_rate": 1.8077273430714363e-05, "loss": 0.7324, "step": 10638 }, { "epoch": 12.117378917378918, "grad_norm": 0.2138822227716446, "learning_rate": 1.8072796561848477e-05, "loss": 0.5844, "step": 10639 }, { "epoch": 12.118518518518519, "grad_norm": 0.21547774970531464, "learning_rate": 1.806831993358342e-05, "loss": 0.7531, "step": 10640 }, { "epoch": 12.11965811965812, "grad_norm": 0.17853891849517822, "learning_rate": 1.8063843546074663e-05, "loss": 0.743, "step": 10641 }, { "epoch": 12.120797720797722, "grad_norm": 0.16555222868919373, "learning_rate": 1.8059367399477707e-05, "loss": 0.7745, "step": 10642 }, { "epoch": 12.121937321937322, "grad_norm": 0.22953662276268005, "learning_rate": 1.8054891493948003e-05, "loss": 0.7267, "step": 10643 }, { "epoch": 12.123076923076923, "grad_norm": 0.16581910848617554, "learning_rate": 1.805041582964102e-05, "loss": 0.968, "step": 10644 }, { "epoch": 12.124216524216525, "grad_norm": 0.2142912894487381, "learning_rate": 1.8045940406712206e-05, "loss": 0.7086, "step": 10645 }, { "epoch": 12.125356125356126, "grad_norm": 0.21221700310707092, "learning_rate": 1.8041465225317006e-05, "loss": 0.4142, "step": 10646 }, { "epoch": 12.126495726495726, "grad_norm": 0.1971280425786972, "learning_rate": 1.8036990285610853e-05, "loss": 0.5309, "step": 10647 }, { "epoch": 12.127635327635328, "grad_norm": 0.24733866751194, "learning_rate": 1.803251558774918e-05, "loss": 0.5366, "step": 10648 }, { "epoch": 12.128774928774929, "grad_norm": 0.22367709875106812, "learning_rate": 1.8028041131887397e-05, "loss": 0.7881, "step": 10649 }, { "epoch": 12.12991452991453, "grad_norm": 0.18362602591514587, "learning_rate": 1.802356691818092e-05, "loss": 0.5632, "step": 10650 }, { "epoch": 12.131054131054132, "grad_norm": 0.23956923186779022, "learning_rate": 1.8019092946785147e-05, "loss": 0.5352, "step": 10651 }, { "epoch": 12.132193732193732, "grad_norm": 0.2240896075963974, "learning_rate": 1.8014619217855465e-05, "loss": 0.6053, "step": 10652 }, { "epoch": 12.133333333333333, "grad_norm": 0.19277696311473846, "learning_rate": 1.801014573154728e-05, "loss": 0.7914, "step": 10653 }, { "epoch": 12.134472934472935, "grad_norm": 0.21935196220874786, "learning_rate": 1.8005672488015952e-05, "loss": 0.6107, "step": 10654 }, { "epoch": 12.135612535612536, "grad_norm": 0.1736196130514145, "learning_rate": 1.800119948741686e-05, "loss": 0.6358, "step": 10655 }, { "epoch": 12.136752136752136, "grad_norm": 0.1652590036392212, "learning_rate": 1.7996726729905348e-05, "loss": 0.6313, "step": 10656 }, { "epoch": 12.137891737891739, "grad_norm": 0.2022676318883896, "learning_rate": 1.7992254215636782e-05, "loss": 0.5496, "step": 10657 }, { "epoch": 12.13903133903134, "grad_norm": 0.16930244863033295, "learning_rate": 1.798778194476649e-05, "loss": 0.8777, "step": 10658 }, { "epoch": 12.14017094017094, "grad_norm": 0.22526149451732635, "learning_rate": 1.7983309917449824e-05, "loss": 0.6807, "step": 10659 }, { "epoch": 12.141310541310542, "grad_norm": 0.18134568631649017, "learning_rate": 1.7978838133842094e-05, "loss": 0.5725, "step": 10660 }, { "epoch": 12.142450142450143, "grad_norm": 0.23000963032245636, "learning_rate": 1.797436659409863e-05, "loss": 0.7334, "step": 10661 }, { "epoch": 12.143589743589743, "grad_norm": 0.1757991462945938, "learning_rate": 1.7969895298374727e-05, "loss": 0.7157, "step": 10662 }, { "epoch": 12.144729344729345, "grad_norm": 0.21834725141525269, "learning_rate": 1.796542424682569e-05, "loss": 0.5902, "step": 10663 }, { "epoch": 12.145868945868946, "grad_norm": 0.1912463903427124, "learning_rate": 1.7960953439606824e-05, "loss": 0.7046, "step": 10664 }, { "epoch": 12.147008547008546, "grad_norm": 0.2624971270561218, "learning_rate": 1.7956482876873397e-05, "loss": 0.6282, "step": 10665 }, { "epoch": 12.148148148148149, "grad_norm": 0.20026636123657227, "learning_rate": 1.7952012558780696e-05, "loss": 0.6087, "step": 10666 }, { "epoch": 12.14928774928775, "grad_norm": 0.29310810565948486, "learning_rate": 1.7947542485483976e-05, "loss": 0.2775, "step": 10667 }, { "epoch": 12.15042735042735, "grad_norm": 0.18996065855026245, "learning_rate": 1.7943072657138506e-05, "loss": 0.6303, "step": 10668 }, { "epoch": 12.151566951566952, "grad_norm": 0.24515536427497864, "learning_rate": 1.7938603073899518e-05, "loss": 0.5606, "step": 10669 }, { "epoch": 12.152706552706553, "grad_norm": 0.22369220852851868, "learning_rate": 1.7934133735922274e-05, "loss": 0.5143, "step": 10670 }, { "epoch": 12.153846153846153, "grad_norm": 0.2261112481355667, "learning_rate": 1.792966464336199e-05, "loss": 0.6097, "step": 10671 }, { "epoch": 12.154985754985756, "grad_norm": 0.20036981999874115, "learning_rate": 1.79251957963739e-05, "loss": 0.6883, "step": 10672 }, { "epoch": 12.156125356125356, "grad_norm": 0.17598126828670502, "learning_rate": 1.7920727195113213e-05, "loss": 0.8462, "step": 10673 }, { "epoch": 12.157264957264957, "grad_norm": 0.2102530598640442, "learning_rate": 1.7916258839735133e-05, "loss": 0.5979, "step": 10674 }, { "epoch": 12.158404558404559, "grad_norm": 0.21557404100894928, "learning_rate": 1.7911790730394877e-05, "loss": 0.4804, "step": 10675 }, { "epoch": 12.15954415954416, "grad_norm": 0.15740369260311127, "learning_rate": 1.7907322867247613e-05, "loss": 0.6584, "step": 10676 }, { "epoch": 12.16068376068376, "grad_norm": 0.23590488731861115, "learning_rate": 1.790285525044853e-05, "loss": 0.5028, "step": 10677 }, { "epoch": 12.161823361823362, "grad_norm": 0.207161083817482, "learning_rate": 1.789838788015281e-05, "loss": 0.6426, "step": 10678 }, { "epoch": 12.162962962962963, "grad_norm": 0.19174276292324066, "learning_rate": 1.78939207565156e-05, "loss": 0.7633, "step": 10679 }, { "epoch": 12.164102564102564, "grad_norm": 0.20363642275333405, "learning_rate": 1.788945387969207e-05, "loss": 0.6449, "step": 10680 }, { "epoch": 12.165242165242166, "grad_norm": 0.217159241437912, "learning_rate": 1.7884987249837355e-05, "loss": 0.5507, "step": 10681 }, { "epoch": 12.166381766381766, "grad_norm": 0.16143184900283813, "learning_rate": 1.788052086710661e-05, "loss": 0.663, "step": 10682 }, { "epoch": 12.167521367521367, "grad_norm": 0.19331926107406616, "learning_rate": 1.787605473165494e-05, "loss": 0.6403, "step": 10683 }, { "epoch": 12.16866096866097, "grad_norm": 0.2204113006591797, "learning_rate": 1.787158884363749e-05, "loss": 0.5189, "step": 10684 }, { "epoch": 12.16980056980057, "grad_norm": 0.19390377402305603, "learning_rate": 1.786712320320935e-05, "loss": 0.6982, "step": 10685 }, { "epoch": 12.17094017094017, "grad_norm": 0.1998767852783203, "learning_rate": 1.7862657810525644e-05, "loss": 0.6942, "step": 10686 }, { "epoch": 12.172079772079773, "grad_norm": 0.2072298377752304, "learning_rate": 1.785819266574147e-05, "loss": 0.8303, "step": 10687 }, { "epoch": 12.173219373219373, "grad_norm": 0.20465919375419617, "learning_rate": 1.785372776901189e-05, "loss": 0.6879, "step": 10688 }, { "epoch": 12.174358974358974, "grad_norm": 0.2517319917678833, "learning_rate": 1.7849263120492006e-05, "loss": 0.3552, "step": 10689 }, { "epoch": 12.175498575498576, "grad_norm": 0.20104607939720154, "learning_rate": 1.7844798720336876e-05, "loss": 0.7055, "step": 10690 }, { "epoch": 12.176638176638177, "grad_norm": 0.2130606472492218, "learning_rate": 1.7840334568701565e-05, "loss": 0.8386, "step": 10691 }, { "epoch": 12.177777777777777, "grad_norm": 0.1965247541666031, "learning_rate": 1.783587066574112e-05, "loss": 0.7466, "step": 10692 }, { "epoch": 12.17891737891738, "grad_norm": 0.18730469048023224, "learning_rate": 1.783140701161059e-05, "loss": 0.5807, "step": 10693 }, { "epoch": 12.18005698005698, "grad_norm": 0.24426794052124023, "learning_rate": 1.7826943606465007e-05, "loss": 0.5245, "step": 10694 }, { "epoch": 12.18119658119658, "grad_norm": 0.19502083957195282, "learning_rate": 1.7822480450459405e-05, "loss": 0.7117, "step": 10695 }, { "epoch": 12.182336182336183, "grad_norm": 0.20117051899433136, "learning_rate": 1.7818017543748783e-05, "loss": 0.7298, "step": 10696 }, { "epoch": 12.183475783475783, "grad_norm": 0.19388437271118164, "learning_rate": 1.7813554886488165e-05, "loss": 0.8489, "step": 10697 }, { "epoch": 12.184615384615384, "grad_norm": 0.15525661408901215, "learning_rate": 1.7809092478832557e-05, "loss": 0.9988, "step": 10698 }, { "epoch": 12.185754985754986, "grad_norm": 0.20999334752559662, "learning_rate": 1.780463032093694e-05, "loss": 0.7525, "step": 10699 }, { "epoch": 12.186894586894587, "grad_norm": 0.2047741711139679, "learning_rate": 1.78001684129563e-05, "loss": 0.7616, "step": 10700 }, { "epoch": 12.188034188034187, "grad_norm": 0.18242231011390686, "learning_rate": 1.7795706755045602e-05, "loss": 0.7656, "step": 10701 }, { "epoch": 12.18917378917379, "grad_norm": 0.17030739784240723, "learning_rate": 1.779124534735983e-05, "loss": 0.788, "step": 10702 }, { "epoch": 12.19031339031339, "grad_norm": 0.2390817552804947, "learning_rate": 1.7786784190053922e-05, "loss": 0.4655, "step": 10703 }, { "epoch": 12.19145299145299, "grad_norm": 0.24233390390872955, "learning_rate": 1.7782323283282848e-05, "loss": 0.7488, "step": 10704 }, { "epoch": 12.192592592592593, "grad_norm": 0.25977823138237, "learning_rate": 1.7777862627201525e-05, "loss": 0.5433, "step": 10705 }, { "epoch": 12.193732193732194, "grad_norm": 0.21289107203483582, "learning_rate": 1.77734022219649e-05, "loss": 0.7498, "step": 10706 }, { "epoch": 12.194871794871794, "grad_norm": 0.21533629298210144, "learning_rate": 1.7768942067727877e-05, "loss": 0.6468, "step": 10707 }, { "epoch": 12.196011396011396, "grad_norm": 0.2645060122013092, "learning_rate": 1.776448216464539e-05, "loss": 0.5044, "step": 10708 }, { "epoch": 12.197150997150997, "grad_norm": 0.19389687478542328, "learning_rate": 1.7760022512872337e-05, "loss": 0.6738, "step": 10709 }, { "epoch": 12.198290598290598, "grad_norm": 0.24795565009117126, "learning_rate": 1.775556311256361e-05, "loss": 0.6541, "step": 10710 }, { "epoch": 12.1994301994302, "grad_norm": 0.2238483428955078, "learning_rate": 1.7751103963874104e-05, "loss": 0.564, "step": 10711 }, { "epoch": 12.2005698005698, "grad_norm": 0.20205016434192657, "learning_rate": 1.7746645066958683e-05, "loss": 0.7781, "step": 10712 }, { "epoch": 12.201709401709401, "grad_norm": 0.19284003973007202, "learning_rate": 1.7742186421972234e-05, "loss": 0.6534, "step": 10713 }, { "epoch": 12.202849002849003, "grad_norm": 0.21559850871562958, "learning_rate": 1.77377280290696e-05, "loss": 0.6882, "step": 10714 }, { "epoch": 12.203988603988604, "grad_norm": 0.3090205192565918, "learning_rate": 1.7733269888405652e-05, "loss": 0.432, "step": 10715 }, { "epoch": 12.205128205128204, "grad_norm": 0.17751125991344452, "learning_rate": 1.772881200013522e-05, "loss": 0.9052, "step": 10716 }, { "epoch": 12.206267806267807, "grad_norm": 0.22450704872608185, "learning_rate": 1.7724354364413144e-05, "loss": 0.5813, "step": 10717 }, { "epoch": 12.207407407407407, "grad_norm": 0.22953973710536957, "learning_rate": 1.7719896981394238e-05, "loss": 0.6634, "step": 10718 }, { "epoch": 12.208547008547008, "grad_norm": 0.19475506246089935, "learning_rate": 1.7715439851233346e-05, "loss": 0.7875, "step": 10719 }, { "epoch": 12.20968660968661, "grad_norm": 0.2490365356206894, "learning_rate": 1.771098297408525e-05, "loss": 0.7025, "step": 10720 }, { "epoch": 12.21082621082621, "grad_norm": 0.23320597410202026, "learning_rate": 1.7706526350104767e-05, "loss": 0.5894, "step": 10721 }, { "epoch": 12.211965811965811, "grad_norm": 0.19566142559051514, "learning_rate": 1.7702069979446677e-05, "loss": 0.554, "step": 10722 }, { "epoch": 12.213105413105414, "grad_norm": 0.2419247329235077, "learning_rate": 1.7697613862265775e-05, "loss": 0.5138, "step": 10723 }, { "epoch": 12.214245014245014, "grad_norm": 0.2230897694826126, "learning_rate": 1.7693157998716813e-05, "loss": 0.6378, "step": 10724 }, { "epoch": 12.215384615384615, "grad_norm": 0.20686529576778412, "learning_rate": 1.7688702388954583e-05, "loss": 0.7026, "step": 10725 }, { "epoch": 12.216524216524217, "grad_norm": 0.2121909260749817, "learning_rate": 1.768424703313381e-05, "loss": 0.8471, "step": 10726 }, { "epoch": 12.217663817663817, "grad_norm": 0.2286602407693863, "learning_rate": 1.7679791931409265e-05, "loss": 0.5637, "step": 10727 }, { "epoch": 12.218803418803418, "grad_norm": 0.20954394340515137, "learning_rate": 1.7675337083935674e-05, "loss": 0.7747, "step": 10728 }, { "epoch": 12.21994301994302, "grad_norm": 0.1756397783756256, "learning_rate": 1.7670882490867758e-05, "loss": 0.859, "step": 10729 }, { "epoch": 12.221082621082621, "grad_norm": 0.2088949978351593, "learning_rate": 1.766642815236026e-05, "loss": 0.7268, "step": 10730 }, { "epoch": 12.222222222222221, "grad_norm": 0.17804178595542908, "learning_rate": 1.7661974068567876e-05, "loss": 0.7313, "step": 10731 }, { "epoch": 12.223361823361824, "grad_norm": 0.19555315375328064, "learning_rate": 1.765752023964532e-05, "loss": 0.6368, "step": 10732 }, { "epoch": 12.224501424501424, "grad_norm": 0.18006379902362823, "learning_rate": 1.765306666574727e-05, "loss": 0.7287, "step": 10733 }, { "epoch": 12.225641025641025, "grad_norm": 0.21815355122089386, "learning_rate": 1.7648613347028424e-05, "loss": 0.656, "step": 10734 }, { "epoch": 12.226780626780627, "grad_norm": 0.2285873293876648, "learning_rate": 1.7644160283643446e-05, "loss": 0.6461, "step": 10735 }, { "epoch": 12.227920227920228, "grad_norm": 0.24663476645946503, "learning_rate": 1.7639707475747018e-05, "loss": 0.5885, "step": 10736 }, { "epoch": 12.229059829059828, "grad_norm": 0.22460223734378815, "learning_rate": 1.7635254923493783e-05, "loss": 0.5982, "step": 10737 }, { "epoch": 12.23019943019943, "grad_norm": 0.21745523810386658, "learning_rate": 1.7630802627038405e-05, "loss": 0.6841, "step": 10738 }, { "epoch": 12.231339031339031, "grad_norm": 0.20803306996822357, "learning_rate": 1.762635058653551e-05, "loss": 0.9838, "step": 10739 }, { "epoch": 12.232478632478632, "grad_norm": 0.1870354264974594, "learning_rate": 1.7621898802139736e-05, "loss": 0.7261, "step": 10740 }, { "epoch": 12.233618233618234, "grad_norm": 0.18768827617168427, "learning_rate": 1.761744727400571e-05, "loss": 0.7957, "step": 10741 }, { "epoch": 12.234757834757835, "grad_norm": 0.24852044880390167, "learning_rate": 1.761299600228804e-05, "loss": 0.7304, "step": 10742 }, { "epoch": 12.235897435897435, "grad_norm": 0.19052305817604065, "learning_rate": 1.760854498714134e-05, "loss": 0.6378, "step": 10743 }, { "epoch": 12.237037037037037, "grad_norm": 0.21809840202331543, "learning_rate": 1.7604094228720194e-05, "loss": 0.7439, "step": 10744 }, { "epoch": 12.238176638176638, "grad_norm": 0.20084500312805176, "learning_rate": 1.75996437271792e-05, "loss": 0.6188, "step": 10745 }, { "epoch": 12.239316239316238, "grad_norm": 0.199592724442482, "learning_rate": 1.7595193482672924e-05, "loss": 0.8747, "step": 10746 }, { "epoch": 12.24045584045584, "grad_norm": 0.19660794734954834, "learning_rate": 1.7590743495355945e-05, "loss": 0.639, "step": 10747 }, { "epoch": 12.241595441595441, "grad_norm": 0.2428426593542099, "learning_rate": 1.7586293765382815e-05, "loss": 0.5828, "step": 10748 }, { "epoch": 12.242735042735042, "grad_norm": 0.21701796352863312, "learning_rate": 1.7581844292908095e-05, "loss": 0.6053, "step": 10749 }, { "epoch": 12.243874643874644, "grad_norm": 0.22236555814743042, "learning_rate": 1.757739507808632e-05, "loss": 0.6185, "step": 10750 }, { "epoch": 12.245014245014245, "grad_norm": 0.24517688155174255, "learning_rate": 1.757294612107202e-05, "loss": 0.6609, "step": 10751 }, { "epoch": 12.246153846153845, "grad_norm": 0.19971169531345367, "learning_rate": 1.7568497422019737e-05, "loss": 0.6233, "step": 10752 }, { "epoch": 12.247293447293448, "grad_norm": 0.22031354904174805, "learning_rate": 1.756404898108397e-05, "loss": 0.7641, "step": 10753 }, { "epoch": 12.248433048433048, "grad_norm": 0.20710423588752747, "learning_rate": 1.7559600798419233e-05, "loss": 0.4968, "step": 10754 }, { "epoch": 12.249572649572649, "grad_norm": 0.23224996030330658, "learning_rate": 1.755515287418002e-05, "loss": 0.7139, "step": 10755 }, { "epoch": 12.250712250712251, "grad_norm": 0.21826957166194916, "learning_rate": 1.7550705208520817e-05, "loss": 0.6562, "step": 10756 }, { "epoch": 12.251851851851852, "grad_norm": 0.23644167184829712, "learning_rate": 1.7546257801596115e-05, "loss": 0.7466, "step": 10757 }, { "epoch": 12.252991452991452, "grad_norm": 0.22611716389656067, "learning_rate": 1.7541810653560376e-05, "loss": 0.5025, "step": 10758 }, { "epoch": 12.254131054131054, "grad_norm": 0.25908517837524414, "learning_rate": 1.7537363764568067e-05, "loss": 0.6044, "step": 10759 }, { "epoch": 12.255270655270655, "grad_norm": 0.24921810626983643, "learning_rate": 1.7532917134773627e-05, "loss": 0.6788, "step": 10760 }, { "epoch": 12.256410256410255, "grad_norm": 0.21592481434345245, "learning_rate": 1.7528470764331518e-05, "loss": 0.5031, "step": 10761 }, { "epoch": 12.257549857549858, "grad_norm": 0.23294512927532196, "learning_rate": 1.7524024653396153e-05, "loss": 0.5266, "step": 10762 }, { "epoch": 12.258689458689458, "grad_norm": 0.1757514774799347, "learning_rate": 1.7519578802121977e-05, "loss": 0.7011, "step": 10763 }, { "epoch": 12.25982905982906, "grad_norm": 0.17363764345645905, "learning_rate": 1.751513321066341e-05, "loss": 0.7961, "step": 10764 }, { "epoch": 12.260968660968661, "grad_norm": 0.23321864008903503, "learning_rate": 1.751068787917484e-05, "loss": 0.7313, "step": 10765 }, { "epoch": 12.262108262108262, "grad_norm": 0.17034848034381866, "learning_rate": 1.7506242807810684e-05, "loss": 0.8433, "step": 10766 }, { "epoch": 12.263247863247864, "grad_norm": 0.23170049488544464, "learning_rate": 1.7501797996725317e-05, "loss": 0.5912, "step": 10767 }, { "epoch": 12.264387464387465, "grad_norm": 0.2036147117614746, "learning_rate": 1.7497353446073128e-05, "loss": 0.7455, "step": 10768 }, { "epoch": 12.265527065527065, "grad_norm": 0.24861375987529755, "learning_rate": 1.7492909156008484e-05, "loss": 0.6507, "step": 10769 }, { "epoch": 12.266666666666667, "grad_norm": 0.26345381140708923, "learning_rate": 1.7488465126685756e-05, "loss": 0.8334, "step": 10770 }, { "epoch": 12.267806267806268, "grad_norm": 0.21005362272262573, "learning_rate": 1.7484021358259285e-05, "loss": 0.6203, "step": 10771 }, { "epoch": 12.268945868945869, "grad_norm": 0.21710240840911865, "learning_rate": 1.7479577850883422e-05, "loss": 0.6095, "step": 10772 }, { "epoch": 12.270085470085471, "grad_norm": 0.22123247385025024, "learning_rate": 1.7475134604712496e-05, "loss": 0.6514, "step": 10773 }, { "epoch": 12.271225071225071, "grad_norm": 0.21868394315242767, "learning_rate": 1.7470691619900843e-05, "loss": 0.6075, "step": 10774 }, { "epoch": 12.272364672364672, "grad_norm": 0.1895763874053955, "learning_rate": 1.7466248896602776e-05, "loss": 0.8371, "step": 10775 }, { "epoch": 12.273504273504274, "grad_norm": 0.22064198553562164, "learning_rate": 1.7461806434972604e-05, "loss": 0.6574, "step": 10776 }, { "epoch": 12.274643874643875, "grad_norm": 0.2188696265220642, "learning_rate": 1.7457364235164625e-05, "loss": 0.5647, "step": 10777 }, { "epoch": 12.275783475783475, "grad_norm": 0.26082390546798706, "learning_rate": 1.7452922297333125e-05, "loss": 0.5591, "step": 10778 }, { "epoch": 12.276923076923078, "grad_norm": 0.21436484158039093, "learning_rate": 1.7448480621632397e-05, "loss": 0.7713, "step": 10779 }, { "epoch": 12.278062678062678, "grad_norm": 0.1950216293334961, "learning_rate": 1.7444039208216695e-05, "loss": 0.6866, "step": 10780 }, { "epoch": 12.279202279202279, "grad_norm": 0.1890411376953125, "learning_rate": 1.7439598057240297e-05, "loss": 0.8617, "step": 10781 }, { "epoch": 12.280341880341881, "grad_norm": 0.19841855764389038, "learning_rate": 1.7435157168857442e-05, "loss": 0.6381, "step": 10782 }, { "epoch": 12.281481481481482, "grad_norm": 0.22863084077835083, "learning_rate": 1.7430716543222388e-05, "loss": 0.4503, "step": 10783 }, { "epoch": 12.282621082621082, "grad_norm": 0.1743665486574173, "learning_rate": 1.7426276180489355e-05, "loss": 0.6095, "step": 10784 }, { "epoch": 12.283760683760685, "grad_norm": 0.2586623430252075, "learning_rate": 1.7421836080812583e-05, "loss": 0.4805, "step": 10785 }, { "epoch": 12.284900284900285, "grad_norm": 0.21026919782161713, "learning_rate": 1.741739624434629e-05, "loss": 0.5799, "step": 10786 }, { "epoch": 12.286039886039886, "grad_norm": 0.21374911069869995, "learning_rate": 1.741295667124467e-05, "loss": 0.7512, "step": 10787 }, { "epoch": 12.287179487179488, "grad_norm": 0.21112634241580963, "learning_rate": 1.7408517361661934e-05, "loss": 0.7029, "step": 10788 }, { "epoch": 12.288319088319088, "grad_norm": 0.20958742499351501, "learning_rate": 1.7404078315752265e-05, "loss": 0.6727, "step": 10789 }, { "epoch": 12.289458689458689, "grad_norm": 0.27278652787208557, "learning_rate": 1.7399639533669848e-05, "loss": 0.6567, "step": 10790 }, { "epoch": 12.290598290598291, "grad_norm": 0.20039872825145721, "learning_rate": 1.7395201015568847e-05, "loss": 0.7497, "step": 10791 }, { "epoch": 12.291737891737892, "grad_norm": 0.1941027045249939, "learning_rate": 1.7390762761603433e-05, "loss": 0.5699, "step": 10792 }, { "epoch": 12.292877492877492, "grad_norm": 0.27816611528396606, "learning_rate": 1.7386324771927748e-05, "loss": 0.5251, "step": 10793 }, { "epoch": 12.294017094017095, "grad_norm": 0.20026326179504395, "learning_rate": 1.7381887046695945e-05, "loss": 0.7609, "step": 10794 }, { "epoch": 12.295156695156695, "grad_norm": 0.23853638768196106, "learning_rate": 1.737744958606215e-05, "loss": 0.6327, "step": 10795 }, { "epoch": 12.296296296296296, "grad_norm": 0.21739494800567627, "learning_rate": 1.737301239018049e-05, "loss": 0.5109, "step": 10796 }, { "epoch": 12.297435897435898, "grad_norm": 0.2554258406162262, "learning_rate": 1.736857545920509e-05, "loss": 0.351, "step": 10797 }, { "epoch": 12.298575498575499, "grad_norm": 0.20038418471813202, "learning_rate": 1.736413879329005e-05, "loss": 0.6367, "step": 10798 }, { "epoch": 12.2997150997151, "grad_norm": 0.19856251776218414, "learning_rate": 1.735970239258947e-05, "loss": 0.8322, "step": 10799 }, { "epoch": 12.300854700854702, "grad_norm": 0.2430545538663864, "learning_rate": 1.7355266257257438e-05, "loss": 0.5878, "step": 10800 }, { "epoch": 12.301994301994302, "grad_norm": 0.25265562534332275, "learning_rate": 1.735083038744803e-05, "loss": 0.5441, "step": 10801 }, { "epoch": 12.303133903133903, "grad_norm": 0.20412509143352509, "learning_rate": 1.734639478331532e-05, "loss": 0.7165, "step": 10802 }, { "epoch": 12.304273504273505, "grad_norm": 0.2196875661611557, "learning_rate": 1.7341959445013362e-05, "loss": 0.8689, "step": 10803 }, { "epoch": 12.305413105413106, "grad_norm": 0.206462100148201, "learning_rate": 1.7337524372696218e-05, "loss": 0.6361, "step": 10804 }, { "epoch": 12.306552706552706, "grad_norm": 0.1997363120317459, "learning_rate": 1.7333089566517917e-05, "loss": 0.7908, "step": 10805 }, { "epoch": 12.307692307692308, "grad_norm": 0.20651085674762726, "learning_rate": 1.732865502663251e-05, "loss": 0.641, "step": 10806 }, { "epoch": 12.308831908831909, "grad_norm": 0.23688238859176636, "learning_rate": 1.7324220753194e-05, "loss": 0.5492, "step": 10807 }, { "epoch": 12.30997150997151, "grad_norm": 0.23155272006988525, "learning_rate": 1.731978674635641e-05, "loss": 0.6126, "step": 10808 }, { "epoch": 12.311111111111112, "grad_norm": 0.188265860080719, "learning_rate": 1.7315353006273755e-05, "loss": 0.7488, "step": 10809 }, { "epoch": 12.312250712250712, "grad_norm": 0.2498866468667984, "learning_rate": 1.731091953310002e-05, "loss": 0.6749, "step": 10810 }, { "epoch": 12.313390313390313, "grad_norm": 0.20068980753421783, "learning_rate": 1.7306486326989195e-05, "loss": 0.7336, "step": 10811 }, { "epoch": 12.314529914529915, "grad_norm": 0.2250603288412094, "learning_rate": 1.7302053388095256e-05, "loss": 0.6995, "step": 10812 }, { "epoch": 12.315669515669516, "grad_norm": 0.21724343299865723, "learning_rate": 1.7297620716572177e-05, "loss": 0.7542, "step": 10813 }, { "epoch": 12.316809116809116, "grad_norm": 0.3206590414047241, "learning_rate": 1.729318831257391e-05, "loss": 0.4611, "step": 10814 }, { "epoch": 12.317948717948719, "grad_norm": 0.20177413523197174, "learning_rate": 1.7288756176254406e-05, "loss": 0.6522, "step": 10815 }, { "epoch": 12.31908831908832, "grad_norm": 0.2285015881061554, "learning_rate": 1.7284324307767604e-05, "loss": 0.5431, "step": 10816 }, { "epoch": 12.32022792022792, "grad_norm": 0.17258432507514954, "learning_rate": 1.727989270726744e-05, "loss": 0.7312, "step": 10817 }, { "epoch": 12.321367521367522, "grad_norm": 0.21014466881752014, "learning_rate": 1.7275461374907825e-05, "loss": 0.7903, "step": 10818 }, { "epoch": 12.322507122507123, "grad_norm": 0.1585046947002411, "learning_rate": 1.7271030310842684e-05, "loss": 0.7555, "step": 10819 }, { "epoch": 12.323646723646723, "grad_norm": 0.19881057739257812, "learning_rate": 1.726659951522592e-05, "loss": 0.8694, "step": 10820 }, { "epoch": 12.324786324786325, "grad_norm": 0.2570485472679138, "learning_rate": 1.7262168988211413e-05, "loss": 0.5388, "step": 10821 }, { "epoch": 12.325925925925926, "grad_norm": 0.20987975597381592, "learning_rate": 1.7257738729953065e-05, "loss": 0.5459, "step": 10822 }, { "epoch": 12.327065527065526, "grad_norm": 0.24118657410144806, "learning_rate": 1.7253308740604736e-05, "loss": 0.4905, "step": 10823 }, { "epoch": 12.328205128205129, "grad_norm": 0.20755544304847717, "learning_rate": 1.7248879020320306e-05, "loss": 0.6382, "step": 10824 }, { "epoch": 12.32934472934473, "grad_norm": 0.19893032312393188, "learning_rate": 1.7244449569253616e-05, "loss": 0.8554, "step": 10825 }, { "epoch": 12.33048433048433, "grad_norm": 0.2536832094192505, "learning_rate": 1.7240020387558525e-05, "loss": 0.3889, "step": 10826 }, { "epoch": 12.331623931623932, "grad_norm": 0.20230743288993835, "learning_rate": 1.723559147538886e-05, "loss": 0.6262, "step": 10827 }, { "epoch": 12.332763532763533, "grad_norm": 0.19835615158081055, "learning_rate": 1.7231162832898462e-05, "loss": 0.7368, "step": 10828 }, { "epoch": 12.333903133903133, "grad_norm": 0.19229477643966675, "learning_rate": 1.7226734460241127e-05, "loss": 0.6124, "step": 10829 }, { "epoch": 12.335042735042736, "grad_norm": 0.20986317098140717, "learning_rate": 1.7222306357570696e-05, "loss": 0.8819, "step": 10830 }, { "epoch": 12.336182336182336, "grad_norm": 0.2248592972755432, "learning_rate": 1.7217878525040955e-05, "loss": 0.6465, "step": 10831 }, { "epoch": 12.337321937321937, "grad_norm": 0.21292966604232788, "learning_rate": 1.7213450962805685e-05, "loss": 0.7351, "step": 10832 }, { "epoch": 12.338461538461539, "grad_norm": 0.17942236363887787, "learning_rate": 1.7209023671018685e-05, "loss": 0.8832, "step": 10833 }, { "epoch": 12.33960113960114, "grad_norm": 0.1823773980140686, "learning_rate": 1.720459664983371e-05, "loss": 0.5213, "step": 10834 }, { "epoch": 12.34074074074074, "grad_norm": 0.2529001533985138, "learning_rate": 1.7200169899404534e-05, "loss": 0.6295, "step": 10835 }, { "epoch": 12.341880341880342, "grad_norm": 0.17539896070957184, "learning_rate": 1.719574341988491e-05, "loss": 0.7827, "step": 10836 }, { "epoch": 12.343019943019943, "grad_norm": 0.1981101632118225, "learning_rate": 1.7191317211428577e-05, "loss": 0.669, "step": 10837 }, { "epoch": 12.344159544159544, "grad_norm": 0.22740918397903442, "learning_rate": 1.718689127418927e-05, "loss": 0.6029, "step": 10838 }, { "epoch": 12.345299145299146, "grad_norm": 0.18608032166957855, "learning_rate": 1.7182465608320717e-05, "loss": 0.8558, "step": 10839 }, { "epoch": 12.346438746438746, "grad_norm": 0.22309254109859467, "learning_rate": 1.7178040213976623e-05, "loss": 0.6231, "step": 10840 }, { "epoch": 12.347578347578347, "grad_norm": 0.21621108055114746, "learning_rate": 1.7173615091310713e-05, "loss": 0.7582, "step": 10841 }, { "epoch": 12.34871794871795, "grad_norm": 0.18133021891117096, "learning_rate": 1.716919024047667e-05, "loss": 0.7135, "step": 10842 }, { "epoch": 12.34985754985755, "grad_norm": 0.22286663949489594, "learning_rate": 1.7164765661628192e-05, "loss": 0.6465, "step": 10843 }, { "epoch": 12.35099715099715, "grad_norm": 0.20079916715621948, "learning_rate": 1.716034135491894e-05, "loss": 0.6421, "step": 10844 }, { "epoch": 12.352136752136753, "grad_norm": 0.21749894320964813, "learning_rate": 1.7155917320502596e-05, "loss": 0.7368, "step": 10845 }, { "epoch": 12.353276353276353, "grad_norm": 0.21311548352241516, "learning_rate": 1.715149355853281e-05, "loss": 0.6545, "step": 10846 }, { "epoch": 12.354415954415954, "grad_norm": 0.1735820770263672, "learning_rate": 1.7147070069163247e-05, "loss": 0.9553, "step": 10847 }, { "epoch": 12.355555555555556, "grad_norm": 0.18377651274204254, "learning_rate": 1.7142646852547528e-05, "loss": 0.7713, "step": 10848 }, { "epoch": 12.356695156695157, "grad_norm": 0.2323378026485443, "learning_rate": 1.7138223908839296e-05, "loss": 0.851, "step": 10849 }, { "epoch": 12.357834757834757, "grad_norm": 0.21971707046031952, "learning_rate": 1.713380123819216e-05, "loss": 0.6503, "step": 10850 }, { "epoch": 12.35897435897436, "grad_norm": 0.22638419270515442, "learning_rate": 1.7129378840759735e-05, "loss": 0.6714, "step": 10851 }, { "epoch": 12.36011396011396, "grad_norm": 0.202363520860672, "learning_rate": 1.7124956716695643e-05, "loss": 0.7194, "step": 10852 }, { "epoch": 12.36125356125356, "grad_norm": 0.19767224788665771, "learning_rate": 1.712053486615345e-05, "loss": 0.6844, "step": 10853 }, { "epoch": 12.362393162393163, "grad_norm": 0.19901210069656372, "learning_rate": 1.7116113289286755e-05, "loss": 0.7594, "step": 10854 }, { "epoch": 12.363532763532763, "grad_norm": 0.22945459187030792, "learning_rate": 1.7111691986249124e-05, "loss": 0.7578, "step": 10855 }, { "epoch": 12.364672364672364, "grad_norm": 0.20821332931518555, "learning_rate": 1.7107270957194128e-05, "loss": 0.6723, "step": 10856 }, { "epoch": 12.365811965811966, "grad_norm": 0.18176351487636566, "learning_rate": 1.7102850202275312e-05, "loss": 0.7668, "step": 10857 }, { "epoch": 12.366951566951567, "grad_norm": 0.1840147227048874, "learning_rate": 1.7098429721646224e-05, "loss": 0.9512, "step": 10858 }, { "epoch": 12.368091168091167, "grad_norm": 0.20341096818447113, "learning_rate": 1.7094009515460403e-05, "loss": 0.7151, "step": 10859 }, { "epoch": 12.36923076923077, "grad_norm": 0.21524856984615326, "learning_rate": 1.708958958387138e-05, "loss": 0.6924, "step": 10860 }, { "epoch": 12.37037037037037, "grad_norm": 0.26379674673080444, "learning_rate": 1.7085169927032653e-05, "loss": 0.6872, "step": 10861 }, { "epoch": 12.37150997150997, "grad_norm": 0.1969965249300003, "learning_rate": 1.708075054509774e-05, "loss": 0.6073, "step": 10862 }, { "epoch": 12.372649572649573, "grad_norm": 0.18986299633979797, "learning_rate": 1.7076331438220145e-05, "loss": 0.6506, "step": 10863 }, { "epoch": 12.373789173789174, "grad_norm": 0.2116694152355194, "learning_rate": 1.707191260655335e-05, "loss": 0.5803, "step": 10864 }, { "epoch": 12.374928774928774, "grad_norm": 0.1978309452533722, "learning_rate": 1.7067494050250832e-05, "loss": 0.5744, "step": 10865 }, { "epoch": 12.376068376068377, "grad_norm": 0.22155092656612396, "learning_rate": 1.7063075769466055e-05, "loss": 0.568, "step": 10866 }, { "epoch": 12.377207977207977, "grad_norm": 0.21958346664905548, "learning_rate": 1.705865776435249e-05, "loss": 0.6179, "step": 10867 }, { "epoch": 12.378347578347578, "grad_norm": 0.24003711342811584, "learning_rate": 1.7054240035063572e-05, "loss": 0.6299, "step": 10868 }, { "epoch": 12.37948717948718, "grad_norm": 0.23949706554412842, "learning_rate": 1.7049822581752763e-05, "loss": 0.6282, "step": 10869 }, { "epoch": 12.38062678062678, "grad_norm": 0.20721937716007233, "learning_rate": 1.7045405404573464e-05, "loss": 0.7627, "step": 10870 }, { "epoch": 12.381766381766381, "grad_norm": 0.2312813550233841, "learning_rate": 1.7040988503679118e-05, "loss": 0.7129, "step": 10871 }, { "epoch": 12.382905982905983, "grad_norm": 0.22972968220710754, "learning_rate": 1.7036571879223123e-05, "loss": 0.5168, "step": 10872 }, { "epoch": 12.384045584045584, "grad_norm": 0.2056453377008438, "learning_rate": 1.7032155531358885e-05, "loss": 0.5596, "step": 10873 }, { "epoch": 12.385185185185184, "grad_norm": 0.23936963081359863, "learning_rate": 1.7027739460239797e-05, "loss": 0.6441, "step": 10874 }, { "epoch": 12.386324786324787, "grad_norm": 0.22788862884044647, "learning_rate": 1.7023323666019248e-05, "loss": 0.6808, "step": 10875 }, { "epoch": 12.387464387464387, "grad_norm": 0.17753151059150696, "learning_rate": 1.70189081488506e-05, "loss": 0.8624, "step": 10876 }, { "epoch": 12.388603988603988, "grad_norm": 0.2086041420698166, "learning_rate": 1.7014492908887226e-05, "loss": 0.496, "step": 10877 }, { "epoch": 12.38974358974359, "grad_norm": 0.29431894421577454, "learning_rate": 1.7010077946282466e-05, "loss": 0.4777, "step": 10878 }, { "epoch": 12.39088319088319, "grad_norm": 0.22070807218551636, "learning_rate": 1.7005663261189678e-05, "loss": 0.8056, "step": 10879 }, { "epoch": 12.392022792022791, "grad_norm": 0.1758912056684494, "learning_rate": 1.7001248853762184e-05, "loss": 0.7873, "step": 10880 }, { "epoch": 12.393162393162394, "grad_norm": 0.18746311962604523, "learning_rate": 1.6996834724153318e-05, "loss": 0.5918, "step": 10881 }, { "epoch": 12.394301994301994, "grad_norm": 0.2081279754638672, "learning_rate": 1.699242087251639e-05, "loss": 0.6617, "step": 10882 }, { "epoch": 12.395441595441595, "grad_norm": 0.17694391310214996, "learning_rate": 1.6988007299004704e-05, "loss": 0.8219, "step": 10883 }, { "epoch": 12.396581196581197, "grad_norm": 0.21463613212108612, "learning_rate": 1.698359400377155e-05, "loss": 0.6713, "step": 10884 }, { "epoch": 12.397720797720797, "grad_norm": 0.2681838274002075, "learning_rate": 1.697918098697023e-05, "loss": 0.4294, "step": 10885 }, { "epoch": 12.398860398860398, "grad_norm": 0.24642108380794525, "learning_rate": 1.6974768248754015e-05, "loss": 0.801, "step": 10886 }, { "epoch": 12.4, "grad_norm": 0.23168188333511353, "learning_rate": 1.6970355789276162e-05, "loss": 0.6824, "step": 10887 }, { "epoch": 12.401139601139601, "grad_norm": 0.19611448049545288, "learning_rate": 1.6965943608689938e-05, "loss": 0.7616, "step": 10888 }, { "epoch": 12.402279202279201, "grad_norm": 0.2129056453704834, "learning_rate": 1.6961531707148588e-05, "loss": 0.5648, "step": 10889 }, { "epoch": 12.403418803418804, "grad_norm": 0.2036651223897934, "learning_rate": 1.6957120084805344e-05, "loss": 0.6716, "step": 10890 }, { "epoch": 12.404558404558404, "grad_norm": 0.22893372178077698, "learning_rate": 1.6952708741813437e-05, "loss": 0.6018, "step": 10891 }, { "epoch": 12.405698005698005, "grad_norm": 0.1946319192647934, "learning_rate": 1.694829767832609e-05, "loss": 0.8137, "step": 10892 }, { "epoch": 12.406837606837607, "grad_norm": 0.2172234207391739, "learning_rate": 1.6943886894496503e-05, "loss": 0.7576, "step": 10893 }, { "epoch": 12.407977207977208, "grad_norm": 0.23911713063716888, "learning_rate": 1.6939476390477883e-05, "loss": 0.74, "step": 10894 }, { "epoch": 12.40911680911681, "grad_norm": 0.2226768434047699, "learning_rate": 1.69350661664234e-05, "loss": 0.6618, "step": 10895 }, { "epoch": 12.41025641025641, "grad_norm": 0.21871010959148407, "learning_rate": 1.6930656222486258e-05, "loss": 0.7467, "step": 10896 }, { "epoch": 12.411396011396011, "grad_norm": 0.16431699693202972, "learning_rate": 1.692624655881962e-05, "loss": 0.7324, "step": 10897 }, { "epoch": 12.412535612535613, "grad_norm": 0.20297163724899292, "learning_rate": 1.6921837175576634e-05, "loss": 0.6638, "step": 10898 }, { "epoch": 12.413675213675214, "grad_norm": 0.2928679287433624, "learning_rate": 1.6917428072910467e-05, "loss": 0.7598, "step": 10899 }, { "epoch": 12.414814814814815, "grad_norm": 0.18867497146129608, "learning_rate": 1.6913019250974244e-05, "loss": 0.7946, "step": 10900 }, { "epoch": 12.415954415954417, "grad_norm": 0.2605903446674347, "learning_rate": 1.6908610709921102e-05, "loss": 0.3366, "step": 10901 }, { "epoch": 12.417094017094017, "grad_norm": 0.18068283796310425, "learning_rate": 1.690420244990416e-05, "loss": 0.741, "step": 10902 }, { "epoch": 12.418233618233618, "grad_norm": 0.26920533180236816, "learning_rate": 1.6899794471076537e-05, "loss": 0.4441, "step": 10903 }, { "epoch": 12.41937321937322, "grad_norm": 0.26625603437423706, "learning_rate": 1.6895386773591324e-05, "loss": 0.7136, "step": 10904 }, { "epoch": 12.42051282051282, "grad_norm": 0.2134181559085846, "learning_rate": 1.6890979357601617e-05, "loss": 0.8128, "step": 10905 }, { "epoch": 12.421652421652421, "grad_norm": 0.18584464490413666, "learning_rate": 1.688657222326049e-05, "loss": 0.7526, "step": 10906 }, { "epoch": 12.422792022792024, "grad_norm": 0.19565482437610626, "learning_rate": 1.6882165370721025e-05, "loss": 0.8005, "step": 10907 }, { "epoch": 12.423931623931624, "grad_norm": 0.22352029383182526, "learning_rate": 1.6877758800136285e-05, "loss": 0.5997, "step": 10908 }, { "epoch": 12.425071225071225, "grad_norm": 0.18467660248279572, "learning_rate": 1.6873352511659317e-05, "loss": 0.7041, "step": 10909 }, { "epoch": 12.426210826210827, "grad_norm": 0.21925725042819977, "learning_rate": 1.6868946505443163e-05, "loss": 0.6993, "step": 10910 }, { "epoch": 12.427350427350428, "grad_norm": 0.18253867328166962, "learning_rate": 1.686454078164086e-05, "loss": 0.8133, "step": 10911 }, { "epoch": 12.428490028490028, "grad_norm": 0.2522498071193695, "learning_rate": 1.686013534040543e-05, "loss": 0.5022, "step": 10912 }, { "epoch": 12.42962962962963, "grad_norm": 0.20844027400016785, "learning_rate": 1.6855730181889877e-05, "loss": 0.7698, "step": 10913 }, { "epoch": 12.430769230769231, "grad_norm": 0.24319635331630707, "learning_rate": 1.6851325306247213e-05, "loss": 0.4736, "step": 10914 }, { "epoch": 12.431908831908832, "grad_norm": 0.20779003202915192, "learning_rate": 1.6846920713630436e-05, "loss": 0.608, "step": 10915 }, { "epoch": 12.433048433048434, "grad_norm": 0.273119181394577, "learning_rate": 1.6842516404192515e-05, "loss": 0.5014, "step": 10916 }, { "epoch": 12.434188034188034, "grad_norm": 0.20731210708618164, "learning_rate": 1.683811237808643e-05, "loss": 0.4557, "step": 10917 }, { "epoch": 12.435327635327635, "grad_norm": 0.1937895119190216, "learning_rate": 1.683370863546516e-05, "loss": 0.7014, "step": 10918 }, { "epoch": 12.436467236467237, "grad_norm": 0.2328297644853592, "learning_rate": 1.682930517648164e-05, "loss": 0.6301, "step": 10919 }, { "epoch": 12.437606837606838, "grad_norm": 0.21277906000614166, "learning_rate": 1.6824902001288826e-05, "loss": 0.585, "step": 10920 }, { "epoch": 12.438746438746438, "grad_norm": 0.22262638807296753, "learning_rate": 1.682049911003964e-05, "loss": 0.6298, "step": 10921 }, { "epoch": 12.43988603988604, "grad_norm": 0.21188397705554962, "learning_rate": 1.6816096502887017e-05, "loss": 0.6325, "step": 10922 }, { "epoch": 12.441025641025641, "grad_norm": 0.22162698209285736, "learning_rate": 1.681169417998387e-05, "loss": 0.6926, "step": 10923 }, { "epoch": 12.442165242165242, "grad_norm": 0.20816735923290253, "learning_rate": 1.6807292141483104e-05, "loss": 0.7027, "step": 10924 }, { "epoch": 12.443304843304844, "grad_norm": 0.24191802740097046, "learning_rate": 1.6802890387537608e-05, "loss": 0.6088, "step": 10925 }, { "epoch": 12.444444444444445, "grad_norm": 0.2472567856311798, "learning_rate": 1.679848891830027e-05, "loss": 0.5705, "step": 10926 }, { "epoch": 12.445584045584045, "grad_norm": 0.19793660938739777, "learning_rate": 1.6794087733923966e-05, "loss": 0.7335, "step": 10927 }, { "epoch": 12.446723646723648, "grad_norm": 0.2164766937494278, "learning_rate": 1.6789686834561557e-05, "loss": 0.8562, "step": 10928 }, { "epoch": 12.447863247863248, "grad_norm": 0.2467564195394516, "learning_rate": 1.678528622036591e-05, "loss": 0.5478, "step": 10929 }, { "epoch": 12.449002849002849, "grad_norm": 0.19778405129909515, "learning_rate": 1.678088589148986e-05, "loss": 0.7746, "step": 10930 }, { "epoch": 12.450142450142451, "grad_norm": 0.24205471575260162, "learning_rate": 1.677648584808625e-05, "loss": 0.4983, "step": 10931 }, { "epoch": 12.451282051282051, "grad_norm": 0.23273898661136627, "learning_rate": 1.6772086090307898e-05, "loss": 0.5497, "step": 10932 }, { "epoch": 12.452421652421652, "grad_norm": 0.24793747067451477, "learning_rate": 1.6767686618307628e-05, "loss": 0.5743, "step": 10933 }, { "epoch": 12.453561253561254, "grad_norm": 0.19684715569019318, "learning_rate": 1.676328743223823e-05, "loss": 0.8132, "step": 10934 }, { "epoch": 12.454700854700855, "grad_norm": 0.22554780542850494, "learning_rate": 1.675888853225252e-05, "loss": 0.623, "step": 10935 }, { "epoch": 12.455840455840455, "grad_norm": 0.2464742809534073, "learning_rate": 1.675448991850327e-05, "loss": 0.6751, "step": 10936 }, { "epoch": 12.456980056980058, "grad_norm": 0.19820618629455566, "learning_rate": 1.6750091591143263e-05, "loss": 0.6132, "step": 10937 }, { "epoch": 12.458119658119658, "grad_norm": 0.2199823409318924, "learning_rate": 1.674569355032526e-05, "loss": 0.7071, "step": 10938 }, { "epoch": 12.459259259259259, "grad_norm": 0.19806915521621704, "learning_rate": 1.674129579620201e-05, "loss": 0.6505, "step": 10939 }, { "epoch": 12.460398860398861, "grad_norm": 0.19808705151081085, "learning_rate": 1.6736898328926282e-05, "loss": 0.8088, "step": 10940 }, { "epoch": 12.461538461538462, "grad_norm": 0.21985428035259247, "learning_rate": 1.6732501148650795e-05, "loss": 0.6154, "step": 10941 }, { "epoch": 12.462678062678062, "grad_norm": 0.2526129484176636, "learning_rate": 1.6728104255528282e-05, "loss": 0.6772, "step": 10942 }, { "epoch": 12.463817663817665, "grad_norm": 0.28405681252479553, "learning_rate": 1.672370764971145e-05, "loss": 0.5673, "step": 10943 }, { "epoch": 12.464957264957265, "grad_norm": 0.1978614330291748, "learning_rate": 1.671931133135302e-05, "loss": 0.8093, "step": 10944 }, { "epoch": 12.466096866096866, "grad_norm": 0.20405744016170502, "learning_rate": 1.671491530060567e-05, "loss": 0.6331, "step": 10945 }, { "epoch": 12.467236467236468, "grad_norm": 0.17509637773036957, "learning_rate": 1.67105195576221e-05, "loss": 0.8965, "step": 10946 }, { "epoch": 12.468376068376068, "grad_norm": 0.2595173418521881, "learning_rate": 1.6706124102554983e-05, "loss": 0.5936, "step": 10947 }, { "epoch": 12.469515669515669, "grad_norm": 0.1783231645822525, "learning_rate": 1.6701728935556984e-05, "loss": 0.6488, "step": 10948 }, { "epoch": 12.470655270655271, "grad_norm": 0.18801869451999664, "learning_rate": 1.6697334056780755e-05, "loss": 0.6566, "step": 10949 }, { "epoch": 12.471794871794872, "grad_norm": 0.20684203505516052, "learning_rate": 1.6692939466378944e-05, "loss": 0.7693, "step": 10950 }, { "epoch": 12.472934472934472, "grad_norm": 0.2421393096446991, "learning_rate": 1.6688545164504194e-05, "loss": 0.5507, "step": 10951 }, { "epoch": 12.474074074074075, "grad_norm": 0.2938486337661743, "learning_rate": 1.668415115130913e-05, "loss": 0.4418, "step": 10952 }, { "epoch": 12.475213675213675, "grad_norm": 0.20215126872062683, "learning_rate": 1.6679757426946362e-05, "loss": 0.7922, "step": 10953 }, { "epoch": 12.476353276353276, "grad_norm": 0.19147877395153046, "learning_rate": 1.6675363991568503e-05, "loss": 0.5436, "step": 10954 }, { "epoch": 12.477492877492878, "grad_norm": 0.22713878750801086, "learning_rate": 1.6670970845328142e-05, "loss": 0.7034, "step": 10955 }, { "epoch": 12.478632478632479, "grad_norm": 0.2423946112394333, "learning_rate": 1.6666577988377875e-05, "loss": 0.525, "step": 10956 }, { "epoch": 12.47977207977208, "grad_norm": 0.2133038491010666, "learning_rate": 1.6662185420870265e-05, "loss": 0.5818, "step": 10957 }, { "epoch": 12.480911680911682, "grad_norm": 0.23854395747184753, "learning_rate": 1.6657793142957883e-05, "loss": 0.4953, "step": 10958 }, { "epoch": 12.482051282051282, "grad_norm": 0.24194027483463287, "learning_rate": 1.665340115479329e-05, "loss": 0.8131, "step": 10959 }, { "epoch": 12.483190883190883, "grad_norm": 0.190907284617424, "learning_rate": 1.664900945652903e-05, "loss": 0.9208, "step": 10960 }, { "epoch": 12.484330484330485, "grad_norm": 0.22213807702064514, "learning_rate": 1.6644618048317625e-05, "loss": 0.8089, "step": 10961 }, { "epoch": 12.485470085470086, "grad_norm": 0.20240817964076996, "learning_rate": 1.664022693031162e-05, "loss": 0.9218, "step": 10962 }, { "epoch": 12.486609686609686, "grad_norm": 0.22596926987171173, "learning_rate": 1.6635836102663527e-05, "loss": 0.6796, "step": 10963 }, { "epoch": 12.487749287749288, "grad_norm": 0.21198280155658722, "learning_rate": 1.6631445565525845e-05, "loss": 0.5697, "step": 10964 }, { "epoch": 12.488888888888889, "grad_norm": 0.19748011231422424, "learning_rate": 1.6627055319051076e-05, "loss": 0.5982, "step": 10965 }, { "epoch": 12.49002849002849, "grad_norm": 0.3053680956363678, "learning_rate": 1.6622665363391697e-05, "loss": 0.5573, "step": 10966 }, { "epoch": 12.491168091168092, "grad_norm": 0.22078709304332733, "learning_rate": 1.6618275698700193e-05, "loss": 0.9135, "step": 10967 }, { "epoch": 12.492307692307692, "grad_norm": 0.18783444166183472, "learning_rate": 1.6613886325129015e-05, "loss": 0.7731, "step": 10968 }, { "epoch": 12.493447293447293, "grad_norm": 0.2591036260128021, "learning_rate": 1.6609497242830633e-05, "loss": 0.5407, "step": 10969 }, { "epoch": 12.494586894586895, "grad_norm": 0.2199113816022873, "learning_rate": 1.6605108451957485e-05, "loss": 0.7096, "step": 10970 }, { "epoch": 12.495726495726496, "grad_norm": 0.2580206096172333, "learning_rate": 1.660071995266201e-05, "loss": 0.7711, "step": 10971 }, { "epoch": 12.496866096866096, "grad_norm": 0.2157939374446869, "learning_rate": 1.6596331745096615e-05, "loss": 0.7494, "step": 10972 }, { "epoch": 12.498005698005699, "grad_norm": 0.18810850381851196, "learning_rate": 1.6591943829413737e-05, "loss": 0.5819, "step": 10973 }, { "epoch": 12.4991452991453, "grad_norm": 0.23743487894535065, "learning_rate": 1.6587556205765777e-05, "loss": 0.5564, "step": 10974 }, { "epoch": 12.5002849002849, "grad_norm": 0.22389283776283264, "learning_rate": 1.6583168874305118e-05, "loss": 0.6437, "step": 10975 }, { "epoch": 12.501424501424502, "grad_norm": 0.19681458175182343, "learning_rate": 1.6578781835184155e-05, "loss": 0.7081, "step": 10976 }, { "epoch": 12.502564102564103, "grad_norm": 0.1927095651626587, "learning_rate": 1.6574395088555253e-05, "loss": 0.6656, "step": 10977 }, { "epoch": 12.503703703703703, "grad_norm": 0.17713747918605804, "learning_rate": 1.6570008634570786e-05, "loss": 0.7476, "step": 10978 }, { "epoch": 12.504843304843305, "grad_norm": 0.24266201257705688, "learning_rate": 1.6565622473383095e-05, "loss": 0.6614, "step": 10979 }, { "epoch": 12.505982905982906, "grad_norm": 0.1891140341758728, "learning_rate": 1.6561236605144537e-05, "loss": 0.8354, "step": 10980 }, { "epoch": 12.507122507122507, "grad_norm": 0.17886357009410858, "learning_rate": 1.655685103000743e-05, "loss": 0.5899, "step": 10981 }, { "epoch": 12.508262108262109, "grad_norm": 0.1950901299715042, "learning_rate": 1.6552465748124116e-05, "loss": 0.8008, "step": 10982 }, { "epoch": 12.50940170940171, "grad_norm": 0.22581149637699127, "learning_rate": 1.6548080759646883e-05, "loss": 0.5845, "step": 10983 }, { "epoch": 12.51054131054131, "grad_norm": 0.16622164845466614, "learning_rate": 1.6543696064728053e-05, "loss": 0.7195, "step": 10984 }, { "epoch": 12.511680911680912, "grad_norm": 0.1869547814130783, "learning_rate": 1.6539311663519925e-05, "loss": 0.8042, "step": 10985 }, { "epoch": 12.512820512820513, "grad_norm": 0.2045227736234665, "learning_rate": 1.6534927556174756e-05, "loss": 0.6094, "step": 10986 }, { "epoch": 12.513960113960113, "grad_norm": 0.2639092803001404, "learning_rate": 1.6530543742844843e-05, "loss": 0.4522, "step": 10987 }, { "epoch": 12.515099715099716, "grad_norm": 0.20233054459095, "learning_rate": 1.652616022368243e-05, "loss": 0.8569, "step": 10988 }, { "epoch": 12.516239316239316, "grad_norm": 0.19802075624465942, "learning_rate": 1.652177699883978e-05, "loss": 0.6387, "step": 10989 }, { "epoch": 12.517378917378917, "grad_norm": 0.20674096047878265, "learning_rate": 1.6517394068469128e-05, "loss": 0.8587, "step": 10990 }, { "epoch": 12.518518518518519, "grad_norm": 0.20811493694782257, "learning_rate": 1.6513011432722708e-05, "loss": 0.7051, "step": 10991 }, { "epoch": 12.51965811965812, "grad_norm": 0.18526801466941833, "learning_rate": 1.6508629091752736e-05, "loss": 0.6825, "step": 10992 }, { "epoch": 12.52079772079772, "grad_norm": 0.2324620634317398, "learning_rate": 1.650424704571143e-05, "loss": 0.6098, "step": 10993 }, { "epoch": 12.521937321937322, "grad_norm": 0.22030244767665863, "learning_rate": 1.649986529475098e-05, "loss": 0.5876, "step": 10994 }, { "epoch": 12.523076923076923, "grad_norm": 0.20725619792938232, "learning_rate": 1.6495483839023595e-05, "loss": 0.6561, "step": 10995 }, { "epoch": 12.524216524216524, "grad_norm": 0.19436527788639069, "learning_rate": 1.6491102678681436e-05, "loss": 0.6815, "step": 10996 }, { "epoch": 12.525356125356126, "grad_norm": 0.2555645704269409, "learning_rate": 1.6486721813876686e-05, "loss": 0.4067, "step": 10997 }, { "epoch": 12.526495726495726, "grad_norm": 0.2308678776025772, "learning_rate": 1.6482341244761502e-05, "loss": 0.7071, "step": 10998 }, { "epoch": 12.527635327635327, "grad_norm": 0.2093668431043625, "learning_rate": 1.6477960971488028e-05, "loss": 0.6203, "step": 10999 }, { "epoch": 12.52877492877493, "grad_norm": 0.26428475975990295, "learning_rate": 1.6473580994208398e-05, "loss": 0.3758, "step": 11000 }, { "epoch": 12.52991452991453, "grad_norm": 0.22192619740962982, "learning_rate": 1.6469201313074756e-05, "loss": 0.5943, "step": 11001 }, { "epoch": 12.53105413105413, "grad_norm": 0.2347877025604248, "learning_rate": 1.646482192823921e-05, "loss": 0.7203, "step": 11002 }, { "epoch": 12.532193732193733, "grad_norm": 0.20047219097614288, "learning_rate": 1.646044283985387e-05, "loss": 0.6711, "step": 11003 }, { "epoch": 12.533333333333333, "grad_norm": 0.2244284749031067, "learning_rate": 1.6456064048070832e-05, "loss": 0.4248, "step": 11004 }, { "epoch": 12.534472934472934, "grad_norm": 0.1928756833076477, "learning_rate": 1.645168555304218e-05, "loss": 0.8309, "step": 11005 }, { "epoch": 12.535612535612536, "grad_norm": 0.24166139960289001, "learning_rate": 1.6447307354920005e-05, "loss": 0.49, "step": 11006 }, { "epoch": 12.536752136752137, "grad_norm": 0.19183756411075592, "learning_rate": 1.6442929453856364e-05, "loss": 0.7716, "step": 11007 }, { "epoch": 12.537891737891737, "grad_norm": 0.21705985069274902, "learning_rate": 1.643855185000332e-05, "loss": 0.7428, "step": 11008 }, { "epoch": 12.53903133903134, "grad_norm": 0.1837446242570877, "learning_rate": 1.6434174543512905e-05, "loss": 0.6945, "step": 11009 }, { "epoch": 12.54017094017094, "grad_norm": 0.17365996539592743, "learning_rate": 1.6429797534537172e-05, "loss": 0.8055, "step": 11010 }, { "epoch": 12.54131054131054, "grad_norm": 0.24148063361644745, "learning_rate": 1.642542082322813e-05, "loss": 0.83, "step": 11011 }, { "epoch": 12.542450142450143, "grad_norm": 0.24556422233581543, "learning_rate": 1.6421044409737817e-05, "loss": 0.5919, "step": 11012 }, { "epoch": 12.543589743589743, "grad_norm": 0.2238049954175949, "learning_rate": 1.6416668294218213e-05, "loss": 0.6238, "step": 11013 }, { "epoch": 12.544729344729344, "grad_norm": 0.2636539340019226, "learning_rate": 1.6412292476821328e-05, "loss": 0.4099, "step": 11014 }, { "epoch": 12.545868945868946, "grad_norm": 0.17827825248241425, "learning_rate": 1.6407916957699136e-05, "loss": 0.8987, "step": 11015 }, { "epoch": 12.547008547008547, "grad_norm": 0.2063855081796646, "learning_rate": 1.6403541737003613e-05, "loss": 0.6717, "step": 11016 }, { "epoch": 12.548148148148147, "grad_norm": 0.2015405148267746, "learning_rate": 1.6399166814886736e-05, "loss": 0.6795, "step": 11017 }, { "epoch": 12.54928774928775, "grad_norm": 0.1882464587688446, "learning_rate": 1.6394792191500443e-05, "loss": 0.6898, "step": 11018 }, { "epoch": 12.55042735042735, "grad_norm": 0.2405157834291458, "learning_rate": 1.639041786699669e-05, "loss": 0.738, "step": 11019 }, { "epoch": 12.55156695156695, "grad_norm": 0.2269430160522461, "learning_rate": 1.638604384152739e-05, "loss": 0.66, "step": 11020 }, { "epoch": 12.552706552706553, "grad_norm": 0.24743977189064026, "learning_rate": 1.638167011524448e-05, "loss": 0.3647, "step": 11021 }, { "epoch": 12.553846153846154, "grad_norm": 0.2229391634464264, "learning_rate": 1.6377296688299866e-05, "loss": 0.8568, "step": 11022 }, { "epoch": 12.554985754985754, "grad_norm": 0.2505655884742737, "learning_rate": 1.6372923560845454e-05, "loss": 0.5591, "step": 11023 }, { "epoch": 12.556125356125357, "grad_norm": 0.23933634161949158, "learning_rate": 1.636855073303313e-05, "loss": 0.7009, "step": 11024 }, { "epoch": 12.557264957264957, "grad_norm": 0.2654150128364563, "learning_rate": 1.636417820501478e-05, "loss": 0.5882, "step": 11025 }, { "epoch": 12.558404558404558, "grad_norm": 0.1851956993341446, "learning_rate": 1.635980597694226e-05, "loss": 0.8961, "step": 11026 }, { "epoch": 12.55954415954416, "grad_norm": 0.2490128129720688, "learning_rate": 1.6355434048967434e-05, "loss": 0.6622, "step": 11027 }, { "epoch": 12.56068376068376, "grad_norm": 0.19830764830112457, "learning_rate": 1.6351062421242162e-05, "loss": 0.631, "step": 11028 }, { "epoch": 12.561823361823361, "grad_norm": 0.17523930966854095, "learning_rate": 1.6346691093918287e-05, "loss": 0.8467, "step": 11029 }, { "epoch": 12.562962962962963, "grad_norm": 0.19710446894168854, "learning_rate": 1.6342320067147618e-05, "loss": 0.7908, "step": 11030 }, { "epoch": 12.564102564102564, "grad_norm": 0.17316338419914246, "learning_rate": 1.6337949341081986e-05, "loss": 0.7468, "step": 11031 }, { "epoch": 12.565242165242164, "grad_norm": 0.21457502245903015, "learning_rate": 1.633357891587319e-05, "loss": 0.9224, "step": 11032 }, { "epoch": 12.566381766381767, "grad_norm": 0.24812400341033936, "learning_rate": 1.6329208791673037e-05, "loss": 0.519, "step": 11033 }, { "epoch": 12.567521367521367, "grad_norm": 0.22866606712341309, "learning_rate": 1.6324838968633303e-05, "loss": 0.5053, "step": 11034 }, { "epoch": 12.568660968660968, "grad_norm": 0.18968430161476135, "learning_rate": 1.6320469446905773e-05, "loss": 0.9831, "step": 11035 }, { "epoch": 12.56980056980057, "grad_norm": 0.20741485059261322, "learning_rate": 1.63161002266422e-05, "loss": 0.8396, "step": 11036 }, { "epoch": 12.57094017094017, "grad_norm": 0.16728100180625916, "learning_rate": 1.6311731307994356e-05, "loss": 0.8375, "step": 11037 }, { "epoch": 12.572079772079771, "grad_norm": 0.21869726479053497, "learning_rate": 1.6307362691113967e-05, "loss": 0.7689, "step": 11038 }, { "epoch": 12.573219373219374, "grad_norm": 0.2434314489364624, "learning_rate": 1.6302994376152776e-05, "loss": 0.7189, "step": 11039 }, { "epoch": 12.574358974358974, "grad_norm": 0.18562611937522888, "learning_rate": 1.6298626363262515e-05, "loss": 0.6075, "step": 11040 }, { "epoch": 12.575498575498575, "grad_norm": 0.2555478811264038, "learning_rate": 1.6294258652594883e-05, "loss": 0.5631, "step": 11041 }, { "epoch": 12.576638176638177, "grad_norm": 0.2179694026708603, "learning_rate": 1.6289891244301595e-05, "loss": 0.7161, "step": 11042 }, { "epoch": 12.577777777777778, "grad_norm": 0.250118613243103, "learning_rate": 1.628552413853433e-05, "loss": 0.7754, "step": 11043 }, { "epoch": 12.578917378917378, "grad_norm": 0.20918738842010498, "learning_rate": 1.6281157335444785e-05, "loss": 0.6769, "step": 11044 }, { "epoch": 12.58005698005698, "grad_norm": 0.2173689305782318, "learning_rate": 1.627679083518462e-05, "loss": 0.8834, "step": 11045 }, { "epoch": 12.581196581196581, "grad_norm": 0.252636581659317, "learning_rate": 1.62724246379055e-05, "loss": 0.6731, "step": 11046 }, { "epoch": 12.582336182336181, "grad_norm": 0.2242475003004074, "learning_rate": 1.6268058743759064e-05, "loss": 0.6637, "step": 11047 }, { "epoch": 12.583475783475784, "grad_norm": 0.21343784034252167, "learning_rate": 1.626369315289697e-05, "loss": 0.6821, "step": 11048 }, { "epoch": 12.584615384615384, "grad_norm": 0.20597362518310547, "learning_rate": 1.6259327865470834e-05, "loss": 0.7599, "step": 11049 }, { "epoch": 12.585754985754985, "grad_norm": 0.19783805310726166, "learning_rate": 1.625496288163227e-05, "loss": 0.8005, "step": 11050 }, { "epoch": 12.586894586894587, "grad_norm": 0.20493733882904053, "learning_rate": 1.6250598201532907e-05, "loss": 0.8026, "step": 11051 }, { "epoch": 12.588034188034188, "grad_norm": 0.20619259774684906, "learning_rate": 1.6246233825324324e-05, "loss": 0.6866, "step": 11052 }, { "epoch": 12.589173789173788, "grad_norm": 0.16386501491069794, "learning_rate": 1.624186975315812e-05, "loss": 0.6641, "step": 11053 }, { "epoch": 12.59031339031339, "grad_norm": 0.1730891913175583, "learning_rate": 1.6237505985185853e-05, "loss": 0.7881, "step": 11054 }, { "epoch": 12.591452991452991, "grad_norm": 0.19879373908042908, "learning_rate": 1.6233142521559108e-05, "loss": 0.7469, "step": 11055 }, { "epoch": 12.592592592592592, "grad_norm": 0.27814745903015137, "learning_rate": 1.622877936242943e-05, "loss": 0.6045, "step": 11056 }, { "epoch": 12.593732193732194, "grad_norm": 0.16927435994148254, "learning_rate": 1.6224416507948366e-05, "loss": 0.8278, "step": 11057 }, { "epoch": 12.594871794871795, "grad_norm": 0.16074690222740173, "learning_rate": 1.6220053958267447e-05, "loss": 0.8925, "step": 11058 }, { "epoch": 12.596011396011395, "grad_norm": 0.23557724058628082, "learning_rate": 1.6215691713538207e-05, "loss": 0.5533, "step": 11059 }, { "epoch": 12.597150997150997, "grad_norm": 0.18923786282539368, "learning_rate": 1.621132977391214e-05, "loss": 0.7038, "step": 11060 }, { "epoch": 12.598290598290598, "grad_norm": 0.2361970990896225, "learning_rate": 1.6206968139540758e-05, "loss": 0.6842, "step": 11061 }, { "epoch": 12.5994301994302, "grad_norm": 0.23810768127441406, "learning_rate": 1.6202606810575564e-05, "loss": 0.5675, "step": 11062 }, { "epoch": 12.6005698005698, "grad_norm": 0.1956935077905655, "learning_rate": 1.619824578716802e-05, "loss": 0.885, "step": 11063 }, { "epoch": 12.601709401709401, "grad_norm": 0.2303309291601181, "learning_rate": 1.619388506946961e-05, "loss": 0.6753, "step": 11064 }, { "epoch": 12.602849002849004, "grad_norm": 0.2104651927947998, "learning_rate": 1.618952465763179e-05, "loss": 0.622, "step": 11065 }, { "epoch": 12.603988603988604, "grad_norm": 0.20536163449287415, "learning_rate": 1.6185164551806004e-05, "loss": 0.6924, "step": 11066 }, { "epoch": 12.605128205128205, "grad_norm": 0.2689109146595001, "learning_rate": 1.6180804752143692e-05, "loss": 0.4751, "step": 11067 }, { "epoch": 12.606267806267807, "grad_norm": 0.17425256967544556, "learning_rate": 1.617644525879629e-05, "loss": 0.7078, "step": 11068 }, { "epoch": 12.607407407407408, "grad_norm": 0.22629356384277344, "learning_rate": 1.6172086071915204e-05, "loss": 0.7829, "step": 11069 }, { "epoch": 12.608547008547008, "grad_norm": 0.2518870234489441, "learning_rate": 1.6167727191651848e-05, "loss": 0.577, "step": 11070 }, { "epoch": 12.60968660968661, "grad_norm": 0.17481626570224762, "learning_rate": 1.6163368618157615e-05, "loss": 0.6537, "step": 11071 }, { "epoch": 12.610826210826211, "grad_norm": 0.2701930105686188, "learning_rate": 1.615901035158388e-05, "loss": 0.5066, "step": 11072 }, { "epoch": 12.611965811965812, "grad_norm": 0.17483319342136383, "learning_rate": 1.6154652392082036e-05, "loss": 0.7244, "step": 11073 }, { "epoch": 12.613105413105414, "grad_norm": 0.21778984367847443, "learning_rate": 1.6150294739803445e-05, "loss": 0.6299, "step": 11074 }, { "epoch": 12.614245014245014, "grad_norm": 0.22433669865131378, "learning_rate": 1.614593739489945e-05, "loss": 0.7286, "step": 11075 }, { "epoch": 12.615384615384615, "grad_norm": 0.22760696709156036, "learning_rate": 1.6141580357521403e-05, "loss": 0.7473, "step": 11076 }, { "epoch": 12.616524216524217, "grad_norm": 0.1692415028810501, "learning_rate": 1.6137223627820626e-05, "loss": 0.8738, "step": 11077 }, { "epoch": 12.617663817663818, "grad_norm": 0.48261013627052307, "learning_rate": 1.6132867205948447e-05, "loss": 0.5273, "step": 11078 }, { "epoch": 12.618803418803418, "grad_norm": 0.21189595758914948, "learning_rate": 1.6128511092056174e-05, "loss": 0.779, "step": 11079 }, { "epoch": 12.61994301994302, "grad_norm": 0.24352779984474182, "learning_rate": 1.6124155286295107e-05, "loss": 0.6357, "step": 11080 }, { "epoch": 12.621082621082621, "grad_norm": 0.1993314027786255, "learning_rate": 1.6119799788816537e-05, "loss": 0.6663, "step": 11081 }, { "epoch": 12.622222222222222, "grad_norm": 0.23018746078014374, "learning_rate": 1.6115444599771743e-05, "loss": 0.5983, "step": 11082 }, { "epoch": 12.623361823361824, "grad_norm": 0.17441576719284058, "learning_rate": 1.611108971931198e-05, "loss": 0.7592, "step": 11083 }, { "epoch": 12.624501424501425, "grad_norm": 0.23152019083499908, "learning_rate": 1.610673514758852e-05, "loss": 0.8389, "step": 11084 }, { "epoch": 12.625641025641025, "grad_norm": 0.28077030181884766, "learning_rate": 1.610238088475261e-05, "loss": 0.5543, "step": 11085 }, { "epoch": 12.626780626780628, "grad_norm": 0.22574378550052643, "learning_rate": 1.609802693095548e-05, "loss": 0.7816, "step": 11086 }, { "epoch": 12.627920227920228, "grad_norm": 0.23004484176635742, "learning_rate": 1.6093673286348355e-05, "loss": 0.5654, "step": 11087 }, { "epoch": 12.629059829059829, "grad_norm": 0.2010558694601059, "learning_rate": 1.6089319951082445e-05, "loss": 0.6681, "step": 11088 }, { "epoch": 12.630199430199431, "grad_norm": 0.15654054284095764, "learning_rate": 1.6084966925308966e-05, "loss": 0.9109, "step": 11089 }, { "epoch": 12.631339031339031, "grad_norm": 0.2031426727771759, "learning_rate": 1.6080614209179094e-05, "loss": 0.8188, "step": 11090 }, { "epoch": 12.632478632478632, "grad_norm": 0.17334313690662384, "learning_rate": 1.607626180284402e-05, "loss": 0.8053, "step": 11091 }, { "epoch": 12.633618233618234, "grad_norm": 0.18525288999080658, "learning_rate": 1.607190970645492e-05, "loss": 0.784, "step": 11092 }, { "epoch": 12.634757834757835, "grad_norm": 0.21956297755241394, "learning_rate": 1.6067557920162947e-05, "loss": 0.6042, "step": 11093 }, { "epoch": 12.635897435897435, "grad_norm": 0.26183709502220154, "learning_rate": 1.6063206444119238e-05, "loss": 0.535, "step": 11094 }, { "epoch": 12.637037037037038, "grad_norm": 0.23704814910888672, "learning_rate": 1.6058855278474956e-05, "loss": 0.655, "step": 11095 }, { "epoch": 12.638176638176638, "grad_norm": 0.27600836753845215, "learning_rate": 1.605450442338122e-05, "loss": 0.5203, "step": 11096 }, { "epoch": 12.639316239316239, "grad_norm": 0.22738400101661682, "learning_rate": 1.605015387898914e-05, "loss": 0.5225, "step": 11097 }, { "epoch": 12.640455840455841, "grad_norm": 0.2282770872116089, "learning_rate": 1.6045803645449836e-05, "loss": 0.5768, "step": 11098 }, { "epoch": 12.641595441595442, "grad_norm": 0.23350057005882263, "learning_rate": 1.604145372291439e-05, "loss": 0.6944, "step": 11099 }, { "epoch": 12.642735042735042, "grad_norm": 0.22004786133766174, "learning_rate": 1.6037104111533897e-05, "loss": 0.7505, "step": 11100 }, { "epoch": 12.643874643874645, "grad_norm": 0.2057197391986847, "learning_rate": 1.603275481145942e-05, "loss": 0.6726, "step": 11101 }, { "epoch": 12.645014245014245, "grad_norm": 0.2566360831260681, "learning_rate": 1.6028405822842036e-05, "loss": 0.5354, "step": 11102 }, { "epoch": 12.646153846153846, "grad_norm": 0.19007746875286102, "learning_rate": 1.6024057145832782e-05, "loss": 0.9406, "step": 11103 }, { "epoch": 12.647293447293448, "grad_norm": 0.17702710628509521, "learning_rate": 1.6019708780582715e-05, "loss": 0.8956, "step": 11104 }, { "epoch": 12.648433048433048, "grad_norm": 0.22386471927165985, "learning_rate": 1.6015360727242844e-05, "loss": 0.4807, "step": 11105 }, { "epoch": 12.649572649572649, "grad_norm": 0.23359254002571106, "learning_rate": 1.6011012985964212e-05, "loss": 0.5094, "step": 11106 }, { "epoch": 12.650712250712251, "grad_norm": 0.17404289543628693, "learning_rate": 1.6006665556897815e-05, "loss": 0.7101, "step": 11107 }, { "epoch": 12.651851851851852, "grad_norm": 0.22402898967266083, "learning_rate": 1.6002318440194664e-05, "loss": 0.7611, "step": 11108 }, { "epoch": 12.652991452991452, "grad_norm": 0.16277088224887848, "learning_rate": 1.599797163600573e-05, "loss": 0.7209, "step": 11109 }, { "epoch": 12.654131054131055, "grad_norm": 0.22703644633293152, "learning_rate": 1.5993625144482e-05, "loss": 0.5067, "step": 11110 }, { "epoch": 12.655270655270655, "grad_norm": 0.27813565731048584, "learning_rate": 1.5989278965774433e-05, "loss": 0.6124, "step": 11111 }, { "epoch": 12.656410256410256, "grad_norm": 0.2167578786611557, "learning_rate": 1.5984933100033995e-05, "loss": 0.6381, "step": 11112 }, { "epoch": 12.657549857549858, "grad_norm": 0.19387587904930115, "learning_rate": 1.5980587547411612e-05, "loss": 0.6903, "step": 11113 }, { "epoch": 12.658689458689459, "grad_norm": 0.24054613709449768, "learning_rate": 1.5976242308058235e-05, "loss": 0.7076, "step": 11114 }, { "epoch": 12.65982905982906, "grad_norm": 0.2635744512081146, "learning_rate": 1.5971897382124773e-05, "loss": 0.5763, "step": 11115 }, { "epoch": 12.660968660968662, "grad_norm": 0.18814826011657715, "learning_rate": 1.5967552769762138e-05, "loss": 0.5222, "step": 11116 }, { "epoch": 12.662108262108262, "grad_norm": 0.2089758962392807, "learning_rate": 1.5963208471121245e-05, "loss": 0.6821, "step": 11117 }, { "epoch": 12.663247863247863, "grad_norm": 0.23014183342456818, "learning_rate": 1.5958864486352964e-05, "loss": 0.6499, "step": 11118 }, { "epoch": 12.664387464387465, "grad_norm": 0.31964734196662903, "learning_rate": 1.595452081560819e-05, "loss": 0.5052, "step": 11119 }, { "epoch": 12.665527065527066, "grad_norm": 0.22384202480316162, "learning_rate": 1.595017745903778e-05, "loss": 0.7366, "step": 11120 }, { "epoch": 12.666666666666666, "grad_norm": 0.18546050786972046, "learning_rate": 1.5945834416792596e-05, "loss": 0.7342, "step": 11121 }, { "epoch": 12.667806267806268, "grad_norm": 0.22354067862033844, "learning_rate": 1.594149168902348e-05, "loss": 0.8117, "step": 11122 }, { "epoch": 12.668945868945869, "grad_norm": 0.2659115791320801, "learning_rate": 1.5937149275881268e-05, "loss": 0.5371, "step": 11123 }, { "epoch": 12.67008547008547, "grad_norm": 0.22612543404102325, "learning_rate": 1.5932807177516784e-05, "loss": 0.673, "step": 11124 }, { "epoch": 12.671225071225072, "grad_norm": 0.21471258997917175, "learning_rate": 1.5928465394080848e-05, "loss": 0.722, "step": 11125 }, { "epoch": 12.672364672364672, "grad_norm": 0.2103831171989441, "learning_rate": 1.5924123925724248e-05, "loss": 0.838, "step": 11126 }, { "epoch": 12.673504273504273, "grad_norm": 0.2037634402513504, "learning_rate": 1.5919782772597776e-05, "loss": 0.4347, "step": 11127 }, { "epoch": 12.674643874643875, "grad_norm": 0.18816131353378296, "learning_rate": 1.591544193485223e-05, "loss": 0.8471, "step": 11128 }, { "epoch": 12.675783475783476, "grad_norm": 0.19635280966758728, "learning_rate": 1.591110141263837e-05, "loss": 0.6587, "step": 11129 }, { "epoch": 12.676923076923076, "grad_norm": 0.19508196413516998, "learning_rate": 1.5906761206106953e-05, "loss": 0.7512, "step": 11130 }, { "epoch": 12.678062678062679, "grad_norm": 0.2978247106075287, "learning_rate": 1.5902421315408716e-05, "loss": 0.6487, "step": 11131 }, { "epoch": 12.67920227920228, "grad_norm": 0.2564646601676941, "learning_rate": 1.5898081740694416e-05, "loss": 0.6217, "step": 11132 }, { "epoch": 12.68034188034188, "grad_norm": 0.23250678181648254, "learning_rate": 1.589374248211476e-05, "loss": 0.6793, "step": 11133 }, { "epoch": 12.681481481481482, "grad_norm": 0.2524375319480896, "learning_rate": 1.5889403539820476e-05, "loss": 0.4932, "step": 11134 }, { "epoch": 12.682621082621083, "grad_norm": 0.18259340524673462, "learning_rate": 1.5885064913962255e-05, "loss": 0.6859, "step": 11135 }, { "epoch": 12.683760683760683, "grad_norm": 0.1866711527109146, "learning_rate": 1.58807266046908e-05, "loss": 0.688, "step": 11136 }, { "epoch": 12.684900284900285, "grad_norm": 0.22629667818546295, "learning_rate": 1.5876388612156782e-05, "loss": 0.588, "step": 11137 }, { "epoch": 12.686039886039886, "grad_norm": 0.19245080649852753, "learning_rate": 1.587205093651088e-05, "loss": 0.8236, "step": 11138 }, { "epoch": 12.687179487179487, "grad_norm": 0.21643824875354767, "learning_rate": 1.5867713577903752e-05, "loss": 0.5575, "step": 11139 }, { "epoch": 12.688319088319089, "grad_norm": 0.18831786513328552, "learning_rate": 1.5863376536486046e-05, "loss": 0.6612, "step": 11140 }, { "epoch": 12.68945868945869, "grad_norm": 0.1758168339729309, "learning_rate": 1.5859039812408404e-05, "loss": 0.6423, "step": 11141 }, { "epoch": 12.69059829059829, "grad_norm": 0.2479313611984253, "learning_rate": 1.5854703405821438e-05, "loss": 0.5712, "step": 11142 }, { "epoch": 12.691737891737892, "grad_norm": 0.1954648196697235, "learning_rate": 1.585036731687578e-05, "loss": 0.7463, "step": 11143 }, { "epoch": 12.692877492877493, "grad_norm": 0.22821049392223358, "learning_rate": 1.5846031545722022e-05, "loss": 0.6038, "step": 11144 }, { "epoch": 12.694017094017093, "grad_norm": 0.17145420610904694, "learning_rate": 1.5841696092510767e-05, "loss": 0.7862, "step": 11145 }, { "epoch": 12.695156695156696, "grad_norm": 0.2066497951745987, "learning_rate": 1.5837360957392584e-05, "loss": 0.7199, "step": 11146 }, { "epoch": 12.696296296296296, "grad_norm": 0.2200264185667038, "learning_rate": 1.5833026140518063e-05, "loss": 0.7711, "step": 11147 }, { "epoch": 12.697435897435897, "grad_norm": 0.2565844655036926, "learning_rate": 1.5828691642037742e-05, "loss": 0.4629, "step": 11148 }, { "epoch": 12.698575498575499, "grad_norm": 0.16445843875408173, "learning_rate": 1.5824357462102178e-05, "loss": 0.8913, "step": 11149 }, { "epoch": 12.6997150997151, "grad_norm": 0.22528819739818573, "learning_rate": 1.582002360086192e-05, "loss": 0.6493, "step": 11150 }, { "epoch": 12.7008547008547, "grad_norm": 0.19773760437965393, "learning_rate": 1.581569005846749e-05, "loss": 0.7771, "step": 11151 }, { "epoch": 12.701994301994302, "grad_norm": 0.25532928109169006, "learning_rate": 1.5811356835069402e-05, "loss": 0.7498, "step": 11152 }, { "epoch": 12.703133903133903, "grad_norm": 0.22465664148330688, "learning_rate": 1.580702393081816e-05, "loss": 0.5811, "step": 11153 }, { "epoch": 12.704273504273504, "grad_norm": 0.2509215772151947, "learning_rate": 1.580269134586425e-05, "loss": 0.5316, "step": 11154 }, { "epoch": 12.705413105413106, "grad_norm": 0.22902412712574005, "learning_rate": 1.579835908035817e-05, "loss": 0.3799, "step": 11155 }, { "epoch": 12.706552706552706, "grad_norm": 0.2320164144039154, "learning_rate": 1.5794027134450382e-05, "loss": 0.627, "step": 11156 }, { "epoch": 12.707692307692307, "grad_norm": 0.2339378446340561, "learning_rate": 1.5789695508291348e-05, "loss": 0.5747, "step": 11157 }, { "epoch": 12.70883190883191, "grad_norm": 0.2223222702741623, "learning_rate": 1.5785364202031515e-05, "loss": 0.6875, "step": 11158 }, { "epoch": 12.70997150997151, "grad_norm": 0.1851043403148651, "learning_rate": 1.578103321582133e-05, "loss": 0.6541, "step": 11159 }, { "epoch": 12.71111111111111, "grad_norm": 0.2115449160337448, "learning_rate": 1.57767025498112e-05, "loss": 0.6917, "step": 11160 }, { "epoch": 12.712250712250713, "grad_norm": 0.21514242887496948, "learning_rate": 1.5772372204151564e-05, "loss": 0.535, "step": 11161 }, { "epoch": 12.713390313390313, "grad_norm": 0.2880719304084778, "learning_rate": 1.576804217899282e-05, "loss": 0.6128, "step": 11162 }, { "epoch": 12.714529914529914, "grad_norm": 0.1779347062110901, "learning_rate": 1.5763712474485352e-05, "loss": 1.077, "step": 11163 }, { "epoch": 12.715669515669516, "grad_norm": 0.196516215801239, "learning_rate": 1.5759383090779557e-05, "loss": 0.7063, "step": 11164 }, { "epoch": 12.716809116809117, "grad_norm": 0.2461429238319397, "learning_rate": 1.5755054028025795e-05, "loss": 0.5388, "step": 11165 }, { "epoch": 12.717948717948717, "grad_norm": 0.19045525789260864, "learning_rate": 1.5750725286374436e-05, "loss": 0.7982, "step": 11166 }, { "epoch": 12.71908831908832, "grad_norm": 0.22819633781909943, "learning_rate": 1.5746396865975814e-05, "loss": 0.6243, "step": 11167 }, { "epoch": 12.72022792022792, "grad_norm": 0.20307184755802155, "learning_rate": 1.5742068766980282e-05, "loss": 0.5656, "step": 11168 }, { "epoch": 12.72136752136752, "grad_norm": 0.2208218276500702, "learning_rate": 1.573774098953816e-05, "loss": 0.6779, "step": 11169 }, { "epoch": 12.722507122507123, "grad_norm": 0.20758362114429474, "learning_rate": 1.5733413533799767e-05, "loss": 0.8041, "step": 11170 }, { "epoch": 12.723646723646723, "grad_norm": 0.17610397934913635, "learning_rate": 1.5729086399915398e-05, "loss": 0.6553, "step": 11171 }, { "epoch": 12.724786324786324, "grad_norm": 0.201230987906456, "learning_rate": 1.5724759588035358e-05, "loss": 0.6433, "step": 11172 }, { "epoch": 12.725925925925926, "grad_norm": 0.23075005412101746, "learning_rate": 1.5720433098309927e-05, "loss": 0.6756, "step": 11173 }, { "epoch": 12.727065527065527, "grad_norm": 0.19387048482894897, "learning_rate": 1.5716106930889368e-05, "loss": 0.6994, "step": 11174 }, { "epoch": 12.728205128205127, "grad_norm": 0.2190813422203064, "learning_rate": 1.5711781085923953e-05, "loss": 0.481, "step": 11175 }, { "epoch": 12.72934472934473, "grad_norm": 0.24127015471458435, "learning_rate": 1.5707455563563917e-05, "loss": 0.6465, "step": 11176 }, { "epoch": 12.73048433048433, "grad_norm": 0.25211644172668457, "learning_rate": 1.570313036395951e-05, "loss": 0.5271, "step": 11177 }, { "epoch": 12.73162393162393, "grad_norm": 0.2326471209526062, "learning_rate": 1.5698805487260948e-05, "loss": 0.5954, "step": 11178 }, { "epoch": 12.732763532763533, "grad_norm": 0.21010422706604004, "learning_rate": 1.5694480933618456e-05, "loss": 0.7784, "step": 11179 }, { "epoch": 12.733903133903134, "grad_norm": 0.15468095242977142, "learning_rate": 1.569015670318222e-05, "loss": 0.7947, "step": 11180 }, { "epoch": 12.735042735042736, "grad_norm": 0.25469398498535156, "learning_rate": 1.5685832796102458e-05, "loss": 0.4579, "step": 11181 }, { "epoch": 12.736182336182337, "grad_norm": 0.2417183816432953, "learning_rate": 1.5681509212529318e-05, "loss": 0.6263, "step": 11182 }, { "epoch": 12.737321937321937, "grad_norm": 0.2280004471540451, "learning_rate": 1.5677185952613e-05, "loss": 0.6585, "step": 11183 }, { "epoch": 12.73846153846154, "grad_norm": 0.20425523817539215, "learning_rate": 1.5672863016503653e-05, "loss": 0.6748, "step": 11184 }, { "epoch": 12.73960113960114, "grad_norm": 0.2509705424308777, "learning_rate": 1.566854040435142e-05, "loss": 0.5541, "step": 11185 }, { "epoch": 12.74074074074074, "grad_norm": 0.31750181317329407, "learning_rate": 1.566421811630644e-05, "loss": 0.7052, "step": 11186 }, { "epoch": 12.741880341880343, "grad_norm": 0.24409219622612, "learning_rate": 1.5659896152518844e-05, "loss": 0.7305, "step": 11187 }, { "epoch": 12.743019943019943, "grad_norm": 0.1938214898109436, "learning_rate": 1.5655574513138735e-05, "loss": 0.7939, "step": 11188 }, { "epoch": 12.744159544159544, "grad_norm": 0.25239941477775574, "learning_rate": 1.5651253198316222e-05, "loss": 0.6208, "step": 11189 }, { "epoch": 12.745299145299146, "grad_norm": 0.22138601541519165, "learning_rate": 1.5646932208201395e-05, "loss": 0.5108, "step": 11190 }, { "epoch": 12.746438746438747, "grad_norm": 0.18638557195663452, "learning_rate": 1.5642611542944336e-05, "loss": 0.8004, "step": 11191 }, { "epoch": 12.747578347578347, "grad_norm": 0.21830163896083832, "learning_rate": 1.5638291202695104e-05, "loss": 0.656, "step": 11192 }, { "epoch": 12.74871794871795, "grad_norm": 0.19808819890022278, "learning_rate": 1.5633971187603765e-05, "loss": 0.7945, "step": 11193 }, { "epoch": 12.74985754985755, "grad_norm": 0.20925697684288025, "learning_rate": 1.5629651497820365e-05, "loss": 0.6595, "step": 11194 }, { "epoch": 12.75099715099715, "grad_norm": 0.24728427827358246, "learning_rate": 1.5625332133494935e-05, "loss": 0.5493, "step": 11195 }, { "epoch": 12.752136752136753, "grad_norm": 0.2341899275779724, "learning_rate": 1.562101309477751e-05, "loss": 0.6311, "step": 11196 }, { "epoch": 12.753276353276354, "grad_norm": 0.20414113998413086, "learning_rate": 1.561669438181808e-05, "loss": 0.6863, "step": 11197 }, { "epoch": 12.754415954415954, "grad_norm": 0.21995875239372253, "learning_rate": 1.561237599476667e-05, "loss": 0.8367, "step": 11198 }, { "epoch": 12.755555555555556, "grad_norm": 0.1657383143901825, "learning_rate": 1.5608057933773247e-05, "loss": 0.727, "step": 11199 }, { "epoch": 12.756695156695157, "grad_norm": 0.23210634291172028, "learning_rate": 1.560374019898781e-05, "loss": 0.6924, "step": 11200 }, { "epoch": 12.757834757834758, "grad_norm": 0.19804133474826813, "learning_rate": 1.5599422790560307e-05, "loss": 0.7299, "step": 11201 }, { "epoch": 12.75897435897436, "grad_norm": 0.268680602312088, "learning_rate": 1.559510570864071e-05, "loss": 0.4928, "step": 11202 }, { "epoch": 12.76011396011396, "grad_norm": 0.21380242705345154, "learning_rate": 1.5590788953378947e-05, "loss": 0.5975, "step": 11203 }, { "epoch": 12.761253561253561, "grad_norm": 0.24435050785541534, "learning_rate": 1.5586472524924956e-05, "loss": 0.6826, "step": 11204 }, { "epoch": 12.762393162393163, "grad_norm": 0.2538256347179413, "learning_rate": 1.5582156423428675e-05, "loss": 0.5048, "step": 11205 }, { "epoch": 12.763532763532764, "grad_norm": 0.2266242355108261, "learning_rate": 1.557784064903999e-05, "loss": 0.6112, "step": 11206 }, { "epoch": 12.764672364672364, "grad_norm": 0.22259734570980072, "learning_rate": 1.5573525201908817e-05, "loss": 0.7573, "step": 11207 }, { "epoch": 12.765811965811967, "grad_norm": 0.24566206336021423, "learning_rate": 1.5569210082185033e-05, "loss": 0.7427, "step": 11208 }, { "epoch": 12.766951566951567, "grad_norm": 0.18628554046154022, "learning_rate": 1.556489529001852e-05, "loss": 0.8772, "step": 11209 }, { "epoch": 12.768091168091168, "grad_norm": 0.2506822645664215, "learning_rate": 1.5560580825559137e-05, "loss": 0.6094, "step": 11210 }, { "epoch": 12.76923076923077, "grad_norm": 0.18677690625190735, "learning_rate": 1.5556266688956744e-05, "loss": 0.6967, "step": 11211 }, { "epoch": 12.77037037037037, "grad_norm": 0.15620164573192596, "learning_rate": 1.5551952880361175e-05, "loss": 0.7795, "step": 11212 }, { "epoch": 12.771509971509971, "grad_norm": 0.21999280154705048, "learning_rate": 1.5547639399922273e-05, "loss": 0.4862, "step": 11213 }, { "epoch": 12.772649572649573, "grad_norm": 0.20681433379650116, "learning_rate": 1.554332624778984e-05, "loss": 0.5806, "step": 11214 }, { "epoch": 12.773789173789174, "grad_norm": 0.18957693874835968, "learning_rate": 1.5539013424113686e-05, "loss": 0.7632, "step": 11215 }, { "epoch": 12.774928774928775, "grad_norm": 0.19347313046455383, "learning_rate": 1.5534700929043628e-05, "loss": 0.7254, "step": 11216 }, { "epoch": 12.776068376068377, "grad_norm": 0.21261726319789886, "learning_rate": 1.5530388762729432e-05, "loss": 0.6183, "step": 11217 }, { "epoch": 12.777207977207977, "grad_norm": 0.2281065732240677, "learning_rate": 1.5526076925320877e-05, "loss": 0.6127, "step": 11218 }, { "epoch": 12.778347578347578, "grad_norm": 0.2227533459663391, "learning_rate": 1.552176541696772e-05, "loss": 0.7665, "step": 11219 }, { "epoch": 12.77948717948718, "grad_norm": 0.21940748393535614, "learning_rate": 1.551745423781972e-05, "loss": 0.6665, "step": 11220 }, { "epoch": 12.78062678062678, "grad_norm": 0.22539184987545013, "learning_rate": 1.5513143388026606e-05, "loss": 0.5846, "step": 11221 }, { "epoch": 12.781766381766381, "grad_norm": 0.2524380683898926, "learning_rate": 1.5508832867738116e-05, "loss": 0.5339, "step": 11222 }, { "epoch": 12.782905982905984, "grad_norm": 0.1836635023355484, "learning_rate": 1.550452267710396e-05, "loss": 0.7841, "step": 11223 }, { "epoch": 12.784045584045584, "grad_norm": 0.16528676450252533, "learning_rate": 1.5500212816273844e-05, "loss": 0.7765, "step": 11224 }, { "epoch": 12.785185185185185, "grad_norm": 0.1646662801504135, "learning_rate": 1.549590328539746e-05, "loss": 0.7165, "step": 11225 }, { "epoch": 12.786324786324787, "grad_norm": 0.16733065247535706, "learning_rate": 1.5491594084624483e-05, "loss": 0.7258, "step": 11226 }, { "epoch": 12.787464387464388, "grad_norm": 0.21924377977848053, "learning_rate": 1.5487285214104595e-05, "loss": 0.794, "step": 11227 }, { "epoch": 12.788603988603988, "grad_norm": 0.19709335267543793, "learning_rate": 1.5482976673987465e-05, "loss": 0.5388, "step": 11228 }, { "epoch": 12.78974358974359, "grad_norm": 0.21844550967216492, "learning_rate": 1.5478668464422717e-05, "loss": 0.8007, "step": 11229 }, { "epoch": 12.790883190883191, "grad_norm": 0.19894273579120636, "learning_rate": 1.5474360585559998e-05, "loss": 0.675, "step": 11230 }, { "epoch": 12.792022792022792, "grad_norm": 0.2822384536266327, "learning_rate": 1.547005303754893e-05, "loss": 0.4806, "step": 11231 }, { "epoch": 12.793162393162394, "grad_norm": 0.27862998843193054, "learning_rate": 1.5465745820539135e-05, "loss": 0.5472, "step": 11232 }, { "epoch": 12.794301994301994, "grad_norm": 0.21956251561641693, "learning_rate": 1.54614389346802e-05, "loss": 0.5191, "step": 11233 }, { "epoch": 12.795441595441595, "grad_norm": 0.19542093575000763, "learning_rate": 1.5457132380121728e-05, "loss": 0.7289, "step": 11234 }, { "epoch": 12.796581196581197, "grad_norm": 0.24643944203853607, "learning_rate": 1.5452826157013284e-05, "loss": 0.6039, "step": 11235 }, { "epoch": 12.797720797720798, "grad_norm": 0.2070317417383194, "learning_rate": 1.544852026550445e-05, "loss": 0.581, "step": 11236 }, { "epoch": 12.798860398860398, "grad_norm": 0.18230903148651123, "learning_rate": 1.5444214705744764e-05, "loss": 0.7172, "step": 11237 }, { "epoch": 12.8, "grad_norm": 0.2501848042011261, "learning_rate": 1.5439909477883784e-05, "loss": 0.643, "step": 11238 }, { "epoch": 12.801139601139601, "grad_norm": 0.23969808220863342, "learning_rate": 1.543560458207104e-05, "loss": 0.674, "step": 11239 }, { "epoch": 12.802279202279202, "grad_norm": 0.24644148349761963, "learning_rate": 1.5431300018456047e-05, "loss": 0.465, "step": 11240 }, { "epoch": 12.803418803418804, "grad_norm": 0.2110668420791626, "learning_rate": 1.5426995787188324e-05, "loss": 0.5697, "step": 11241 }, { "epoch": 12.804558404558405, "grad_norm": 0.1825806349515915, "learning_rate": 1.5422691888417364e-05, "loss": 0.7538, "step": 11242 }, { "epoch": 12.805698005698005, "grad_norm": 0.24073870480060577, "learning_rate": 1.541838832229265e-05, "loss": 0.612, "step": 11243 }, { "epoch": 12.806837606837608, "grad_norm": 0.19120050966739655, "learning_rate": 1.5414085088963658e-05, "loss": 0.638, "step": 11244 }, { "epoch": 12.807977207977208, "grad_norm": 0.23519383370876312, "learning_rate": 1.5409782188579855e-05, "loss": 0.5826, "step": 11245 }, { "epoch": 12.809116809116809, "grad_norm": 0.31809619069099426, "learning_rate": 1.5405479621290687e-05, "loss": 0.3012, "step": 11246 }, { "epoch": 12.810256410256411, "grad_norm": 0.21087875962257385, "learning_rate": 1.54011773872456e-05, "loss": 0.7106, "step": 11247 }, { "epoch": 12.811396011396011, "grad_norm": 0.2201055884361267, "learning_rate": 1.5396875486594007e-05, "loss": 0.6285, "step": 11248 }, { "epoch": 12.812535612535612, "grad_norm": 0.18184322118759155, "learning_rate": 1.539257391948535e-05, "loss": 0.778, "step": 11249 }, { "epoch": 12.813675213675214, "grad_norm": 0.21316279470920563, "learning_rate": 1.5388272686069016e-05, "loss": 0.7233, "step": 11250 }, { "epoch": 12.814814814814815, "grad_norm": 0.21728692948818207, "learning_rate": 1.5383971786494406e-05, "loss": 0.6832, "step": 11251 }, { "epoch": 12.815954415954415, "grad_norm": 0.2114868015050888, "learning_rate": 1.53796712209109e-05, "loss": 0.6524, "step": 11252 }, { "epoch": 12.817094017094018, "grad_norm": 0.2465014010667801, "learning_rate": 1.537537098946787e-05, "loss": 0.8676, "step": 11253 }, { "epoch": 12.818233618233618, "grad_norm": 0.2124163955450058, "learning_rate": 1.5371071092314672e-05, "loss": 0.7978, "step": 11254 }, { "epoch": 12.819373219373219, "grad_norm": 0.19976741075515747, "learning_rate": 1.5366771529600653e-05, "loss": 0.7583, "step": 11255 }, { "epoch": 12.820512820512821, "grad_norm": 0.19268165528774261, "learning_rate": 1.5362472301475158e-05, "loss": 0.8674, "step": 11256 }, { "epoch": 12.821652421652422, "grad_norm": 0.19234377145767212, "learning_rate": 1.5358173408087495e-05, "loss": 0.6675, "step": 11257 }, { "epoch": 12.822792022792022, "grad_norm": 0.21339838206768036, "learning_rate": 1.535387484958699e-05, "loss": 0.6563, "step": 11258 }, { "epoch": 12.823931623931625, "grad_norm": 0.24457897245883942, "learning_rate": 1.5349576626122923e-05, "loss": 0.6577, "step": 11259 }, { "epoch": 12.825071225071225, "grad_norm": 0.27925947308540344, "learning_rate": 1.5345278737844614e-05, "loss": 0.5595, "step": 11260 }, { "epoch": 12.826210826210826, "grad_norm": 0.1818474531173706, "learning_rate": 1.534098118490132e-05, "loss": 0.6588, "step": 11261 }, { "epoch": 12.827350427350428, "grad_norm": 0.16830523312091827, "learning_rate": 1.5336683967442315e-05, "loss": 0.9107, "step": 11262 }, { "epoch": 12.828490028490029, "grad_norm": 0.18106761574745178, "learning_rate": 1.533238708561685e-05, "loss": 0.7089, "step": 11263 }, { "epoch": 12.829629629629629, "grad_norm": 0.16657721996307373, "learning_rate": 1.5328090539574162e-05, "loss": 0.7726, "step": 11264 }, { "epoch": 12.830769230769231, "grad_norm": 0.21090742945671082, "learning_rate": 1.532379432946349e-05, "loss": 0.6567, "step": 11265 }, { "epoch": 12.831908831908832, "grad_norm": 0.25384172797203064, "learning_rate": 1.5319498455434057e-05, "loss": 0.4562, "step": 11266 }, { "epoch": 12.833048433048432, "grad_norm": 0.20707079768180847, "learning_rate": 1.531520291763506e-05, "loss": 0.8442, "step": 11267 }, { "epoch": 12.834188034188035, "grad_norm": 0.20802339911460876, "learning_rate": 1.53109077162157e-05, "loss": 0.6555, "step": 11268 }, { "epoch": 12.835327635327635, "grad_norm": 0.21156306564807892, "learning_rate": 1.5306612851325154e-05, "loss": 0.6527, "step": 11269 }, { "epoch": 12.836467236467236, "grad_norm": 2.3743772506713867, "learning_rate": 1.5302318323112607e-05, "loss": 0.4502, "step": 11270 }, { "epoch": 12.837606837606838, "grad_norm": 0.25030121207237244, "learning_rate": 1.5298024131727207e-05, "loss": 0.5459, "step": 11271 }, { "epoch": 12.838746438746439, "grad_norm": 0.20316855609416962, "learning_rate": 1.529373027731811e-05, "loss": 0.8273, "step": 11272 }, { "epoch": 12.83988603988604, "grad_norm": 0.18483185768127441, "learning_rate": 1.528943676003446e-05, "loss": 0.8948, "step": 11273 }, { "epoch": 12.841025641025642, "grad_norm": 0.23908449709415436, "learning_rate": 1.528514358002537e-05, "loss": 0.7057, "step": 11274 }, { "epoch": 12.842165242165242, "grad_norm": 0.20082879066467285, "learning_rate": 1.5280850737439967e-05, "loss": 0.7533, "step": 11275 }, { "epoch": 12.843304843304843, "grad_norm": 0.20539212226867676, "learning_rate": 1.5276558232427335e-05, "loss": 0.7364, "step": 11276 }, { "epoch": 12.844444444444445, "grad_norm": 0.23212577402591705, "learning_rate": 1.5272266065136583e-05, "loss": 0.6454, "step": 11277 }, { "epoch": 12.845584045584046, "grad_norm": 0.21794946491718292, "learning_rate": 1.526797423571678e-05, "loss": 0.7836, "step": 11278 }, { "epoch": 12.846723646723646, "grad_norm": 0.23278498649597168, "learning_rate": 1.5263682744316997e-05, "loss": 0.4949, "step": 11279 }, { "epoch": 12.847863247863248, "grad_norm": 0.19228476285934448, "learning_rate": 1.525939159108628e-05, "loss": 0.8375, "step": 11280 }, { "epoch": 12.849002849002849, "grad_norm": 0.1980063021183014, "learning_rate": 1.5255100776173688e-05, "loss": 0.7407, "step": 11281 }, { "epoch": 12.85014245014245, "grad_norm": 0.21237735450267792, "learning_rate": 1.5250810299728235e-05, "loss": 0.5179, "step": 11282 }, { "epoch": 12.851282051282052, "grad_norm": 0.25003233551979065, "learning_rate": 1.5246520161898952e-05, "loss": 0.6853, "step": 11283 }, { "epoch": 12.852421652421652, "grad_norm": 0.2409413456916809, "learning_rate": 1.5242230362834855e-05, "loss": 0.6818, "step": 11284 }, { "epoch": 12.853561253561253, "grad_norm": 0.23972086608409882, "learning_rate": 1.5237940902684924e-05, "loss": 0.5063, "step": 11285 }, { "epoch": 12.854700854700855, "grad_norm": 0.2771264910697937, "learning_rate": 1.5233651781598157e-05, "loss": 0.5091, "step": 11286 }, { "epoch": 12.855840455840456, "grad_norm": 0.23623840510845184, "learning_rate": 1.5229362999723517e-05, "loss": 0.7786, "step": 11287 }, { "epoch": 12.856980056980056, "grad_norm": 0.23032218217849731, "learning_rate": 1.5225074557209974e-05, "loss": 0.5387, "step": 11288 }, { "epoch": 12.858119658119659, "grad_norm": 0.26485055685043335, "learning_rate": 1.5220786454206468e-05, "loss": 0.5621, "step": 11289 }, { "epoch": 12.85925925925926, "grad_norm": 0.21657170355319977, "learning_rate": 1.5216498690861946e-05, "loss": 0.6269, "step": 11290 }, { "epoch": 12.86039886039886, "grad_norm": 0.367741197347641, "learning_rate": 1.5212211267325324e-05, "loss": 0.3114, "step": 11291 }, { "epoch": 12.861538461538462, "grad_norm": 0.24216917157173157, "learning_rate": 1.5207924183745525e-05, "loss": 0.57, "step": 11292 }, { "epoch": 12.862678062678063, "grad_norm": 0.24328188598155975, "learning_rate": 1.520363744027144e-05, "loss": 0.7571, "step": 11293 }, { "epoch": 12.863817663817663, "grad_norm": 0.17762377858161926, "learning_rate": 1.519935103705197e-05, "loss": 0.7394, "step": 11294 }, { "epoch": 12.864957264957265, "grad_norm": 0.2241375893354416, "learning_rate": 1.5195064974235996e-05, "loss": 0.711, "step": 11295 }, { "epoch": 12.866096866096866, "grad_norm": 0.3043307363986969, "learning_rate": 1.5190779251972376e-05, "loss": 0.4974, "step": 11296 }, { "epoch": 12.867236467236467, "grad_norm": 0.21094325184822083, "learning_rate": 1.5186493870409968e-05, "loss": 0.6314, "step": 11297 }, { "epoch": 12.868376068376069, "grad_norm": 0.22727924585342407, "learning_rate": 1.5182208829697614e-05, "loss": 0.7082, "step": 11298 }, { "epoch": 12.86951566951567, "grad_norm": 0.17573437094688416, "learning_rate": 1.517792412998415e-05, "loss": 0.8817, "step": 11299 }, { "epoch": 12.87065527065527, "grad_norm": 0.2225823700428009, "learning_rate": 1.5173639771418389e-05, "loss": 0.8804, "step": 11300 }, { "epoch": 12.871794871794872, "grad_norm": 0.21569858491420746, "learning_rate": 1.5169355754149145e-05, "loss": 0.5543, "step": 11301 }, { "epoch": 12.872934472934473, "grad_norm": 0.19905759394168854, "learning_rate": 1.5165072078325204e-05, "loss": 0.8759, "step": 11302 }, { "epoch": 12.874074074074073, "grad_norm": 0.20556959509849548, "learning_rate": 1.5160788744095362e-05, "loss": 0.6941, "step": 11303 }, { "epoch": 12.875213675213676, "grad_norm": 0.19223438203334808, "learning_rate": 1.5156505751608369e-05, "loss": 0.7244, "step": 11304 }, { "epoch": 12.876353276353276, "grad_norm": 0.18705134093761444, "learning_rate": 1.5152223101013016e-05, "loss": 0.7124, "step": 11305 }, { "epoch": 12.877492877492877, "grad_norm": 0.19291657209396362, "learning_rate": 1.5147940792458032e-05, "loss": 0.7488, "step": 11306 }, { "epoch": 12.878632478632479, "grad_norm": 0.24266314506530762, "learning_rate": 1.514365882609216e-05, "loss": 0.5949, "step": 11307 }, { "epoch": 12.87977207977208, "grad_norm": 0.24872809648513794, "learning_rate": 1.513937720206412e-05, "loss": 0.649, "step": 11308 }, { "epoch": 12.88091168091168, "grad_norm": 0.22760015726089478, "learning_rate": 1.5135095920522632e-05, "loss": 0.6703, "step": 11309 }, { "epoch": 12.882051282051282, "grad_norm": 0.19884894788265228, "learning_rate": 1.5130814981616383e-05, "loss": 0.7886, "step": 11310 }, { "epoch": 12.883190883190883, "grad_norm": 0.25892016291618347, "learning_rate": 1.5126534385494079e-05, "loss": 0.7138, "step": 11311 }, { "epoch": 12.884330484330484, "grad_norm": 0.3058724105358124, "learning_rate": 1.5122254132304381e-05, "loss": 0.3287, "step": 11312 }, { "epoch": 12.885470085470086, "grad_norm": 0.2279716581106186, "learning_rate": 1.5117974222195965e-05, "loss": 0.6315, "step": 11313 }, { "epoch": 12.886609686609686, "grad_norm": 0.2113867700099945, "learning_rate": 1.5113694655317476e-05, "loss": 0.5906, "step": 11314 }, { "epoch": 12.887749287749287, "grad_norm": 0.2106524407863617, "learning_rate": 1.5109415431817553e-05, "loss": 0.5456, "step": 11315 }, { "epoch": 12.88888888888889, "grad_norm": 0.21508152782917023, "learning_rate": 1.5105136551844846e-05, "loss": 0.7473, "step": 11316 }, { "epoch": 12.89002849002849, "grad_norm": 0.22165998816490173, "learning_rate": 1.5100858015547947e-05, "loss": 0.5849, "step": 11317 }, { "epoch": 12.89116809116809, "grad_norm": 0.1639089435338974, "learning_rate": 1.5096579823075479e-05, "loss": 0.6897, "step": 11318 }, { "epoch": 12.892307692307693, "grad_norm": 0.17655792832374573, "learning_rate": 1.5092301974576023e-05, "loss": 0.9143, "step": 11319 }, { "epoch": 12.893447293447293, "grad_norm": 0.22830168902873993, "learning_rate": 1.508802447019817e-05, "loss": 0.6356, "step": 11320 }, { "epoch": 12.894586894586894, "grad_norm": 0.2639561891555786, "learning_rate": 1.508374731009048e-05, "loss": 0.6211, "step": 11321 }, { "epoch": 12.895726495726496, "grad_norm": 0.24511019885540009, "learning_rate": 1.507947049440152e-05, "loss": 0.5841, "step": 11322 }, { "epoch": 12.896866096866097, "grad_norm": 0.23563529551029205, "learning_rate": 1.507519402327983e-05, "loss": 0.5909, "step": 11323 }, { "epoch": 12.898005698005697, "grad_norm": 0.2009643316268921, "learning_rate": 1.5070917896873946e-05, "loss": 0.8266, "step": 11324 }, { "epoch": 12.8991452991453, "grad_norm": 0.15463519096374512, "learning_rate": 1.5066642115332386e-05, "loss": 0.8298, "step": 11325 }, { "epoch": 12.9002849002849, "grad_norm": 0.18611261248588562, "learning_rate": 1.5062366678803654e-05, "loss": 0.6996, "step": 11326 }, { "epoch": 12.9014245014245, "grad_norm": 0.19349269568920135, "learning_rate": 1.5058091587436268e-05, "loss": 0.7205, "step": 11327 }, { "epoch": 12.902564102564103, "grad_norm": 0.22040440142154694, "learning_rate": 1.5053816841378698e-05, "loss": 0.7988, "step": 11328 }, { "epoch": 12.903703703703703, "grad_norm": 0.23214900493621826, "learning_rate": 1.5049542440779424e-05, "loss": 0.732, "step": 11329 }, { "epoch": 12.904843304843304, "grad_norm": 0.2591022551059723, "learning_rate": 1.5045268385786904e-05, "loss": 0.7225, "step": 11330 }, { "epoch": 12.905982905982906, "grad_norm": 0.20048879086971283, "learning_rate": 1.5040994676549588e-05, "loss": 0.8738, "step": 11331 }, { "epoch": 12.907122507122507, "grad_norm": 0.20053917169570923, "learning_rate": 1.5036721313215913e-05, "loss": 0.6706, "step": 11332 }, { "epoch": 12.908262108262107, "grad_norm": 0.22289910912513733, "learning_rate": 1.503244829593431e-05, "loss": 0.674, "step": 11333 }, { "epoch": 12.90940170940171, "grad_norm": 0.24713099002838135, "learning_rate": 1.502817562485318e-05, "loss": 0.6585, "step": 11334 }, { "epoch": 12.91054131054131, "grad_norm": 0.1970605105161667, "learning_rate": 1.5023903300120943e-05, "loss": 0.8098, "step": 11335 }, { "epoch": 12.91168091168091, "grad_norm": 0.18536637723445892, "learning_rate": 1.5019631321885972e-05, "loss": 0.6711, "step": 11336 }, { "epoch": 12.912820512820513, "grad_norm": 0.1956309825181961, "learning_rate": 1.5015359690296646e-05, "loss": 0.5207, "step": 11337 }, { "epoch": 12.913960113960114, "grad_norm": 0.21517540514469147, "learning_rate": 1.5011088405501345e-05, "loss": 0.7251, "step": 11338 }, { "epoch": 12.915099715099714, "grad_norm": 0.2148135006427765, "learning_rate": 1.500681746764841e-05, "loss": 0.6951, "step": 11339 }, { "epoch": 12.916239316239317, "grad_norm": 0.20857606828212738, "learning_rate": 1.5002546876886191e-05, "loss": 0.6369, "step": 11340 }, { "epoch": 12.917378917378917, "grad_norm": 0.2005983144044876, "learning_rate": 1.4998276633363007e-05, "loss": 0.7484, "step": 11341 }, { "epoch": 12.918518518518518, "grad_norm": 0.17464596033096313, "learning_rate": 1.4994006737227185e-05, "loss": 0.9722, "step": 11342 }, { "epoch": 12.91965811965812, "grad_norm": 0.21768459677696228, "learning_rate": 1.4989737188627018e-05, "loss": 0.6091, "step": 11343 }, { "epoch": 12.92079772079772, "grad_norm": 0.262990802526474, "learning_rate": 1.498546798771081e-05, "loss": 0.6419, "step": 11344 }, { "epoch": 12.921937321937321, "grad_norm": 0.22202444076538086, "learning_rate": 1.4981199134626841e-05, "loss": 0.764, "step": 11345 }, { "epoch": 12.923076923076923, "grad_norm": 0.18325243890285492, "learning_rate": 1.4976930629523375e-05, "loss": 0.6889, "step": 11346 }, { "epoch": 12.924216524216524, "grad_norm": 0.2587796151638031, "learning_rate": 1.4972662472548676e-05, "loss": 0.4527, "step": 11347 }, { "epoch": 12.925356125356124, "grad_norm": 0.2007748782634735, "learning_rate": 1.4968394663850977e-05, "loss": 0.7945, "step": 11348 }, { "epoch": 12.926495726495727, "grad_norm": 0.21909686923027039, "learning_rate": 1.4964127203578521e-05, "loss": 0.8434, "step": 11349 }, { "epoch": 12.927635327635327, "grad_norm": 0.2630300521850586, "learning_rate": 1.495986009187953e-05, "loss": 0.619, "step": 11350 }, { "epoch": 12.928774928774928, "grad_norm": 0.20543648302555084, "learning_rate": 1.4955593328902206e-05, "loss": 0.8312, "step": 11351 }, { "epoch": 12.92991452991453, "grad_norm": 0.21905991435050964, "learning_rate": 1.495132691479475e-05, "loss": 0.5108, "step": 11352 }, { "epoch": 12.93105413105413, "grad_norm": 0.22271057963371277, "learning_rate": 1.4947060849705341e-05, "loss": 0.4384, "step": 11353 }, { "epoch": 12.932193732193731, "grad_norm": 0.25240451097488403, "learning_rate": 1.494279513378216e-05, "loss": 0.685, "step": 11354 }, { "epoch": 12.933333333333334, "grad_norm": 0.18054066598415375, "learning_rate": 1.4938529767173357e-05, "loss": 0.8588, "step": 11355 }, { "epoch": 12.934472934472934, "grad_norm": 0.16919507086277008, "learning_rate": 1.4934264750027089e-05, "loss": 0.6914, "step": 11356 }, { "epoch": 12.935612535612536, "grad_norm": 0.22787116467952728, "learning_rate": 1.493000008249148e-05, "loss": 0.6032, "step": 11357 }, { "epoch": 12.936752136752137, "grad_norm": 0.24769218266010284, "learning_rate": 1.4925735764714666e-05, "loss": 0.6556, "step": 11358 }, { "epoch": 12.937891737891738, "grad_norm": 0.23653510212898254, "learning_rate": 1.4921471796844744e-05, "loss": 0.7298, "step": 11359 }, { "epoch": 12.93903133903134, "grad_norm": 0.23376649618148804, "learning_rate": 1.491720817902983e-05, "loss": 0.703, "step": 11360 }, { "epoch": 12.94017094017094, "grad_norm": 0.22891533374786377, "learning_rate": 1.4912944911418006e-05, "loss": 0.6003, "step": 11361 }, { "epoch": 12.941310541310541, "grad_norm": 0.22133411467075348, "learning_rate": 1.4908681994157339e-05, "loss": 0.6826, "step": 11362 }, { "epoch": 12.942450142450143, "grad_norm": 0.25973889231681824, "learning_rate": 1.4904419427395904e-05, "loss": 0.5391, "step": 11363 }, { "epoch": 12.943589743589744, "grad_norm": 0.18987728655338287, "learning_rate": 1.490015721128174e-05, "loss": 0.7936, "step": 11364 }, { "epoch": 12.944729344729344, "grad_norm": 0.20876216888427734, "learning_rate": 1.4895895345962896e-05, "loss": 0.6467, "step": 11365 }, { "epoch": 12.945868945868947, "grad_norm": 0.22567686438560486, "learning_rate": 1.4891633831587386e-05, "loss": 0.582, "step": 11366 }, { "epoch": 12.947008547008547, "grad_norm": 0.17486126720905304, "learning_rate": 1.488737266830324e-05, "loss": 0.6805, "step": 11367 }, { "epoch": 12.948148148148148, "grad_norm": 0.249233216047287, "learning_rate": 1.4883111856258444e-05, "loss": 0.4735, "step": 11368 }, { "epoch": 12.94928774928775, "grad_norm": 0.2411205917596817, "learning_rate": 1.4878851395601002e-05, "loss": 0.7448, "step": 11369 }, { "epoch": 12.95042735042735, "grad_norm": 0.15878437459468842, "learning_rate": 1.487459128647887e-05, "loss": 0.7648, "step": 11370 }, { "epoch": 12.951566951566951, "grad_norm": 0.1965569108724594, "learning_rate": 1.4870331529040032e-05, "loss": 0.7249, "step": 11371 }, { "epoch": 12.952706552706553, "grad_norm": 0.20828257501125336, "learning_rate": 1.4866072123432445e-05, "loss": 0.7939, "step": 11372 }, { "epoch": 12.953846153846154, "grad_norm": 0.2108020931482315, "learning_rate": 1.4861813069804037e-05, "loss": 0.5208, "step": 11373 }, { "epoch": 12.954985754985755, "grad_norm": 0.26978325843811035, "learning_rate": 1.4857554368302745e-05, "loss": 0.5179, "step": 11374 }, { "epoch": 12.956125356125357, "grad_norm": 0.29760831594467163, "learning_rate": 1.485329601907648e-05, "loss": 0.3318, "step": 11375 }, { "epoch": 12.957264957264957, "grad_norm": 0.18968529999256134, "learning_rate": 1.4849038022273148e-05, "loss": 0.5812, "step": 11376 }, { "epoch": 12.958404558404558, "grad_norm": 0.23169593513011932, "learning_rate": 1.4844780378040634e-05, "loss": 0.5563, "step": 11377 }, { "epoch": 12.95954415954416, "grad_norm": 0.17449063062667847, "learning_rate": 1.4840523086526836e-05, "loss": 0.8254, "step": 11378 }, { "epoch": 12.96068376068376, "grad_norm": 0.23026247322559357, "learning_rate": 1.4836266147879602e-05, "loss": 0.584, "step": 11379 }, { "epoch": 12.961823361823361, "grad_norm": 0.20057043433189392, "learning_rate": 1.4832009562246799e-05, "loss": 0.6018, "step": 11380 }, { "epoch": 12.962962962962964, "grad_norm": 0.256772518157959, "learning_rate": 1.482775332977625e-05, "loss": 0.8504, "step": 11381 }, { "epoch": 12.964102564102564, "grad_norm": 0.2533794939517975, "learning_rate": 1.482349745061582e-05, "loss": 0.7321, "step": 11382 }, { "epoch": 12.965242165242165, "grad_norm": 0.23897698521614075, "learning_rate": 1.4819241924913303e-05, "loss": 0.5534, "step": 11383 }, { "epoch": 12.966381766381767, "grad_norm": 0.19721534848213196, "learning_rate": 1.4814986752816512e-05, "loss": 0.7591, "step": 11384 }, { "epoch": 12.967521367521368, "grad_norm": 0.19526709616184235, "learning_rate": 1.4810731934473241e-05, "loss": 0.6045, "step": 11385 }, { "epoch": 12.968660968660968, "grad_norm": 0.15430134534835815, "learning_rate": 1.4806477470031271e-05, "loss": 0.8702, "step": 11386 }, { "epoch": 12.96980056980057, "grad_norm": 0.255127489566803, "learning_rate": 1.4802223359638368e-05, "loss": 0.553, "step": 11387 }, { "epoch": 12.970940170940171, "grad_norm": 0.26086923480033875, "learning_rate": 1.4797969603442297e-05, "loss": 0.5594, "step": 11388 }, { "epoch": 12.972079772079772, "grad_norm": 0.20795606076717377, "learning_rate": 1.4793716201590791e-05, "loss": 0.8878, "step": 11389 }, { "epoch": 12.973219373219374, "grad_norm": 0.20088131725788116, "learning_rate": 1.4789463154231598e-05, "loss": 0.7535, "step": 11390 }, { "epoch": 12.974358974358974, "grad_norm": 0.19947673380374908, "learning_rate": 1.4785210461512416e-05, "loss": 0.5852, "step": 11391 }, { "epoch": 12.975498575498575, "grad_norm": 0.19736547768115997, "learning_rate": 1.4780958123580968e-05, "loss": 0.5457, "step": 11392 }, { "epoch": 12.976638176638177, "grad_norm": 0.20003557205200195, "learning_rate": 1.477670614058495e-05, "loss": 0.6882, "step": 11393 }, { "epoch": 12.977777777777778, "grad_norm": 0.23446090519428253, "learning_rate": 1.477245451267204e-05, "loss": 0.6767, "step": 11394 }, { "epoch": 12.978917378917378, "grad_norm": 0.26469114422798157, "learning_rate": 1.476820323998992e-05, "loss": 0.4988, "step": 11395 }, { "epoch": 12.98005698005698, "grad_norm": 0.1809329390525818, "learning_rate": 1.4763952322686229e-05, "loss": 0.9165, "step": 11396 }, { "epoch": 12.981196581196581, "grad_norm": 0.17014767229557037, "learning_rate": 1.4759701760908629e-05, "loss": 0.766, "step": 11397 }, { "epoch": 12.982336182336182, "grad_norm": 0.2265402227640152, "learning_rate": 1.4755451554804745e-05, "loss": 0.6505, "step": 11398 }, { "epoch": 12.983475783475784, "grad_norm": 0.2196301966905594, "learning_rate": 1.4751201704522201e-05, "loss": 0.7872, "step": 11399 }, { "epoch": 12.984615384615385, "grad_norm": 0.26781708002090454, "learning_rate": 1.4746952210208604e-05, "loss": 0.7257, "step": 11400 }, { "epoch": 12.985754985754985, "grad_norm": 0.2548161447048187, "learning_rate": 1.474270307201156e-05, "loss": 0.5794, "step": 11401 }, { "epoch": 12.986894586894588, "grad_norm": 0.23451417684555054, "learning_rate": 1.4738454290078637e-05, "loss": 0.5691, "step": 11402 }, { "epoch": 12.988034188034188, "grad_norm": 0.24653545022010803, "learning_rate": 1.4734205864557412e-05, "loss": 0.63, "step": 11403 }, { "epoch": 12.989173789173789, "grad_norm": 0.2059410810470581, "learning_rate": 1.472995779559546e-05, "loss": 0.8472, "step": 11404 }, { "epoch": 12.990313390313391, "grad_norm": 0.24840520322322845, "learning_rate": 1.4725710083340311e-05, "loss": 0.7161, "step": 11405 }, { "epoch": 12.991452991452991, "grad_norm": 0.19748952984809875, "learning_rate": 1.472146272793951e-05, "loss": 0.6585, "step": 11406 }, { "epoch": 12.992592592592592, "grad_norm": 0.19665081799030304, "learning_rate": 1.4717215729540568e-05, "loss": 0.863, "step": 11407 }, { "epoch": 12.993732193732194, "grad_norm": 0.23738694190979004, "learning_rate": 1.471296908829101e-05, "loss": 0.5315, "step": 11408 }, { "epoch": 12.994871794871795, "grad_norm": 0.22886839509010315, "learning_rate": 1.4708722804338315e-05, "loss": 0.6378, "step": 11409 }, { "epoch": 12.996011396011395, "grad_norm": 0.20925356447696686, "learning_rate": 1.4704476877829987e-05, "loss": 0.7843, "step": 11410 }, { "epoch": 12.997150997150998, "grad_norm": 0.29935958981513977, "learning_rate": 1.4700231308913483e-05, "loss": 0.5282, "step": 11411 }, { "epoch": 12.998290598290598, "grad_norm": 0.2766572833061218, "learning_rate": 1.4695986097736279e-05, "loss": 0.5435, "step": 11412 }, { "epoch": 12.999430199430199, "grad_norm": 0.2087930589914322, "learning_rate": 1.4691741244445807e-05, "loss": 0.901, "step": 11413 }, { "epoch": 13.0, "grad_norm": 0.2996289134025574, "learning_rate": 1.46874967491895e-05, "loss": 0.9528, "step": 11414 }, { "epoch": 13.0011396011396, "grad_norm": 0.21519289910793304, "learning_rate": 1.4683252612114806e-05, "loss": 0.7741, "step": 11415 }, { "epoch": 13.002279202279203, "grad_norm": 0.2124188244342804, "learning_rate": 1.4679008833369113e-05, "loss": 0.7446, "step": 11416 }, { "epoch": 13.003418803418803, "grad_norm": 0.17165492475032806, "learning_rate": 1.4674765413099828e-05, "loss": 0.7705, "step": 11417 }, { "epoch": 13.004558404558404, "grad_norm": 0.15280818939208984, "learning_rate": 1.4670522351454332e-05, "loss": 0.6471, "step": 11418 }, { "epoch": 13.005698005698006, "grad_norm": 0.17679402232170105, "learning_rate": 1.4666279648580005e-05, "loss": 0.7578, "step": 11419 }, { "epoch": 13.006837606837607, "grad_norm": 0.21650585532188416, "learning_rate": 1.46620373046242e-05, "loss": 0.6234, "step": 11420 }, { "epoch": 13.007977207977207, "grad_norm": 0.18113601207733154, "learning_rate": 1.4657795319734274e-05, "loss": 0.5055, "step": 11421 }, { "epoch": 13.00911680911681, "grad_norm": 0.23416933417320251, "learning_rate": 1.4653553694057553e-05, "loss": 0.5932, "step": 11422 }, { "epoch": 13.01025641025641, "grad_norm": 0.18027664721012115, "learning_rate": 1.4649312427741366e-05, "loss": 0.6091, "step": 11423 }, { "epoch": 13.01139601139601, "grad_norm": 0.17871034145355225, "learning_rate": 1.4645071520933027e-05, "loss": 0.8529, "step": 11424 }, { "epoch": 13.012535612535613, "grad_norm": 0.1923864185810089, "learning_rate": 1.4640830973779818e-05, "loss": 0.8046, "step": 11425 }, { "epoch": 13.013675213675214, "grad_norm": 0.2389516979455948, "learning_rate": 1.4636590786429043e-05, "loss": 0.5703, "step": 11426 }, { "epoch": 13.014814814814814, "grad_norm": 0.24037589132785797, "learning_rate": 1.4632350959027976e-05, "loss": 0.5333, "step": 11427 }, { "epoch": 13.015954415954416, "grad_norm": 0.16341760754585266, "learning_rate": 1.4628111491723868e-05, "loss": 0.8563, "step": 11428 }, { "epoch": 13.017094017094017, "grad_norm": 0.21553219854831696, "learning_rate": 1.4623872384663973e-05, "loss": 0.6649, "step": 11429 }, { "epoch": 13.018233618233618, "grad_norm": 0.2237376719713211, "learning_rate": 1.461963363799552e-05, "loss": 0.5996, "step": 11430 }, { "epoch": 13.01937321937322, "grad_norm": 0.19841177761554718, "learning_rate": 1.461539525186574e-05, "loss": 0.8854, "step": 11431 }, { "epoch": 13.02051282051282, "grad_norm": 0.22623202204704285, "learning_rate": 1.461115722642184e-05, "loss": 0.6766, "step": 11432 }, { "epoch": 13.021652421652421, "grad_norm": 0.22455154359340668, "learning_rate": 1.4606919561811019e-05, "loss": 0.6496, "step": 11433 }, { "epoch": 13.022792022792023, "grad_norm": 0.24419784545898438, "learning_rate": 1.4602682258180461e-05, "loss": 0.4354, "step": 11434 }, { "epoch": 13.023931623931624, "grad_norm": 0.200229212641716, "learning_rate": 1.4598445315677345e-05, "loss": 0.7056, "step": 11435 }, { "epoch": 13.025071225071224, "grad_norm": 0.18830811977386475, "learning_rate": 1.4594208734448811e-05, "loss": 0.4923, "step": 11436 }, { "epoch": 13.026210826210827, "grad_norm": 0.1918337345123291, "learning_rate": 1.4589972514642048e-05, "loss": 0.597, "step": 11437 }, { "epoch": 13.027350427350427, "grad_norm": 0.1658741682767868, "learning_rate": 1.4585736656404152e-05, "loss": 0.7869, "step": 11438 }, { "epoch": 13.028490028490028, "grad_norm": 0.2372065633535385, "learning_rate": 1.4581501159882267e-05, "loss": 0.5721, "step": 11439 }, { "epoch": 13.02962962962963, "grad_norm": 0.21426714956760406, "learning_rate": 1.457726602522349e-05, "loss": 0.6547, "step": 11440 }, { "epoch": 13.03076923076923, "grad_norm": 0.18508793413639069, "learning_rate": 1.4573031252574943e-05, "loss": 0.8496, "step": 11441 }, { "epoch": 13.031908831908831, "grad_norm": 0.19948071241378784, "learning_rate": 1.4568796842083682e-05, "loss": 0.7428, "step": 11442 }, { "epoch": 13.033048433048434, "grad_norm": 0.2422424852848053, "learning_rate": 1.4564562793896797e-05, "loss": 0.7462, "step": 11443 }, { "epoch": 13.034188034188034, "grad_norm": 0.1996956318616867, "learning_rate": 1.4560329108161338e-05, "loss": 0.4969, "step": 11444 }, { "epoch": 13.035327635327635, "grad_norm": 0.23708593845367432, "learning_rate": 1.4556095785024371e-05, "loss": 0.7164, "step": 11445 }, { "epoch": 13.036467236467237, "grad_norm": 0.21722626686096191, "learning_rate": 1.4551862824632907e-05, "loss": 0.5624, "step": 11446 }, { "epoch": 13.037606837606837, "grad_norm": 0.3021351099014282, "learning_rate": 1.4547630227133972e-05, "loss": 0.4409, "step": 11447 }, { "epoch": 13.038746438746438, "grad_norm": 0.22942470014095306, "learning_rate": 1.4543397992674601e-05, "loss": 0.5466, "step": 11448 }, { "epoch": 13.03988603988604, "grad_norm": 0.2294406145811081, "learning_rate": 1.4539166121401765e-05, "loss": 0.6643, "step": 11449 }, { "epoch": 13.04102564102564, "grad_norm": 0.2203741818666458, "learning_rate": 1.4534934613462453e-05, "loss": 0.5618, "step": 11450 }, { "epoch": 13.042165242165241, "grad_norm": 0.21978648006916046, "learning_rate": 1.4530703469003645e-05, "loss": 0.5467, "step": 11451 }, { "epoch": 13.043304843304844, "grad_norm": 0.17470145225524902, "learning_rate": 1.4526472688172305e-05, "loss": 0.704, "step": 11452 }, { "epoch": 13.044444444444444, "grad_norm": 0.23567743599414825, "learning_rate": 1.4522242271115361e-05, "loss": 0.5294, "step": 11453 }, { "epoch": 13.045584045584045, "grad_norm": 0.191808819770813, "learning_rate": 1.451801221797976e-05, "loss": 0.696, "step": 11454 }, { "epoch": 13.046723646723647, "grad_norm": 0.21488399803638458, "learning_rate": 1.4513782528912418e-05, "loss": 0.575, "step": 11455 }, { "epoch": 13.047863247863248, "grad_norm": 0.2066238671541214, "learning_rate": 1.4509553204060253e-05, "loss": 0.7023, "step": 11456 }, { "epoch": 13.049002849002848, "grad_norm": 0.20625387132167816, "learning_rate": 1.4505324243570145e-05, "loss": 0.7823, "step": 11457 }, { "epoch": 13.05014245014245, "grad_norm": 0.18600556254386902, "learning_rate": 1.4501095647588981e-05, "loss": 0.69, "step": 11458 }, { "epoch": 13.051282051282051, "grad_norm": 0.21689251065254211, "learning_rate": 1.4496867416263644e-05, "loss": 0.5559, "step": 11459 }, { "epoch": 13.052421652421652, "grad_norm": 0.23860739171504974, "learning_rate": 1.4492639549740994e-05, "loss": 0.4703, "step": 11460 }, { "epoch": 13.053561253561254, "grad_norm": 0.23685714602470398, "learning_rate": 1.448841204816786e-05, "loss": 0.7486, "step": 11461 }, { "epoch": 13.054700854700855, "grad_norm": 0.23470570147037506, "learning_rate": 1.4484184911691079e-05, "loss": 0.7676, "step": 11462 }, { "epoch": 13.055840455840455, "grad_norm": 0.2473776936531067, "learning_rate": 1.4479958140457479e-05, "loss": 0.5993, "step": 11463 }, { "epoch": 13.056980056980057, "grad_norm": 0.1980896145105362, "learning_rate": 1.447573173461387e-05, "loss": 0.6698, "step": 11464 }, { "epoch": 13.058119658119658, "grad_norm": 0.21760006248950958, "learning_rate": 1.447150569430703e-05, "loss": 0.644, "step": 11465 }, { "epoch": 13.059259259259258, "grad_norm": 0.21926675736904144, "learning_rate": 1.4467280019683755e-05, "loss": 0.5746, "step": 11466 }, { "epoch": 13.06039886039886, "grad_norm": 0.2303035706281662, "learning_rate": 1.4463054710890805e-05, "loss": 0.5843, "step": 11467 }, { "epoch": 13.061538461538461, "grad_norm": 0.22751690447330475, "learning_rate": 1.4458829768074956e-05, "loss": 0.5984, "step": 11468 }, { "epoch": 13.062678062678062, "grad_norm": 0.1822330802679062, "learning_rate": 1.4454605191382914e-05, "loss": 0.7939, "step": 11469 }, { "epoch": 13.063817663817664, "grad_norm": 0.23645171523094177, "learning_rate": 1.4450380980961441e-05, "loss": 0.5233, "step": 11470 }, { "epoch": 13.064957264957265, "grad_norm": 0.16785389184951782, "learning_rate": 1.4446157136957263e-05, "loss": 0.7826, "step": 11471 }, { "epoch": 13.066096866096865, "grad_norm": 0.20599152147769928, "learning_rate": 1.444193365951706e-05, "loss": 0.9462, "step": 11472 }, { "epoch": 13.067236467236468, "grad_norm": 0.17982584238052368, "learning_rate": 1.4437710548787537e-05, "loss": 0.8866, "step": 11473 }, { "epoch": 13.068376068376068, "grad_norm": 0.20019139349460602, "learning_rate": 1.4433487804915368e-05, "loss": 0.6199, "step": 11474 }, { "epoch": 13.069515669515669, "grad_norm": 0.2203243225812912, "learning_rate": 1.442926542804724e-05, "loss": 0.515, "step": 11475 }, { "epoch": 13.070655270655271, "grad_norm": 0.23264262080192566, "learning_rate": 1.4425043418329782e-05, "loss": 0.6896, "step": 11476 }, { "epoch": 13.071794871794872, "grad_norm": 0.20378360152244568, "learning_rate": 1.4420821775909649e-05, "loss": 0.6089, "step": 11477 }, { "epoch": 13.072934472934472, "grad_norm": 0.19463470578193665, "learning_rate": 1.4416600500933466e-05, "loss": 0.6047, "step": 11478 }, { "epoch": 13.074074074074074, "grad_norm": 0.19807323813438416, "learning_rate": 1.4412379593547864e-05, "loss": 0.7472, "step": 11479 }, { "epoch": 13.075213675213675, "grad_norm": 0.17974790930747986, "learning_rate": 1.4408159053899412e-05, "loss": 0.8, "step": 11480 }, { "epoch": 13.076353276353275, "grad_norm": 0.19811110198497772, "learning_rate": 1.4403938882134738e-05, "loss": 0.6622, "step": 11481 }, { "epoch": 13.077492877492878, "grad_norm": 0.21380451321601868, "learning_rate": 1.4399719078400415e-05, "loss": 0.6306, "step": 11482 }, { "epoch": 13.078632478632478, "grad_norm": 0.2382928878068924, "learning_rate": 1.4395499642842986e-05, "loss": 0.6166, "step": 11483 }, { "epoch": 13.079772079772079, "grad_norm": 0.23316870629787445, "learning_rate": 1.4391280575609018e-05, "loss": 0.518, "step": 11484 }, { "epoch": 13.080911680911681, "grad_norm": 0.21876977384090424, "learning_rate": 1.4387061876845051e-05, "loss": 0.5918, "step": 11485 }, { "epoch": 13.082051282051282, "grad_norm": 0.2160615772008896, "learning_rate": 1.4382843546697619e-05, "loss": 0.666, "step": 11486 }, { "epoch": 13.083190883190884, "grad_norm": 0.21719148755073547, "learning_rate": 1.437862558531322e-05, "loss": 0.7148, "step": 11487 }, { "epoch": 13.084330484330485, "grad_norm": 0.2050594538450241, "learning_rate": 1.4374407992838363e-05, "loss": 0.4238, "step": 11488 }, { "epoch": 13.085470085470085, "grad_norm": 0.21120205521583557, "learning_rate": 1.4370190769419535e-05, "loss": 0.7231, "step": 11489 }, { "epoch": 13.086609686609687, "grad_norm": 0.207522451877594, "learning_rate": 1.4365973915203223e-05, "loss": 0.6889, "step": 11490 }, { "epoch": 13.087749287749288, "grad_norm": 0.19271396100521088, "learning_rate": 1.4361757430335864e-05, "loss": 0.8723, "step": 11491 }, { "epoch": 13.088888888888889, "grad_norm": 0.19991078972816467, "learning_rate": 1.4357541314963934e-05, "loss": 0.6813, "step": 11492 }, { "epoch": 13.090028490028491, "grad_norm": 0.21781806647777557, "learning_rate": 1.4353325569233873e-05, "loss": 0.6675, "step": 11493 }, { "epoch": 13.091168091168091, "grad_norm": 0.20519177615642548, "learning_rate": 1.4349110193292079e-05, "loss": 0.78, "step": 11494 }, { "epoch": 13.092307692307692, "grad_norm": 0.19230076670646667, "learning_rate": 1.4344895187284978e-05, "loss": 0.6574, "step": 11495 }, { "epoch": 13.093447293447294, "grad_norm": 0.18990492820739746, "learning_rate": 1.434068055135897e-05, "loss": 0.7134, "step": 11496 }, { "epoch": 13.094586894586895, "grad_norm": 0.19751523435115814, "learning_rate": 1.4336466285660451e-05, "loss": 0.6944, "step": 11497 }, { "epoch": 13.095726495726495, "grad_norm": 0.1749780774116516, "learning_rate": 1.4332252390335771e-05, "loss": 0.6119, "step": 11498 }, { "epoch": 13.096866096866098, "grad_norm": 0.21589112281799316, "learning_rate": 1.4328038865531305e-05, "loss": 0.7532, "step": 11499 }, { "epoch": 13.098005698005698, "grad_norm": 0.22464151680469513, "learning_rate": 1.4323825711393396e-05, "loss": 0.7189, "step": 11500 }, { "epoch": 13.099145299145299, "grad_norm": 0.23879218101501465, "learning_rate": 1.4319612928068388e-05, "loss": 0.5596, "step": 11501 }, { "epoch": 13.100284900284901, "grad_norm": 0.1787513941526413, "learning_rate": 1.4315400515702576e-05, "loss": 0.8427, "step": 11502 }, { "epoch": 13.101424501424502, "grad_norm": 0.2210531383752823, "learning_rate": 1.4311188474442295e-05, "loss": 0.5092, "step": 11503 }, { "epoch": 13.102564102564102, "grad_norm": 0.3780690133571625, "learning_rate": 1.430697680443383e-05, "loss": 0.815, "step": 11504 }, { "epoch": 13.103703703703705, "grad_norm": 0.2050284892320633, "learning_rate": 1.4302765505823482e-05, "loss": 0.8659, "step": 11505 }, { "epoch": 13.104843304843305, "grad_norm": 0.19268076121807098, "learning_rate": 1.4298554578757496e-05, "loss": 0.6517, "step": 11506 }, { "epoch": 13.105982905982906, "grad_norm": 0.18988528847694397, "learning_rate": 1.4294344023382133e-05, "loss": 0.568, "step": 11507 }, { "epoch": 13.107122507122508, "grad_norm": 0.14905154705047607, "learning_rate": 1.4290133839843644e-05, "loss": 0.7074, "step": 11508 }, { "epoch": 13.108262108262108, "grad_norm": 0.2254278063774109, "learning_rate": 1.4285924028288272e-05, "loss": 0.5758, "step": 11509 }, { "epoch": 13.109401709401709, "grad_norm": 0.2876284420490265, "learning_rate": 1.428171458886221e-05, "loss": 0.3755, "step": 11510 }, { "epoch": 13.110541310541311, "grad_norm": 0.2649424076080322, "learning_rate": 1.4277505521711679e-05, "loss": 0.7619, "step": 11511 }, { "epoch": 13.111680911680912, "grad_norm": 0.22738780081272125, "learning_rate": 1.4273296826982866e-05, "loss": 0.647, "step": 11512 }, { "epoch": 13.112820512820512, "grad_norm": 0.2185848206281662, "learning_rate": 1.4269088504821963e-05, "loss": 0.5954, "step": 11513 }, { "epoch": 13.113960113960115, "grad_norm": 0.2230282723903656, "learning_rate": 1.4264880555375104e-05, "loss": 0.4998, "step": 11514 }, { "epoch": 13.115099715099715, "grad_norm": 0.2161315530538559, "learning_rate": 1.4260672978788477e-05, "loss": 0.6973, "step": 11515 }, { "epoch": 13.116239316239316, "grad_norm": 0.24731765687465668, "learning_rate": 1.4256465775208222e-05, "loss": 0.5941, "step": 11516 }, { "epoch": 13.117378917378918, "grad_norm": 0.2210257649421692, "learning_rate": 1.4252258944780445e-05, "loss": 0.5394, "step": 11517 }, { "epoch": 13.118518518518519, "grad_norm": 0.18674232065677643, "learning_rate": 1.424805248765127e-05, "loss": 0.7684, "step": 11518 }, { "epoch": 13.11965811965812, "grad_norm": 0.19959892332553864, "learning_rate": 1.4243846403966799e-05, "loss": 0.7202, "step": 11519 }, { "epoch": 13.120797720797722, "grad_norm": 0.1804495006799698, "learning_rate": 1.4239640693873135e-05, "loss": 0.9416, "step": 11520 }, { "epoch": 13.121937321937322, "grad_norm": 0.22852128744125366, "learning_rate": 1.423543535751633e-05, "loss": 0.5582, "step": 11521 }, { "epoch": 13.123076923076923, "grad_norm": 0.23781459033489227, "learning_rate": 1.4231230395042455e-05, "loss": 0.5017, "step": 11522 }, { "epoch": 13.124216524216525, "grad_norm": 0.18285304307937622, "learning_rate": 1.4227025806597566e-05, "loss": 0.5944, "step": 11523 }, { "epoch": 13.125356125356126, "grad_norm": 0.22835831344127655, "learning_rate": 1.4222821592327707e-05, "loss": 0.7405, "step": 11524 }, { "epoch": 13.126495726495726, "grad_norm": 0.20321674644947052, "learning_rate": 1.4218617752378865e-05, "loss": 0.3524, "step": 11525 }, { "epoch": 13.127635327635328, "grad_norm": 0.2231002002954483, "learning_rate": 1.4214414286897094e-05, "loss": 0.8003, "step": 11526 }, { "epoch": 13.128774928774929, "grad_norm": 0.18399591743946075, "learning_rate": 1.4210211196028383e-05, "loss": 0.7674, "step": 11527 }, { "epoch": 13.12991452991453, "grad_norm": 0.18497677147388458, "learning_rate": 1.4206008479918701e-05, "loss": 0.6735, "step": 11528 }, { "epoch": 13.131054131054132, "grad_norm": 0.24679137766361237, "learning_rate": 1.4201806138714025e-05, "loss": 0.5597, "step": 11529 }, { "epoch": 13.132193732193732, "grad_norm": 0.23271405696868896, "learning_rate": 1.4197604172560319e-05, "loss": 0.605, "step": 11530 }, { "epoch": 13.133333333333333, "grad_norm": 0.20882228016853333, "learning_rate": 1.4193402581603538e-05, "loss": 0.7401, "step": 11531 }, { "epoch": 13.134472934472935, "grad_norm": 0.15231657028198242, "learning_rate": 1.4189201365989591e-05, "loss": 0.7368, "step": 11532 }, { "epoch": 13.135612535612536, "grad_norm": 0.17907041311264038, "learning_rate": 1.4185000525864412e-05, "loss": 0.8759, "step": 11533 }, { "epoch": 13.136752136752136, "grad_norm": 0.17270059883594513, "learning_rate": 1.4180800061373903e-05, "loss": 0.7135, "step": 11534 }, { "epoch": 13.137891737891739, "grad_norm": 0.20590460300445557, "learning_rate": 1.4176599972663972e-05, "loss": 0.6557, "step": 11535 }, { "epoch": 13.13903133903134, "grad_norm": 0.1744016408920288, "learning_rate": 1.4172400259880469e-05, "loss": 0.8199, "step": 11536 }, { "epoch": 13.14017094017094, "grad_norm": 0.190405011177063, "learning_rate": 1.4168200923169294e-05, "loss": 0.6, "step": 11537 }, { "epoch": 13.141310541310542, "grad_norm": 0.20549336075782776, "learning_rate": 1.4164001962676285e-05, "loss": 0.7205, "step": 11538 }, { "epoch": 13.142450142450143, "grad_norm": 0.18362568318843842, "learning_rate": 1.4159803378547299e-05, "loss": 0.9011, "step": 11539 }, { "epoch": 13.143589743589743, "grad_norm": 0.19862878322601318, "learning_rate": 1.4155605170928141e-05, "loss": 0.715, "step": 11540 }, { "epoch": 13.144729344729345, "grad_norm": 0.2534286081790924, "learning_rate": 1.415140733996464e-05, "loss": 0.727, "step": 11541 }, { "epoch": 13.145868945868946, "grad_norm": 0.19345135986804962, "learning_rate": 1.4147209885802593e-05, "loss": 0.4794, "step": 11542 }, { "epoch": 13.147008547008546, "grad_norm": 0.21531400084495544, "learning_rate": 1.4143012808587802e-05, "loss": 0.4436, "step": 11543 }, { "epoch": 13.148148148148149, "grad_norm": 0.21644629538059235, "learning_rate": 1.4138816108466024e-05, "loss": 0.4643, "step": 11544 }, { "epoch": 13.14928774928775, "grad_norm": 0.182317852973938, "learning_rate": 1.4134619785583032e-05, "loss": 0.7638, "step": 11545 }, { "epoch": 13.15042735042735, "grad_norm": 0.25152501463890076, "learning_rate": 1.4130423840084572e-05, "loss": 0.4686, "step": 11546 }, { "epoch": 13.151566951566952, "grad_norm": 0.2344987988471985, "learning_rate": 1.4126228272116382e-05, "loss": 0.4872, "step": 11547 }, { "epoch": 13.152706552706553, "grad_norm": 0.2786315381526947, "learning_rate": 1.4122033081824192e-05, "loss": 0.3465, "step": 11548 }, { "epoch": 13.153846153846153, "grad_norm": 0.19070546329021454, "learning_rate": 1.4117838269353705e-05, "loss": 0.6071, "step": 11549 }, { "epoch": 13.154985754985756, "grad_norm": 0.23821833729743958, "learning_rate": 1.4113643834850632e-05, "loss": 0.6572, "step": 11550 }, { "epoch": 13.156125356125356, "grad_norm": 0.20713569223880768, "learning_rate": 1.4109449778460634e-05, "loss": 0.5809, "step": 11551 }, { "epoch": 13.157264957264957, "grad_norm": 0.19106636941432953, "learning_rate": 1.4105256100329397e-05, "loss": 0.9459, "step": 11552 }, { "epoch": 13.158404558404559, "grad_norm": 0.22153536975383759, "learning_rate": 1.4101062800602578e-05, "loss": 0.8085, "step": 11553 }, { "epoch": 13.15954415954416, "grad_norm": 0.24223750829696655, "learning_rate": 1.4096869879425826e-05, "loss": 0.56, "step": 11554 }, { "epoch": 13.16068376068376, "grad_norm": 0.18808935582637787, "learning_rate": 1.4092677336944757e-05, "loss": 0.8308, "step": 11555 }, { "epoch": 13.161823361823362, "grad_norm": 0.24668224155902863, "learning_rate": 1.4088485173304999e-05, "loss": 0.4897, "step": 11556 }, { "epoch": 13.162962962962963, "grad_norm": 0.20698751509189606, "learning_rate": 1.408429338865216e-05, "loss": 0.6295, "step": 11557 }, { "epoch": 13.164102564102564, "grad_norm": 0.2100403904914856, "learning_rate": 1.408010198313183e-05, "loss": 0.7904, "step": 11558 }, { "epoch": 13.165242165242166, "grad_norm": 0.23687000572681427, "learning_rate": 1.4075910956889588e-05, "loss": 0.5746, "step": 11559 }, { "epoch": 13.166381766381766, "grad_norm": 0.2089221477508545, "learning_rate": 1.4071720310070999e-05, "loss": 0.4951, "step": 11560 }, { "epoch": 13.167521367521367, "grad_norm": 0.20603562891483307, "learning_rate": 1.4067530042821624e-05, "loss": 0.6948, "step": 11561 }, { "epoch": 13.16866096866097, "grad_norm": 0.18587876856327057, "learning_rate": 1.4063340155286988e-05, "loss": 0.7913, "step": 11562 }, { "epoch": 13.16980056980057, "grad_norm": 0.22771002352237701, "learning_rate": 1.4059150647612624e-05, "loss": 0.6707, "step": 11563 }, { "epoch": 13.17094017094017, "grad_norm": 0.17967014014720917, "learning_rate": 1.4054961519944046e-05, "loss": 0.7811, "step": 11564 }, { "epoch": 13.172079772079773, "grad_norm": 0.20775778591632843, "learning_rate": 1.4050772772426762e-05, "loss": 0.6791, "step": 11565 }, { "epoch": 13.173219373219373, "grad_norm": 0.18925908207893372, "learning_rate": 1.4046584405206242e-05, "loss": 0.7388, "step": 11566 }, { "epoch": 13.174358974358974, "grad_norm": 0.2153882533311844, "learning_rate": 1.4042396418427966e-05, "loss": 0.6407, "step": 11567 }, { "epoch": 13.175498575498576, "grad_norm": 0.20188970863819122, "learning_rate": 1.4038208812237397e-05, "loss": 0.635, "step": 11568 }, { "epoch": 13.176638176638177, "grad_norm": 0.21654903888702393, "learning_rate": 1.4034021586779981e-05, "loss": 0.6413, "step": 11569 }, { "epoch": 13.177777777777777, "grad_norm": 0.18344613909721375, "learning_rate": 1.402983474220115e-05, "loss": 0.7784, "step": 11570 }, { "epoch": 13.17891737891738, "grad_norm": 0.18774652481079102, "learning_rate": 1.4025648278646328e-05, "loss": 0.6506, "step": 11571 }, { "epoch": 13.18005698005698, "grad_norm": 0.2177322953939438, "learning_rate": 1.4021462196260931e-05, "loss": 0.6103, "step": 11572 }, { "epoch": 13.18119658119658, "grad_norm": 0.23880016803741455, "learning_rate": 1.4017276495190331e-05, "loss": 0.4566, "step": 11573 }, { "epoch": 13.182336182336183, "grad_norm": 0.20434992015361786, "learning_rate": 1.4013091175579923e-05, "loss": 0.7283, "step": 11574 }, { "epoch": 13.183475783475783, "grad_norm": 0.19964273273944855, "learning_rate": 1.4008906237575073e-05, "loss": 0.6593, "step": 11575 }, { "epoch": 13.184615384615384, "grad_norm": 0.23219776153564453, "learning_rate": 1.4004721681321142e-05, "loss": 0.511, "step": 11576 }, { "epoch": 13.185754985754986, "grad_norm": 0.21062827110290527, "learning_rate": 1.4000537506963456e-05, "loss": 0.6972, "step": 11577 }, { "epoch": 13.186894586894587, "grad_norm": 0.19703301787376404, "learning_rate": 1.3996353714647353e-05, "loss": 0.5974, "step": 11578 }, { "epoch": 13.188034188034187, "grad_norm": 0.19937114417552948, "learning_rate": 1.3992170304518142e-05, "loss": 0.7851, "step": 11579 }, { "epoch": 13.18917378917379, "grad_norm": 0.21336285769939423, "learning_rate": 1.3987987276721132e-05, "loss": 0.7405, "step": 11580 }, { "epoch": 13.19031339031339, "grad_norm": 0.20813515782356262, "learning_rate": 1.3983804631401604e-05, "loss": 0.7038, "step": 11581 }, { "epoch": 13.19145299145299, "grad_norm": 0.1741720587015152, "learning_rate": 1.3979622368704837e-05, "loss": 0.6571, "step": 11582 }, { "epoch": 13.192592592592593, "grad_norm": 0.17021405696868896, "learning_rate": 1.3975440488776093e-05, "loss": 0.5768, "step": 11583 }, { "epoch": 13.193732193732194, "grad_norm": 0.1774616241455078, "learning_rate": 1.3971258991760625e-05, "loss": 0.6787, "step": 11584 }, { "epoch": 13.194871794871794, "grad_norm": 0.20216669142246246, "learning_rate": 1.3967077877803653e-05, "loss": 0.8463, "step": 11585 }, { "epoch": 13.196011396011396, "grad_norm": 0.17637847363948822, "learning_rate": 1.3962897147050408e-05, "loss": 0.7966, "step": 11586 }, { "epoch": 13.197150997150997, "grad_norm": 0.280303418636322, "learning_rate": 1.3958716799646093e-05, "loss": 0.3728, "step": 11587 }, { "epoch": 13.198290598290598, "grad_norm": 0.2020619660615921, "learning_rate": 1.3954536835735922e-05, "loss": 0.6231, "step": 11588 }, { "epoch": 13.1994301994302, "grad_norm": 0.2249068021774292, "learning_rate": 1.3950357255465049e-05, "loss": 0.7047, "step": 11589 }, { "epoch": 13.2005698005698, "grad_norm": 0.1925133913755417, "learning_rate": 1.3946178058978657e-05, "loss": 0.7707, "step": 11590 }, { "epoch": 13.201709401709401, "grad_norm": 0.22543399035930634, "learning_rate": 1.39419992464219e-05, "loss": 0.5175, "step": 11591 }, { "epoch": 13.202849002849003, "grad_norm": 0.2181890457868576, "learning_rate": 1.3937820817939917e-05, "loss": 0.6261, "step": 11592 }, { "epoch": 13.203988603988604, "grad_norm": 0.18290166556835175, "learning_rate": 1.3933642773677836e-05, "loss": 0.7239, "step": 11593 }, { "epoch": 13.205128205128204, "grad_norm": 0.1689101755619049, "learning_rate": 1.3929465113780776e-05, "loss": 0.7861, "step": 11594 }, { "epoch": 13.206267806267807, "grad_norm": 0.1998259276151657, "learning_rate": 1.3925287838393846e-05, "loss": 0.5791, "step": 11595 }, { "epoch": 13.207407407407407, "grad_norm": 0.19786402583122253, "learning_rate": 1.392111094766212e-05, "loss": 0.2011, "step": 11596 }, { "epoch": 13.208547008547008, "grad_norm": 0.20233529806137085, "learning_rate": 1.3916934441730672e-05, "loss": 0.7028, "step": 11597 }, { "epoch": 13.20968660968661, "grad_norm": 0.19939535856246948, "learning_rate": 1.391275832074457e-05, "loss": 0.727, "step": 11598 }, { "epoch": 13.21082621082621, "grad_norm": 0.2780725955963135, "learning_rate": 1.3908582584848872e-05, "loss": 0.3274, "step": 11599 }, { "epoch": 13.211965811965811, "grad_norm": 0.23216119408607483, "learning_rate": 1.3904407234188594e-05, "loss": 0.6895, "step": 11600 }, { "epoch": 13.213105413105414, "grad_norm": 0.24538558721542358, "learning_rate": 1.3900232268908763e-05, "loss": 0.5244, "step": 11601 }, { "epoch": 13.214245014245014, "grad_norm": 0.2178601175546646, "learning_rate": 1.3896057689154396e-05, "loss": 0.7729, "step": 11602 }, { "epoch": 13.215384615384615, "grad_norm": 0.25279882550239563, "learning_rate": 1.3891883495070473e-05, "loss": 0.4163, "step": 11603 }, { "epoch": 13.216524216524217, "grad_norm": 0.18254736065864563, "learning_rate": 1.3887709686801992e-05, "loss": 0.7246, "step": 11604 }, { "epoch": 13.217663817663817, "grad_norm": 0.19879871606826782, "learning_rate": 1.3883536264493907e-05, "loss": 0.4122, "step": 11605 }, { "epoch": 13.218803418803418, "grad_norm": 0.22978079319000244, "learning_rate": 1.3879363228291187e-05, "loss": 0.5717, "step": 11606 }, { "epoch": 13.21994301994302, "grad_norm": 0.18433459103107452, "learning_rate": 1.3875190578338756e-05, "loss": 0.8107, "step": 11607 }, { "epoch": 13.221082621082621, "grad_norm": 0.1832319051027298, "learning_rate": 1.387101831478155e-05, "loss": 0.666, "step": 11608 }, { "epoch": 13.222222222222221, "grad_norm": 0.20097021758556366, "learning_rate": 1.3866846437764483e-05, "loss": 0.6026, "step": 11609 }, { "epoch": 13.223361823361824, "grad_norm": 0.19776402413845062, "learning_rate": 1.3862674947432463e-05, "loss": 0.6121, "step": 11610 }, { "epoch": 13.224501424501424, "grad_norm": 0.287895143032074, "learning_rate": 1.3858503843930365e-05, "loss": 0.5031, "step": 11611 }, { "epoch": 13.225641025641025, "grad_norm": 0.2561202049255371, "learning_rate": 1.3854333127403063e-05, "loss": 0.478, "step": 11612 }, { "epoch": 13.226780626780627, "grad_norm": 0.21733428537845612, "learning_rate": 1.3850162797995426e-05, "loss": 0.8118, "step": 11613 }, { "epoch": 13.227920227920228, "grad_norm": 0.2504943311214447, "learning_rate": 1.384599285585229e-05, "loss": 0.6361, "step": 11614 }, { "epoch": 13.229059829059828, "grad_norm": 0.243933767080307, "learning_rate": 1.3841823301118503e-05, "loss": 0.7141, "step": 11615 }, { "epoch": 13.23019943019943, "grad_norm": 0.22382576763629913, "learning_rate": 1.3837654133938876e-05, "loss": 0.4717, "step": 11616 }, { "epoch": 13.231339031339031, "grad_norm": 0.2521352469921112, "learning_rate": 1.3833485354458214e-05, "loss": 0.6739, "step": 11617 }, { "epoch": 13.232478632478632, "grad_norm": 0.24256514012813568, "learning_rate": 1.3829316962821328e-05, "loss": 0.6928, "step": 11618 }, { "epoch": 13.233618233618234, "grad_norm": 0.23227514326572418, "learning_rate": 1.3825148959172973e-05, "loss": 0.518, "step": 11619 }, { "epoch": 13.234757834757835, "grad_norm": 0.2246641367673874, "learning_rate": 1.3820981343657924e-05, "loss": 0.5972, "step": 11620 }, { "epoch": 13.235897435897435, "grad_norm": 0.20608285069465637, "learning_rate": 1.3816814116420938e-05, "loss": 0.5933, "step": 11621 }, { "epoch": 13.237037037037037, "grad_norm": 0.23809665441513062, "learning_rate": 1.3812647277606758e-05, "loss": 0.69, "step": 11622 }, { "epoch": 13.238176638176638, "grad_norm": 0.23365621268749237, "learning_rate": 1.3808480827360092e-05, "loss": 0.6051, "step": 11623 }, { "epoch": 13.239316239316238, "grad_norm": 0.18482758104801178, "learning_rate": 1.380431476582566e-05, "loss": 0.6293, "step": 11624 }, { "epoch": 13.24045584045584, "grad_norm": 0.2261316329240799, "learning_rate": 1.3800149093148179e-05, "loss": 0.8503, "step": 11625 }, { "epoch": 13.241595441595441, "grad_norm": 0.2567899525165558, "learning_rate": 1.3795983809472313e-05, "loss": 0.4726, "step": 11626 }, { "epoch": 13.242735042735042, "grad_norm": 0.20920339226722717, "learning_rate": 1.379181891494274e-05, "loss": 0.5162, "step": 11627 }, { "epoch": 13.243874643874644, "grad_norm": 0.1641882061958313, "learning_rate": 1.3787654409704118e-05, "loss": 0.6192, "step": 11628 }, { "epoch": 13.245014245014245, "grad_norm": 0.20723962783813477, "learning_rate": 1.3783490293901102e-05, "loss": 0.6878, "step": 11629 }, { "epoch": 13.246153846153845, "grad_norm": 0.22747716307640076, "learning_rate": 1.3779326567678303e-05, "loss": 0.6497, "step": 11630 }, { "epoch": 13.247293447293448, "grad_norm": 0.27678728103637695, "learning_rate": 1.3775163231180349e-05, "loss": 0.5673, "step": 11631 }, { "epoch": 13.248433048433048, "grad_norm": 0.2180687040090561, "learning_rate": 1.377100028455185e-05, "loss": 0.7948, "step": 11632 }, { "epoch": 13.249572649572649, "grad_norm": 0.23409755527973175, "learning_rate": 1.3766837727937392e-05, "loss": 0.7267, "step": 11633 }, { "epoch": 13.250712250712251, "grad_norm": 0.18900719285011292, "learning_rate": 1.3762675561481547e-05, "loss": 0.6484, "step": 11634 }, { "epoch": 13.251851851851852, "grad_norm": 0.23243440687656403, "learning_rate": 1.3758513785328872e-05, "loss": 0.5069, "step": 11635 }, { "epoch": 13.252991452991452, "grad_norm": 0.18046048283576965, "learning_rate": 1.3754352399623946e-05, "loss": 0.8074, "step": 11636 }, { "epoch": 13.254131054131054, "grad_norm": 0.19376908242702484, "learning_rate": 1.3750191404511278e-05, "loss": 0.6343, "step": 11637 }, { "epoch": 13.255270655270655, "grad_norm": 0.23857325315475464, "learning_rate": 1.3746030800135398e-05, "loss": 0.5676, "step": 11638 }, { "epoch": 13.256410256410255, "grad_norm": 0.20566202700138092, "learning_rate": 1.374187058664082e-05, "loss": 0.4449, "step": 11639 }, { "epoch": 13.257549857549858, "grad_norm": 0.16072019934654236, "learning_rate": 1.3737710764172044e-05, "loss": 0.7824, "step": 11640 }, { "epoch": 13.258689458689458, "grad_norm": 0.17833319306373596, "learning_rate": 1.3733551332873534e-05, "loss": 0.7401, "step": 11641 }, { "epoch": 13.25982905982906, "grad_norm": 0.20904269814491272, "learning_rate": 1.372939229288977e-05, "loss": 0.6093, "step": 11642 }, { "epoch": 13.260968660968661, "grad_norm": 0.19811105728149414, "learning_rate": 1.3725233644365204e-05, "loss": 0.6425, "step": 11643 }, { "epoch": 13.262108262108262, "grad_norm": 0.2192060351371765, "learning_rate": 1.3721075387444293e-05, "loss": 0.7129, "step": 11644 }, { "epoch": 13.263247863247864, "grad_norm": 0.19724543392658234, "learning_rate": 1.3716917522271439e-05, "loss": 0.7193, "step": 11645 }, { "epoch": 13.264387464387465, "grad_norm": 0.20687972009181976, "learning_rate": 1.3712760048991058e-05, "loss": 0.7468, "step": 11646 }, { "epoch": 13.265527065527065, "grad_norm": 0.24772727489471436, "learning_rate": 1.370860296774758e-05, "loss": 0.5926, "step": 11647 }, { "epoch": 13.266666666666667, "grad_norm": 0.20096513628959656, "learning_rate": 1.3704446278685362e-05, "loss": 0.6094, "step": 11648 }, { "epoch": 13.267806267806268, "grad_norm": 0.2793840765953064, "learning_rate": 1.3700289981948792e-05, "loss": 0.5776, "step": 11649 }, { "epoch": 13.268945868945869, "grad_norm": 0.2074030041694641, "learning_rate": 1.369613407768222e-05, "loss": 0.6481, "step": 11650 }, { "epoch": 13.270085470085471, "grad_norm": 0.25681355595588684, "learning_rate": 1.3691978566030011e-05, "loss": 0.8151, "step": 11651 }, { "epoch": 13.271225071225071, "grad_norm": 0.21756432950496674, "learning_rate": 1.3687823447136471e-05, "loss": 0.7279, "step": 11652 }, { "epoch": 13.272364672364672, "grad_norm": 0.25154349207878113, "learning_rate": 1.3683668721145931e-05, "loss": 0.6369, "step": 11653 }, { "epoch": 13.273504273504274, "grad_norm": 0.1960681527853012, "learning_rate": 1.3679514388202696e-05, "loss": 0.7955, "step": 11654 }, { "epoch": 13.274643874643875, "grad_norm": 0.2003762274980545, "learning_rate": 1.3675360448451072e-05, "loss": 0.7032, "step": 11655 }, { "epoch": 13.275783475783475, "grad_norm": 0.2773532271385193, "learning_rate": 1.3671206902035311e-05, "loss": 0.5724, "step": 11656 }, { "epoch": 13.276923076923078, "grad_norm": 0.22172974050045013, "learning_rate": 1.3667053749099675e-05, "loss": 0.9324, "step": 11657 }, { "epoch": 13.278062678062678, "grad_norm": 0.15701836347579956, "learning_rate": 1.366290098978844e-05, "loss": 0.8745, "step": 11658 }, { "epoch": 13.279202279202279, "grad_norm": 0.24879899621009827, "learning_rate": 1.3658748624245843e-05, "loss": 0.5656, "step": 11659 }, { "epoch": 13.280341880341881, "grad_norm": 0.19226278364658356, "learning_rate": 1.365459665261608e-05, "loss": 0.7775, "step": 11660 }, { "epoch": 13.281481481481482, "grad_norm": 0.2153489738702774, "learning_rate": 1.365044507504338e-05, "loss": 0.8125, "step": 11661 }, { "epoch": 13.282621082621082, "grad_norm": 0.22412502765655518, "learning_rate": 1.3646293891671932e-05, "loss": 0.6856, "step": 11662 }, { "epoch": 13.283760683760685, "grad_norm": 0.2342052310705185, "learning_rate": 1.3642143102645933e-05, "loss": 0.589, "step": 11663 }, { "epoch": 13.284900284900285, "grad_norm": 0.22180594503879547, "learning_rate": 1.3637992708109524e-05, "loss": 0.5019, "step": 11664 }, { "epoch": 13.286039886039886, "grad_norm": 0.1808266043663025, "learning_rate": 1.3633842708206873e-05, "loss": 0.6718, "step": 11665 }, { "epoch": 13.287179487179488, "grad_norm": 0.22990697622299194, "learning_rate": 1.3629693103082124e-05, "loss": 0.5513, "step": 11666 }, { "epoch": 13.288319088319088, "grad_norm": 0.1747976541519165, "learning_rate": 1.3625543892879411e-05, "loss": 0.7103, "step": 11667 }, { "epoch": 13.289458689458689, "grad_norm": 0.30305853486061096, "learning_rate": 1.3621395077742821e-05, "loss": 0.3237, "step": 11668 }, { "epoch": 13.290598290598291, "grad_norm": 0.17665137350559235, "learning_rate": 1.361724665781648e-05, "loss": 0.7229, "step": 11669 }, { "epoch": 13.291737891737892, "grad_norm": 0.252114862203598, "learning_rate": 1.3613098633244475e-05, "loss": 0.5299, "step": 11670 }, { "epoch": 13.292877492877492, "grad_norm": 0.1885116994380951, "learning_rate": 1.3608951004170859e-05, "loss": 0.791, "step": 11671 }, { "epoch": 13.294017094017095, "grad_norm": 0.25263312458992004, "learning_rate": 1.3604803770739699e-05, "loss": 0.5148, "step": 11672 }, { "epoch": 13.295156695156695, "grad_norm": 0.1892181932926178, "learning_rate": 1.360065693309504e-05, "loss": 0.6974, "step": 11673 }, { "epoch": 13.296296296296296, "grad_norm": 0.22764094173908234, "learning_rate": 1.3596510491380926e-05, "loss": 0.4453, "step": 11674 }, { "epoch": 13.297435897435898, "grad_norm": 0.1909138262271881, "learning_rate": 1.3592364445741355e-05, "loss": 0.7589, "step": 11675 }, { "epoch": 13.298575498575499, "grad_norm": 0.20127205550670624, "learning_rate": 1.3588218796320335e-05, "loss": 0.7262, "step": 11676 }, { "epoch": 13.2997150997151, "grad_norm": 0.18755756318569183, "learning_rate": 1.3584073543261857e-05, "loss": 0.7694, "step": 11677 }, { "epoch": 13.300854700854702, "grad_norm": 0.1816464513540268, "learning_rate": 1.357992868670991e-05, "loss": 0.7881, "step": 11678 }, { "epoch": 13.301994301994302, "grad_norm": 0.2296556532382965, "learning_rate": 1.3575784226808424e-05, "loss": 0.5905, "step": 11679 }, { "epoch": 13.303133903133903, "grad_norm": 0.21964482963085175, "learning_rate": 1.3571640163701382e-05, "loss": 0.6905, "step": 11680 }, { "epoch": 13.304273504273505, "grad_norm": 0.19504858553409576, "learning_rate": 1.3567496497532714e-05, "loss": 0.7416, "step": 11681 }, { "epoch": 13.305413105413106, "grad_norm": 0.1960165798664093, "learning_rate": 1.3563353228446322e-05, "loss": 0.8115, "step": 11682 }, { "epoch": 13.306552706552706, "grad_norm": 0.1789308488368988, "learning_rate": 1.3559210356586122e-05, "loss": 0.9033, "step": 11683 }, { "epoch": 13.307692307692308, "grad_norm": 0.20353779196739197, "learning_rate": 1.355506788209601e-05, "loss": 0.6144, "step": 11684 }, { "epoch": 13.308831908831909, "grad_norm": 0.22314287722110748, "learning_rate": 1.3550925805119874e-05, "loss": 0.7909, "step": 11685 }, { "epoch": 13.30997150997151, "grad_norm": 0.24087511003017426, "learning_rate": 1.3546784125801559e-05, "loss": 0.5125, "step": 11686 }, { "epoch": 13.311111111111112, "grad_norm": 0.24155688285827637, "learning_rate": 1.3542642844284925e-05, "loss": 0.6512, "step": 11687 }, { "epoch": 13.312250712250712, "grad_norm": 0.2127622663974762, "learning_rate": 1.3538501960713817e-05, "loss": 0.6409, "step": 11688 }, { "epoch": 13.313390313390313, "grad_norm": 0.19076725840568542, "learning_rate": 1.3534361475232066e-05, "loss": 0.5577, "step": 11689 }, { "epoch": 13.314529914529915, "grad_norm": 0.2386363446712494, "learning_rate": 1.3530221387983449e-05, "loss": 0.6635, "step": 11690 }, { "epoch": 13.315669515669516, "grad_norm": 0.27781039476394653, "learning_rate": 1.3526081699111795e-05, "loss": 0.4627, "step": 11691 }, { "epoch": 13.316809116809116, "grad_norm": 0.2276037037372589, "learning_rate": 1.3521942408760887e-05, "loss": 0.5927, "step": 11692 }, { "epoch": 13.317948717948719, "grad_norm": 0.1818503886461258, "learning_rate": 1.3517803517074476e-05, "loss": 0.7698, "step": 11693 }, { "epoch": 13.31908831908832, "grad_norm": 0.22993409633636475, "learning_rate": 1.3513665024196323e-05, "loss": 0.5875, "step": 11694 }, { "epoch": 13.32022792022792, "grad_norm": 0.18451379239559174, "learning_rate": 1.3509526930270171e-05, "loss": 0.7612, "step": 11695 }, { "epoch": 13.321367521367522, "grad_norm": 0.2384960800409317, "learning_rate": 1.350538923543975e-05, "loss": 0.6317, "step": 11696 }, { "epoch": 13.322507122507123, "grad_norm": 0.2107928842306137, "learning_rate": 1.350125193984878e-05, "loss": 0.4457, "step": 11697 }, { "epoch": 13.323646723646723, "grad_norm": 0.24160587787628174, "learning_rate": 1.349711504364094e-05, "loss": 0.5347, "step": 11698 }, { "epoch": 13.324786324786325, "grad_norm": 0.1711517721414566, "learning_rate": 1.349297854695993e-05, "loss": 0.7233, "step": 11699 }, { "epoch": 13.325925925925926, "grad_norm": 0.20796626806259155, "learning_rate": 1.3488842449949418e-05, "loss": 0.6381, "step": 11700 }, { "epoch": 13.327065527065526, "grad_norm": 0.23498138785362244, "learning_rate": 1.3484706752753065e-05, "loss": 0.7115, "step": 11701 }, { "epoch": 13.328205128205129, "grad_norm": 0.26574021577835083, "learning_rate": 1.3480571455514512e-05, "loss": 0.618, "step": 11702 }, { "epoch": 13.32934472934473, "grad_norm": 0.2008192092180252, "learning_rate": 1.3476436558377392e-05, "loss": 0.8789, "step": 11703 }, { "epoch": 13.33048433048433, "grad_norm": 0.19755418598651886, "learning_rate": 1.347230206148533e-05, "loss": 0.7185, "step": 11704 }, { "epoch": 13.331623931623932, "grad_norm": 0.17471033334732056, "learning_rate": 1.346816796498191e-05, "loss": 0.8852, "step": 11705 }, { "epoch": 13.332763532763533, "grad_norm": 0.8952785134315491, "learning_rate": 1.3464034269010728e-05, "loss": 0.7773, "step": 11706 }, { "epoch": 13.333903133903133, "grad_norm": 0.17916469275951385, "learning_rate": 1.3459900973715362e-05, "loss": 0.7367, "step": 11707 }, { "epoch": 13.335042735042736, "grad_norm": 0.21020612120628357, "learning_rate": 1.3455768079239375e-05, "loss": 0.7739, "step": 11708 }, { "epoch": 13.336182336182336, "grad_norm": 0.15315940976142883, "learning_rate": 1.3451635585726303e-05, "loss": 0.6165, "step": 11709 }, { "epoch": 13.337321937321937, "grad_norm": 0.21983729302883148, "learning_rate": 1.3447503493319686e-05, "loss": 0.5992, "step": 11710 }, { "epoch": 13.338461538461539, "grad_norm": 0.23087336122989655, "learning_rate": 1.344337180216304e-05, "loss": 0.4929, "step": 11711 }, { "epoch": 13.33960113960114, "grad_norm": 0.19386158883571625, "learning_rate": 1.3439240512399875e-05, "loss": 0.6294, "step": 11712 }, { "epoch": 13.34074074074074, "grad_norm": 0.213854119181633, "learning_rate": 1.3435109624173675e-05, "loss": 0.3712, "step": 11713 }, { "epoch": 13.341880341880342, "grad_norm": 0.20322713255882263, "learning_rate": 1.3430979137627924e-05, "loss": 0.7028, "step": 11714 }, { "epoch": 13.343019943019943, "grad_norm": 0.25743451714515686, "learning_rate": 1.3426849052906094e-05, "loss": 0.5265, "step": 11715 }, { "epoch": 13.344159544159544, "grad_norm": 0.15938548743724823, "learning_rate": 1.342271937015161e-05, "loss": 0.7474, "step": 11716 }, { "epoch": 13.345299145299146, "grad_norm": 0.2237337827682495, "learning_rate": 1.3418590089507921e-05, "loss": 0.6901, "step": 11717 }, { "epoch": 13.346438746438746, "grad_norm": 0.20207485556602478, "learning_rate": 1.3414461211118447e-05, "loss": 0.6515, "step": 11718 }, { "epoch": 13.347578347578347, "grad_norm": 0.1879969835281372, "learning_rate": 1.3410332735126607e-05, "loss": 0.6928, "step": 11719 }, { "epoch": 13.34871794871795, "grad_norm": 0.18650564551353455, "learning_rate": 1.340620466167577e-05, "loss": 0.8326, "step": 11720 }, { "epoch": 13.34985754985755, "grad_norm": 0.21093356609344482, "learning_rate": 1.340207699090933e-05, "loss": 0.6828, "step": 11721 }, { "epoch": 13.35099715099715, "grad_norm": 0.23285485804080963, "learning_rate": 1.3397949722970649e-05, "loss": 0.5843, "step": 11722 }, { "epoch": 13.352136752136753, "grad_norm": 0.25672730803489685, "learning_rate": 1.3393822858003083e-05, "loss": 0.6111, "step": 11723 }, { "epoch": 13.353276353276353, "grad_norm": 0.2889954447746277, "learning_rate": 1.3389696396149965e-05, "loss": 0.4769, "step": 11724 }, { "epoch": 13.354415954415954, "grad_norm": 0.2774520516395569, "learning_rate": 1.3385570337554617e-05, "loss": 0.739, "step": 11725 }, { "epoch": 13.355555555555556, "grad_norm": 0.2050815224647522, "learning_rate": 1.3381444682360365e-05, "loss": 0.6947, "step": 11726 }, { "epoch": 13.356695156695157, "grad_norm": 0.2372182309627533, "learning_rate": 1.3377319430710477e-05, "loss": 0.6744, "step": 11727 }, { "epoch": 13.357834757834757, "grad_norm": 0.1729559451341629, "learning_rate": 1.337319458274825e-05, "loss": 0.5814, "step": 11728 }, { "epoch": 13.35897435897436, "grad_norm": 0.2525213062763214, "learning_rate": 1.3369070138616945e-05, "loss": 0.5186, "step": 11729 }, { "epoch": 13.36011396011396, "grad_norm": 0.24681463837623596, "learning_rate": 1.3364946098459835e-05, "loss": 0.5953, "step": 11730 }, { "epoch": 13.36125356125356, "grad_norm": 0.2084857076406479, "learning_rate": 1.336082246242013e-05, "loss": 0.6919, "step": 11731 }, { "epoch": 13.362393162393163, "grad_norm": 0.1615460366010666, "learning_rate": 1.3356699230641067e-05, "loss": 0.9097, "step": 11732 }, { "epoch": 13.363532763532763, "grad_norm": 0.200116828083992, "learning_rate": 1.3352576403265862e-05, "loss": 0.8498, "step": 11733 }, { "epoch": 13.364672364672364, "grad_norm": 0.22976773977279663, "learning_rate": 1.3348453980437708e-05, "loss": 0.6947, "step": 11734 }, { "epoch": 13.365811965811966, "grad_norm": 0.21923796832561493, "learning_rate": 1.334433196229979e-05, "loss": 0.676, "step": 11735 }, { "epoch": 13.366951566951567, "grad_norm": 0.21256579458713531, "learning_rate": 1.3340210348995274e-05, "loss": 0.7264, "step": 11736 }, { "epoch": 13.368091168091167, "grad_norm": 0.1874372810125351, "learning_rate": 1.3336089140667321e-05, "loss": 0.7775, "step": 11737 }, { "epoch": 13.36923076923077, "grad_norm": 0.1841961294412613, "learning_rate": 1.3331968337459078e-05, "loss": 0.5427, "step": 11738 }, { "epoch": 13.37037037037037, "grad_norm": 0.2150958627462387, "learning_rate": 1.332784793951365e-05, "loss": 0.5171, "step": 11739 }, { "epoch": 13.37150997150997, "grad_norm": 0.21831750869750977, "learning_rate": 1.3323727946974158e-05, "loss": 0.6129, "step": 11740 }, { "epoch": 13.372649572649573, "grad_norm": 0.22264201939105988, "learning_rate": 1.331960835998371e-05, "loss": 0.5797, "step": 11741 }, { "epoch": 13.373789173789174, "grad_norm": 0.22739334404468536, "learning_rate": 1.3315489178685391e-05, "loss": 0.626, "step": 11742 }, { "epoch": 13.374928774928774, "grad_norm": 0.26020103693008423, "learning_rate": 1.3311370403222256e-05, "loss": 0.5647, "step": 11743 }, { "epoch": 13.376068376068377, "grad_norm": 0.27008765935897827, "learning_rate": 1.3307252033737372e-05, "loss": 0.545, "step": 11744 }, { "epoch": 13.377207977207977, "grad_norm": 0.2193966954946518, "learning_rate": 1.330313407037378e-05, "loss": 0.8023, "step": 11745 }, { "epoch": 13.378347578347578, "grad_norm": 0.1801786571741104, "learning_rate": 1.3299016513274507e-05, "loss": 0.7572, "step": 11746 }, { "epoch": 13.37948717948718, "grad_norm": 0.20420002937316895, "learning_rate": 1.329489936258257e-05, "loss": 0.6427, "step": 11747 }, { "epoch": 13.38062678062678, "grad_norm": 0.2570120394229889, "learning_rate": 1.3290782618440966e-05, "loss": 0.4259, "step": 11748 }, { "epoch": 13.381766381766381, "grad_norm": 0.18940019607543945, "learning_rate": 1.3286666280992694e-05, "loss": 0.694, "step": 11749 }, { "epoch": 13.382905982905983, "grad_norm": 0.20675398409366608, "learning_rate": 1.3282550350380702e-05, "loss": 0.5802, "step": 11750 }, { "epoch": 13.384045584045584, "grad_norm": 0.2542724907398224, "learning_rate": 1.327843482674796e-05, "loss": 0.7778, "step": 11751 }, { "epoch": 13.385185185185184, "grad_norm": 0.19304198026657104, "learning_rate": 1.3274319710237409e-05, "loss": 0.7178, "step": 11752 }, { "epoch": 13.386324786324787, "grad_norm": 0.2127370536327362, "learning_rate": 1.3270205000991992e-05, "loss": 0.4982, "step": 11753 }, { "epoch": 13.387464387464387, "grad_norm": 0.21912941336631775, "learning_rate": 1.32660906991546e-05, "loss": 0.6523, "step": 11754 }, { "epoch": 13.388603988603988, "grad_norm": 0.22796490788459778, "learning_rate": 1.326197680486815e-05, "loss": 0.8008, "step": 11755 }, { "epoch": 13.38974358974359, "grad_norm": 0.2455299347639084, "learning_rate": 1.3257863318275526e-05, "loss": 0.6711, "step": 11756 }, { "epoch": 13.39088319088319, "grad_norm": 0.21852083504199982, "learning_rate": 1.3253750239519596e-05, "loss": 0.4091, "step": 11757 }, { "epoch": 13.392022792022791, "grad_norm": 0.18607379496097565, "learning_rate": 1.3249637568743226e-05, "loss": 0.6097, "step": 11758 }, { "epoch": 13.393162393162394, "grad_norm": 0.25612539052963257, "learning_rate": 1.3245525306089254e-05, "loss": 0.6545, "step": 11759 }, { "epoch": 13.394301994301994, "grad_norm": 0.2174026370048523, "learning_rate": 1.3241413451700523e-05, "loss": 0.6602, "step": 11760 }, { "epoch": 13.395441595441595, "grad_norm": 0.18675318360328674, "learning_rate": 1.3237302005719832e-05, "loss": 0.5977, "step": 11761 }, { "epoch": 13.396581196581197, "grad_norm": 0.19634675979614258, "learning_rate": 1.3233190968289988e-05, "loss": 0.8069, "step": 11762 }, { "epoch": 13.397720797720797, "grad_norm": 0.24026522040367126, "learning_rate": 1.322908033955378e-05, "loss": 0.6805, "step": 11763 }, { "epoch": 13.398860398860398, "grad_norm": 0.16847091913223267, "learning_rate": 1.3224970119653993e-05, "loss": 0.9449, "step": 11764 }, { "epoch": 13.4, "grad_norm": 0.1692194938659668, "learning_rate": 1.3220860308733365e-05, "loss": 0.7029, "step": 11765 }, { "epoch": 13.401139601139601, "grad_norm": 0.22036419808864594, "learning_rate": 1.321675090693465e-05, "loss": 0.8559, "step": 11766 }, { "epoch": 13.402279202279201, "grad_norm": 0.21671216189861298, "learning_rate": 1.3212641914400581e-05, "loss": 0.6499, "step": 11767 }, { "epoch": 13.403418803418804, "grad_norm": 0.2525615096092224, "learning_rate": 1.3208533331273875e-05, "loss": 0.4299, "step": 11768 }, { "epoch": 13.404558404558404, "grad_norm": 0.24309319257736206, "learning_rate": 1.3204425157697232e-05, "loss": 0.7435, "step": 11769 }, { "epoch": 13.405698005698005, "grad_norm": 0.2587674856185913, "learning_rate": 1.3200317393813342e-05, "loss": 0.5602, "step": 11770 }, { "epoch": 13.406837606837607, "grad_norm": 0.22008532285690308, "learning_rate": 1.3196210039764886e-05, "loss": 0.6589, "step": 11771 }, { "epoch": 13.407977207977208, "grad_norm": 0.22189222276210785, "learning_rate": 1.3192103095694508e-05, "loss": 0.6874, "step": 11772 }, { "epoch": 13.40911680911681, "grad_norm": 0.17690376937389374, "learning_rate": 1.3187996561744862e-05, "loss": 0.7136, "step": 11773 }, { "epoch": 13.41025641025641, "grad_norm": 0.20434725284576416, "learning_rate": 1.3183890438058578e-05, "loss": 0.6152, "step": 11774 }, { "epoch": 13.411396011396011, "grad_norm": 0.18098989129066467, "learning_rate": 1.3179784724778271e-05, "loss": 0.6763, "step": 11775 }, { "epoch": 13.412535612535613, "grad_norm": 0.2281097024679184, "learning_rate": 1.3175679422046557e-05, "loss": 0.4836, "step": 11776 }, { "epoch": 13.413675213675214, "grad_norm": 0.23284488916397095, "learning_rate": 1.3171574530006003e-05, "loss": 0.7279, "step": 11777 }, { "epoch": 13.414814814814815, "grad_norm": 0.23388147354125977, "learning_rate": 1.3167470048799196e-05, "loss": 0.6055, "step": 11778 }, { "epoch": 13.415954415954417, "grad_norm": 0.24649496376514435, "learning_rate": 1.3163365978568693e-05, "loss": 0.6982, "step": 11779 }, { "epoch": 13.417094017094017, "grad_norm": 0.2163296341896057, "learning_rate": 1.3159262319457043e-05, "loss": 0.6715, "step": 11780 }, { "epoch": 13.418233618233618, "grad_norm": 0.25945591926574707, "learning_rate": 1.3155159071606771e-05, "loss": 0.3567, "step": 11781 }, { "epoch": 13.41937321937322, "grad_norm": 0.20381131768226624, "learning_rate": 1.31510562351604e-05, "loss": 0.5506, "step": 11782 }, { "epoch": 13.42051282051282, "grad_norm": 0.2540411651134491, "learning_rate": 1.314695381026044e-05, "loss": 0.5988, "step": 11783 }, { "epoch": 13.421652421652421, "grad_norm": 0.16043323278427124, "learning_rate": 1.314285179704936e-05, "loss": 0.7278, "step": 11784 }, { "epoch": 13.422792022792024, "grad_norm": 0.18081732094287872, "learning_rate": 1.3138750195669644e-05, "loss": 0.6862, "step": 11785 }, { "epoch": 13.423931623931624, "grad_norm": 0.1803949624300003, "learning_rate": 1.3134649006263755e-05, "loss": 0.8005, "step": 11786 }, { "epoch": 13.425071225071225, "grad_norm": 0.16201768815517426, "learning_rate": 1.3130548228974143e-05, "loss": 0.766, "step": 11787 }, { "epoch": 13.426210826210827, "grad_norm": 0.22643521428108215, "learning_rate": 1.312644786394322e-05, "loss": 0.8755, "step": 11788 }, { "epoch": 13.427350427350428, "grad_norm": 0.207927405834198, "learning_rate": 1.312234791131342e-05, "loss": 0.7667, "step": 11789 }, { "epoch": 13.428490028490028, "grad_norm": 0.21223682165145874, "learning_rate": 1.3118248371227138e-05, "loss": 0.5855, "step": 11790 }, { "epoch": 13.42962962962963, "grad_norm": 0.22653348743915558, "learning_rate": 1.3114149243826765e-05, "loss": 0.7142, "step": 11791 }, { "epoch": 13.430769230769231, "grad_norm": 0.24808557331562042, "learning_rate": 1.3110050529254675e-05, "loss": 0.6013, "step": 11792 }, { "epoch": 13.431908831908832, "grad_norm": 0.15929973125457764, "learning_rate": 1.3105952227653228e-05, "loss": 0.8077, "step": 11793 }, { "epoch": 13.433048433048434, "grad_norm": 0.18594641983509064, "learning_rate": 1.3101854339164782e-05, "loss": 0.7346, "step": 11794 }, { "epoch": 13.434188034188034, "grad_norm": 0.2075185775756836, "learning_rate": 1.309775686393164e-05, "loss": 0.671, "step": 11795 }, { "epoch": 13.435327635327635, "grad_norm": 0.2374790608882904, "learning_rate": 1.3093659802096136e-05, "loss": 0.6618, "step": 11796 }, { "epoch": 13.436467236467237, "grad_norm": 0.25639960169792175, "learning_rate": 1.308956315380057e-05, "loss": 0.4616, "step": 11797 }, { "epoch": 13.437606837606838, "grad_norm": 0.2293757051229477, "learning_rate": 1.3085466919187236e-05, "loss": 0.5088, "step": 11798 }, { "epoch": 13.438746438746438, "grad_norm": 0.171857088804245, "learning_rate": 1.3081371098398393e-05, "loss": 0.8437, "step": 11799 }, { "epoch": 13.43988603988604, "grad_norm": 0.18606480956077576, "learning_rate": 1.3077275691576307e-05, "loss": 0.5405, "step": 11800 }, { "epoch": 13.441025641025641, "grad_norm": 0.2071981281042099, "learning_rate": 1.3073180698863228e-05, "loss": 0.6098, "step": 11801 }, { "epoch": 13.442165242165242, "grad_norm": 0.2265680581331253, "learning_rate": 1.306908612040138e-05, "loss": 0.7493, "step": 11802 }, { "epoch": 13.443304843304844, "grad_norm": 0.21974751353263855, "learning_rate": 1.3064991956332977e-05, "loss": 0.514, "step": 11803 }, { "epoch": 13.444444444444445, "grad_norm": 0.1616891324520111, "learning_rate": 1.3060898206800231e-05, "loss": 1.0072, "step": 11804 }, { "epoch": 13.445584045584045, "grad_norm": 0.192517951130867, "learning_rate": 1.305680487194533e-05, "loss": 0.7794, "step": 11805 }, { "epoch": 13.446723646723648, "grad_norm": 0.1810740977525711, "learning_rate": 1.3052711951910429e-05, "loss": 0.767, "step": 11806 }, { "epoch": 13.447863247863248, "grad_norm": 0.17321431636810303, "learning_rate": 1.30486194468377e-05, "loss": 0.8547, "step": 11807 }, { "epoch": 13.449002849002849, "grad_norm": 0.17997126281261444, "learning_rate": 1.3044527356869283e-05, "loss": 0.7351, "step": 11808 }, { "epoch": 13.450142450142451, "grad_norm": 0.22150816023349762, "learning_rate": 1.3040435682147317e-05, "loss": 0.7567, "step": 11809 }, { "epoch": 13.451282051282051, "grad_norm": 0.20686297118663788, "learning_rate": 1.3036344422813902e-05, "loss": 0.6055, "step": 11810 }, { "epoch": 13.452421652421652, "grad_norm": 0.16225579380989075, "learning_rate": 1.3032253579011138e-05, "loss": 0.7329, "step": 11811 }, { "epoch": 13.453561253561254, "grad_norm": 0.25011882185935974, "learning_rate": 1.3028163150881117e-05, "loss": 0.5926, "step": 11812 }, { "epoch": 13.454700854700855, "grad_norm": 0.2048470824956894, "learning_rate": 1.3024073138565929e-05, "loss": 0.6826, "step": 11813 }, { "epoch": 13.455840455840455, "grad_norm": 0.18196135759353638, "learning_rate": 1.3019983542207603e-05, "loss": 0.8431, "step": 11814 }, { "epoch": 13.456980056980058, "grad_norm": 0.19810007512569427, "learning_rate": 1.3015894361948197e-05, "loss": 0.7068, "step": 11815 }, { "epoch": 13.458119658119658, "grad_norm": 0.16278612613677979, "learning_rate": 1.3011805597929733e-05, "loss": 0.6735, "step": 11816 }, { "epoch": 13.459259259259259, "grad_norm": 0.23491507768630981, "learning_rate": 1.300771725029424e-05, "loss": 0.6551, "step": 11817 }, { "epoch": 13.460398860398861, "grad_norm": 0.2680200934410095, "learning_rate": 1.3003629319183697e-05, "loss": 0.6771, "step": 11818 }, { "epoch": 13.461538461538462, "grad_norm": 0.21170486509799957, "learning_rate": 1.2999541804740094e-05, "loss": 0.78, "step": 11819 }, { "epoch": 13.462678062678062, "grad_norm": 0.182427316904068, "learning_rate": 1.2995454707105409e-05, "loss": 0.5947, "step": 11820 }, { "epoch": 13.463817663817665, "grad_norm": 0.19571854174137115, "learning_rate": 1.2991368026421607e-05, "loss": 0.4616, "step": 11821 }, { "epoch": 13.464957264957265, "grad_norm": 0.202408105134964, "learning_rate": 1.2987281762830603e-05, "loss": 0.8652, "step": 11822 }, { "epoch": 13.466096866096866, "grad_norm": 0.2505589723587036, "learning_rate": 1.2983195916474327e-05, "loss": 0.7532, "step": 11823 }, { "epoch": 13.467236467236468, "grad_norm": 0.2218238115310669, "learning_rate": 1.2979110487494729e-05, "loss": 0.2192, "step": 11824 }, { "epoch": 13.468376068376068, "grad_norm": 0.20187053084373474, "learning_rate": 1.2975025476033667e-05, "loss": 0.6784, "step": 11825 }, { "epoch": 13.469515669515669, "grad_norm": 0.2052537202835083, "learning_rate": 1.297094088223304e-05, "loss": 0.7748, "step": 11826 }, { "epoch": 13.470655270655271, "grad_norm": 0.22300845384597778, "learning_rate": 1.2966856706234717e-05, "loss": 0.5471, "step": 11827 }, { "epoch": 13.471794871794872, "grad_norm": 0.16977225244045258, "learning_rate": 1.2962772948180562e-05, "loss": 0.7442, "step": 11828 }, { "epoch": 13.472934472934472, "grad_norm": 0.21880266070365906, "learning_rate": 1.2958689608212392e-05, "loss": 0.8785, "step": 11829 }, { "epoch": 13.474074074074075, "grad_norm": 0.2288815826177597, "learning_rate": 1.2954606686472048e-05, "loss": 0.5701, "step": 11830 }, { "epoch": 13.475213675213675, "grad_norm": 0.1787015199661255, "learning_rate": 1.295052418310134e-05, "loss": 0.6599, "step": 11831 }, { "epoch": 13.476353276353276, "grad_norm": 0.18069544434547424, "learning_rate": 1.2946442098242073e-05, "loss": 0.8786, "step": 11832 }, { "epoch": 13.477492877492878, "grad_norm": 0.15547680854797363, "learning_rate": 1.2942360432036008e-05, "loss": 0.7813, "step": 11833 }, { "epoch": 13.478632478632479, "grad_norm": 0.26197972893714905, "learning_rate": 1.2938279184624912e-05, "loss": 0.5559, "step": 11834 }, { "epoch": 13.47977207977208, "grad_norm": 0.24245873093605042, "learning_rate": 1.2934198356150573e-05, "loss": 0.5394, "step": 11835 }, { "epoch": 13.480911680911682, "grad_norm": 0.20734834671020508, "learning_rate": 1.2930117946754693e-05, "loss": 0.3662, "step": 11836 }, { "epoch": 13.482051282051282, "grad_norm": 0.22213418781757355, "learning_rate": 1.2926037956579013e-05, "loss": 0.4171, "step": 11837 }, { "epoch": 13.483190883190883, "grad_norm": 0.18396912515163422, "learning_rate": 1.2921958385765238e-05, "loss": 0.8424, "step": 11838 }, { "epoch": 13.484330484330485, "grad_norm": 0.2207166850566864, "learning_rate": 1.2917879234455072e-05, "loss": 0.6613, "step": 11839 }, { "epoch": 13.485470085470086, "grad_norm": 0.3201916515827179, "learning_rate": 1.2913800502790174e-05, "loss": 0.5973, "step": 11840 }, { "epoch": 13.486609686609686, "grad_norm": 0.2347378432750702, "learning_rate": 1.2909722190912222e-05, "loss": 0.7864, "step": 11841 }, { "epoch": 13.487749287749288, "grad_norm": 0.21414105594158173, "learning_rate": 1.2905644298962868e-05, "loss": 0.7357, "step": 11842 }, { "epoch": 13.488888888888889, "grad_norm": 0.1687665730714798, "learning_rate": 1.2901566827083758e-05, "loss": 0.6911, "step": 11843 }, { "epoch": 13.49002849002849, "grad_norm": 0.19328096508979797, "learning_rate": 1.2897489775416494e-05, "loss": 0.6803, "step": 11844 }, { "epoch": 13.491168091168092, "grad_norm": 0.2298462986946106, "learning_rate": 1.2893413144102678e-05, "loss": 0.6843, "step": 11845 }, { "epoch": 13.492307692307692, "grad_norm": 0.23529808223247528, "learning_rate": 1.2889336933283938e-05, "loss": 0.6027, "step": 11846 }, { "epoch": 13.493447293447293, "grad_norm": 0.21353775262832642, "learning_rate": 1.2885261143101824e-05, "loss": 0.7785, "step": 11847 }, { "epoch": 13.494586894586895, "grad_norm": 0.24823594093322754, "learning_rate": 1.2881185773697902e-05, "loss": 0.4019, "step": 11848 }, { "epoch": 13.495726495726496, "grad_norm": 0.19683465361595154, "learning_rate": 1.2877110825213729e-05, "loss": 0.6757, "step": 11849 }, { "epoch": 13.496866096866096, "grad_norm": 0.21291722357273102, "learning_rate": 1.2873036297790842e-05, "loss": 0.5452, "step": 11850 }, { "epoch": 13.498005698005699, "grad_norm": 0.20584151148796082, "learning_rate": 1.2868962191570744e-05, "loss": 0.6438, "step": 11851 }, { "epoch": 13.4991452991453, "grad_norm": 0.1875271201133728, "learning_rate": 1.286488850669495e-05, "loss": 0.6734, "step": 11852 }, { "epoch": 13.5002849002849, "grad_norm": 0.26178422570228577, "learning_rate": 1.2860815243304949e-05, "loss": 0.5763, "step": 11853 }, { "epoch": 13.501424501424502, "grad_norm": 0.19995729625225067, "learning_rate": 1.2856742401542218e-05, "loss": 0.9195, "step": 11854 }, { "epoch": 13.502564102564103, "grad_norm": 0.2521830201148987, "learning_rate": 1.2852669981548227e-05, "loss": 0.7433, "step": 11855 }, { "epoch": 13.503703703703703, "grad_norm": 0.23248237371444702, "learning_rate": 1.2848597983464394e-05, "loss": 0.5707, "step": 11856 }, { "epoch": 13.504843304843305, "grad_norm": 0.18612810969352722, "learning_rate": 1.284452640743218e-05, "loss": 0.7798, "step": 11857 }, { "epoch": 13.505982905982906, "grad_norm": 0.22991402447223663, "learning_rate": 1.2840455253593001e-05, "loss": 0.6549, "step": 11858 }, { "epoch": 13.507122507122507, "grad_norm": 0.24886555969715118, "learning_rate": 1.2836384522088241e-05, "loss": 0.5469, "step": 11859 }, { "epoch": 13.508262108262109, "grad_norm": 0.18751516938209534, "learning_rate": 1.2832314213059294e-05, "loss": 0.567, "step": 11860 }, { "epoch": 13.50940170940171, "grad_norm": 0.25968310236930847, "learning_rate": 1.2828244326647537e-05, "loss": 0.5357, "step": 11861 }, { "epoch": 13.51054131054131, "grad_norm": 0.21097686886787415, "learning_rate": 1.2824174862994337e-05, "loss": 0.7197, "step": 11862 }, { "epoch": 13.511680911680912, "grad_norm": 0.15692934393882751, "learning_rate": 1.282010582224102e-05, "loss": 0.8723, "step": 11863 }, { "epoch": 13.512820512820513, "grad_norm": 0.18831391632556915, "learning_rate": 1.281603720452892e-05, "loss": 0.6201, "step": 11864 }, { "epoch": 13.513960113960113, "grad_norm": 0.21477819979190826, "learning_rate": 1.2811969009999353e-05, "loss": 0.7348, "step": 11865 }, { "epoch": 13.515099715099716, "grad_norm": 0.18301048874855042, "learning_rate": 1.2807901238793629e-05, "loss": 0.8579, "step": 11866 }, { "epoch": 13.516239316239316, "grad_norm": 0.24501535296440125, "learning_rate": 1.2803833891053005e-05, "loss": 0.4058, "step": 11867 }, { "epoch": 13.517378917378917, "grad_norm": 0.21062730252742767, "learning_rate": 1.279976696691878e-05, "loss": 0.887, "step": 11868 }, { "epoch": 13.518518518518519, "grad_norm": 0.2024749517440796, "learning_rate": 1.2795700466532207e-05, "loss": 0.637, "step": 11869 }, { "epoch": 13.51965811965812, "grad_norm": 0.24022068083286285, "learning_rate": 1.2791634390034512e-05, "loss": 0.5854, "step": 11870 }, { "epoch": 13.52079772079772, "grad_norm": 0.2260151356458664, "learning_rate": 1.2787568737566924e-05, "loss": 0.6486, "step": 11871 }, { "epoch": 13.521937321937322, "grad_norm": 0.2462989091873169, "learning_rate": 1.278350350927066e-05, "loss": 0.6885, "step": 11872 }, { "epoch": 13.523076923076923, "grad_norm": 0.1758878380060196, "learning_rate": 1.2779438705286927e-05, "loss": 0.7826, "step": 11873 }, { "epoch": 13.524216524216524, "grad_norm": 0.2048841118812561, "learning_rate": 1.2775374325756883e-05, "loss": 0.6602, "step": 11874 }, { "epoch": 13.525356125356126, "grad_norm": 0.19689778983592987, "learning_rate": 1.2771310370821703e-05, "loss": 0.6727, "step": 11875 }, { "epoch": 13.526495726495726, "grad_norm": 0.23123781383037567, "learning_rate": 1.2767246840622549e-05, "loss": 0.7002, "step": 11876 }, { "epoch": 13.527635327635327, "grad_norm": 0.1996956318616867, "learning_rate": 1.2763183735300557e-05, "loss": 0.6125, "step": 11877 }, { "epoch": 13.52877492877493, "grad_norm": 0.17822016775608063, "learning_rate": 1.2759121054996825e-05, "loss": 0.6241, "step": 11878 }, { "epoch": 13.52991452991453, "grad_norm": 0.21769826114177704, "learning_rate": 1.2755058799852495e-05, "loss": 0.7486, "step": 11879 }, { "epoch": 13.53105413105413, "grad_norm": 0.19557489454746246, "learning_rate": 1.2750996970008654e-05, "loss": 0.7319, "step": 11880 }, { "epoch": 13.532193732193733, "grad_norm": 0.17155064642429352, "learning_rate": 1.2746935565606366e-05, "loss": 0.6238, "step": 11881 }, { "epoch": 13.533333333333333, "grad_norm": 0.21221791207790375, "learning_rate": 1.27428745867867e-05, "loss": 0.6154, "step": 11882 }, { "epoch": 13.534472934472934, "grad_norm": 0.2187795341014862, "learning_rate": 1.2738814033690707e-05, "loss": 0.7899, "step": 11883 }, { "epoch": 13.535612535612536, "grad_norm": 0.2055298388004303, "learning_rate": 1.2734753906459432e-05, "loss": 0.7316, "step": 11884 }, { "epoch": 13.536752136752137, "grad_norm": 0.1997203379869461, "learning_rate": 1.2730694205233873e-05, "loss": 0.9362, "step": 11885 }, { "epoch": 13.537891737891737, "grad_norm": 0.2273198515176773, "learning_rate": 1.2726634930155042e-05, "loss": 0.6449, "step": 11886 }, { "epoch": 13.53903133903134, "grad_norm": 0.2319500744342804, "learning_rate": 1.2722576081363935e-05, "loss": 0.6131, "step": 11887 }, { "epoch": 13.54017094017094, "grad_norm": 0.19878043234348297, "learning_rate": 1.2718517659001533e-05, "loss": 0.7458, "step": 11888 }, { "epoch": 13.54131054131054, "grad_norm": 0.2034902125597, "learning_rate": 1.2714459663208767e-05, "loss": 0.622, "step": 11889 }, { "epoch": 13.542450142450143, "grad_norm": 0.23946736752986908, "learning_rate": 1.271040209412661e-05, "loss": 0.4904, "step": 11890 }, { "epoch": 13.543589743589743, "grad_norm": 0.2752933204174042, "learning_rate": 1.2706344951895988e-05, "loss": 0.7497, "step": 11891 }, { "epoch": 13.544729344729344, "grad_norm": 0.2005884200334549, "learning_rate": 1.2702288236657821e-05, "loss": 0.6766, "step": 11892 }, { "epoch": 13.545868945868946, "grad_norm": 0.1786276251077652, "learning_rate": 1.2698231948552997e-05, "loss": 0.7305, "step": 11893 }, { "epoch": 13.547008547008547, "grad_norm": 0.24155272543430328, "learning_rate": 1.2694176087722404e-05, "loss": 0.721, "step": 11894 }, { "epoch": 13.548148148148147, "grad_norm": 0.23642794787883759, "learning_rate": 1.2690120654306914e-05, "loss": 0.7851, "step": 11895 }, { "epoch": 13.54928774928775, "grad_norm": 0.19893379509449005, "learning_rate": 1.2686065648447399e-05, "loss": 0.5975, "step": 11896 }, { "epoch": 13.55042735042735, "grad_norm": 0.2467699944972992, "learning_rate": 1.2682011070284678e-05, "loss": 0.6753, "step": 11897 }, { "epoch": 13.55156695156695, "grad_norm": 0.22657524049282074, "learning_rate": 1.2677956919959588e-05, "loss": 0.3883, "step": 11898 }, { "epoch": 13.552706552706553, "grad_norm": 0.24406392872333527, "learning_rate": 1.2673903197612935e-05, "loss": 0.4954, "step": 11899 }, { "epoch": 13.553846153846154, "grad_norm": 0.17373217642307281, "learning_rate": 1.2669849903385525e-05, "loss": 0.7426, "step": 11900 }, { "epoch": 13.554985754985754, "grad_norm": 0.16287533938884735, "learning_rate": 1.2665797037418136e-05, "loss": 0.8058, "step": 11901 }, { "epoch": 13.556125356125357, "grad_norm": 0.18679316341876984, "learning_rate": 1.2661744599851533e-05, "loss": 0.7145, "step": 11902 }, { "epoch": 13.557264957264957, "grad_norm": 0.17649011313915253, "learning_rate": 1.2657692590826481e-05, "loss": 0.757, "step": 11903 }, { "epoch": 13.558404558404558, "grad_norm": 0.2134092003107071, "learning_rate": 1.2653641010483697e-05, "loss": 0.7175, "step": 11904 }, { "epoch": 13.55954415954416, "grad_norm": 0.21499109268188477, "learning_rate": 1.2649589858963914e-05, "loss": 0.6921, "step": 11905 }, { "epoch": 13.56068376068376, "grad_norm": 0.21749883890151978, "learning_rate": 1.2645539136407842e-05, "loss": 0.6565, "step": 11906 }, { "epoch": 13.561823361823361, "grad_norm": 0.18136462569236755, "learning_rate": 1.2641488842956172e-05, "loss": 0.7308, "step": 11907 }, { "epoch": 13.562962962962963, "grad_norm": 0.21715889871120453, "learning_rate": 1.2637438978749577e-05, "loss": 0.7452, "step": 11908 }, { "epoch": 13.564102564102564, "grad_norm": 0.20942451059818268, "learning_rate": 1.2633389543928726e-05, "loss": 0.7688, "step": 11909 }, { "epoch": 13.565242165242164, "grad_norm": 0.18523932993412018, "learning_rate": 1.2629340538634257e-05, "loss": 0.7706, "step": 11910 }, { "epoch": 13.566381766381767, "grad_norm": 0.23703047633171082, "learning_rate": 1.2625291963006814e-05, "loss": 0.6041, "step": 11911 }, { "epoch": 13.567521367521367, "grad_norm": 0.21664546430110931, "learning_rate": 1.2621243817187013e-05, "loss": 0.6085, "step": 11912 }, { "epoch": 13.568660968660968, "grad_norm": 0.239175483584404, "learning_rate": 1.2617196101315453e-05, "loss": 0.4718, "step": 11913 }, { "epoch": 13.56980056980057, "grad_norm": 0.2262725681066513, "learning_rate": 1.2613148815532738e-05, "loss": 0.6674, "step": 11914 }, { "epoch": 13.57094017094017, "grad_norm": 0.23112249374389648, "learning_rate": 1.260910195997942e-05, "loss": 0.5022, "step": 11915 }, { "epoch": 13.572079772079771, "grad_norm": 0.22211824357509613, "learning_rate": 1.2605055534796062e-05, "loss": 0.7274, "step": 11916 }, { "epoch": 13.573219373219374, "grad_norm": 0.2197098433971405, "learning_rate": 1.2601009540123213e-05, "loss": 0.4865, "step": 11917 }, { "epoch": 13.574358974358974, "grad_norm": 0.2433994561433792, "learning_rate": 1.259696397610141e-05, "loss": 0.6374, "step": 11918 }, { "epoch": 13.575498575498575, "grad_norm": 0.25643351674079895, "learning_rate": 1.259291884287115e-05, "loss": 0.5213, "step": 11919 }, { "epoch": 13.576638176638177, "grad_norm": 0.1962897628545761, "learning_rate": 1.2588874140572932e-05, "loss": 0.8476, "step": 11920 }, { "epoch": 13.577777777777778, "grad_norm": 0.23479922115802765, "learning_rate": 1.2584829869347247e-05, "loss": 0.6863, "step": 11921 }, { "epoch": 13.578917378917378, "grad_norm": 0.18647314608097076, "learning_rate": 1.2580786029334569e-05, "loss": 0.8135, "step": 11922 }, { "epoch": 13.58005698005698, "grad_norm": 0.19278664886951447, "learning_rate": 1.2576742620675336e-05, "loss": 0.5966, "step": 11923 }, { "epoch": 13.581196581196581, "grad_norm": 0.1958315074443817, "learning_rate": 1.2572699643510003e-05, "loss": 0.5162, "step": 11924 }, { "epoch": 13.582336182336181, "grad_norm": 0.16626198589801788, "learning_rate": 1.2568657097978992e-05, "loss": 0.774, "step": 11925 }, { "epoch": 13.583475783475784, "grad_norm": 0.24117280542850494, "learning_rate": 1.25646149842227e-05, "loss": 0.6517, "step": 11926 }, { "epoch": 13.584615384615384, "grad_norm": 0.17913152277469635, "learning_rate": 1.2560573302381524e-05, "loss": 0.8523, "step": 11927 }, { "epoch": 13.585754985754985, "grad_norm": 0.2595483660697937, "learning_rate": 1.2556532052595848e-05, "loss": 0.5177, "step": 11928 }, { "epoch": 13.586894586894587, "grad_norm": 0.200043186545372, "learning_rate": 1.2552491235006041e-05, "loss": 0.8274, "step": 11929 }, { "epoch": 13.588034188034188, "grad_norm": 0.21147392690181732, "learning_rate": 1.2548450849752435e-05, "loss": 0.7123, "step": 11930 }, { "epoch": 13.589173789173788, "grad_norm": 0.22863133251667023, "learning_rate": 1.2544410896975376e-05, "loss": 0.569, "step": 11931 }, { "epoch": 13.59031339031339, "grad_norm": 0.17246928811073303, "learning_rate": 1.2540371376815177e-05, "loss": 0.7866, "step": 11932 }, { "epoch": 13.591452991452991, "grad_norm": 0.16606873273849487, "learning_rate": 1.2536332289412145e-05, "loss": 0.8087, "step": 11933 }, { "epoch": 13.592592592592592, "grad_norm": 0.21755880117416382, "learning_rate": 1.2532293634906567e-05, "loss": 0.6748, "step": 11934 }, { "epoch": 13.593732193732194, "grad_norm": 0.1909179836511612, "learning_rate": 1.2528255413438716e-05, "loss": 0.7769, "step": 11935 }, { "epoch": 13.594871794871795, "grad_norm": 0.2857014536857605, "learning_rate": 1.2524217625148854e-05, "loss": 0.4679, "step": 11936 }, { "epoch": 13.596011396011395, "grad_norm": 0.2215040922164917, "learning_rate": 1.2520180270177231e-05, "loss": 0.6349, "step": 11937 }, { "epoch": 13.597150997150997, "grad_norm": 0.20099647343158722, "learning_rate": 1.2516143348664058e-05, "loss": 0.7075, "step": 11938 }, { "epoch": 13.598290598290598, "grad_norm": 0.2536945939064026, "learning_rate": 1.2512106860749556e-05, "loss": 0.6181, "step": 11939 }, { "epoch": 13.5994301994302, "grad_norm": 0.19861255586147308, "learning_rate": 1.2508070806573927e-05, "loss": 0.8053, "step": 11940 }, { "epoch": 13.6005698005698, "grad_norm": 0.22227483987808228, "learning_rate": 1.250403518627736e-05, "loss": 0.66, "step": 11941 }, { "epoch": 13.601709401709401, "grad_norm": 0.23981639742851257, "learning_rate": 1.2500000000000006e-05, "loss": 0.6902, "step": 11942 }, { "epoch": 13.602849002849004, "grad_norm": 0.20598097145557404, "learning_rate": 1.2495965247882027e-05, "loss": 0.6442, "step": 11943 }, { "epoch": 13.603988603988604, "grad_norm": 0.18238377571105957, "learning_rate": 1.249193093006356e-05, "loss": 0.7795, "step": 11944 }, { "epoch": 13.605128205128205, "grad_norm": 0.21116027235984802, "learning_rate": 1.2487897046684729e-05, "loss": 0.7298, "step": 11945 }, { "epoch": 13.606267806267807, "grad_norm": 0.23421117663383484, "learning_rate": 1.2483863597885642e-05, "loss": 0.6085, "step": 11946 }, { "epoch": 13.607407407407408, "grad_norm": 0.2317437380552292, "learning_rate": 1.2479830583806392e-05, "loss": 0.7832, "step": 11947 }, { "epoch": 13.608547008547008, "grad_norm": 0.2056443840265274, "learning_rate": 1.247579800458707e-05, "loss": 0.7342, "step": 11948 }, { "epoch": 13.60968660968661, "grad_norm": 0.16647958755493164, "learning_rate": 1.247176586036771e-05, "loss": 0.8492, "step": 11949 }, { "epoch": 13.610826210826211, "grad_norm": 0.18847352266311646, "learning_rate": 1.2467734151288377e-05, "loss": 0.7253, "step": 11950 }, { "epoch": 13.611965811965812, "grad_norm": 0.23761668801307678, "learning_rate": 1.24637028774891e-05, "loss": 0.5955, "step": 11951 }, { "epoch": 13.613105413105414, "grad_norm": 0.1838001161813736, "learning_rate": 1.2459672039109907e-05, "loss": 0.7702, "step": 11952 }, { "epoch": 13.614245014245014, "grad_norm": 0.18846701085567474, "learning_rate": 1.245564163629078e-05, "loss": 0.771, "step": 11953 }, { "epoch": 13.615384615384615, "grad_norm": 0.1988699734210968, "learning_rate": 1.2451611669171717e-05, "loss": 0.6131, "step": 11954 }, { "epoch": 13.616524216524217, "grad_norm": 0.16885283589363098, "learning_rate": 1.2447582137892688e-05, "loss": 0.6178, "step": 11955 }, { "epoch": 13.617663817663818, "grad_norm": 0.2070610374212265, "learning_rate": 1.244355304259365e-05, "loss": 0.3871, "step": 11956 }, { "epoch": 13.618803418803418, "grad_norm": 0.2371179312467575, "learning_rate": 1.2439524383414547e-05, "loss": 0.6418, "step": 11957 }, { "epoch": 13.61994301994302, "grad_norm": 0.2366572618484497, "learning_rate": 1.2435496160495302e-05, "loss": 0.7808, "step": 11958 }, { "epoch": 13.621082621082621, "grad_norm": 0.3098319470882416, "learning_rate": 1.2431468373975841e-05, "loss": 0.4199, "step": 11959 }, { "epoch": 13.622222222222222, "grad_norm": 0.22360925376415253, "learning_rate": 1.2427441023996033e-05, "loss": 0.6718, "step": 11960 }, { "epoch": 13.623361823361824, "grad_norm": 0.19217972457408905, "learning_rate": 1.2423414110695775e-05, "loss": 0.578, "step": 11961 }, { "epoch": 13.624501424501425, "grad_norm": 0.2236679196357727, "learning_rate": 1.241938763421493e-05, "loss": 0.6048, "step": 11962 }, { "epoch": 13.625641025641025, "grad_norm": 0.2347201257944107, "learning_rate": 1.2415361594693358e-05, "loss": 0.5748, "step": 11963 }, { "epoch": 13.626780626780628, "grad_norm": 0.18007099628448486, "learning_rate": 1.2411335992270878e-05, "loss": 0.6316, "step": 11964 }, { "epoch": 13.627920227920228, "grad_norm": 0.241592139005661, "learning_rate": 1.2407310827087318e-05, "loss": 0.5624, "step": 11965 }, { "epoch": 13.629059829059829, "grad_norm": 0.2217334806919098, "learning_rate": 1.2403286099282474e-05, "loss": 0.7087, "step": 11966 }, { "epoch": 13.630199430199431, "grad_norm": 0.19712881743907928, "learning_rate": 1.2399261808996162e-05, "loss": 0.8149, "step": 11967 }, { "epoch": 13.631339031339031, "grad_norm": 0.22485846281051636, "learning_rate": 1.239523795636813e-05, "loss": 0.8697, "step": 11968 }, { "epoch": 13.632478632478632, "grad_norm": 0.19370786845684052, "learning_rate": 1.2391214541538149e-05, "loss": 0.797, "step": 11969 }, { "epoch": 13.633618233618234, "grad_norm": 0.2015693038702011, "learning_rate": 1.2387191564645958e-05, "loss": 0.7652, "step": 11970 }, { "epoch": 13.634757834757835, "grad_norm": 0.3329710364341736, "learning_rate": 1.23831690258313e-05, "loss": 0.3782, "step": 11971 }, { "epoch": 13.635897435897435, "grad_norm": 0.24312107264995575, "learning_rate": 1.237914692523387e-05, "loss": 0.7565, "step": 11972 }, { "epoch": 13.637037037037038, "grad_norm": 0.1755981296300888, "learning_rate": 1.2375125262993376e-05, "loss": 0.7782, "step": 11973 }, { "epoch": 13.638176638176638, "grad_norm": 0.26076728105545044, "learning_rate": 1.2371104039249498e-05, "loss": 0.5249, "step": 11974 }, { "epoch": 13.639316239316239, "grad_norm": 0.19592714309692383, "learning_rate": 1.2367083254141915e-05, "loss": 0.6927, "step": 11975 }, { "epoch": 13.640455840455841, "grad_norm": 0.21158058941364288, "learning_rate": 1.2363062907810263e-05, "loss": 0.6754, "step": 11976 }, { "epoch": 13.641595441595442, "grad_norm": 0.21058164536952972, "learning_rate": 1.2359043000394178e-05, "loss": 0.6716, "step": 11977 }, { "epoch": 13.642735042735042, "grad_norm": 0.1837356686592102, "learning_rate": 1.2355023532033311e-05, "loss": 0.9676, "step": 11978 }, { "epoch": 13.643874643874645, "grad_norm": 0.17448551952838898, "learning_rate": 1.2351004502867242e-05, "loss": 0.8437, "step": 11979 }, { "epoch": 13.645014245014245, "grad_norm": 0.2007696032524109, "learning_rate": 1.2346985913035572e-05, "loss": 0.814, "step": 11980 }, { "epoch": 13.646153846153846, "grad_norm": 0.2008890062570572, "learning_rate": 1.2342967762677876e-05, "loss": 0.7094, "step": 11981 }, { "epoch": 13.647293447293448, "grad_norm": 0.1742667853832245, "learning_rate": 1.2338950051933726e-05, "loss": 0.8423, "step": 11982 }, { "epoch": 13.648433048433048, "grad_norm": 0.22775019705295563, "learning_rate": 1.2334932780942648e-05, "loss": 0.3184, "step": 11983 }, { "epoch": 13.649572649572649, "grad_norm": 0.23534031212329865, "learning_rate": 1.2330915949844187e-05, "loss": 0.7006, "step": 11984 }, { "epoch": 13.650712250712251, "grad_norm": 0.17848096787929535, "learning_rate": 1.2326899558777855e-05, "loss": 0.7268, "step": 11985 }, { "epoch": 13.651851851851852, "grad_norm": 0.2199409306049347, "learning_rate": 1.232288360788316e-05, "loss": 0.6399, "step": 11986 }, { "epoch": 13.652991452991452, "grad_norm": 0.20777131617069244, "learning_rate": 1.2318868097299568e-05, "loss": 0.8304, "step": 11987 }, { "epoch": 13.654131054131055, "grad_norm": 0.20312553644180298, "learning_rate": 1.2314853027166565e-05, "loss": 0.8173, "step": 11988 }, { "epoch": 13.655270655270655, "grad_norm": 0.15953557193279266, "learning_rate": 1.2310838397623598e-05, "loss": 0.7031, "step": 11989 }, { "epoch": 13.656410256410256, "grad_norm": 0.2103070765733719, "learning_rate": 1.2306824208810108e-05, "loss": 0.89, "step": 11990 }, { "epoch": 13.657549857549858, "grad_norm": 0.2311912178993225, "learning_rate": 1.2302810460865522e-05, "loss": 0.4565, "step": 11991 }, { "epoch": 13.658689458689459, "grad_norm": 0.2704470753669739, "learning_rate": 1.2298797153929243e-05, "loss": 0.5362, "step": 11992 }, { "epoch": 13.65982905982906, "grad_norm": 0.23101139068603516, "learning_rate": 1.2294784288140679e-05, "loss": 0.6446, "step": 11993 }, { "epoch": 13.660968660968662, "grad_norm": 0.21261920034885406, "learning_rate": 1.2290771863639186e-05, "loss": 0.8463, "step": 11994 }, { "epoch": 13.662108262108262, "grad_norm": 0.19150252640247345, "learning_rate": 1.2286759880564136e-05, "loss": 0.6388, "step": 11995 }, { "epoch": 13.663247863247863, "grad_norm": 0.18911303579807281, "learning_rate": 1.2282748339054878e-05, "loss": 0.5826, "step": 11996 }, { "epoch": 13.664387464387465, "grad_norm": 0.21570447087287903, "learning_rate": 1.2278737239250754e-05, "loss": 0.6871, "step": 11997 }, { "epoch": 13.665527065527066, "grad_norm": 0.18554680049419403, "learning_rate": 1.2274726581291057e-05, "loss": 0.6245, "step": 11998 }, { "epoch": 13.666666666666666, "grad_norm": 0.20974911749362946, "learning_rate": 1.2270716365315101e-05, "loss": 0.7523, "step": 11999 }, { "epoch": 13.667806267806268, "grad_norm": 0.1708163470029831, "learning_rate": 1.226670659146217e-05, "loss": 0.5978, "step": 12000 }, { "epoch": 13.668945868945869, "grad_norm": 0.21521274745464325, "learning_rate": 1.2262697259871538e-05, "loss": 0.6199, "step": 12001 }, { "epoch": 13.67008547008547, "grad_norm": 0.1693134307861328, "learning_rate": 1.2258688370682459e-05, "loss": 0.6558, "step": 12002 }, { "epoch": 13.671225071225072, "grad_norm": 0.21319220960140228, "learning_rate": 1.2254679924034169e-05, "loss": 0.6246, "step": 12003 }, { "epoch": 13.672364672364672, "grad_norm": 0.23365657031536102, "learning_rate": 1.2250671920065903e-05, "loss": 0.5943, "step": 12004 }, { "epoch": 13.673504273504273, "grad_norm": 0.2138364464044571, "learning_rate": 1.2246664358916856e-05, "loss": 0.6211, "step": 12005 }, { "epoch": 13.674643874643875, "grad_norm": 0.1780523955821991, "learning_rate": 1.2242657240726226e-05, "loss": 0.7917, "step": 12006 }, { "epoch": 13.675783475783476, "grad_norm": 0.2242337018251419, "learning_rate": 1.2238650565633195e-05, "loss": 0.6403, "step": 12007 }, { "epoch": 13.676923076923076, "grad_norm": 0.2050691395998001, "learning_rate": 1.2234644333776931e-05, "loss": 0.5379, "step": 12008 }, { "epoch": 13.678062678062679, "grad_norm": 0.20676282048225403, "learning_rate": 1.2230638545296566e-05, "loss": 0.8157, "step": 12009 }, { "epoch": 13.67920227920228, "grad_norm": 0.23082448542118073, "learning_rate": 1.2226633200331238e-05, "loss": 0.707, "step": 12010 }, { "epoch": 13.68034188034188, "grad_norm": 0.20839092135429382, "learning_rate": 1.222262829902006e-05, "loss": 0.6023, "step": 12011 }, { "epoch": 13.681481481481482, "grad_norm": 0.1955326348543167, "learning_rate": 1.2218623841502156e-05, "loss": 0.7687, "step": 12012 }, { "epoch": 13.682621082621083, "grad_norm": 0.22524051368236542, "learning_rate": 1.2214619827916584e-05, "loss": 0.5829, "step": 12013 }, { "epoch": 13.683760683760683, "grad_norm": 0.1810995638370514, "learning_rate": 1.2210616258402427e-05, "loss": 0.7893, "step": 12014 }, { "epoch": 13.684900284900285, "grad_norm": 0.18191368877887726, "learning_rate": 1.2206613133098737e-05, "loss": 0.7852, "step": 12015 }, { "epoch": 13.686039886039886, "grad_norm": 0.2552942931652069, "learning_rate": 1.2202610452144564e-05, "loss": 0.6092, "step": 12016 }, { "epoch": 13.687179487179487, "grad_norm": 0.2494039535522461, "learning_rate": 1.2198608215678917e-05, "loss": 0.5332, "step": 12017 }, { "epoch": 13.688319088319089, "grad_norm": 0.1996004283428192, "learning_rate": 1.2194606423840807e-05, "loss": 0.6962, "step": 12018 }, { "epoch": 13.68945868945869, "grad_norm": 0.197572261095047, "learning_rate": 1.2190605076769232e-05, "loss": 0.7313, "step": 12019 }, { "epoch": 13.69059829059829, "grad_norm": 0.2840818762779236, "learning_rate": 1.2186604174603178e-05, "loss": 0.4424, "step": 12020 }, { "epoch": 13.691737891737892, "grad_norm": 0.22129279375076294, "learning_rate": 1.2182603717481588e-05, "loss": 0.7447, "step": 12021 }, { "epoch": 13.692877492877493, "grad_norm": 0.18007630109786987, "learning_rate": 1.2178603705543412e-05, "loss": 0.8695, "step": 12022 }, { "epoch": 13.694017094017093, "grad_norm": 0.19652818143367767, "learning_rate": 1.2174604138927603e-05, "loss": 0.6182, "step": 12023 }, { "epoch": 13.695156695156696, "grad_norm": 0.23077695071697235, "learning_rate": 1.2170605017773057e-05, "loss": 0.5186, "step": 12024 }, { "epoch": 13.696296296296296, "grad_norm": 0.16277217864990234, "learning_rate": 1.2166606342218678e-05, "loss": 0.7847, "step": 12025 }, { "epoch": 13.697435897435897, "grad_norm": 0.20688322186470032, "learning_rate": 1.216260811240335e-05, "loss": 0.6436, "step": 12026 }, { "epoch": 13.698575498575499, "grad_norm": 0.19791574776172638, "learning_rate": 1.2158610328465958e-05, "loss": 0.7555, "step": 12027 }, { "epoch": 13.6997150997151, "grad_norm": 0.21570193767547607, "learning_rate": 1.2154612990545333e-05, "loss": 0.534, "step": 12028 }, { "epoch": 13.7008547008547, "grad_norm": 0.17073188722133636, "learning_rate": 1.2150616098780326e-05, "loss": 0.8929, "step": 12029 }, { "epoch": 13.701994301994302, "grad_norm": 0.17871499061584473, "learning_rate": 1.2146619653309755e-05, "loss": 0.5669, "step": 12030 }, { "epoch": 13.703133903133903, "grad_norm": 0.19358180463314056, "learning_rate": 1.2142623654272439e-05, "loss": 0.4399, "step": 12031 }, { "epoch": 13.704273504273504, "grad_norm": 0.19818490743637085, "learning_rate": 1.2138628101807154e-05, "loss": 0.6763, "step": 12032 }, { "epoch": 13.705413105413106, "grad_norm": 0.2800033688545227, "learning_rate": 1.2134632996052675e-05, "loss": 0.4403, "step": 12033 }, { "epoch": 13.706552706552706, "grad_norm": 0.2268080711364746, "learning_rate": 1.213063833714779e-05, "loss": 0.6589, "step": 12034 }, { "epoch": 13.707692307692307, "grad_norm": 0.22550585865974426, "learning_rate": 1.2126644125231215e-05, "loss": 0.6582, "step": 12035 }, { "epoch": 13.70883190883191, "grad_norm": 0.22792108356952667, "learning_rate": 1.2122650360441696e-05, "loss": 0.6455, "step": 12036 }, { "epoch": 13.70997150997151, "grad_norm": 0.15592224895954132, "learning_rate": 1.2118657042917938e-05, "loss": 0.7888, "step": 12037 }, { "epoch": 13.71111111111111, "grad_norm": 0.18949413299560547, "learning_rate": 1.2114664172798656e-05, "loss": 0.6035, "step": 12038 }, { "epoch": 13.712250712250713, "grad_norm": 0.23782742023468018, "learning_rate": 1.2110671750222512e-05, "loss": 0.5429, "step": 12039 }, { "epoch": 13.713390313390313, "grad_norm": 0.19856585562229156, "learning_rate": 1.2106679775328186e-05, "loss": 0.7701, "step": 12040 }, { "epoch": 13.714529914529914, "grad_norm": 0.22010724246501923, "learning_rate": 1.2102688248254326e-05, "loss": 0.6433, "step": 12041 }, { "epoch": 13.715669515669516, "grad_norm": 0.2371382713317871, "learning_rate": 1.2098697169139581e-05, "loss": 0.5825, "step": 12042 }, { "epoch": 13.716809116809117, "grad_norm": 0.2550557255744934, "learning_rate": 1.2094706538122551e-05, "loss": 0.4836, "step": 12043 }, { "epoch": 13.717948717948717, "grad_norm": 0.29449477791786194, "learning_rate": 1.2090716355341848e-05, "loss": 0.6653, "step": 12044 }, { "epoch": 13.71908831908832, "grad_norm": 0.1919989287853241, "learning_rate": 1.2086726620936072e-05, "loss": 0.6437, "step": 12045 }, { "epoch": 13.72022792022792, "grad_norm": 0.20676428079605103, "learning_rate": 1.2082737335043803e-05, "loss": 0.7867, "step": 12046 }, { "epoch": 13.72136752136752, "grad_norm": 0.2550913095474243, "learning_rate": 1.207874849780358e-05, "loss": 0.7347, "step": 12047 }, { "epoch": 13.722507122507123, "grad_norm": 0.2725510895252228, "learning_rate": 1.2074760109353953e-05, "loss": 0.5768, "step": 12048 }, { "epoch": 13.723646723646723, "grad_norm": 0.242890864610672, "learning_rate": 1.2070772169833453e-05, "loss": 0.6288, "step": 12049 }, { "epoch": 13.724786324786324, "grad_norm": 0.21027885377407074, "learning_rate": 1.2066784679380603e-05, "loss": 0.6246, "step": 12050 }, { "epoch": 13.725925925925926, "grad_norm": 0.2023586928844452, "learning_rate": 1.2062797638133874e-05, "loss": 0.7381, "step": 12051 }, { "epoch": 13.727065527065527, "grad_norm": 0.18032431602478027, "learning_rate": 1.2058811046231764e-05, "loss": 0.6457, "step": 12052 }, { "epoch": 13.728205128205127, "grad_norm": 0.2373048961162567, "learning_rate": 1.2054824903812731e-05, "loss": 0.56, "step": 12053 }, { "epoch": 13.72934472934473, "grad_norm": 0.32662925124168396, "learning_rate": 1.2050839211015241e-05, "loss": 0.4491, "step": 12054 }, { "epoch": 13.73048433048433, "grad_norm": 0.21573962271213531, "learning_rate": 1.2046853967977695e-05, "loss": 0.7075, "step": 12055 }, { "epoch": 13.73162393162393, "grad_norm": 0.18163876235485077, "learning_rate": 1.2042869174838542e-05, "loss": 0.7223, "step": 12056 }, { "epoch": 13.732763532763533, "grad_norm": 0.20618174970149994, "learning_rate": 1.2038884831736187e-05, "loss": 0.6681, "step": 12057 }, { "epoch": 13.733903133903134, "grad_norm": 0.24440184235572815, "learning_rate": 1.2034900938808991e-05, "loss": 0.6618, "step": 12058 }, { "epoch": 13.735042735042736, "grad_norm": 0.1526571363210678, "learning_rate": 1.2030917496195343e-05, "loss": 0.8562, "step": 12059 }, { "epoch": 13.736182336182337, "grad_norm": 0.21257290244102478, "learning_rate": 1.2026934504033594e-05, "loss": 0.7117, "step": 12060 }, { "epoch": 13.737321937321937, "grad_norm": 0.22210821509361267, "learning_rate": 1.2022951962462095e-05, "loss": 0.6232, "step": 12061 }, { "epoch": 13.73846153846154, "grad_norm": 0.2265629917383194, "learning_rate": 1.201896987161915e-05, "loss": 0.764, "step": 12062 }, { "epoch": 13.73960113960114, "grad_norm": 0.18448157608509064, "learning_rate": 1.2014988231643082e-05, "loss": 0.928, "step": 12063 }, { "epoch": 13.74074074074074, "grad_norm": 0.19953051209449768, "learning_rate": 1.2011007042672181e-05, "loss": 0.626, "step": 12064 }, { "epoch": 13.741880341880343, "grad_norm": 0.19707120954990387, "learning_rate": 1.2007026304844735e-05, "loss": 0.8638, "step": 12065 }, { "epoch": 13.743019943019943, "grad_norm": 0.21861234307289124, "learning_rate": 1.2003046018298977e-05, "loss": 0.6904, "step": 12066 }, { "epoch": 13.744159544159544, "grad_norm": 0.20723873376846313, "learning_rate": 1.1999066183173183e-05, "loss": 0.9858, "step": 12067 }, { "epoch": 13.745299145299146, "grad_norm": 0.2597009539604187, "learning_rate": 1.1995086799605582e-05, "loss": 0.3995, "step": 12068 }, { "epoch": 13.746438746438747, "grad_norm": 0.23442518711090088, "learning_rate": 1.1991107867734368e-05, "loss": 0.781, "step": 12069 }, { "epoch": 13.747578347578347, "grad_norm": 0.22636261582374573, "learning_rate": 1.1987129387697758e-05, "loss": 0.6951, "step": 12070 }, { "epoch": 13.74871794871795, "grad_norm": 0.26419615745544434, "learning_rate": 1.1983151359633923e-05, "loss": 0.5102, "step": 12071 }, { "epoch": 13.74985754985755, "grad_norm": 0.21060913801193237, "learning_rate": 1.1979173783681052e-05, "loss": 0.6206, "step": 12072 }, { "epoch": 13.75099715099715, "grad_norm": 0.26317015290260315, "learning_rate": 1.1975196659977272e-05, "loss": 0.4829, "step": 12073 }, { "epoch": 13.752136752136753, "grad_norm": 0.19388261437416077, "learning_rate": 1.197121998866073e-05, "loss": 0.4855, "step": 12074 }, { "epoch": 13.753276353276354, "grad_norm": 0.1724197119474411, "learning_rate": 1.1967243769869549e-05, "loss": 0.6627, "step": 12075 }, { "epoch": 13.754415954415954, "grad_norm": 0.19934606552124023, "learning_rate": 1.196326800374184e-05, "loss": 0.8031, "step": 12076 }, { "epoch": 13.755555555555556, "grad_norm": 0.19647754728794098, "learning_rate": 1.1959292690415667e-05, "loss": 0.7513, "step": 12077 }, { "epoch": 13.756695156695157, "grad_norm": 0.1899164766073227, "learning_rate": 1.1955317830029127e-05, "loss": 0.8108, "step": 12078 }, { "epoch": 13.757834757834758, "grad_norm": 0.2037467658519745, "learning_rate": 1.1951343422720284e-05, "loss": 0.7484, "step": 12079 }, { "epoch": 13.75897435897436, "grad_norm": 0.20733106136322021, "learning_rate": 1.1947369468627158e-05, "loss": 0.6196, "step": 12080 }, { "epoch": 13.76011396011396, "grad_norm": 0.18393978476524353, "learning_rate": 1.1943395967887786e-05, "loss": 0.7818, "step": 12081 }, { "epoch": 13.761253561253561, "grad_norm": 0.17892445623874664, "learning_rate": 1.1939422920640174e-05, "loss": 0.9429, "step": 12082 }, { "epoch": 13.762393162393163, "grad_norm": 0.199140265583992, "learning_rate": 1.1935450327022336e-05, "loss": 0.9958, "step": 12083 }, { "epoch": 13.763532763532764, "grad_norm": 0.239736407995224, "learning_rate": 1.1931478187172224e-05, "loss": 0.6854, "step": 12084 }, { "epoch": 13.764672364672364, "grad_norm": 0.20002266764640808, "learning_rate": 1.1927506501227812e-05, "loss": 0.6576, "step": 12085 }, { "epoch": 13.765811965811967, "grad_norm": 0.21758271753787994, "learning_rate": 1.1923535269327049e-05, "loss": 0.6983, "step": 12086 }, { "epoch": 13.766951566951567, "grad_norm": 0.22153815627098083, "learning_rate": 1.1919564491607877e-05, "loss": 0.638, "step": 12087 }, { "epoch": 13.768091168091168, "grad_norm": 0.23503264784812927, "learning_rate": 1.191559416820818e-05, "loss": 0.8276, "step": 12088 }, { "epoch": 13.76923076923077, "grad_norm": 0.20020806789398193, "learning_rate": 1.1911624299265892e-05, "loss": 0.6884, "step": 12089 }, { "epoch": 13.77037037037037, "grad_norm": 0.20131689310073853, "learning_rate": 1.1907654884918885e-05, "loss": 0.5925, "step": 12090 }, { "epoch": 13.771509971509971, "grad_norm": 0.19345976412296295, "learning_rate": 1.1903685925305036e-05, "loss": 0.7759, "step": 12091 }, { "epoch": 13.772649572649573, "grad_norm": 0.16225802898406982, "learning_rate": 1.1899717420562179e-05, "loss": 0.7014, "step": 12092 }, { "epoch": 13.773789173789174, "grad_norm": 0.19633789360523224, "learning_rate": 1.1895749370828166e-05, "loss": 0.7085, "step": 12093 }, { "epoch": 13.774928774928775, "grad_norm": 0.20979763567447662, "learning_rate": 1.189178177624081e-05, "loss": 0.7952, "step": 12094 }, { "epoch": 13.776068376068377, "grad_norm": 0.17385949194431305, "learning_rate": 1.1887814636937932e-05, "loss": 0.937, "step": 12095 }, { "epoch": 13.777207977207977, "grad_norm": 0.21739883720874786, "learning_rate": 1.1883847953057303e-05, "loss": 0.8762, "step": 12096 }, { "epoch": 13.778347578347578, "grad_norm": 0.19551996886730194, "learning_rate": 1.1879881724736702e-05, "loss": 0.7889, "step": 12097 }, { "epoch": 13.77948717948718, "grad_norm": 0.25265413522720337, "learning_rate": 1.187591595211389e-05, "loss": 0.6491, "step": 12098 }, { "epoch": 13.78062678062678, "grad_norm": 0.18349041044712067, "learning_rate": 1.1871950635326606e-05, "loss": 0.7704, "step": 12099 }, { "epoch": 13.781766381766381, "grad_norm": 0.20760832726955414, "learning_rate": 1.186798577451258e-05, "loss": 0.8489, "step": 12100 }, { "epoch": 13.782905982905984, "grad_norm": 0.2054699957370758, "learning_rate": 1.1864021369809522e-05, "loss": 0.547, "step": 12101 }, { "epoch": 13.784045584045584, "grad_norm": 0.32506656646728516, "learning_rate": 1.1860057421355136e-05, "loss": 0.6203, "step": 12102 }, { "epoch": 13.785185185185185, "grad_norm": 0.1898188441991806, "learning_rate": 1.1856093929287079e-05, "loss": 0.8057, "step": 12103 }, { "epoch": 13.786324786324787, "grad_norm": 0.21240024268627167, "learning_rate": 1.1852130893743026e-05, "loss": 0.6552, "step": 12104 }, { "epoch": 13.787464387464388, "grad_norm": 0.21144267916679382, "learning_rate": 1.1848168314860627e-05, "loss": 0.5596, "step": 12105 }, { "epoch": 13.788603988603988, "grad_norm": 0.15825605392456055, "learning_rate": 1.1844206192777515e-05, "loss": 0.9469, "step": 12106 }, { "epoch": 13.78974358974359, "grad_norm": 0.1907399445772171, "learning_rate": 1.1840244527631295e-05, "loss": 0.8039, "step": 12107 }, { "epoch": 13.790883190883191, "grad_norm": 0.20638875663280487, "learning_rate": 1.1836283319559572e-05, "loss": 0.6795, "step": 12108 }, { "epoch": 13.792022792022792, "grad_norm": 0.2701677680015564, "learning_rate": 1.1832322568699927e-05, "loss": 0.603, "step": 12109 }, { "epoch": 13.793162393162394, "grad_norm": 1.2137209177017212, "learning_rate": 1.182836227518993e-05, "loss": 0.5928, "step": 12110 }, { "epoch": 13.794301994301994, "grad_norm": 0.26294225454330444, "learning_rate": 1.1824402439167137e-05, "loss": 0.7734, "step": 12111 }, { "epoch": 13.795441595441595, "grad_norm": 0.21291710436344147, "learning_rate": 1.1820443060769077e-05, "loss": 0.6986, "step": 12112 }, { "epoch": 13.796581196581197, "grad_norm": 0.20568042993545532, "learning_rate": 1.1816484140133286e-05, "loss": 0.7356, "step": 12113 }, { "epoch": 13.797720797720798, "grad_norm": 0.23887597024440765, "learning_rate": 1.1812525677397243e-05, "loss": 0.4924, "step": 12114 }, { "epoch": 13.798860398860398, "grad_norm": 0.1845303177833557, "learning_rate": 1.1808567672698451e-05, "loss": 0.7989, "step": 12115 }, { "epoch": 13.8, "grad_norm": 0.216530442237854, "learning_rate": 1.1804610126174378e-05, "loss": 0.7151, "step": 12116 }, { "epoch": 13.801139601139601, "grad_norm": 0.19970011711120605, "learning_rate": 1.1800653037962494e-05, "loss": 0.5739, "step": 12117 }, { "epoch": 13.802279202279202, "grad_norm": 0.24037083983421326, "learning_rate": 1.179669640820022e-05, "loss": 0.6529, "step": 12118 }, { "epoch": 13.803418803418804, "grad_norm": 0.19288606941699982, "learning_rate": 1.1792740237024988e-05, "loss": 0.8498, "step": 12119 }, { "epoch": 13.804558404558405, "grad_norm": 0.24818424880504608, "learning_rate": 1.1788784524574207e-05, "loss": 0.6215, "step": 12120 }, { "epoch": 13.805698005698005, "grad_norm": 0.1914277821779251, "learning_rate": 1.1784829270985271e-05, "loss": 0.6896, "step": 12121 }, { "epoch": 13.806837606837608, "grad_norm": 0.2206151932477951, "learning_rate": 1.1780874476395553e-05, "loss": 0.774, "step": 12122 }, { "epoch": 13.807977207977208, "grad_norm": 0.21806097030639648, "learning_rate": 1.1776920140942422e-05, "loss": 0.6167, "step": 12123 }, { "epoch": 13.809116809116809, "grad_norm": 0.16567018628120422, "learning_rate": 1.1772966264763217e-05, "loss": 0.6197, "step": 12124 }, { "epoch": 13.810256410256411, "grad_norm": 0.21184101700782776, "learning_rate": 1.1769012847995275e-05, "loss": 0.6112, "step": 12125 }, { "epoch": 13.811396011396011, "grad_norm": 0.25883615016937256, "learning_rate": 1.1765059890775895e-05, "loss": 0.6972, "step": 12126 }, { "epoch": 13.812535612535612, "grad_norm": 0.21643269062042236, "learning_rate": 1.1761107393242382e-05, "loss": 0.7196, "step": 12127 }, { "epoch": 13.813675213675214, "grad_norm": 0.20388276875019073, "learning_rate": 1.1757155355532015e-05, "loss": 0.6601, "step": 12128 }, { "epoch": 13.814814814814815, "grad_norm": 0.207803875207901, "learning_rate": 1.1753203777782067e-05, "loss": 0.5011, "step": 12129 }, { "epoch": 13.815954415954415, "grad_norm": 0.2117311656475067, "learning_rate": 1.1749252660129776e-05, "loss": 0.7493, "step": 12130 }, { "epoch": 13.817094017094018, "grad_norm": 0.22393865883350372, "learning_rate": 1.1745302002712377e-05, "loss": 0.6495, "step": 12131 }, { "epoch": 13.818233618233618, "grad_norm": 0.23343952000141144, "learning_rate": 1.1741351805667092e-05, "loss": 0.6071, "step": 12132 }, { "epoch": 13.819373219373219, "grad_norm": 0.22656363248825073, "learning_rate": 1.173740206913112e-05, "loss": 0.7191, "step": 12133 }, { "epoch": 13.820512820512821, "grad_norm": 0.15526340901851654, "learning_rate": 1.1733452793241645e-05, "loss": 0.6953, "step": 12134 }, { "epoch": 13.821652421652422, "grad_norm": 0.2617109715938568, "learning_rate": 1.172950397813584e-05, "loss": 0.604, "step": 12135 }, { "epoch": 13.822792022792022, "grad_norm": 0.20950858294963837, "learning_rate": 1.1725555623950863e-05, "loss": 0.6931, "step": 12136 }, { "epoch": 13.823931623931625, "grad_norm": 0.22320875525474548, "learning_rate": 1.1721607730823832e-05, "loss": 0.5865, "step": 12137 }, { "epoch": 13.825071225071225, "grad_norm": 0.27012088894844055, "learning_rate": 1.1717660298891886e-05, "loss": 0.4476, "step": 12138 }, { "epoch": 13.826210826210826, "grad_norm": 0.248300701379776, "learning_rate": 1.1713713328292122e-05, "loss": 0.4039, "step": 12139 }, { "epoch": 13.827350427350428, "grad_norm": 0.20293055474758148, "learning_rate": 1.1709766819161641e-05, "loss": 0.7534, "step": 12140 }, { "epoch": 13.828490028490029, "grad_norm": 0.19899849593639374, "learning_rate": 1.1705820771637496e-05, "loss": 0.7671, "step": 12141 }, { "epoch": 13.829629629629629, "grad_norm": 0.20491692423820496, "learning_rate": 1.1701875185856756e-05, "loss": 0.6022, "step": 12142 }, { "epoch": 13.830769230769231, "grad_norm": 0.24299074709415436, "learning_rate": 1.1697930061956461e-05, "loss": 0.5729, "step": 12143 }, { "epoch": 13.831908831908832, "grad_norm": 0.19051791727542877, "learning_rate": 1.1693985400073635e-05, "loss": 0.8498, "step": 12144 }, { "epoch": 13.833048433048432, "grad_norm": 0.2350401133298874, "learning_rate": 1.1690041200345286e-05, "loss": 0.372, "step": 12145 }, { "epoch": 13.834188034188035, "grad_norm": 0.23774166405200958, "learning_rate": 1.1686097462908413e-05, "loss": 0.5306, "step": 12146 }, { "epoch": 13.835327635327635, "grad_norm": 0.21102823317050934, "learning_rate": 1.1682154187899994e-05, "loss": 0.7034, "step": 12147 }, { "epoch": 13.836467236467236, "grad_norm": 0.20242071151733398, "learning_rate": 1.1678211375456977e-05, "loss": 0.627, "step": 12148 }, { "epoch": 13.837606837606838, "grad_norm": 0.21489715576171875, "learning_rate": 1.1674269025716315e-05, "loss": 0.8146, "step": 12149 }, { "epoch": 13.838746438746439, "grad_norm": 0.17829488217830658, "learning_rate": 1.1670327138814932e-05, "loss": 0.6618, "step": 12150 }, { "epoch": 13.83988603988604, "grad_norm": 0.24720202386379242, "learning_rate": 1.1666385714889754e-05, "loss": 0.678, "step": 12151 }, { "epoch": 13.841025641025642, "grad_norm": 0.19597914814949036, "learning_rate": 1.1662444754077662e-05, "loss": 0.6746, "step": 12152 }, { "epoch": 13.842165242165242, "grad_norm": 0.2212129533290863, "learning_rate": 1.165850425651554e-05, "loss": 0.712, "step": 12153 }, { "epoch": 13.843304843304843, "grad_norm": 0.21502304077148438, "learning_rate": 1.1654564222340255e-05, "loss": 0.6935, "step": 12154 }, { "epoch": 13.844444444444445, "grad_norm": 0.2452617734670639, "learning_rate": 1.1650624651688654e-05, "loss": 0.7074, "step": 12155 }, { "epoch": 13.845584045584046, "grad_norm": 0.20556528866291046, "learning_rate": 1.164668554469757e-05, "loss": 0.5669, "step": 12156 }, { "epoch": 13.846723646723646, "grad_norm": 0.2439274787902832, "learning_rate": 1.1642746901503817e-05, "loss": 0.597, "step": 12157 }, { "epoch": 13.847863247863248, "grad_norm": 0.19938474893569946, "learning_rate": 1.163880872224421e-05, "loss": 0.591, "step": 12158 }, { "epoch": 13.849002849002849, "grad_norm": 0.20182687044143677, "learning_rate": 1.1634871007055507e-05, "loss": 0.9088, "step": 12159 }, { "epoch": 13.85014245014245, "grad_norm": 0.20904295146465302, "learning_rate": 1.1630933756074488e-05, "loss": 0.6043, "step": 12160 }, { "epoch": 13.851282051282052, "grad_norm": 0.18662971258163452, "learning_rate": 1.1626996969437907e-05, "loss": 0.8019, "step": 12161 }, { "epoch": 13.852421652421652, "grad_norm": 0.27858710289001465, "learning_rate": 1.1623060647282505e-05, "loss": 0.5142, "step": 12162 }, { "epoch": 13.853561253561253, "grad_norm": 0.21194970607757568, "learning_rate": 1.1619124789744984e-05, "loss": 0.5959, "step": 12163 }, { "epoch": 13.854700854700855, "grad_norm": 0.19565068185329437, "learning_rate": 1.1615189396962057e-05, "loss": 0.7064, "step": 12164 }, { "epoch": 13.855840455840456, "grad_norm": 0.29635757207870483, "learning_rate": 1.1611254469070401e-05, "loss": 0.5085, "step": 12165 }, { "epoch": 13.856980056980056, "grad_norm": 0.2134222537279129, "learning_rate": 1.1607320006206715e-05, "loss": 0.5913, "step": 12166 }, { "epoch": 13.858119658119659, "grad_norm": 0.2340858280658722, "learning_rate": 1.1603386008507627e-05, "loss": 0.5986, "step": 12167 }, { "epoch": 13.85925925925926, "grad_norm": 0.20966479182243347, "learning_rate": 1.1599452476109784e-05, "loss": 0.647, "step": 12168 }, { "epoch": 13.86039886039886, "grad_norm": 0.1754055917263031, "learning_rate": 1.1595519409149807e-05, "loss": 0.6645, "step": 12169 }, { "epoch": 13.861538461538462, "grad_norm": 0.20462289452552795, "learning_rate": 1.1591586807764313e-05, "loss": 0.6546, "step": 12170 }, { "epoch": 13.862678062678063, "grad_norm": 0.17154642939567566, "learning_rate": 1.1587654672089873e-05, "loss": 0.6322, "step": 12171 }, { "epoch": 13.863817663817663, "grad_norm": 0.24959947168827057, "learning_rate": 1.1583723002263069e-05, "loss": 0.5293, "step": 12172 }, { "epoch": 13.864957264957265, "grad_norm": 0.18102394044399261, "learning_rate": 1.1579791798420462e-05, "loss": 0.7882, "step": 12173 }, { "epoch": 13.866096866096866, "grad_norm": 0.25361573696136475, "learning_rate": 1.15758610606986e-05, "loss": 0.4804, "step": 12174 }, { "epoch": 13.867236467236467, "grad_norm": 0.1994975209236145, "learning_rate": 1.1571930789233993e-05, "loss": 0.7014, "step": 12175 }, { "epoch": 13.868376068376069, "grad_norm": 0.1708739995956421, "learning_rate": 1.1568000984163143e-05, "loss": 0.7404, "step": 12176 }, { "epoch": 13.86951566951567, "grad_norm": 0.24408669769763947, "learning_rate": 1.1564071645622579e-05, "loss": 0.4335, "step": 12177 }, { "epoch": 13.87065527065527, "grad_norm": 0.2690698206424713, "learning_rate": 1.1560142773748744e-05, "loss": 0.6219, "step": 12178 }, { "epoch": 13.871794871794872, "grad_norm": 0.3402236998081207, "learning_rate": 1.1556214368678112e-05, "loss": 0.4422, "step": 12179 }, { "epoch": 13.872934472934473, "grad_norm": 0.23265834152698517, "learning_rate": 1.1552286430547126e-05, "loss": 0.6945, "step": 12180 }, { "epoch": 13.874074074074073, "grad_norm": 0.18340229988098145, "learning_rate": 1.1548358959492222e-05, "loss": 0.7958, "step": 12181 }, { "epoch": 13.875213675213676, "grad_norm": 0.2880589962005615, "learning_rate": 1.1544431955649795e-05, "loss": 0.4759, "step": 12182 }, { "epoch": 13.876353276353276, "grad_norm": 0.192237988114357, "learning_rate": 1.1540505419156247e-05, "loss": 0.6922, "step": 12183 }, { "epoch": 13.877492877492877, "grad_norm": 0.22548675537109375, "learning_rate": 1.1536579350147964e-05, "loss": 0.6792, "step": 12184 }, { "epoch": 13.878632478632479, "grad_norm": 0.23923629522323608, "learning_rate": 1.153265374876131e-05, "loss": 0.5638, "step": 12185 }, { "epoch": 13.87977207977208, "grad_norm": 0.18484920263290405, "learning_rate": 1.1528728615132616e-05, "loss": 0.6905, "step": 12186 }, { "epoch": 13.88091168091168, "grad_norm": 0.20133459568023682, "learning_rate": 1.1524803949398219e-05, "loss": 0.7755, "step": 12187 }, { "epoch": 13.882051282051282, "grad_norm": 0.17119839787483215, "learning_rate": 1.1520879751694452e-05, "loss": 0.4494, "step": 12188 }, { "epoch": 13.883190883190883, "grad_norm": 0.2637351453304291, "learning_rate": 1.1516956022157594e-05, "loss": 0.6105, "step": 12189 }, { "epoch": 13.884330484330484, "grad_norm": 0.2123945653438568, "learning_rate": 1.1513032760923927e-05, "loss": 0.5973, "step": 12190 }, { "epoch": 13.885470085470086, "grad_norm": 0.2376355677843094, "learning_rate": 1.1509109968129725e-05, "loss": 0.6014, "step": 12191 }, { "epoch": 13.886609686609686, "grad_norm": 0.2171904295682907, "learning_rate": 1.1505187643911242e-05, "loss": 0.7104, "step": 12192 }, { "epoch": 13.887749287749287, "grad_norm": 0.2625115215778351, "learning_rate": 1.1501265788404695e-05, "loss": 0.5782, "step": 12193 }, { "epoch": 13.88888888888889, "grad_norm": 0.20339708030223846, "learning_rate": 1.1497344401746307e-05, "loss": 0.7304, "step": 12194 }, { "epoch": 13.89002849002849, "grad_norm": 0.20857179164886475, "learning_rate": 1.149342348407228e-05, "loss": 0.7868, "step": 12195 }, { "epoch": 13.89116809116809, "grad_norm": 0.23755505681037903, "learning_rate": 1.148950303551881e-05, "loss": 0.7817, "step": 12196 }, { "epoch": 13.892307692307693, "grad_norm": 0.2457745224237442, "learning_rate": 1.1485583056222043e-05, "loss": 0.7278, "step": 12197 }, { "epoch": 13.893447293447293, "grad_norm": 0.18069693446159363, "learning_rate": 1.148166354631813e-05, "loss": 0.6967, "step": 12198 }, { "epoch": 13.894586894586894, "grad_norm": 0.20564036071300507, "learning_rate": 1.147774450594324e-05, "loss": 0.707, "step": 12199 }, { "epoch": 13.895726495726496, "grad_norm": 0.22701707482337952, "learning_rate": 1.147382593523346e-05, "loss": 0.6321, "step": 12200 }, { "epoch": 13.896866096866097, "grad_norm": 0.1899927258491516, "learning_rate": 1.14699078343249e-05, "loss": 0.7092, "step": 12201 }, { "epoch": 13.898005698005697, "grad_norm": 0.1906367838382721, "learning_rate": 1.146599020335365e-05, "loss": 0.6838, "step": 12202 }, { "epoch": 13.8991452991453, "grad_norm": 0.24131150543689728, "learning_rate": 1.146207304245578e-05, "loss": 0.7082, "step": 12203 }, { "epoch": 13.9002849002849, "grad_norm": 0.21252180635929108, "learning_rate": 1.145815635176735e-05, "loss": 0.6188, "step": 12204 }, { "epoch": 13.9014245014245, "grad_norm": 0.20956584811210632, "learning_rate": 1.1454240131424379e-05, "loss": 0.7428, "step": 12205 }, { "epoch": 13.902564102564103, "grad_norm": 0.1799883395433426, "learning_rate": 1.1450324381562901e-05, "loss": 0.6578, "step": 12206 }, { "epoch": 13.903703703703703, "grad_norm": 0.1994655728340149, "learning_rate": 1.1446409102318923e-05, "loss": 0.7279, "step": 12207 }, { "epoch": 13.904843304843304, "grad_norm": 0.20460699498653412, "learning_rate": 1.1442494293828434e-05, "loss": 0.6784, "step": 12208 }, { "epoch": 13.905982905982906, "grad_norm": 0.17750096321105957, "learning_rate": 1.1438579956227385e-05, "loss": 0.6321, "step": 12209 }, { "epoch": 13.907122507122507, "grad_norm": 0.1773277223110199, "learning_rate": 1.1434666089651757e-05, "loss": 0.7894, "step": 12210 }, { "epoch": 13.908262108262107, "grad_norm": 0.22248679399490356, "learning_rate": 1.143075269423749e-05, "loss": 0.86, "step": 12211 }, { "epoch": 13.90940170940171, "grad_norm": 0.1918819546699524, "learning_rate": 1.1426839770120488e-05, "loss": 0.8217, "step": 12212 }, { "epoch": 13.91054131054131, "grad_norm": 0.19866570830345154, "learning_rate": 1.1422927317436665e-05, "loss": 0.663, "step": 12213 }, { "epoch": 13.91168091168091, "grad_norm": 0.21274980902671814, "learning_rate": 1.1419015336321917e-05, "loss": 0.6106, "step": 12214 }, { "epoch": 13.912820512820513, "grad_norm": 0.21307888627052307, "learning_rate": 1.1415103826912122e-05, "loss": 0.6648, "step": 12215 }, { "epoch": 13.913960113960114, "grad_norm": 0.16856104135513306, "learning_rate": 1.141119278934312e-05, "loss": 0.8613, "step": 12216 }, { "epoch": 13.915099715099714, "grad_norm": 0.19490613043308258, "learning_rate": 1.1407282223750762e-05, "loss": 0.6325, "step": 12217 }, { "epoch": 13.916239316239317, "grad_norm": 0.24189704656600952, "learning_rate": 1.1403372130270872e-05, "loss": 0.8264, "step": 12218 }, { "epoch": 13.917378917378917, "grad_norm": 0.2483488917350769, "learning_rate": 1.1399462509039269e-05, "loss": 0.5555, "step": 12219 }, { "epoch": 13.918518518518518, "grad_norm": 0.21185415983200073, "learning_rate": 1.1395553360191724e-05, "loss": 0.7399, "step": 12220 }, { "epoch": 13.91965811965812, "grad_norm": 0.2353629320859909, "learning_rate": 1.1391644683864013e-05, "loss": 0.7179, "step": 12221 }, { "epoch": 13.92079772079772, "grad_norm": 0.19837498664855957, "learning_rate": 1.1387736480191927e-05, "loss": 0.749, "step": 12222 }, { "epoch": 13.921937321937321, "grad_norm": 0.19528906047344208, "learning_rate": 1.1383828749311176e-05, "loss": 0.7444, "step": 12223 }, { "epoch": 13.923076923076923, "grad_norm": 0.191130131483078, "learning_rate": 1.1379921491357497e-05, "loss": 0.6976, "step": 12224 }, { "epoch": 13.924216524216524, "grad_norm": 0.21918609738349915, "learning_rate": 1.1376014706466604e-05, "loss": 0.6714, "step": 12225 }, { "epoch": 13.925356125356124, "grad_norm": 0.2387215793132782, "learning_rate": 1.1372108394774189e-05, "loss": 0.4853, "step": 12226 }, { "epoch": 13.926495726495727, "grad_norm": 0.20336930453777313, "learning_rate": 1.1368202556415922e-05, "loss": 0.5817, "step": 12227 }, { "epoch": 13.927635327635327, "grad_norm": 0.21663232147693634, "learning_rate": 1.1364297191527465e-05, "loss": 0.5624, "step": 12228 }, { "epoch": 13.928774928774928, "grad_norm": 0.22602929174900055, "learning_rate": 1.1360392300244466e-05, "loss": 0.5956, "step": 12229 }, { "epoch": 13.92991452991453, "grad_norm": 0.2190379500389099, "learning_rate": 1.135648788270256e-05, "loss": 0.6329, "step": 12230 }, { "epoch": 13.93105413105413, "grad_norm": 0.2415732592344284, "learning_rate": 1.1352583939037343e-05, "loss": 0.5511, "step": 12231 }, { "epoch": 13.932193732193731, "grad_norm": 0.2015382945537567, "learning_rate": 1.1348680469384404e-05, "loss": 0.7696, "step": 12232 }, { "epoch": 13.933333333333334, "grad_norm": 0.18540619313716888, "learning_rate": 1.1344777473879352e-05, "loss": 0.7944, "step": 12233 }, { "epoch": 13.934472934472934, "grad_norm": 0.17577524483203888, "learning_rate": 1.1340874952657723e-05, "loss": 0.7502, "step": 12234 }, { "epoch": 13.935612535612536, "grad_norm": 0.1959916353225708, "learning_rate": 1.1336972905855067e-05, "loss": 0.8664, "step": 12235 }, { "epoch": 13.936752136752137, "grad_norm": 0.24582608044147491, "learning_rate": 1.1333071333606917e-05, "loss": 0.6266, "step": 12236 }, { "epoch": 13.937891737891738, "grad_norm": 0.19905447959899902, "learning_rate": 1.1329170236048795e-05, "loss": 0.8772, "step": 12237 }, { "epoch": 13.93903133903134, "grad_norm": 0.25572627782821655, "learning_rate": 1.1325269613316176e-05, "loss": 0.5735, "step": 12238 }, { "epoch": 13.94017094017094, "grad_norm": 0.2504177987575531, "learning_rate": 1.1321369465544546e-05, "loss": 0.6384, "step": 12239 }, { "epoch": 13.941310541310541, "grad_norm": 0.19246777892112732, "learning_rate": 1.1317469792869373e-05, "loss": 0.6691, "step": 12240 }, { "epoch": 13.942450142450143, "grad_norm": 0.25398629903793335, "learning_rate": 1.1313570595426109e-05, "loss": 0.6658, "step": 12241 }, { "epoch": 13.943589743589744, "grad_norm": 0.24399836361408234, "learning_rate": 1.1309671873350166e-05, "loss": 0.6095, "step": 12242 }, { "epoch": 13.944729344729344, "grad_norm": 0.2465384304523468, "learning_rate": 1.1305773626776961e-05, "loss": 0.5077, "step": 12243 }, { "epoch": 13.945868945868947, "grad_norm": 0.22829167544841766, "learning_rate": 1.1301875855841906e-05, "loss": 0.7855, "step": 12244 }, { "epoch": 13.947008547008547, "grad_norm": 0.23919540643692017, "learning_rate": 1.129797856068038e-05, "loss": 0.8215, "step": 12245 }, { "epoch": 13.948148148148148, "grad_norm": 0.19917504489421844, "learning_rate": 1.1294081741427731e-05, "loss": 0.7939, "step": 12246 }, { "epoch": 13.94928774928775, "grad_norm": 0.22689618170261383, "learning_rate": 1.1290185398219316e-05, "loss": 0.6322, "step": 12247 }, { "epoch": 13.95042735042735, "grad_norm": 0.2108716517686844, "learning_rate": 1.1286289531190464e-05, "loss": 0.6106, "step": 12248 }, { "epoch": 13.951566951566951, "grad_norm": 0.2141427993774414, "learning_rate": 1.1282394140476497e-05, "loss": 0.6067, "step": 12249 }, { "epoch": 13.952706552706553, "grad_norm": 0.22058185935020447, "learning_rate": 1.1278499226212696e-05, "loss": 0.7014, "step": 12250 }, { "epoch": 13.953846153846154, "grad_norm": 0.25046995282173157, "learning_rate": 1.1274604788534351e-05, "loss": 0.7122, "step": 12251 }, { "epoch": 13.954985754985755, "grad_norm": 0.22530826926231384, "learning_rate": 1.1270710827576727e-05, "loss": 0.4146, "step": 12252 }, { "epoch": 13.956125356125357, "grad_norm": 0.20750823616981506, "learning_rate": 1.126681734347508e-05, "loss": 0.8046, "step": 12253 }, { "epoch": 13.957264957264957, "grad_norm": 0.2138403207063675, "learning_rate": 1.1262924336364616e-05, "loss": 0.6614, "step": 12254 }, { "epoch": 13.958404558404558, "grad_norm": 0.2613039016723633, "learning_rate": 1.1259031806380577e-05, "loss": 0.5876, "step": 12255 }, { "epoch": 13.95954415954416, "grad_norm": 0.24097444117069244, "learning_rate": 1.125513975365816e-05, "loss": 0.5653, "step": 12256 }, { "epoch": 13.96068376068376, "grad_norm": 0.2530354857444763, "learning_rate": 1.1251248178332527e-05, "loss": 0.5816, "step": 12257 }, { "epoch": 13.961823361823361, "grad_norm": 0.2310188114643097, "learning_rate": 1.1247357080538853e-05, "loss": 0.6506, "step": 12258 }, { "epoch": 13.962962962962964, "grad_norm": 0.17745593190193176, "learning_rate": 1.124346646041229e-05, "loss": 0.8187, "step": 12259 }, { "epoch": 13.964102564102564, "grad_norm": 0.20546174049377441, "learning_rate": 1.1239576318087978e-05, "loss": 0.6235, "step": 12260 }, { "epoch": 13.965242165242165, "grad_norm": 0.20018534362316132, "learning_rate": 1.1235686653701011e-05, "loss": 0.7786, "step": 12261 }, { "epoch": 13.966381766381767, "grad_norm": 0.2011246532201767, "learning_rate": 1.1231797467386495e-05, "loss": 0.5827, "step": 12262 }, { "epoch": 13.967521367521368, "grad_norm": 0.1913696825504303, "learning_rate": 1.122790875927952e-05, "loss": 0.6169, "step": 12263 }, { "epoch": 13.968660968660968, "grad_norm": 0.23348921537399292, "learning_rate": 1.1224020529515155e-05, "loss": 0.6982, "step": 12264 }, { "epoch": 13.96980056980057, "grad_norm": 0.16664430499076843, "learning_rate": 1.1220132778228422e-05, "loss": 0.5185, "step": 12265 }, { "epoch": 13.970940170940171, "grad_norm": 0.309536337852478, "learning_rate": 1.1216245505554385e-05, "loss": 0.3365, "step": 12266 }, { "epoch": 13.972079772079772, "grad_norm": 0.1737096905708313, "learning_rate": 1.1212358711628052e-05, "loss": 0.6641, "step": 12267 }, { "epoch": 13.973219373219374, "grad_norm": 0.1941920965909958, "learning_rate": 1.120847239658441e-05, "loss": 0.6955, "step": 12268 }, { "epoch": 13.974358974358974, "grad_norm": 0.1931382566690445, "learning_rate": 1.1204586560558447e-05, "loss": 0.4182, "step": 12269 }, { "epoch": 13.975498575498575, "grad_norm": 0.26583507657051086, "learning_rate": 1.1200701203685132e-05, "loss": 0.5278, "step": 12270 }, { "epoch": 13.976638176638177, "grad_norm": 0.20485515892505646, "learning_rate": 1.119681632609942e-05, "loss": 0.7461, "step": 12271 }, { "epoch": 13.977777777777778, "grad_norm": 0.22491665184497833, "learning_rate": 1.119293192793623e-05, "loss": 0.7647, "step": 12272 }, { "epoch": 13.978917378917378, "grad_norm": 0.20172721147537231, "learning_rate": 1.118904800933048e-05, "loss": 0.5171, "step": 12273 }, { "epoch": 13.98005698005698, "grad_norm": 0.2544950842857361, "learning_rate": 1.1185164570417075e-05, "loss": 0.646, "step": 12274 }, { "epoch": 13.981196581196581, "grad_norm": 0.24959351122379303, "learning_rate": 1.1181281611330904e-05, "loss": 0.4714, "step": 12275 }, { "epoch": 13.982336182336182, "grad_norm": 0.227168008685112, "learning_rate": 1.1177399132206806e-05, "loss": 0.6064, "step": 12276 }, { "epoch": 13.983475783475784, "grad_norm": 0.21360209584236145, "learning_rate": 1.1173517133179657e-05, "loss": 0.6167, "step": 12277 }, { "epoch": 13.984615384615385, "grad_norm": 0.22920553386211395, "learning_rate": 1.116963561438429e-05, "loss": 0.6415, "step": 12278 }, { "epoch": 13.985754985754985, "grad_norm": 0.192453071475029, "learning_rate": 1.1165754575955504e-05, "loss": 0.5319, "step": 12279 }, { "epoch": 13.986894586894588, "grad_norm": 0.18710856139659882, "learning_rate": 1.1161874018028105e-05, "loss": 0.7048, "step": 12280 }, { "epoch": 13.988034188034188, "grad_norm": 0.19711796939373016, "learning_rate": 1.1157993940736877e-05, "loss": 0.7385, "step": 12281 }, { "epoch": 13.989173789173789, "grad_norm": 0.20384639501571655, "learning_rate": 1.1154114344216584e-05, "loss": 0.544, "step": 12282 }, { "epoch": 13.990313390313391, "grad_norm": 0.19997817277908325, "learning_rate": 1.1150235228601984e-05, "loss": 0.7785, "step": 12283 }, { "epoch": 13.991452991452991, "grad_norm": 0.21891017258167267, "learning_rate": 1.1146356594027796e-05, "loss": 0.6466, "step": 12284 }, { "epoch": 13.992592592592592, "grad_norm": 0.2607383728027344, "learning_rate": 1.114247844062874e-05, "loss": 0.4933, "step": 12285 }, { "epoch": 13.993732193732194, "grad_norm": 0.16678480803966522, "learning_rate": 1.1138600768539514e-05, "loss": 0.7185, "step": 12286 }, { "epoch": 13.994871794871795, "grad_norm": 0.287775456905365, "learning_rate": 1.1134723577894805e-05, "loss": 0.4988, "step": 12287 }, { "epoch": 13.996011396011395, "grad_norm": 0.19296123087406158, "learning_rate": 1.1130846868829273e-05, "loss": 0.6281, "step": 12288 }, { "epoch": 13.997150997150998, "grad_norm": 0.23793011903762817, "learning_rate": 1.1126970641477572e-05, "loss": 0.7806, "step": 12289 }, { "epoch": 13.998290598290598, "grad_norm": 0.19576086103916168, "learning_rate": 1.112309489597434e-05, "loss": 0.5895, "step": 12290 }, { "epoch": 13.999430199430199, "grad_norm": 0.21129938960075378, "learning_rate": 1.1119219632454176e-05, "loss": 0.6374, "step": 12291 }, { "epoch": 14.0, "grad_norm": 0.2696366012096405, "learning_rate": 1.1115344851051684e-05, "loss": 0.7114, "step": 12292 }, { "epoch": 14.0011396011396, "grad_norm": 0.2190292775630951, "learning_rate": 1.1111470551901449e-05, "loss": 0.6586, "step": 12293 }, { "epoch": 14.002279202279203, "grad_norm": 0.1721022129058838, "learning_rate": 1.1107596735138046e-05, "loss": 0.7334, "step": 12294 }, { "epoch": 14.003418803418803, "grad_norm": 0.18997089564800262, "learning_rate": 1.1103723400896001e-05, "loss": 0.8499, "step": 12295 }, { "epoch": 14.004558404558404, "grad_norm": 0.16700685024261475, "learning_rate": 1.1099850549309859e-05, "loss": 0.7764, "step": 12296 }, { "epoch": 14.005698005698006, "grad_norm": 0.19448800384998322, "learning_rate": 1.1095978180514132e-05, "loss": 0.7613, "step": 12297 }, { "epoch": 14.006837606837607, "grad_norm": 0.22701026499271393, "learning_rate": 1.1092106294643321e-05, "loss": 0.6202, "step": 12298 }, { "epoch": 14.007977207977207, "grad_norm": 0.20509253442287445, "learning_rate": 1.1088234891831903e-05, "loss": 0.5375, "step": 12299 }, { "epoch": 14.00911680911681, "grad_norm": 0.20297500491142273, "learning_rate": 1.1084363972214346e-05, "loss": 0.6547, "step": 12300 }, { "epoch": 14.01025641025641, "grad_norm": 0.18465548753738403, "learning_rate": 1.1080493535925104e-05, "loss": 0.7208, "step": 12301 }, { "epoch": 14.01139601139601, "grad_norm": 0.21446870267391205, "learning_rate": 1.1076623583098592e-05, "loss": 0.5847, "step": 12302 }, { "epoch": 14.012535612535613, "grad_norm": 0.25414398312568665, "learning_rate": 1.1072754113869232e-05, "loss": 0.6065, "step": 12303 }, { "epoch": 14.013675213675214, "grad_norm": 0.1874278336763382, "learning_rate": 1.1068885128371423e-05, "loss": 0.9341, "step": 12304 }, { "epoch": 14.014814814814814, "grad_norm": 0.15696552395820618, "learning_rate": 1.1065016626739553e-05, "loss": 0.9722, "step": 12305 }, { "epoch": 14.015954415954416, "grad_norm": 0.21116121113300323, "learning_rate": 1.1061148609107967e-05, "loss": 0.5474, "step": 12306 }, { "epoch": 14.017094017094017, "grad_norm": 0.17223501205444336, "learning_rate": 1.105728107561102e-05, "loss": 0.8862, "step": 12307 }, { "epoch": 14.018233618233618, "grad_norm": 0.17481198906898499, "learning_rate": 1.1053414026383047e-05, "loss": 0.7221, "step": 12308 }, { "epoch": 14.01937321937322, "grad_norm": 0.17188280820846558, "learning_rate": 1.1049547461558356e-05, "loss": 0.6618, "step": 12309 }, { "epoch": 14.02051282051282, "grad_norm": 0.20560085773468018, "learning_rate": 1.1045681381271247e-05, "loss": 0.5322, "step": 12310 }, { "epoch": 14.021652421652421, "grad_norm": 0.23315249383449554, "learning_rate": 1.1041815785655999e-05, "loss": 0.415, "step": 12311 }, { "epoch": 14.022792022792023, "grad_norm": 0.17557953298091888, "learning_rate": 1.103795067484688e-05, "loss": 0.7647, "step": 12312 }, { "epoch": 14.023931623931624, "grad_norm": 0.18308015167713165, "learning_rate": 1.1034086048978123e-05, "loss": 0.7229, "step": 12313 }, { "epoch": 14.025071225071224, "grad_norm": 0.2320505976676941, "learning_rate": 1.1030221908183966e-05, "loss": 0.5286, "step": 12314 }, { "epoch": 14.026210826210827, "grad_norm": 0.19315651059150696, "learning_rate": 1.1026358252598615e-05, "loss": 0.8859, "step": 12315 }, { "epoch": 14.027350427350427, "grad_norm": 0.20136956870555878, "learning_rate": 1.1022495082356283e-05, "loss": 0.7242, "step": 12316 }, { "epoch": 14.028490028490028, "grad_norm": 0.220159649848938, "learning_rate": 1.1018632397591122e-05, "loss": 0.5997, "step": 12317 }, { "epoch": 14.02962962962963, "grad_norm": 0.18217402696609497, "learning_rate": 1.1014770198437307e-05, "loss": 0.6722, "step": 12318 }, { "epoch": 14.03076923076923, "grad_norm": 0.23696590960025787, "learning_rate": 1.1010908485028984e-05, "loss": 0.6736, "step": 12319 }, { "epoch": 14.031908831908831, "grad_norm": 0.21605245769023895, "learning_rate": 1.1007047257500278e-05, "loss": 0.6124, "step": 12320 }, { "epoch": 14.033048433048434, "grad_norm": 0.22143849730491638, "learning_rate": 1.1003186515985304e-05, "loss": 0.7573, "step": 12321 }, { "epoch": 14.034188034188034, "grad_norm": 0.1849474161863327, "learning_rate": 1.0999326260618151e-05, "loss": 0.8574, "step": 12322 }, { "epoch": 14.035327635327635, "grad_norm": 0.18730638921260834, "learning_rate": 1.0995466491532901e-05, "loss": 0.5309, "step": 12323 }, { "epoch": 14.036467236467237, "grad_norm": 0.19191278517246246, "learning_rate": 1.0991607208863621e-05, "loss": 0.7682, "step": 12324 }, { "epoch": 14.037606837606837, "grad_norm": 0.22057481110095978, "learning_rate": 1.0987748412744337e-05, "loss": 0.6396, "step": 12325 }, { "epoch": 14.038746438746438, "grad_norm": 0.2798022925853729, "learning_rate": 1.0983890103309083e-05, "loss": 0.4859, "step": 12326 }, { "epoch": 14.03988603988604, "grad_norm": 0.14839540421962738, "learning_rate": 1.0980032280691871e-05, "loss": 0.8717, "step": 12327 }, { "epoch": 14.04102564102564, "grad_norm": 0.2074902355670929, "learning_rate": 1.0976174945026701e-05, "loss": 0.8141, "step": 12328 }, { "epoch": 14.042165242165241, "grad_norm": 0.20386825501918793, "learning_rate": 1.0972318096447534e-05, "loss": 0.64, "step": 12329 }, { "epoch": 14.043304843304844, "grad_norm": 0.23736783862113953, "learning_rate": 1.0968461735088334e-05, "loss": 0.54, "step": 12330 }, { "epoch": 14.044444444444444, "grad_norm": 0.1977514922618866, "learning_rate": 1.0964605861083047e-05, "loss": 0.7307, "step": 12331 }, { "epoch": 14.045584045584045, "grad_norm": 0.16288281977176666, "learning_rate": 1.0960750474565593e-05, "loss": 0.654, "step": 12332 }, { "epoch": 14.046723646723647, "grad_norm": 0.19778847694396973, "learning_rate": 1.0956895575669884e-05, "loss": 0.6935, "step": 12333 }, { "epoch": 14.047863247863248, "grad_norm": 0.17810329794883728, "learning_rate": 1.0953041164529811e-05, "loss": 0.811, "step": 12334 }, { "epoch": 14.049002849002848, "grad_norm": 0.1680774688720703, "learning_rate": 1.0949187241279257e-05, "loss": 0.7736, "step": 12335 }, { "epoch": 14.05014245014245, "grad_norm": 0.17708034813404083, "learning_rate": 1.094533380605206e-05, "loss": 0.7097, "step": 12336 }, { "epoch": 14.051282051282051, "grad_norm": 0.2512058615684509, "learning_rate": 1.0941480858982072e-05, "loss": 0.5888, "step": 12337 }, { "epoch": 14.052421652421652, "grad_norm": 0.14486335217952728, "learning_rate": 1.0937628400203112e-05, "loss": 0.833, "step": 12338 }, { "epoch": 14.053561253561254, "grad_norm": 0.18729153275489807, "learning_rate": 1.0933776429849002e-05, "loss": 0.722, "step": 12339 }, { "epoch": 14.054700854700855, "grad_norm": 0.21226993203163147, "learning_rate": 1.0929924948053508e-05, "loss": 0.7348, "step": 12340 }, { "epoch": 14.055840455840455, "grad_norm": 0.2136608362197876, "learning_rate": 1.0926073954950413e-05, "loss": 0.7359, "step": 12341 }, { "epoch": 14.056980056980057, "grad_norm": 0.22670835256576538, "learning_rate": 1.0922223450673471e-05, "loss": 0.7195, "step": 12342 }, { "epoch": 14.058119658119658, "grad_norm": 0.17434194684028625, "learning_rate": 1.0918373435356427e-05, "loss": 0.8143, "step": 12343 }, { "epoch": 14.059259259259258, "grad_norm": 0.17531150579452515, "learning_rate": 1.0914523909132995e-05, "loss": 0.7685, "step": 12344 }, { "epoch": 14.06039886039886, "grad_norm": 0.2420409470796585, "learning_rate": 1.0910674872136886e-05, "loss": 0.4934, "step": 12345 }, { "epoch": 14.061538461538461, "grad_norm": 0.2355288714170456, "learning_rate": 1.090682632450179e-05, "loss": 0.5055, "step": 12346 }, { "epoch": 14.062678062678062, "grad_norm": 0.22175931930541992, "learning_rate": 1.0902978266361366e-05, "loss": 0.8557, "step": 12347 }, { "epoch": 14.063817663817664, "grad_norm": 0.27604639530181885, "learning_rate": 1.0899130697849272e-05, "loss": 0.4887, "step": 12348 }, { "epoch": 14.064957264957265, "grad_norm": 0.21475738286972046, "learning_rate": 1.0895283619099145e-05, "loss": 0.6099, "step": 12349 }, { "epoch": 14.066096866096865, "grad_norm": 0.211808443069458, "learning_rate": 1.0891437030244617e-05, "loss": 0.7819, "step": 12350 }, { "epoch": 14.067236467236468, "grad_norm": 0.18572334945201874, "learning_rate": 1.0887590931419272e-05, "loss": 0.7291, "step": 12351 }, { "epoch": 14.068376068376068, "grad_norm": 0.21492944657802582, "learning_rate": 1.08837453227567e-05, "loss": 0.6636, "step": 12352 }, { "epoch": 14.069515669515669, "grad_norm": 0.16860584914684296, "learning_rate": 1.0879900204390476e-05, "loss": 0.6948, "step": 12353 }, { "epoch": 14.070655270655271, "grad_norm": 0.19749948382377625, "learning_rate": 1.0876055576454147e-05, "loss": 0.7881, "step": 12354 }, { "epoch": 14.071794871794872, "grad_norm": 0.18017134070396423, "learning_rate": 1.0872211439081249e-05, "loss": 0.8742, "step": 12355 }, { "epoch": 14.072934472934472, "grad_norm": 0.2028505951166153, "learning_rate": 1.0868367792405298e-05, "loss": 0.8055, "step": 12356 }, { "epoch": 14.074074074074074, "grad_norm": 0.28165650367736816, "learning_rate": 1.0864524636559807e-05, "loss": 0.4472, "step": 12357 }, { "epoch": 14.075213675213675, "grad_norm": 0.24868755042552948, "learning_rate": 1.0860681971678235e-05, "loss": 0.6308, "step": 12358 }, { "epoch": 14.076353276353275, "grad_norm": 0.1929575353860855, "learning_rate": 1.0856839797894065e-05, "loss": 0.695, "step": 12359 }, { "epoch": 14.077492877492878, "grad_norm": 0.16340765357017517, "learning_rate": 1.0852998115340743e-05, "loss": 0.7069, "step": 12360 }, { "epoch": 14.078632478632478, "grad_norm": 0.18964308500289917, "learning_rate": 1.08491569241517e-05, "loss": 0.7345, "step": 12361 }, { "epoch": 14.079772079772079, "grad_norm": 0.2203982174396515, "learning_rate": 1.0845316224460358e-05, "loss": 0.5722, "step": 12362 }, { "epoch": 14.080911680911681, "grad_norm": 0.1602632701396942, "learning_rate": 1.0841476016400103e-05, "loss": 0.7135, "step": 12363 }, { "epoch": 14.082051282051282, "grad_norm": 0.18844416737556458, "learning_rate": 1.0837636300104311e-05, "loss": 0.7949, "step": 12364 }, { "epoch": 14.083190883190884, "grad_norm": 0.24189259111881256, "learning_rate": 1.0833797075706378e-05, "loss": 0.6145, "step": 12365 }, { "epoch": 14.084330484330485, "grad_norm": 0.2609337568283081, "learning_rate": 1.082995834333962e-05, "loss": 0.5039, "step": 12366 }, { "epoch": 14.085470085470085, "grad_norm": 0.18424765765666962, "learning_rate": 1.0826120103137377e-05, "loss": 0.6723, "step": 12367 }, { "epoch": 14.086609686609687, "grad_norm": 0.19361767172813416, "learning_rate": 1.082228235523296e-05, "loss": 0.6373, "step": 12368 }, { "epoch": 14.087749287749288, "grad_norm": 0.26636993885040283, "learning_rate": 1.0818445099759674e-05, "loss": 0.6688, "step": 12369 }, { "epoch": 14.088888888888889, "grad_norm": 0.21687738597393036, "learning_rate": 1.0814608336850781e-05, "loss": 0.6995, "step": 12370 }, { "epoch": 14.090028490028491, "grad_norm": 0.2350667119026184, "learning_rate": 1.081077206663955e-05, "loss": 0.678, "step": 12371 }, { "epoch": 14.091168091168091, "grad_norm": 0.18176552653312683, "learning_rate": 1.0806936289259226e-05, "loss": 0.6437, "step": 12372 }, { "epoch": 14.092307692307692, "grad_norm": 0.2433783859014511, "learning_rate": 1.0803101004843044e-05, "loss": 0.5479, "step": 12373 }, { "epoch": 14.093447293447294, "grad_norm": 0.24452243745326996, "learning_rate": 1.0799266213524198e-05, "loss": 0.534, "step": 12374 }, { "epoch": 14.094586894586895, "grad_norm": 0.19095490872859955, "learning_rate": 1.0795431915435877e-05, "loss": 0.5522, "step": 12375 }, { "epoch": 14.095726495726495, "grad_norm": 0.18320217728614807, "learning_rate": 1.079159811071129e-05, "loss": 0.6218, "step": 12376 }, { "epoch": 14.096866096866098, "grad_norm": 0.2664496600627899, "learning_rate": 1.0787764799483564e-05, "loss": 0.4221, "step": 12377 }, { "epoch": 14.098005698005698, "grad_norm": 0.22277627885341644, "learning_rate": 1.0783931981885847e-05, "loss": 0.7926, "step": 12378 }, { "epoch": 14.099145299145299, "grad_norm": 0.18079183995723724, "learning_rate": 1.0780099658051271e-05, "loss": 0.7459, "step": 12379 }, { "epoch": 14.100284900284901, "grad_norm": 0.18426795303821564, "learning_rate": 1.0776267828112945e-05, "loss": 0.7996, "step": 12380 }, { "epoch": 14.101424501424502, "grad_norm": 0.17334656417369843, "learning_rate": 1.0772436492203945e-05, "loss": 0.8267, "step": 12381 }, { "epoch": 14.102564102564102, "grad_norm": 0.2253703474998474, "learning_rate": 1.076860565045735e-05, "loss": 0.5896, "step": 12382 }, { "epoch": 14.103703703703705, "grad_norm": 0.1944139003753662, "learning_rate": 1.0764775303006219e-05, "loss": 0.8137, "step": 12383 }, { "epoch": 14.104843304843305, "grad_norm": 0.17105910181999207, "learning_rate": 1.0760945449983597e-05, "loss": 0.7884, "step": 12384 }, { "epoch": 14.105982905982906, "grad_norm": 0.23568369448184967, "learning_rate": 1.075711609152249e-05, "loss": 0.627, "step": 12385 }, { "epoch": 14.107122507122508, "grad_norm": 0.2039739191532135, "learning_rate": 1.0753287227755898e-05, "loss": 0.6716, "step": 12386 }, { "epoch": 14.108262108262108, "grad_norm": 0.21701957285404205, "learning_rate": 1.0749458858816835e-05, "loss": 0.6981, "step": 12387 }, { "epoch": 14.109401709401709, "grad_norm": 0.19791172444820404, "learning_rate": 1.0745630984838252e-05, "loss": 0.9376, "step": 12388 }, { "epoch": 14.110541310541311, "grad_norm": 0.15801367163658142, "learning_rate": 1.0741803605953099e-05, "loss": 0.8482, "step": 12389 }, { "epoch": 14.111680911680912, "grad_norm": 0.17341923713684082, "learning_rate": 1.0737976722294319e-05, "loss": 0.705, "step": 12390 }, { "epoch": 14.112820512820512, "grad_norm": 0.1802707016468048, "learning_rate": 1.0734150333994838e-05, "loss": 0.6471, "step": 12391 }, { "epoch": 14.113960113960115, "grad_norm": 0.24691151082515717, "learning_rate": 1.0730324441187536e-05, "loss": 0.5223, "step": 12392 }, { "epoch": 14.115099715099715, "grad_norm": 0.21251699328422546, "learning_rate": 1.0726499044005312e-05, "loss": 0.8608, "step": 12393 }, { "epoch": 14.116239316239316, "grad_norm": 0.19166982173919678, "learning_rate": 1.0722674142581024e-05, "loss": 0.6466, "step": 12394 }, { "epoch": 14.117378917378918, "grad_norm": 0.23904511332511902, "learning_rate": 1.0718849737047537e-05, "loss": 0.6871, "step": 12395 }, { "epoch": 14.118518518518519, "grad_norm": 0.2263159304857254, "learning_rate": 1.071502582753766e-05, "loss": 0.5115, "step": 12396 }, { "epoch": 14.11965811965812, "grad_norm": 0.18796271085739136, "learning_rate": 1.0711202414184216e-05, "loss": 0.5348, "step": 12397 }, { "epoch": 14.120797720797722, "grad_norm": 0.24163362383842468, "learning_rate": 1.0707379497120013e-05, "loss": 0.6685, "step": 12398 }, { "epoch": 14.121937321937322, "grad_norm": 0.18093737959861755, "learning_rate": 1.0703557076477833e-05, "loss": 0.806, "step": 12399 }, { "epoch": 14.123076923076923, "grad_norm": 0.21274122595787048, "learning_rate": 1.0699735152390422e-05, "loss": 0.7044, "step": 12400 }, { "epoch": 14.124216524216525, "grad_norm": 0.2155819535255432, "learning_rate": 1.0695913724990535e-05, "loss": 0.7369, "step": 12401 }, { "epoch": 14.125356125356126, "grad_norm": 0.1662980169057846, "learning_rate": 1.0692092794410904e-05, "loss": 0.8839, "step": 12402 }, { "epoch": 14.126495726495726, "grad_norm": 0.21541108191013336, "learning_rate": 1.0688272360784244e-05, "loss": 0.4945, "step": 12403 }, { "epoch": 14.127635327635328, "grad_norm": 0.19884797930717468, "learning_rate": 1.0684452424243232e-05, "loss": 0.4722, "step": 12404 }, { "epoch": 14.128774928774929, "grad_norm": 0.2191683053970337, "learning_rate": 1.0680632984920558e-05, "loss": 0.5608, "step": 12405 }, { "epoch": 14.12991452991453, "grad_norm": 0.17608490586280823, "learning_rate": 1.0676814042948876e-05, "loss": 0.8601, "step": 12406 }, { "epoch": 14.131054131054132, "grad_norm": 0.23183391988277435, "learning_rate": 1.0672995598460841e-05, "loss": 0.6027, "step": 12407 }, { "epoch": 14.132193732193732, "grad_norm": 0.1913665384054184, "learning_rate": 1.0669177651589051e-05, "loss": 0.6524, "step": 12408 }, { "epoch": 14.133333333333333, "grad_norm": 0.18311044573783875, "learning_rate": 1.066536020246614e-05, "loss": 0.8798, "step": 12409 }, { "epoch": 14.134472934472935, "grad_norm": 0.18477991223335266, "learning_rate": 1.0661543251224699e-05, "loss": 0.6599, "step": 12410 }, { "epoch": 14.135612535612536, "grad_norm": 0.22089451551437378, "learning_rate": 1.065772679799728e-05, "loss": 0.6662, "step": 12411 }, { "epoch": 14.136752136752136, "grad_norm": 0.2456071823835373, "learning_rate": 1.0653910842916454e-05, "loss": 0.6785, "step": 12412 }, { "epoch": 14.137891737891739, "grad_norm": 0.212762251496315, "learning_rate": 1.0650095386114756e-05, "loss": 0.7099, "step": 12413 }, { "epoch": 14.13903133903134, "grad_norm": 0.22029614448547363, "learning_rate": 1.0646280427724715e-05, "loss": 0.7065, "step": 12414 }, { "epoch": 14.14017094017094, "grad_norm": 0.20419296622276306, "learning_rate": 1.064246596787882e-05, "loss": 0.706, "step": 12415 }, { "epoch": 14.141310541310542, "grad_norm": 0.25089824199676514, "learning_rate": 1.0638652006709565e-05, "loss": 0.5925, "step": 12416 }, { "epoch": 14.142450142450143, "grad_norm": 0.2615416347980499, "learning_rate": 1.0634838544349417e-05, "loss": 0.5556, "step": 12417 }, { "epoch": 14.143589743589743, "grad_norm": 0.1994006484746933, "learning_rate": 1.0631025580930846e-05, "loss": 0.7281, "step": 12418 }, { "epoch": 14.144729344729345, "grad_norm": 0.1971782147884369, "learning_rate": 1.062721311658625e-05, "loss": 0.6861, "step": 12419 }, { "epoch": 14.145868945868946, "grad_norm": 0.203592911362648, "learning_rate": 1.0623401151448073e-05, "loss": 0.6064, "step": 12420 }, { "epoch": 14.147008547008546, "grad_norm": 0.21394099295139313, "learning_rate": 1.0619589685648723e-05, "loss": 0.5012, "step": 12421 }, { "epoch": 14.148148148148149, "grad_norm": 0.18990157544612885, "learning_rate": 1.061577871932056e-05, "loss": 0.7969, "step": 12422 }, { "epoch": 14.14928774928775, "grad_norm": 0.22332452237606049, "learning_rate": 1.0611968252595959e-05, "loss": 0.6604, "step": 12423 }, { "epoch": 14.15042735042735, "grad_norm": 0.24588967859745026, "learning_rate": 1.0608158285607266e-05, "loss": 0.6556, "step": 12424 }, { "epoch": 14.151566951566952, "grad_norm": 0.28720003366470337, "learning_rate": 1.0604348818486823e-05, "loss": 0.4166, "step": 12425 }, { "epoch": 14.152706552706553, "grad_norm": 0.17065948247909546, "learning_rate": 1.0600539851366925e-05, "loss": 0.596, "step": 12426 }, { "epoch": 14.153846153846153, "grad_norm": 0.21314504742622375, "learning_rate": 1.059673138437988e-05, "loss": 0.6275, "step": 12427 }, { "epoch": 14.154985754985756, "grad_norm": 0.19009727239608765, "learning_rate": 1.0592923417657958e-05, "loss": 0.6682, "step": 12428 }, { "epoch": 14.156125356125356, "grad_norm": 0.20869901776313782, "learning_rate": 1.0589115951333436e-05, "loss": 0.7042, "step": 12429 }, { "epoch": 14.157264957264957, "grad_norm": 0.20850737392902374, "learning_rate": 1.058530898553853e-05, "loss": 0.4879, "step": 12430 }, { "epoch": 14.158404558404559, "grad_norm": 0.18836501240730286, "learning_rate": 1.0581502520405492e-05, "loss": 0.7744, "step": 12431 }, { "epoch": 14.15954415954416, "grad_norm": 0.23801569640636444, "learning_rate": 1.0577696556066533e-05, "loss": 0.3337, "step": 12432 }, { "epoch": 14.16068376068376, "grad_norm": 0.2107766717672348, "learning_rate": 1.0573891092653823e-05, "loss": 0.5125, "step": 12433 }, { "epoch": 14.161823361823362, "grad_norm": 0.2487872987985611, "learning_rate": 1.0570086130299548e-05, "loss": 0.5924, "step": 12434 }, { "epoch": 14.162962962962963, "grad_norm": 0.24329927563667297, "learning_rate": 1.0566281669135863e-05, "loss": 0.3506, "step": 12435 }, { "epoch": 14.164102564102564, "grad_norm": 0.181972935795784, "learning_rate": 1.0562477709294919e-05, "loss": 0.7264, "step": 12436 }, { "epoch": 14.165242165242166, "grad_norm": 0.18620088696479797, "learning_rate": 1.0558674250908818e-05, "loss": 0.7772, "step": 12437 }, { "epoch": 14.166381766381766, "grad_norm": 0.19572575390338898, "learning_rate": 1.0554871294109675e-05, "loss": 0.7916, "step": 12438 }, { "epoch": 14.167521367521367, "grad_norm": 0.20265932381153107, "learning_rate": 1.0551068839029576e-05, "loss": 0.6412, "step": 12439 }, { "epoch": 14.16866096866097, "grad_norm": 0.20510506629943848, "learning_rate": 1.0547266885800588e-05, "loss": 0.4159, "step": 12440 }, { "epoch": 14.16980056980057, "grad_norm": 0.15745452046394348, "learning_rate": 1.0543465434554769e-05, "loss": 0.6387, "step": 12441 }, { "epoch": 14.17094017094017, "grad_norm": 0.18906773626804352, "learning_rate": 1.0539664485424153e-05, "loss": 0.7071, "step": 12442 }, { "epoch": 14.172079772079773, "grad_norm": 0.19148340821266174, "learning_rate": 1.0535864038540751e-05, "loss": 0.5026, "step": 12443 }, { "epoch": 14.173219373219373, "grad_norm": 0.22974300384521484, "learning_rate": 1.0532064094036582e-05, "loss": 0.6189, "step": 12444 }, { "epoch": 14.174358974358974, "grad_norm": 0.20122529566287994, "learning_rate": 1.0528264652043598e-05, "loss": 0.6045, "step": 12445 }, { "epoch": 14.175498575498576, "grad_norm": 0.19866183400154114, "learning_rate": 1.0524465712693784e-05, "loss": 0.4626, "step": 12446 }, { "epoch": 14.176638176638177, "grad_norm": 0.1823773980140686, "learning_rate": 1.0520667276119084e-05, "loss": 0.7385, "step": 12447 }, { "epoch": 14.177777777777777, "grad_norm": 0.22166435420513153, "learning_rate": 1.0516869342451436e-05, "loss": 0.6146, "step": 12448 }, { "epoch": 14.17891737891738, "grad_norm": 0.18655037879943848, "learning_rate": 1.0513071911822736e-05, "loss": 0.7272, "step": 12449 }, { "epoch": 14.18005698005698, "grad_norm": 0.2278573364019394, "learning_rate": 1.0509274984364886e-05, "loss": 0.5605, "step": 12450 }, { "epoch": 14.18119658119658, "grad_norm": 0.23333711922168732, "learning_rate": 1.050547856020977e-05, "loss": 0.6219, "step": 12451 }, { "epoch": 14.182336182336183, "grad_norm": 0.22250933945178986, "learning_rate": 1.0501682639489239e-05, "loss": 0.6751, "step": 12452 }, { "epoch": 14.183475783475783, "grad_norm": 0.18318895995616913, "learning_rate": 1.0497887222335143e-05, "loss": 0.7053, "step": 12453 }, { "epoch": 14.184615384615384, "grad_norm": 0.1692780703306198, "learning_rate": 1.0494092308879302e-05, "loss": 0.5763, "step": 12454 }, { "epoch": 14.185754985754986, "grad_norm": 0.2442038357257843, "learning_rate": 1.0490297899253537e-05, "loss": 0.5009, "step": 12455 }, { "epoch": 14.186894586894587, "grad_norm": 0.2019973248243332, "learning_rate": 1.0486503993589619e-05, "loss": 0.6492, "step": 12456 }, { "epoch": 14.188034188034187, "grad_norm": 0.16503944993019104, "learning_rate": 1.048271059201933e-05, "loss": 0.8739, "step": 12457 }, { "epoch": 14.18917378917379, "grad_norm": 0.19559434056282043, "learning_rate": 1.0478917694674426e-05, "loss": 0.8488, "step": 12458 }, { "epoch": 14.19031339031339, "grad_norm": 0.17246732115745544, "learning_rate": 1.047512530168665e-05, "loss": 0.8205, "step": 12459 }, { "epoch": 14.19145299145299, "grad_norm": 0.2481149286031723, "learning_rate": 1.0471333413187706e-05, "loss": 0.5811, "step": 12460 }, { "epoch": 14.192592592592593, "grad_norm": 0.19406673312187195, "learning_rate": 1.046754202930931e-05, "loss": 0.5999, "step": 12461 }, { "epoch": 14.193732193732194, "grad_norm": 0.19021573662757874, "learning_rate": 1.0463751150183143e-05, "loss": 0.7711, "step": 12462 }, { "epoch": 14.194871794871794, "grad_norm": 0.22574403882026672, "learning_rate": 1.045996077594088e-05, "loss": 0.5977, "step": 12463 }, { "epoch": 14.196011396011396, "grad_norm": 0.16467835009098053, "learning_rate": 1.045617090671415e-05, "loss": 0.843, "step": 12464 }, { "epoch": 14.197150997150997, "grad_norm": 0.20068757236003876, "learning_rate": 1.0452381542634607e-05, "loss": 0.822, "step": 12465 }, { "epoch": 14.198290598290598, "grad_norm": 0.18333548307418823, "learning_rate": 1.0448592683833866e-05, "loss": 0.6512, "step": 12466 }, { "epoch": 14.1994301994302, "grad_norm": 0.2103206366300583, "learning_rate": 1.044480433044351e-05, "loss": 0.5612, "step": 12467 }, { "epoch": 14.2005698005698, "grad_norm": 0.20221920311450958, "learning_rate": 1.0441016482595129e-05, "loss": 0.5504, "step": 12468 }, { "epoch": 14.201709401709401, "grad_norm": 0.19960333406925201, "learning_rate": 1.0437229140420282e-05, "loss": 0.7304, "step": 12469 }, { "epoch": 14.202849002849003, "grad_norm": 0.2146003097295761, "learning_rate": 1.0433442304050522e-05, "loss": 0.7793, "step": 12470 }, { "epoch": 14.203988603988604, "grad_norm": 0.17417925596237183, "learning_rate": 1.042965597361736e-05, "loss": 0.7855, "step": 12471 }, { "epoch": 14.205128205128204, "grad_norm": 0.17933909595012665, "learning_rate": 1.0425870149252317e-05, "loss": 0.8203, "step": 12472 }, { "epoch": 14.206267806267807, "grad_norm": 0.15111088752746582, "learning_rate": 1.042208483108688e-05, "loss": 0.7666, "step": 12473 }, { "epoch": 14.207407407407407, "grad_norm": 0.2167760282754898, "learning_rate": 1.0418300019252539e-05, "loss": 0.6168, "step": 12474 }, { "epoch": 14.208547008547008, "grad_norm": 0.19389691948890686, "learning_rate": 1.0414515713880717e-05, "loss": 0.5633, "step": 12475 }, { "epoch": 14.20968660968661, "grad_norm": 0.22056040167808533, "learning_rate": 1.0410731915102883e-05, "loss": 0.4671, "step": 12476 }, { "epoch": 14.21082621082621, "grad_norm": 0.21271564066410065, "learning_rate": 1.0406948623050453e-05, "loss": 0.5757, "step": 12477 }, { "epoch": 14.211965811965811, "grad_norm": 0.20088516175746918, "learning_rate": 1.0403165837854837e-05, "loss": 0.5874, "step": 12478 }, { "epoch": 14.213105413105414, "grad_norm": 0.22104431688785553, "learning_rate": 1.0399383559647402e-05, "loss": 0.397, "step": 12479 }, { "epoch": 14.214245014245014, "grad_norm": 0.22194375097751617, "learning_rate": 1.0395601788559527e-05, "loss": 0.4472, "step": 12480 }, { "epoch": 14.215384615384615, "grad_norm": 0.2375565469264984, "learning_rate": 1.0391820524722568e-05, "loss": 0.5376, "step": 12481 }, { "epoch": 14.216524216524217, "grad_norm": 0.2139328122138977, "learning_rate": 1.038803976826786e-05, "loss": 0.692, "step": 12482 }, { "epoch": 14.217663817663817, "grad_norm": 0.16154590249061584, "learning_rate": 1.0384259519326706e-05, "loss": 0.5521, "step": 12483 }, { "epoch": 14.218803418803418, "grad_norm": 0.16744598746299744, "learning_rate": 1.0380479778030414e-05, "loss": 0.8703, "step": 12484 }, { "epoch": 14.21994301994302, "grad_norm": 0.1954227089881897, "learning_rate": 1.0376700544510262e-05, "loss": 0.3635, "step": 12485 }, { "epoch": 14.221082621082621, "grad_norm": 0.18372157216072083, "learning_rate": 1.0372921818897512e-05, "loss": 0.6228, "step": 12486 }, { "epoch": 14.222222222222221, "grad_norm": 0.2358933389186859, "learning_rate": 1.0369143601323417e-05, "loss": 0.8105, "step": 12487 }, { "epoch": 14.223361823361824, "grad_norm": 0.2836482524871826, "learning_rate": 1.0365365891919197e-05, "loss": 0.6668, "step": 12488 }, { "epoch": 14.224501424501424, "grad_norm": 0.2542872726917267, "learning_rate": 1.0361588690816072e-05, "loss": 0.3758, "step": 12489 }, { "epoch": 14.225641025641025, "grad_norm": 0.17217014729976654, "learning_rate": 1.035781199814522e-05, "loss": 0.8552, "step": 12490 }, { "epoch": 14.226780626780627, "grad_norm": 0.19940514862537384, "learning_rate": 1.0354035814037824e-05, "loss": 0.755, "step": 12491 }, { "epoch": 14.227920227920228, "grad_norm": 0.1901174634695053, "learning_rate": 1.035026013862504e-05, "loss": 0.8924, "step": 12492 }, { "epoch": 14.229059829059828, "grad_norm": 0.18908625841140747, "learning_rate": 1.034648497203802e-05, "loss": 0.7233, "step": 12493 }, { "epoch": 14.23019943019943, "grad_norm": 0.21587686240673065, "learning_rate": 1.034271031440786e-05, "loss": 0.5469, "step": 12494 }, { "epoch": 14.231339031339031, "grad_norm": 0.21032138168811798, "learning_rate": 1.0338936165865684e-05, "loss": 0.7043, "step": 12495 }, { "epoch": 14.232478632478632, "grad_norm": 0.24204450845718384, "learning_rate": 1.0335162526542572e-05, "loss": 0.4976, "step": 12496 }, { "epoch": 14.233618233618234, "grad_norm": 0.20440852642059326, "learning_rate": 1.0331389396569591e-05, "loss": 0.7422, "step": 12497 }, { "epoch": 14.234757834757835, "grad_norm": 0.20793507993221283, "learning_rate": 1.0327616776077798e-05, "loss": 0.4419, "step": 12498 }, { "epoch": 14.235897435897435, "grad_norm": 0.2090797871351242, "learning_rate": 1.032384466519822e-05, "loss": 0.6576, "step": 12499 }, { "epoch": 14.237037037037037, "grad_norm": 0.20034758746623993, "learning_rate": 1.0320073064061889e-05, "loss": 0.6086, "step": 12500 }, { "epoch": 14.238176638176638, "grad_norm": 0.2248711735010147, "learning_rate": 1.031630197279978e-05, "loss": 0.5257, "step": 12501 }, { "epoch": 14.239316239316238, "grad_norm": 0.23908962309360504, "learning_rate": 1.0312531391542884e-05, "loss": 0.6444, "step": 12502 }, { "epoch": 14.24045584045584, "grad_norm": 0.1608385592699051, "learning_rate": 1.0308761320422164e-05, "loss": 0.8326, "step": 12503 }, { "epoch": 14.241595441595441, "grad_norm": 0.23936578631401062, "learning_rate": 1.0304991759568572e-05, "loss": 0.5167, "step": 12504 }, { "epoch": 14.242735042735042, "grad_norm": 0.21087776124477386, "learning_rate": 1.0301222709113018e-05, "loss": 0.6822, "step": 12505 }, { "epoch": 14.243874643874644, "grad_norm": 0.18673694133758545, "learning_rate": 1.0297454169186424e-05, "loss": 0.7186, "step": 12506 }, { "epoch": 14.245014245014245, "grad_norm": 0.1787559539079666, "learning_rate": 1.0293686139919676e-05, "loss": 0.6647, "step": 12507 }, { "epoch": 14.246153846153845, "grad_norm": 0.18746985495090485, "learning_rate": 1.0289918621443654e-05, "loss": 0.6668, "step": 12508 }, { "epoch": 14.247293447293448, "grad_norm": 0.23913775384426117, "learning_rate": 1.0286151613889208e-05, "loss": 0.6226, "step": 12509 }, { "epoch": 14.248433048433048, "grad_norm": 0.264639675617218, "learning_rate": 1.028238511738718e-05, "loss": 0.5012, "step": 12510 }, { "epoch": 14.249572649572649, "grad_norm": 0.21155546605587006, "learning_rate": 1.0278619132068402e-05, "loss": 0.6076, "step": 12511 }, { "epoch": 14.250712250712251, "grad_norm": 0.17297117412090302, "learning_rate": 1.0274853658063654e-05, "loss": 0.7893, "step": 12512 }, { "epoch": 14.251851851851852, "grad_norm": 0.2185525894165039, "learning_rate": 1.0271088695503733e-05, "loss": 0.6084, "step": 12513 }, { "epoch": 14.252991452991452, "grad_norm": 0.17372500896453857, "learning_rate": 1.0267324244519408e-05, "loss": 0.6831, "step": 12514 }, { "epoch": 14.254131054131054, "grad_norm": 0.17546038329601288, "learning_rate": 1.0263560305241435e-05, "loss": 0.6158, "step": 12515 }, { "epoch": 14.255270655270655, "grad_norm": 0.18271692097187042, "learning_rate": 1.0259796877800528e-05, "loss": 0.852, "step": 12516 }, { "epoch": 14.256410256410255, "grad_norm": 0.2003297060728073, "learning_rate": 1.0256033962327413e-05, "loss": 0.5634, "step": 12517 }, { "epoch": 14.257549857549858, "grad_norm": 0.5301342606544495, "learning_rate": 1.0252271558952784e-05, "loss": 0.6528, "step": 12518 }, { "epoch": 14.258689458689458, "grad_norm": 0.19460567831993103, "learning_rate": 1.0248509667807321e-05, "loss": 0.7132, "step": 12519 }, { "epoch": 14.25982905982906, "grad_norm": 0.20641255378723145, "learning_rate": 1.0244748289021682e-05, "loss": 0.6365, "step": 12520 }, { "epoch": 14.260968660968661, "grad_norm": 0.22458617389202118, "learning_rate": 1.0240987422726514e-05, "loss": 0.6479, "step": 12521 }, { "epoch": 14.262108262108262, "grad_norm": 0.19180619716644287, "learning_rate": 1.0237227069052438e-05, "loss": 0.783, "step": 12522 }, { "epoch": 14.263247863247864, "grad_norm": 0.2833923399448395, "learning_rate": 1.0233467228130072e-05, "loss": 0.5121, "step": 12523 }, { "epoch": 14.264387464387465, "grad_norm": 0.21777388453483582, "learning_rate": 1.022970790008999e-05, "loss": 0.6168, "step": 12524 }, { "epoch": 14.265527065527065, "grad_norm": 0.21849891543388367, "learning_rate": 1.0225949085062773e-05, "loss": 0.5956, "step": 12525 }, { "epoch": 14.266666666666667, "grad_norm": 0.17699994146823883, "learning_rate": 1.0222190783178973e-05, "loss": 0.744, "step": 12526 }, { "epoch": 14.267806267806268, "grad_norm": 0.1658196747303009, "learning_rate": 1.0218432994569132e-05, "loss": 0.7725, "step": 12527 }, { "epoch": 14.268945868945869, "grad_norm": 0.21563325822353363, "learning_rate": 1.0214675719363756e-05, "loss": 0.8035, "step": 12528 }, { "epoch": 14.270085470085471, "grad_norm": 0.19161447882652283, "learning_rate": 1.021091895769335e-05, "loss": 0.7857, "step": 12529 }, { "epoch": 14.271225071225071, "grad_norm": 0.1977480798959732, "learning_rate": 1.02071627096884e-05, "loss": 0.775, "step": 12530 }, { "epoch": 14.272364672364672, "grad_norm": 0.23712819814682007, "learning_rate": 1.0203406975479369e-05, "loss": 0.6548, "step": 12531 }, { "epoch": 14.273504273504274, "grad_norm": 0.20612823963165283, "learning_rate": 1.0199651755196704e-05, "loss": 0.6778, "step": 12532 }, { "epoch": 14.274643874643875, "grad_norm": 0.1673448234796524, "learning_rate": 1.0195897048970837e-05, "loss": 0.9221, "step": 12533 }, { "epoch": 14.275783475783475, "grad_norm": 0.1959383487701416, "learning_rate": 1.0192142856932186e-05, "loss": 0.7079, "step": 12534 }, { "epoch": 14.276923076923078, "grad_norm": 0.23128730058670044, "learning_rate": 1.0188389179211122e-05, "loss": 0.3848, "step": 12535 }, { "epoch": 14.278062678062678, "grad_norm": 0.20712505280971527, "learning_rate": 1.0184636015938037e-05, "loss": 0.7334, "step": 12536 }, { "epoch": 14.279202279202279, "grad_norm": 0.18095332384109497, "learning_rate": 1.0180883367243285e-05, "loss": 0.7478, "step": 12537 }, { "epoch": 14.280341880341881, "grad_norm": 0.24853791296482086, "learning_rate": 1.0177131233257216e-05, "loss": 0.8098, "step": 12538 }, { "epoch": 14.281481481481482, "grad_norm": 0.17238964140415192, "learning_rate": 1.0173379614110134e-05, "loss": 0.7795, "step": 12539 }, { "epoch": 14.282621082621082, "grad_norm": 0.17710372805595398, "learning_rate": 1.016962850993235e-05, "loss": 0.807, "step": 12540 }, { "epoch": 14.283760683760685, "grad_norm": 0.19814150035381317, "learning_rate": 1.0165877920854153e-05, "loss": 0.8465, "step": 12541 }, { "epoch": 14.284900284900285, "grad_norm": 0.2516099214553833, "learning_rate": 1.016212784700581e-05, "loss": 0.7151, "step": 12542 }, { "epoch": 14.286039886039886, "grad_norm": 0.20676739513874054, "learning_rate": 1.0158378288517568e-05, "loss": 0.5998, "step": 12543 }, { "epoch": 14.287179487179488, "grad_norm": 0.22322627902030945, "learning_rate": 1.0154629245519664e-05, "loss": 0.6966, "step": 12544 }, { "epoch": 14.288319088319088, "grad_norm": 0.2299807369709015, "learning_rate": 1.0150880718142322e-05, "loss": 0.6949, "step": 12545 }, { "epoch": 14.289458689458689, "grad_norm": 0.2129308134317398, "learning_rate": 1.0147132706515717e-05, "loss": 0.5363, "step": 12546 }, { "epoch": 14.290598290598291, "grad_norm": 0.22439347207546234, "learning_rate": 1.014338521077004e-05, "loss": 0.4882, "step": 12547 }, { "epoch": 14.291737891737892, "grad_norm": 0.19620387256145477, "learning_rate": 1.013963823103545e-05, "loss": 0.4752, "step": 12548 }, { "epoch": 14.292877492877492, "grad_norm": 0.20802806317806244, "learning_rate": 1.0135891767442099e-05, "loss": 0.7296, "step": 12549 }, { "epoch": 14.294017094017095, "grad_norm": 0.18960729241371155, "learning_rate": 1.0132145820120095e-05, "loss": 0.6289, "step": 12550 }, { "epoch": 14.295156695156695, "grad_norm": 0.24108396470546722, "learning_rate": 1.0128400389199552e-05, "loss": 0.5268, "step": 12551 }, { "epoch": 14.296296296296296, "grad_norm": 0.2624680995941162, "learning_rate": 1.0124655474810563e-05, "loss": 0.6078, "step": 12552 }, { "epoch": 14.297435897435898, "grad_norm": 0.24978230893611908, "learning_rate": 1.0120911077083195e-05, "loss": 0.5362, "step": 12553 }, { "epoch": 14.298575498575499, "grad_norm": 0.23368790745735168, "learning_rate": 1.0117167196147503e-05, "loss": 0.4848, "step": 12554 }, { "epoch": 14.2997150997151, "grad_norm": 0.2671913504600525, "learning_rate": 1.0113423832133523e-05, "loss": 0.6892, "step": 12555 }, { "epoch": 14.300854700854702, "grad_norm": 0.17675843834877014, "learning_rate": 1.0109680985171269e-05, "loss": 0.7545, "step": 12556 }, { "epoch": 14.301994301994302, "grad_norm": 0.2433677464723587, "learning_rate": 1.0105938655390753e-05, "loss": 0.6221, "step": 12557 }, { "epoch": 14.303133903133903, "grad_norm": 0.20102357864379883, "learning_rate": 1.0102196842921941e-05, "loss": 0.461, "step": 12558 }, { "epoch": 14.304273504273505, "grad_norm": 0.21794714033603668, "learning_rate": 1.0098455547894797e-05, "loss": 0.5659, "step": 12559 }, { "epoch": 14.305413105413106, "grad_norm": 0.2754895091056824, "learning_rate": 1.0094714770439273e-05, "loss": 0.3729, "step": 12560 }, { "epoch": 14.306552706552706, "grad_norm": 0.2294369786977768, "learning_rate": 1.0090974510685306e-05, "loss": 0.6324, "step": 12561 }, { "epoch": 14.307692307692308, "grad_norm": 0.20590168237686157, "learning_rate": 1.0087234768762782e-05, "loss": 0.6657, "step": 12562 }, { "epoch": 14.308831908831909, "grad_norm": 0.21124283969402313, "learning_rate": 1.0083495544801597e-05, "loss": 0.6746, "step": 12563 }, { "epoch": 14.30997150997151, "grad_norm": 0.1777198761701584, "learning_rate": 1.007975683893165e-05, "loss": 0.7074, "step": 12564 }, { "epoch": 14.311111111111112, "grad_norm": 0.20538261532783508, "learning_rate": 1.007601865128277e-05, "loss": 0.7295, "step": 12565 }, { "epoch": 14.312250712250712, "grad_norm": 0.18561547994613647, "learning_rate": 1.0072280981984802e-05, "loss": 0.7966, "step": 12566 }, { "epoch": 14.313390313390313, "grad_norm": 0.17395226657390594, "learning_rate": 1.006854383116757e-05, "loss": 0.9315, "step": 12567 }, { "epoch": 14.314529914529915, "grad_norm": 0.20632939040660858, "learning_rate": 1.006480719896088e-05, "loss": 0.6052, "step": 12568 }, { "epoch": 14.315669515669516, "grad_norm": 0.14955343306064606, "learning_rate": 1.0061071085494498e-05, "loss": 0.5612, "step": 12569 }, { "epoch": 14.316809116809116, "grad_norm": 0.2069385051727295, "learning_rate": 1.00573354908982e-05, "loss": 0.5874, "step": 12570 }, { "epoch": 14.317948717948719, "grad_norm": 0.17582623660564423, "learning_rate": 1.0053600415301734e-05, "loss": 0.7139, "step": 12571 }, { "epoch": 14.31908831908832, "grad_norm": 0.1767067313194275, "learning_rate": 1.0049865858834835e-05, "loss": 0.8682, "step": 12572 }, { "epoch": 14.32022792022792, "grad_norm": 0.1552984118461609, "learning_rate": 1.00461318216272e-05, "loss": 0.8311, "step": 12573 }, { "epoch": 14.321367521367522, "grad_norm": 0.20522622764110565, "learning_rate": 1.004239830380852e-05, "loss": 0.5753, "step": 12574 }, { "epoch": 14.322507122507123, "grad_norm": 0.20223551988601685, "learning_rate": 1.00386653055085e-05, "loss": 0.7066, "step": 12575 }, { "epoch": 14.323646723646723, "grad_norm": 0.1999504119157791, "learning_rate": 1.0034932826856768e-05, "loss": 0.7068, "step": 12576 }, { "epoch": 14.324786324786325, "grad_norm": 0.2503112554550171, "learning_rate": 1.0031200867982974e-05, "loss": 0.7009, "step": 12577 }, { "epoch": 14.325925925925926, "grad_norm": 0.18654067814350128, "learning_rate": 1.0027469429016737e-05, "loss": 0.7889, "step": 12578 }, { "epoch": 14.327065527065526, "grad_norm": 0.17514781653881073, "learning_rate": 1.0023738510087672e-05, "loss": 0.9534, "step": 12579 }, { "epoch": 14.328205128205129, "grad_norm": 0.18068872392177582, "learning_rate": 1.0020008111325343e-05, "loss": 0.5792, "step": 12580 }, { "epoch": 14.32934472934473, "grad_norm": 0.23751656711101532, "learning_rate": 1.0016278232859328e-05, "loss": 0.5473, "step": 12581 }, { "epoch": 14.33048433048433, "grad_norm": 0.22508838772773743, "learning_rate": 1.0012548874819174e-05, "loss": 0.5833, "step": 12582 }, { "epoch": 14.331623931623932, "grad_norm": 0.28413140773773193, "learning_rate": 1.0008820037334423e-05, "loss": 0.4477, "step": 12583 }, { "epoch": 14.332763532763533, "grad_norm": 0.4132240116596222, "learning_rate": 1.000509172053457e-05, "loss": 0.6586, "step": 12584 }, { "epoch": 14.333903133903133, "grad_norm": 0.1827618032693863, "learning_rate": 1.0001363924549109e-05, "loss": 0.5916, "step": 12585 }, { "epoch": 14.335042735042736, "grad_norm": 0.166161447763443, "learning_rate": 9.997636649507541e-06, "loss": 0.8675, "step": 12586 }, { "epoch": 14.336182336182336, "grad_norm": 0.23401637375354767, "learning_rate": 9.993909895539303e-06, "loss": 0.6625, "step": 12587 }, { "epoch": 14.337321937321937, "grad_norm": 0.18541036546230316, "learning_rate": 9.990183662773838e-06, "loss": 0.759, "step": 12588 }, { "epoch": 14.338461538461539, "grad_norm": 0.2001301795244217, "learning_rate": 9.986457951340571e-06, "loss": 0.6406, "step": 12589 }, { "epoch": 14.33960113960114, "grad_norm": 0.20283040404319763, "learning_rate": 9.982732761368918e-06, "loss": 0.8171, "step": 12590 }, { "epoch": 14.34074074074074, "grad_norm": 0.19369982182979584, "learning_rate": 9.979008092988245e-06, "loss": 0.5343, "step": 12591 }, { "epoch": 14.341880341880342, "grad_norm": 0.18374225497245789, "learning_rate": 9.975283946327929e-06, "loss": 0.6343, "step": 12592 }, { "epoch": 14.343019943019943, "grad_norm": 0.20221012830734253, "learning_rate": 9.97156032151732e-06, "loss": 0.5739, "step": 12593 }, { "epoch": 14.344159544159544, "grad_norm": 0.18787631392478943, "learning_rate": 9.967837218685758e-06, "loss": 0.4307, "step": 12594 }, { "epoch": 14.345299145299146, "grad_norm": 0.2108217030763626, "learning_rate": 9.964114637962537e-06, "loss": 0.8173, "step": 12595 }, { "epoch": 14.346438746438746, "grad_norm": 0.19228532910346985, "learning_rate": 9.960392579476957e-06, "loss": 0.7508, "step": 12596 }, { "epoch": 14.347578347578347, "grad_norm": 0.19072678685188293, "learning_rate": 9.95667104335831e-06, "loss": 0.7076, "step": 12597 }, { "epoch": 14.34871794871795, "grad_norm": 0.25872674584388733, "learning_rate": 9.952950029735855e-06, "loss": 0.6042, "step": 12598 }, { "epoch": 14.34985754985755, "grad_norm": 0.17833420634269714, "learning_rate": 9.949229538738817e-06, "loss": 0.7411, "step": 12599 }, { "epoch": 14.35099715099715, "grad_norm": 0.18916936218738556, "learning_rate": 9.945509570496425e-06, "loss": 0.8745, "step": 12600 }, { "epoch": 14.352136752136753, "grad_norm": 0.20757326483726501, "learning_rate": 9.941790125137888e-06, "loss": 0.8031, "step": 12601 }, { "epoch": 14.353276353276353, "grad_norm": 0.16720442473888397, "learning_rate": 9.938071202792398e-06, "loss": 0.5974, "step": 12602 }, { "epoch": 14.354415954415954, "grad_norm": 0.23214448988437653, "learning_rate": 9.934352803589108e-06, "loss": 0.4563, "step": 12603 }, { "epoch": 14.355555555555556, "grad_norm": 0.21899637579917908, "learning_rate": 9.930634927657173e-06, "loss": 0.7588, "step": 12604 }, { "epoch": 14.356695156695157, "grad_norm": 0.17524579167366028, "learning_rate": 9.92691757512573e-06, "loss": 0.705, "step": 12605 }, { "epoch": 14.357834757834757, "grad_norm": 0.34279492497444153, "learning_rate": 9.923200746123902e-06, "loss": 0.4724, "step": 12606 }, { "epoch": 14.35897435897436, "grad_norm": 0.212101548910141, "learning_rate": 9.919484440780752e-06, "loss": 0.4934, "step": 12607 }, { "epoch": 14.36011396011396, "grad_norm": 0.23800750076770782, "learning_rate": 9.915768659225389e-06, "loss": 0.5828, "step": 12608 }, { "epoch": 14.36125356125356, "grad_norm": 0.18548652529716492, "learning_rate": 9.912053401586871e-06, "loss": 0.7144, "step": 12609 }, { "epoch": 14.362393162393163, "grad_norm": 0.19872426986694336, "learning_rate": 9.908338667994221e-06, "loss": 0.8539, "step": 12610 }, { "epoch": 14.363532763532763, "grad_norm": 0.2006315290927887, "learning_rate": 9.904624458576472e-06, "loss": 0.5649, "step": 12611 }, { "epoch": 14.364672364672364, "grad_norm": 0.20271030068397522, "learning_rate": 9.900910773462626e-06, "loss": 0.7093, "step": 12612 }, { "epoch": 14.365811965811966, "grad_norm": 0.18042854964733124, "learning_rate": 9.897197612781684e-06, "loss": 0.7431, "step": 12613 }, { "epoch": 14.366951566951567, "grad_norm": 0.2093563824892044, "learning_rate": 9.89348497666259e-06, "loss": 0.6083, "step": 12614 }, { "epoch": 14.368091168091167, "grad_norm": 0.19551311433315277, "learning_rate": 9.889772865234306e-06, "loss": 0.7185, "step": 12615 }, { "epoch": 14.36923076923077, "grad_norm": 0.22333300113677979, "learning_rate": 9.886061278625766e-06, "loss": 0.6666, "step": 12616 }, { "epoch": 14.37037037037037, "grad_norm": 0.30139651894569397, "learning_rate": 9.882350216965889e-06, "loss": 0.4693, "step": 12617 }, { "epoch": 14.37150997150997, "grad_norm": 0.19599662721157074, "learning_rate": 9.878639680383544e-06, "loss": 0.8895, "step": 12618 }, { "epoch": 14.372649572649573, "grad_norm": 0.18975095450878143, "learning_rate": 9.874929669007637e-06, "loss": 0.7269, "step": 12619 }, { "epoch": 14.373789173789174, "grad_norm": 0.17602480947971344, "learning_rate": 9.871220182967026e-06, "loss": 0.7736, "step": 12620 }, { "epoch": 14.374928774928774, "grad_norm": 0.21895572543144226, "learning_rate": 9.86751122239053e-06, "loss": 0.5928, "step": 12621 }, { "epoch": 14.376068376068377, "grad_norm": 0.20463977754116058, "learning_rate": 9.863802787406987e-06, "loss": 0.6022, "step": 12622 }, { "epoch": 14.377207977207977, "grad_norm": 0.21554405987262726, "learning_rate": 9.860094878145195e-06, "loss": 0.5576, "step": 12623 }, { "epoch": 14.378347578347578, "grad_norm": 0.2489200383424759, "learning_rate": 9.856387494733957e-06, "loss": 0.7404, "step": 12624 }, { "epoch": 14.37948717948718, "grad_norm": 0.17984764277935028, "learning_rate": 9.852680637302011e-06, "loss": 0.773, "step": 12625 }, { "epoch": 14.38062678062678, "grad_norm": 0.21617358922958374, "learning_rate": 9.848974305978125e-06, "loss": 0.6936, "step": 12626 }, { "epoch": 14.381766381766381, "grad_norm": 0.2192595899105072, "learning_rate": 9.845268500891027e-06, "loss": 0.5369, "step": 12627 }, { "epoch": 14.382905982905983, "grad_norm": 0.16107606887817383, "learning_rate": 9.841563222169438e-06, "loss": 0.8325, "step": 12628 }, { "epoch": 14.384045584045584, "grad_norm": 0.28030702471733093, "learning_rate": 9.837858469942029e-06, "loss": 0.4332, "step": 12629 }, { "epoch": 14.385185185185184, "grad_norm": 0.1888609230518341, "learning_rate": 9.834154244337498e-06, "loss": 0.7117, "step": 12630 }, { "epoch": 14.386324786324787, "grad_norm": 0.18952858448028564, "learning_rate": 9.830450545484503e-06, "loss": 0.7184, "step": 12631 }, { "epoch": 14.387464387464387, "grad_norm": 0.20389395952224731, "learning_rate": 9.826747373511672e-06, "loss": 0.4769, "step": 12632 }, { "epoch": 14.388603988603988, "grad_norm": 0.202254980802536, "learning_rate": 9.823044728547628e-06, "loss": 0.6136, "step": 12633 }, { "epoch": 14.38974358974359, "grad_norm": 0.21321870386600494, "learning_rate": 9.81934261072098e-06, "loss": 0.7592, "step": 12634 }, { "epoch": 14.39088319088319, "grad_norm": 0.24171572923660278, "learning_rate": 9.81564102016031e-06, "loss": 0.5723, "step": 12635 }, { "epoch": 14.392022792022791, "grad_norm": 0.1769031584262848, "learning_rate": 9.811939956994194e-06, "loss": 0.6639, "step": 12636 }, { "epoch": 14.393162393162394, "grad_norm": 0.2373218834400177, "learning_rate": 9.80823942135116e-06, "loss": 0.603, "step": 12637 }, { "epoch": 14.394301994301994, "grad_norm": 0.21972903609275818, "learning_rate": 9.804539413359748e-06, "loss": 0.5304, "step": 12638 }, { "epoch": 14.395441595441595, "grad_norm": 0.19349251687526703, "learning_rate": 9.80083993314847e-06, "loss": 0.7405, "step": 12639 }, { "epoch": 14.396581196581197, "grad_norm": 0.22350339591503143, "learning_rate": 9.797140980845817e-06, "loss": 0.3881, "step": 12640 }, { "epoch": 14.397720797720797, "grad_norm": 0.17995622754096985, "learning_rate": 9.79344255658027e-06, "loss": 0.6166, "step": 12641 }, { "epoch": 14.398860398860398, "grad_norm": 0.21553608775138855, "learning_rate": 9.789744660480279e-06, "loss": 0.7, "step": 12642 }, { "epoch": 14.4, "grad_norm": 0.23543411493301392, "learning_rate": 9.786047292674294e-06, "loss": 0.4783, "step": 12643 }, { "epoch": 14.401139601139601, "grad_norm": 0.16944937407970428, "learning_rate": 9.782350453290715e-06, "loss": 0.7924, "step": 12644 }, { "epoch": 14.402279202279201, "grad_norm": 0.2010875940322876, "learning_rate": 9.778654142457954e-06, "loss": 0.7968, "step": 12645 }, { "epoch": 14.403418803418804, "grad_norm": 0.2124527394771576, "learning_rate": 9.774958360304396e-06, "loss": 0.6406, "step": 12646 }, { "epoch": 14.404558404558404, "grad_norm": 0.23077209293842316, "learning_rate": 9.771263106958409e-06, "loss": 0.6283, "step": 12647 }, { "epoch": 14.405698005698005, "grad_norm": 0.20528383553028107, "learning_rate": 9.767568382548326e-06, "loss": 0.7679, "step": 12648 }, { "epoch": 14.406837606837607, "grad_norm": 0.2082781195640564, "learning_rate": 9.76387418720248e-06, "loss": 0.8894, "step": 12649 }, { "epoch": 14.407977207977208, "grad_norm": 0.22017930448055267, "learning_rate": 9.760180521049186e-06, "loss": 0.6447, "step": 12650 }, { "epoch": 14.40911680911681, "grad_norm": 0.20738565921783447, "learning_rate": 9.756487384216732e-06, "loss": 0.4011, "step": 12651 }, { "epoch": 14.41025641025641, "grad_norm": 0.2789316177368164, "learning_rate": 9.752794776833387e-06, "loss": 0.6055, "step": 12652 }, { "epoch": 14.411396011396011, "grad_norm": 0.2477440983057022, "learning_rate": 9.749102699027413e-06, "loss": 0.7176, "step": 12653 }, { "epoch": 14.412535612535613, "grad_norm": 0.21437858045101166, "learning_rate": 9.74541115092705e-06, "loss": 0.7332, "step": 12654 }, { "epoch": 14.413675213675214, "grad_norm": 0.26130738854408264, "learning_rate": 9.741720132660501e-06, "loss": 0.5266, "step": 12655 }, { "epoch": 14.414814814814815, "grad_norm": 0.43563970923423767, "learning_rate": 9.738029644355969e-06, "loss": 0.7076, "step": 12656 }, { "epoch": 14.415954415954417, "grad_norm": 0.23455193638801575, "learning_rate": 9.734339686141642e-06, "loss": 0.5697, "step": 12657 }, { "epoch": 14.417094017094017, "grad_norm": 0.1981157809495926, "learning_rate": 9.730650258145684e-06, "loss": 0.6551, "step": 12658 }, { "epoch": 14.418233618233618, "grad_norm": 0.2522953748703003, "learning_rate": 9.726961360496226e-06, "loss": 0.6056, "step": 12659 }, { "epoch": 14.41937321937322, "grad_norm": 0.23770654201507568, "learning_rate": 9.7232729933214e-06, "loss": 0.7018, "step": 12660 }, { "epoch": 14.42051282051282, "grad_norm": 0.16990040242671967, "learning_rate": 9.719585156749317e-06, "loss": 0.7571, "step": 12661 }, { "epoch": 14.421652421652421, "grad_norm": 0.22051633894443512, "learning_rate": 9.71589785090806e-06, "loss": 0.5478, "step": 12662 }, { "epoch": 14.422792022792024, "grad_norm": 0.1829424798488617, "learning_rate": 9.712211075925704e-06, "loss": 0.7772, "step": 12663 }, { "epoch": 14.423931623931624, "grad_norm": 0.23964989185333252, "learning_rate": 9.708524831930297e-06, "loss": 0.6042, "step": 12664 }, { "epoch": 14.425071225071225, "grad_norm": 0.22030949592590332, "learning_rate": 9.704839119049883e-06, "loss": 0.6522, "step": 12665 }, { "epoch": 14.426210826210827, "grad_norm": 0.17135785520076752, "learning_rate": 9.70115393741246e-06, "loss": 0.5605, "step": 12666 }, { "epoch": 14.427350427350428, "grad_norm": 0.23122885823249817, "learning_rate": 9.697469287146033e-06, "loss": 0.6656, "step": 12667 }, { "epoch": 14.428490028490028, "grad_norm": 0.2249332070350647, "learning_rate": 9.69378516837858e-06, "loss": 0.4033, "step": 12668 }, { "epoch": 14.42962962962963, "grad_norm": 0.2665773332118988, "learning_rate": 9.690101581238067e-06, "loss": 0.4533, "step": 12669 }, { "epoch": 14.430769230769231, "grad_norm": 0.21018235385417938, "learning_rate": 9.686418525852423e-06, "loss": 0.6598, "step": 12670 }, { "epoch": 14.431908831908832, "grad_norm": 0.21335472166538239, "learning_rate": 9.682736002349574e-06, "loss": 0.6232, "step": 12671 }, { "epoch": 14.433048433048434, "grad_norm": 0.19291336834430695, "learning_rate": 9.679054010857427e-06, "loss": 0.6159, "step": 12672 }, { "epoch": 14.434188034188034, "grad_norm": 0.23985837399959564, "learning_rate": 9.675372551503867e-06, "loss": 0.4108, "step": 12673 }, { "epoch": 14.435327635327635, "grad_norm": 0.190646231174469, "learning_rate": 9.671691624416762e-06, "loss": 0.7489, "step": 12674 }, { "epoch": 14.436467236467237, "grad_norm": 0.1992843747138977, "learning_rate": 9.668011229723958e-06, "loss": 0.7786, "step": 12675 }, { "epoch": 14.437606837606838, "grad_norm": 0.18717506527900696, "learning_rate": 9.66433136755329e-06, "loss": 0.7573, "step": 12676 }, { "epoch": 14.438746438746438, "grad_norm": 0.20367279648780823, "learning_rate": 9.660652038032571e-06, "loss": 0.6206, "step": 12677 }, { "epoch": 14.43988603988604, "grad_norm": 0.22619666159152985, "learning_rate": 9.656973241289585e-06, "loss": 0.5807, "step": 12678 }, { "epoch": 14.441025641025641, "grad_norm": 0.21882672607898712, "learning_rate": 9.653294977452113e-06, "loss": 0.5644, "step": 12679 }, { "epoch": 14.442165242165242, "grad_norm": 0.25762587785720825, "learning_rate": 9.649617246647907e-06, "loss": 0.625, "step": 12680 }, { "epoch": 14.443304843304844, "grad_norm": 0.19522501528263092, "learning_rate": 9.645940049004718e-06, "loss": 0.8747, "step": 12681 }, { "epoch": 14.444444444444445, "grad_norm": 0.2014218121767044, "learning_rate": 9.642263384650243e-06, "loss": 0.5604, "step": 12682 }, { "epoch": 14.445584045584045, "grad_norm": 0.2044072151184082, "learning_rate": 9.6385872537122e-06, "loss": 0.7574, "step": 12683 }, { "epoch": 14.446723646723648, "grad_norm": 0.19156105816364288, "learning_rate": 9.634911656318263e-06, "loss": 0.7064, "step": 12684 }, { "epoch": 14.447863247863248, "grad_norm": 0.29438427090644836, "learning_rate": 9.631236592596097e-06, "loss": 0.5793, "step": 12685 }, { "epoch": 14.449002849002849, "grad_norm": 0.21377675235271454, "learning_rate": 9.62756206267335e-06, "loss": 0.4958, "step": 12686 }, { "epoch": 14.450142450142451, "grad_norm": 0.18951386213302612, "learning_rate": 9.623888066677648e-06, "loss": 0.6728, "step": 12687 }, { "epoch": 14.451282051282051, "grad_norm": 0.179488405585289, "learning_rate": 9.620214604736604e-06, "loss": 0.8694, "step": 12688 }, { "epoch": 14.452421652421652, "grad_norm": 0.25905686616897583, "learning_rate": 9.616541676977794e-06, "loss": 0.6106, "step": 12689 }, { "epoch": 14.453561253561254, "grad_norm": 0.18401479721069336, "learning_rate": 9.612869283528795e-06, "loss": 0.7346, "step": 12690 }, { "epoch": 14.454700854700855, "grad_norm": 0.22364789247512817, "learning_rate": 9.609197424517163e-06, "loss": 0.659, "step": 12691 }, { "epoch": 14.455840455840455, "grad_norm": 0.2187655121088028, "learning_rate": 9.605526100070437e-06, "loss": 0.6353, "step": 12692 }, { "epoch": 14.456980056980058, "grad_norm": 0.2196253538131714, "learning_rate": 9.601855310316113e-06, "loss": 0.9079, "step": 12693 }, { "epoch": 14.458119658119658, "grad_norm": 0.21198168396949768, "learning_rate": 9.598185055381704e-06, "loss": 0.7966, "step": 12694 }, { "epoch": 14.459259259259259, "grad_norm": 0.21943166851997375, "learning_rate": 9.59451533539468e-06, "loss": 0.7434, "step": 12695 }, { "epoch": 14.460398860398861, "grad_norm": 0.205881729722023, "learning_rate": 9.590846150482505e-06, "loss": 0.7832, "step": 12696 }, { "epoch": 14.461538461538462, "grad_norm": 0.18034107983112335, "learning_rate": 9.587177500772618e-06, "loss": 0.8069, "step": 12697 }, { "epoch": 14.462678062678062, "grad_norm": 0.24824191629886627, "learning_rate": 9.583509386392441e-06, "loss": 0.5957, "step": 12698 }, { "epoch": 14.463817663817665, "grad_norm": 0.26811710000038147, "learning_rate": 9.579841807469388e-06, "loss": 0.7547, "step": 12699 }, { "epoch": 14.464957264957265, "grad_norm": 0.18017862737178802, "learning_rate": 9.576174764130826e-06, "loss": 0.7503, "step": 12700 }, { "epoch": 14.466096866096866, "grad_norm": 0.2223409116268158, "learning_rate": 9.572508256504128e-06, "loss": 0.7536, "step": 12701 }, { "epoch": 14.467236467236468, "grad_norm": 0.1825609803199768, "learning_rate": 9.568842284716648e-06, "loss": 0.7122, "step": 12702 }, { "epoch": 14.468376068376068, "grad_norm": 0.22106508910655975, "learning_rate": 9.565176848895719e-06, "loss": 0.773, "step": 12703 }, { "epoch": 14.469515669515669, "grad_norm": 0.19889645278453827, "learning_rate": 9.561511949168634e-06, "loss": 0.6686, "step": 12704 }, { "epoch": 14.470655270655271, "grad_norm": 0.22566333413124084, "learning_rate": 9.557847585662694e-06, "loss": 0.4029, "step": 12705 }, { "epoch": 14.471794871794872, "grad_norm": 0.17428520321846008, "learning_rate": 9.554183758505176e-06, "loss": 0.6488, "step": 12706 }, { "epoch": 14.472934472934472, "grad_norm": 0.24169154465198517, "learning_rate": 9.55052046782333e-06, "loss": 0.6469, "step": 12707 }, { "epoch": 14.474074074074075, "grad_norm": 0.18107205629348755, "learning_rate": 9.546857713744396e-06, "loss": 0.5757, "step": 12708 }, { "epoch": 14.475213675213675, "grad_norm": 0.2407011091709137, "learning_rate": 9.543195496395588e-06, "loss": 0.494, "step": 12709 }, { "epoch": 14.476353276353276, "grad_norm": 0.19166330993175507, "learning_rate": 9.539533815904116e-06, "loss": 0.4467, "step": 12710 }, { "epoch": 14.477492877492878, "grad_norm": 0.2439434826374054, "learning_rate": 9.535872672397145e-06, "loss": 0.5605, "step": 12711 }, { "epoch": 14.478632478632479, "grad_norm": 0.2717639207839966, "learning_rate": 9.532212066001842e-06, "loss": 0.7126, "step": 12712 }, { "epoch": 14.47977207977208, "grad_norm": 0.22379228472709656, "learning_rate": 9.528551996845347e-06, "loss": 0.5616, "step": 12713 }, { "epoch": 14.480911680911682, "grad_norm": 0.22340011596679688, "learning_rate": 9.52489246505479e-06, "loss": 0.4267, "step": 12714 }, { "epoch": 14.482051282051282, "grad_norm": 0.17172454297542572, "learning_rate": 9.521233470757285e-06, "loss": 0.8249, "step": 12715 }, { "epoch": 14.483190883190883, "grad_norm": 0.19228993356227875, "learning_rate": 9.517575014079897e-06, "loss": 0.5293, "step": 12716 }, { "epoch": 14.484330484330485, "grad_norm": 0.17538754642009735, "learning_rate": 9.513917095149708e-06, "loss": 0.7077, "step": 12717 }, { "epoch": 14.485470085470086, "grad_norm": 0.16558973491191864, "learning_rate": 9.510259714093761e-06, "loss": 0.6564, "step": 12718 }, { "epoch": 14.486609686609686, "grad_norm": 0.15310008823871613, "learning_rate": 9.506602871039094e-06, "loss": 0.8839, "step": 12719 }, { "epoch": 14.487749287749288, "grad_norm": 0.20456555485725403, "learning_rate": 9.502946566112716e-06, "loss": 0.712, "step": 12720 }, { "epoch": 14.488888888888889, "grad_norm": 0.1737065464258194, "learning_rate": 9.49929079944162e-06, "loss": 0.8899, "step": 12721 }, { "epoch": 14.49002849002849, "grad_norm": 0.2662432789802551, "learning_rate": 9.495635571152792e-06, "loss": 0.536, "step": 12722 }, { "epoch": 14.491168091168092, "grad_norm": 0.22410637140274048, "learning_rate": 9.49198088137317e-06, "loss": 0.7734, "step": 12723 }, { "epoch": 14.492307692307692, "grad_norm": 0.23314222693443298, "learning_rate": 9.488326730229695e-06, "loss": 0.5753, "step": 12724 }, { "epoch": 14.493447293447293, "grad_norm": 0.19664184749126434, "learning_rate": 9.484673117849293e-06, "loss": 0.6002, "step": 12725 }, { "epoch": 14.494586894586895, "grad_norm": 0.17529739439487457, "learning_rate": 9.481020044358867e-06, "loss": 0.5262, "step": 12726 }, { "epoch": 14.495726495726496, "grad_norm": 0.18910002708435059, "learning_rate": 9.477367509885287e-06, "loss": 0.6401, "step": 12727 }, { "epoch": 14.496866096866096, "grad_norm": 0.1735510379076004, "learning_rate": 9.47371551455542e-06, "loss": 0.8743, "step": 12728 }, { "epoch": 14.498005698005699, "grad_norm": 0.1674593836069107, "learning_rate": 9.470064058496108e-06, "loss": 0.584, "step": 12729 }, { "epoch": 14.4991452991453, "grad_norm": 0.23148374259471893, "learning_rate": 9.466413141834182e-06, "loss": 0.7386, "step": 12730 }, { "epoch": 14.5002849002849, "grad_norm": 0.21370823681354523, "learning_rate": 9.462762764696442e-06, "loss": 0.7967, "step": 12731 }, { "epoch": 14.501424501424502, "grad_norm": 0.26482513546943665, "learning_rate": 9.459112927209681e-06, "loss": 0.3715, "step": 12732 }, { "epoch": 14.502564102564103, "grad_norm": 0.2443828582763672, "learning_rate": 9.455463629500675e-06, "loss": 0.5561, "step": 12733 }, { "epoch": 14.503703703703703, "grad_norm": 0.17528831958770752, "learning_rate": 9.451814871696154e-06, "loss": 0.7589, "step": 12734 }, { "epoch": 14.504843304843305, "grad_norm": 0.2478124499320984, "learning_rate": 9.448166653922863e-06, "loss": 0.6932, "step": 12735 }, { "epoch": 14.505982905982906, "grad_norm": 0.1701100468635559, "learning_rate": 9.444518976307509e-06, "loss": 0.8498, "step": 12736 }, { "epoch": 14.507122507122507, "grad_norm": 0.18891778588294983, "learning_rate": 9.440871838976797e-06, "loss": 0.4025, "step": 12737 }, { "epoch": 14.508262108262109, "grad_norm": 0.23873752355575562, "learning_rate": 9.437225242057388e-06, "loss": 0.4546, "step": 12738 }, { "epoch": 14.50940170940171, "grad_norm": 0.21752510964870453, "learning_rate": 9.433579185675942e-06, "loss": 0.7138, "step": 12739 }, { "epoch": 14.51054131054131, "grad_norm": 0.21618473529815674, "learning_rate": 9.429933669959098e-06, "loss": 0.6962, "step": 12740 }, { "epoch": 14.511680911680912, "grad_norm": 0.30999991297721863, "learning_rate": 9.426288695033478e-06, "loss": 0.2891, "step": 12741 }, { "epoch": 14.512820512820513, "grad_norm": 0.21715053915977478, "learning_rate": 9.422644261025678e-06, "loss": 0.6074, "step": 12742 }, { "epoch": 14.513960113960113, "grad_norm": 0.2191510945558548, "learning_rate": 9.419000368062281e-06, "loss": 0.7092, "step": 12743 }, { "epoch": 14.515099715099716, "grad_norm": 0.2763567268848419, "learning_rate": 9.415357016269857e-06, "loss": 0.6448, "step": 12744 }, { "epoch": 14.516239316239316, "grad_norm": 0.23002742230892181, "learning_rate": 9.411714205774935e-06, "loss": 0.6477, "step": 12745 }, { "epoch": 14.517378917378917, "grad_norm": 0.21072138845920563, "learning_rate": 9.408071936704044e-06, "loss": 0.5799, "step": 12746 }, { "epoch": 14.518518518518519, "grad_norm": 0.19315801560878754, "learning_rate": 9.404430209183695e-06, "loss": 0.8153, "step": 12747 }, { "epoch": 14.51965811965812, "grad_norm": 0.22192391753196716, "learning_rate": 9.40078902334038e-06, "loss": 0.7123, "step": 12748 }, { "epoch": 14.52079772079772, "grad_norm": 0.17423202097415924, "learning_rate": 9.397148379300552e-06, "loss": 0.725, "step": 12749 }, { "epoch": 14.521937321937322, "grad_norm": 0.20946653187274933, "learning_rate": 9.393508277190669e-06, "loss": 0.6635, "step": 12750 }, { "epoch": 14.523076923076923, "grad_norm": 0.2139614373445511, "learning_rate": 9.389868717137154e-06, "loss": 0.573, "step": 12751 }, { "epoch": 14.524216524216524, "grad_norm": 0.18973661959171295, "learning_rate": 9.386229699266441e-06, "loss": 0.6132, "step": 12752 }, { "epoch": 14.525356125356126, "grad_norm": 0.2118040919303894, "learning_rate": 9.3825912237049e-06, "loss": 0.5701, "step": 12753 }, { "epoch": 14.526495726495726, "grad_norm": 0.24053429067134857, "learning_rate": 9.378953290578917e-06, "loss": 0.9265, "step": 12754 }, { "epoch": 14.527635327635327, "grad_norm": 0.20716017484664917, "learning_rate": 9.375315900014842e-06, "loss": 0.7185, "step": 12755 }, { "epoch": 14.52877492877493, "grad_norm": 0.20687194168567657, "learning_rate": 9.371679052139023e-06, "loss": 0.7313, "step": 12756 }, { "epoch": 14.52991452991453, "grad_norm": 0.22021609544754028, "learning_rate": 9.36804274707776e-06, "loss": 0.5323, "step": 12757 }, { "epoch": 14.53105413105413, "grad_norm": 0.18618431687355042, "learning_rate": 9.364406984957361e-06, "loss": 0.7662, "step": 12758 }, { "epoch": 14.532193732193733, "grad_norm": 0.2050783634185791, "learning_rate": 9.360771765904105e-06, "loss": 0.5422, "step": 12759 }, { "epoch": 14.533333333333333, "grad_norm": 0.27364587783813477, "learning_rate": 9.357137090044263e-06, "loss": 0.689, "step": 12760 }, { "epoch": 14.534472934472934, "grad_norm": 0.19753402471542358, "learning_rate": 9.353502957504057e-06, "loss": 0.5316, "step": 12761 }, { "epoch": 14.535612535612536, "grad_norm": 0.18314433097839355, "learning_rate": 9.349869368409713e-06, "loss": 0.7352, "step": 12762 }, { "epoch": 14.536752136752137, "grad_norm": 0.22073610126972198, "learning_rate": 9.346236322887464e-06, "loss": 0.7738, "step": 12763 }, { "epoch": 14.537891737891737, "grad_norm": 0.24817657470703125, "learning_rate": 9.342603821063463e-06, "loss": 0.5084, "step": 12764 }, { "epoch": 14.53903133903134, "grad_norm": 0.19437846541404724, "learning_rate": 9.338971863063892e-06, "loss": 0.6462, "step": 12765 }, { "epoch": 14.54017094017094, "grad_norm": 0.1540386974811554, "learning_rate": 9.335340449014896e-06, "loss": 0.7037, "step": 12766 }, { "epoch": 14.54131054131054, "grad_norm": 0.21783535182476044, "learning_rate": 9.331709579042613e-06, "loss": 0.5213, "step": 12767 }, { "epoch": 14.542450142450143, "grad_norm": 0.1780288964509964, "learning_rate": 9.328079253273133e-06, "loss": 0.7168, "step": 12768 }, { "epoch": 14.543589743589743, "grad_norm": 0.1799340695142746, "learning_rate": 9.324449471832564e-06, "loss": 0.591, "step": 12769 }, { "epoch": 14.544729344729344, "grad_norm": 0.21395541727542877, "learning_rate": 9.320820234846966e-06, "loss": 0.7872, "step": 12770 }, { "epoch": 14.545868945868946, "grad_norm": 0.25342968106269836, "learning_rate": 9.317191542442413e-06, "loss": 0.6905, "step": 12771 }, { "epoch": 14.547008547008547, "grad_norm": 0.1848641335964203, "learning_rate": 9.313563394744915e-06, "loss": 0.6439, "step": 12772 }, { "epoch": 14.548148148148147, "grad_norm": 0.24846234917640686, "learning_rate": 9.309935791880489e-06, "loss": 0.765, "step": 12773 }, { "epoch": 14.54928774928775, "grad_norm": 0.23958313465118408, "learning_rate": 9.306308733975158e-06, "loss": 0.5846, "step": 12774 }, { "epoch": 14.55042735042735, "grad_norm": 0.23163294792175293, "learning_rate": 9.302682221154877e-06, "loss": 0.5904, "step": 12775 }, { "epoch": 14.55156695156695, "grad_norm": 0.2353852093219757, "learning_rate": 9.299056253545608e-06, "loss": 0.6717, "step": 12776 }, { "epoch": 14.552706552706553, "grad_norm": 0.1862393021583557, "learning_rate": 9.29543083127329e-06, "loss": 0.6082, "step": 12777 }, { "epoch": 14.553846153846154, "grad_norm": 0.19592125713825226, "learning_rate": 9.291805954463859e-06, "loss": 0.8035, "step": 12778 }, { "epoch": 14.554985754985754, "grad_norm": 0.23634620010852814, "learning_rate": 9.288181623243197e-06, "loss": 0.7024, "step": 12779 }, { "epoch": 14.556125356125357, "grad_norm": 0.19568602740764618, "learning_rate": 9.284557837737193e-06, "loss": 0.5394, "step": 12780 }, { "epoch": 14.557264957264957, "grad_norm": 0.22622573375701904, "learning_rate": 9.280934598071713e-06, "loss": 0.6091, "step": 12781 }, { "epoch": 14.558404558404558, "grad_norm": 0.1921817809343338, "learning_rate": 9.277311904372612e-06, "loss": 0.6438, "step": 12782 }, { "epoch": 14.55954415954416, "grad_norm": 0.20838889479637146, "learning_rate": 9.273689756765697e-06, "loss": 0.6498, "step": 12783 }, { "epoch": 14.56068376068376, "grad_norm": 0.22933794558048248, "learning_rate": 9.270068155376773e-06, "loss": 0.5626, "step": 12784 }, { "epoch": 14.561823361823361, "grad_norm": 0.22988249361515045, "learning_rate": 9.26644710033166e-06, "loss": 0.4697, "step": 12785 }, { "epoch": 14.562962962962963, "grad_norm": 0.19771137833595276, "learning_rate": 9.262826591756096e-06, "loss": 0.4922, "step": 12786 }, { "epoch": 14.564102564102564, "grad_norm": 0.21506692469120026, "learning_rate": 9.259206629775843e-06, "loss": 0.7956, "step": 12787 }, { "epoch": 14.565242165242164, "grad_norm": 0.26526808738708496, "learning_rate": 9.255587214516631e-06, "loss": 0.4643, "step": 12788 }, { "epoch": 14.566381766381767, "grad_norm": 0.22599969804286957, "learning_rate": 9.25196834610418e-06, "loss": 0.6884, "step": 12789 }, { "epoch": 14.567521367521367, "grad_norm": 0.23053516447544098, "learning_rate": 9.24835002466417e-06, "loss": 0.7369, "step": 12790 }, { "epoch": 14.568660968660968, "grad_norm": 0.23256726562976837, "learning_rate": 9.24473225032228e-06, "loss": 0.3916, "step": 12791 }, { "epoch": 14.56980056980057, "grad_norm": 0.19544029235839844, "learning_rate": 9.241115023204164e-06, "loss": 0.6617, "step": 12792 }, { "epoch": 14.57094017094017, "grad_norm": 0.19380274415016174, "learning_rate": 9.23749834343546e-06, "loss": 0.6968, "step": 12793 }, { "epoch": 14.572079772079771, "grad_norm": 0.255537748336792, "learning_rate": 9.233882211141797e-06, "loss": 0.6371, "step": 12794 }, { "epoch": 14.573219373219374, "grad_norm": 0.20726673305034637, "learning_rate": 9.230266626448741e-06, "loss": 0.7882, "step": 12795 }, { "epoch": 14.574358974358974, "grad_norm": 0.2502824068069458, "learning_rate": 9.226651589481906e-06, "loss": 0.409, "step": 12796 }, { "epoch": 14.575498575498575, "grad_norm": 0.19566868245601654, "learning_rate": 9.22303710036684e-06, "loss": 0.6197, "step": 12797 }, { "epoch": 14.576638176638177, "grad_norm": 0.22269435226917267, "learning_rate": 9.219423159229078e-06, "loss": 0.557, "step": 12798 }, { "epoch": 14.577777777777778, "grad_norm": 0.2081078439950943, "learning_rate": 9.215809766194144e-06, "loss": 0.6, "step": 12799 }, { "epoch": 14.578917378917378, "grad_norm": 0.25893324613571167, "learning_rate": 9.212196921387548e-06, "loss": 0.4348, "step": 12800 }, { "epoch": 14.58005698005698, "grad_norm": 0.20001818239688873, "learning_rate": 9.208584624934774e-06, "loss": 0.7304, "step": 12801 }, { "epoch": 14.581196581196581, "grad_norm": 0.1583530753850937, "learning_rate": 9.204972876961277e-06, "loss": 0.9084, "step": 12802 }, { "epoch": 14.582336182336181, "grad_norm": 0.17533500492572784, "learning_rate": 9.201361677592505e-06, "loss": 0.8482, "step": 12803 }, { "epoch": 14.583475783475784, "grad_norm": 0.19490677118301392, "learning_rate": 9.19775102695389e-06, "loss": 0.7168, "step": 12804 }, { "epoch": 14.584615384615384, "grad_norm": 0.23321951925754547, "learning_rate": 9.194140925170847e-06, "loss": 0.4843, "step": 12805 }, { "epoch": 14.585754985754985, "grad_norm": 0.18963153660297394, "learning_rate": 9.190531372368737e-06, "loss": 0.4558, "step": 12806 }, { "epoch": 14.586894586894587, "grad_norm": 0.1767222136259079, "learning_rate": 9.186922368672959e-06, "loss": 0.5863, "step": 12807 }, { "epoch": 14.588034188034188, "grad_norm": 0.29822349548339844, "learning_rate": 9.183313914208863e-06, "loss": 0.5253, "step": 12808 }, { "epoch": 14.589173789173788, "grad_norm": 0.18491874635219574, "learning_rate": 9.17970600910176e-06, "loss": 0.6101, "step": 12809 }, { "epoch": 14.59031339031339, "grad_norm": 0.21525131165981293, "learning_rate": 9.176098653476975e-06, "loss": 0.7278, "step": 12810 }, { "epoch": 14.591452991452991, "grad_norm": 0.21279139816761017, "learning_rate": 9.172491847459797e-06, "loss": 0.8417, "step": 12811 }, { "epoch": 14.592592592592592, "grad_norm": 0.20600202679634094, "learning_rate": 9.168885591175514e-06, "loss": 0.7268, "step": 12812 }, { "epoch": 14.593732193732194, "grad_norm": 0.1988891363143921, "learning_rate": 9.165279884749362e-06, "loss": 0.7926, "step": 12813 }, { "epoch": 14.594871794871795, "grad_norm": 0.17967242002487183, "learning_rate": 9.161674728306582e-06, "loss": 0.4623, "step": 12814 }, { "epoch": 14.596011396011395, "grad_norm": 0.17472924292087555, "learning_rate": 9.158070121972398e-06, "loss": 0.7645, "step": 12815 }, { "epoch": 14.597150997150997, "grad_norm": 0.22233867645263672, "learning_rate": 9.154466065872011e-06, "loss": 0.5888, "step": 12816 }, { "epoch": 14.598290598290598, "grad_norm": 0.18700459599494934, "learning_rate": 9.150862560130574e-06, "loss": 0.637, "step": 12817 }, { "epoch": 14.5994301994302, "grad_norm": 0.2434198260307312, "learning_rate": 9.147259604873276e-06, "loss": 0.7316, "step": 12818 }, { "epoch": 14.6005698005698, "grad_norm": 0.2834024429321289, "learning_rate": 9.143657200225256e-06, "loss": 0.6003, "step": 12819 }, { "epoch": 14.601709401709401, "grad_norm": 0.2045540064573288, "learning_rate": 9.140055346311619e-06, "loss": 0.7706, "step": 12820 }, { "epoch": 14.602849002849004, "grad_norm": 0.21345989406108856, "learning_rate": 9.136454043257472e-06, "loss": 0.43, "step": 12821 }, { "epoch": 14.603988603988604, "grad_norm": 0.2493118792772293, "learning_rate": 9.132853291187904e-06, "loss": 0.6509, "step": 12822 }, { "epoch": 14.605128205128205, "grad_norm": 0.19616162776947021, "learning_rate": 9.129253090227981e-06, "loss": 0.7401, "step": 12823 }, { "epoch": 14.606267806267807, "grad_norm": 0.22674551606178284, "learning_rate": 9.125653440502738e-06, "loss": 0.7348, "step": 12824 }, { "epoch": 14.607407407407408, "grad_norm": 0.19461093842983246, "learning_rate": 9.122054342137203e-06, "loss": 0.6387, "step": 12825 }, { "epoch": 14.608547008547008, "grad_norm": 0.18842506408691406, "learning_rate": 9.118455795256386e-06, "loss": 0.7617, "step": 12826 }, { "epoch": 14.60968660968661, "grad_norm": 0.18397743999958038, "learning_rate": 9.114857799985282e-06, "loss": 0.7431, "step": 12827 }, { "epoch": 14.610826210826211, "grad_norm": 0.22190988063812256, "learning_rate": 9.111260356448836e-06, "loss": 0.774, "step": 12828 }, { "epoch": 14.611965811965812, "grad_norm": 0.26174870133399963, "learning_rate": 9.107663464772018e-06, "loss": 0.4364, "step": 12829 }, { "epoch": 14.613105413105414, "grad_norm": 0.2167321890592575, "learning_rate": 9.104067125079754e-06, "loss": 0.6935, "step": 12830 }, { "epoch": 14.614245014245014, "grad_norm": 0.15865680575370789, "learning_rate": 9.10047133749696e-06, "loss": 0.7962, "step": 12831 }, { "epoch": 14.615384615384615, "grad_norm": 0.19315698742866516, "learning_rate": 9.096876102148511e-06, "loss": 0.7019, "step": 12832 }, { "epoch": 14.616524216524217, "grad_norm": 0.2170671969652176, "learning_rate": 9.093281419159292e-06, "loss": 0.8031, "step": 12833 }, { "epoch": 14.617663817663818, "grad_norm": 0.20496302843093872, "learning_rate": 9.089687288654147e-06, "loss": 0.6789, "step": 12834 }, { "epoch": 14.618803418803418, "grad_norm": 0.22105635702610016, "learning_rate": 9.086093710757928e-06, "loss": 0.4857, "step": 12835 }, { "epoch": 14.61994301994302, "grad_norm": 0.17714130878448486, "learning_rate": 9.08250068559543e-06, "loss": 0.7495, "step": 12836 }, { "epoch": 14.621082621082621, "grad_norm": 0.23276539146900177, "learning_rate": 9.078908213291457e-06, "loss": 0.6358, "step": 12837 }, { "epoch": 14.622222222222222, "grad_norm": 0.23377883434295654, "learning_rate": 9.075316293970783e-06, "loss": 0.7701, "step": 12838 }, { "epoch": 14.623361823361824, "grad_norm": 0.27861806750297546, "learning_rate": 9.07172492775817e-06, "loss": 0.6524, "step": 12839 }, { "epoch": 14.624501424501425, "grad_norm": 0.2254713773727417, "learning_rate": 9.068134114778352e-06, "loss": 0.7804, "step": 12840 }, { "epoch": 14.625641025641025, "grad_norm": 0.20277513563632965, "learning_rate": 9.064543855156046e-06, "loss": 0.7296, "step": 12841 }, { "epoch": 14.626780626780628, "grad_norm": 0.25616806745529175, "learning_rate": 9.060954149015966e-06, "loss": 0.4441, "step": 12842 }, { "epoch": 14.627920227920228, "grad_norm": 0.23563611507415771, "learning_rate": 9.05736499648277e-06, "loss": 0.6675, "step": 12843 }, { "epoch": 14.629059829059829, "grad_norm": 0.22810997068881989, "learning_rate": 9.053776397681133e-06, "loss": 0.3929, "step": 12844 }, { "epoch": 14.630199430199431, "grad_norm": 0.20193806290626526, "learning_rate": 9.050188352735692e-06, "loss": 0.5998, "step": 12845 }, { "epoch": 14.631339031339031, "grad_norm": 0.23286187648773193, "learning_rate": 9.046600861771082e-06, "loss": 0.6299, "step": 12846 }, { "epoch": 14.632478632478632, "grad_norm": 0.19484420120716095, "learning_rate": 9.043013924911886e-06, "loss": 0.7678, "step": 12847 }, { "epoch": 14.633618233618234, "grad_norm": 0.23107795417308807, "learning_rate": 9.0394275422827e-06, "loss": 0.5326, "step": 12848 }, { "epoch": 14.634757834757835, "grad_norm": 0.22537994384765625, "learning_rate": 9.035841714008084e-06, "loss": 0.6991, "step": 12849 }, { "epoch": 14.635897435897435, "grad_norm": 0.18735532462596893, "learning_rate": 9.032256440212589e-06, "loss": 0.7035, "step": 12850 }, { "epoch": 14.637037037037038, "grad_norm": 0.21662990748882294, "learning_rate": 9.02867172102074e-06, "loss": 0.7339, "step": 12851 }, { "epoch": 14.638176638176638, "grad_norm": 0.21480605006217957, "learning_rate": 9.02508755655704e-06, "loss": 0.7108, "step": 12852 }, { "epoch": 14.639316239316239, "grad_norm": 0.2904488444328308, "learning_rate": 9.021503946945994e-06, "loss": 0.4803, "step": 12853 }, { "epoch": 14.640455840455841, "grad_norm": 0.1873403638601303, "learning_rate": 9.017920892312046e-06, "loss": 0.6105, "step": 12854 }, { "epoch": 14.641595441595442, "grad_norm": 0.2191169559955597, "learning_rate": 9.014338392779655e-06, "loss": 0.4706, "step": 12855 }, { "epoch": 14.642735042735042, "grad_norm": 0.24094586074352264, "learning_rate": 9.010756448473254e-06, "loss": 0.6251, "step": 12856 }, { "epoch": 14.643874643874645, "grad_norm": 0.2248314917087555, "learning_rate": 9.007175059517262e-06, "loss": 0.6546, "step": 12857 }, { "epoch": 14.645014245014245, "grad_norm": 0.1848820000886917, "learning_rate": 9.00359422603605e-06, "loss": 0.8862, "step": 12858 }, { "epoch": 14.646153846153846, "grad_norm": 0.1982840746641159, "learning_rate": 9.000013948153998e-06, "loss": 0.7922, "step": 12859 }, { "epoch": 14.647293447293448, "grad_norm": 0.2248876541852951, "learning_rate": 8.996434225995467e-06, "loss": 0.6102, "step": 12860 }, { "epoch": 14.648433048433048, "grad_norm": 0.16222314536571503, "learning_rate": 8.992855059684783e-06, "loss": 0.8696, "step": 12861 }, { "epoch": 14.649572649572649, "grad_norm": 0.2841748893260956, "learning_rate": 8.989276449346262e-06, "loss": 0.7772, "step": 12862 }, { "epoch": 14.650712250712251, "grad_norm": 0.18035295605659485, "learning_rate": 8.9856983951042e-06, "loss": 0.6827, "step": 12863 }, { "epoch": 14.651851851851852, "grad_norm": 0.22114045917987823, "learning_rate": 8.982120897082882e-06, "loss": 0.485, "step": 12864 }, { "epoch": 14.652991452991452, "grad_norm": 0.22197026014328003, "learning_rate": 8.978543955406545e-06, "loss": 0.4146, "step": 12865 }, { "epoch": 14.654131054131055, "grad_norm": 0.21953082084655762, "learning_rate": 8.974967570199435e-06, "loss": 0.7247, "step": 12866 }, { "epoch": 14.655270655270655, "grad_norm": 0.2214314192533493, "learning_rate": 8.971391741585772e-06, "loss": 0.5547, "step": 12867 }, { "epoch": 14.656410256410256, "grad_norm": 0.2568795680999756, "learning_rate": 8.96781646968976e-06, "loss": 0.5693, "step": 12868 }, { "epoch": 14.657549857549858, "grad_norm": 0.20167192816734314, "learning_rate": 8.964241754635563e-06, "loss": 0.6874, "step": 12869 }, { "epoch": 14.658689458689459, "grad_norm": 0.20127788186073303, "learning_rate": 8.960667596547348e-06, "loss": 0.6502, "step": 12870 }, { "epoch": 14.65982905982906, "grad_norm": 0.19613531231880188, "learning_rate": 8.957093995549253e-06, "loss": 0.7393, "step": 12871 }, { "epoch": 14.660968660968662, "grad_norm": 0.2337152659893036, "learning_rate": 8.953520951765406e-06, "loss": 0.5703, "step": 12872 }, { "epoch": 14.662108262108262, "grad_norm": 0.18300464749336243, "learning_rate": 8.949948465319901e-06, "loss": 0.4934, "step": 12873 }, { "epoch": 14.663247863247863, "grad_norm": 0.23022036254405975, "learning_rate": 8.946376536336826e-06, "loss": 0.8773, "step": 12874 }, { "epoch": 14.664387464387465, "grad_norm": 0.1827642321586609, "learning_rate": 8.942805164940241e-06, "loss": 0.592, "step": 12875 }, { "epoch": 14.665527065527066, "grad_norm": 0.23409932851791382, "learning_rate": 8.939234351254198e-06, "loss": 0.6742, "step": 12876 }, { "epoch": 14.666666666666666, "grad_norm": 0.20597197115421295, "learning_rate": 8.935664095402705e-06, "loss": 0.7454, "step": 12877 }, { "epoch": 14.667806267806268, "grad_norm": 0.23554804921150208, "learning_rate": 8.932094397509774e-06, "loss": 0.4539, "step": 12878 }, { "epoch": 14.668945868945869, "grad_norm": 0.24082693457603455, "learning_rate": 8.928525257699394e-06, "loss": 0.5551, "step": 12879 }, { "epoch": 14.67008547008547, "grad_norm": 0.18054941296577454, "learning_rate": 8.924956676095537e-06, "loss": 0.967, "step": 12880 }, { "epoch": 14.671225071225072, "grad_norm": 0.18954972922801971, "learning_rate": 8.921388652822129e-06, "loss": 0.5843, "step": 12881 }, { "epoch": 14.672364672364672, "grad_norm": 0.20438851416110992, "learning_rate": 8.917821188003112e-06, "loss": 0.7102, "step": 12882 }, { "epoch": 14.673504273504273, "grad_norm": 0.19364258646965027, "learning_rate": 8.91425428176239e-06, "loss": 0.6255, "step": 12883 }, { "epoch": 14.674643874643875, "grad_norm": 0.15404297411441803, "learning_rate": 8.910687934223854e-06, "loss": 0.8145, "step": 12884 }, { "epoch": 14.675783475783476, "grad_norm": 0.20459187030792236, "learning_rate": 8.90712214551137e-06, "loss": 0.5912, "step": 12885 }, { "epoch": 14.676923076923076, "grad_norm": 0.24483807384967804, "learning_rate": 8.903556915748792e-06, "loss": 0.7708, "step": 12886 }, { "epoch": 14.678062678062679, "grad_norm": 0.18246497213840485, "learning_rate": 8.899992245059956e-06, "loss": 0.6854, "step": 12887 }, { "epoch": 14.67920227920228, "grad_norm": 0.18728506565093994, "learning_rate": 8.896428133568654e-06, "loss": 0.7306, "step": 12888 }, { "epoch": 14.68034188034188, "grad_norm": 0.1966158151626587, "learning_rate": 8.892864581398691e-06, "loss": 0.7761, "step": 12889 }, { "epoch": 14.681481481481482, "grad_norm": 0.166145920753479, "learning_rate": 8.889301588673835e-06, "loss": 0.7806, "step": 12890 }, { "epoch": 14.682621082621083, "grad_norm": 0.18172600865364075, "learning_rate": 8.885739155517845e-06, "loss": 0.6886, "step": 12891 }, { "epoch": 14.683760683760683, "grad_norm": 0.26016733050346375, "learning_rate": 8.882177282054443e-06, "loss": 0.4099, "step": 12892 }, { "epoch": 14.684900284900285, "grad_norm": 0.2189798653125763, "learning_rate": 8.878615968407347e-06, "loss": 0.6603, "step": 12893 }, { "epoch": 14.686039886039886, "grad_norm": 0.20690695941448212, "learning_rate": 8.875055214700251e-06, "loss": 0.6559, "step": 12894 }, { "epoch": 14.687179487179487, "grad_norm": 0.17047905921936035, "learning_rate": 8.871495021056834e-06, "loss": 0.9431, "step": 12895 }, { "epoch": 14.688319088319089, "grad_norm": 0.2584533095359802, "learning_rate": 8.867935387600745e-06, "loss": 0.6573, "step": 12896 }, { "epoch": 14.68945868945869, "grad_norm": 0.18696272373199463, "learning_rate": 8.864376314455627e-06, "loss": 0.6192, "step": 12897 }, { "epoch": 14.69059829059829, "grad_norm": 0.20733846724033356, "learning_rate": 8.8608178017451e-06, "loss": 0.8758, "step": 12898 }, { "epoch": 14.691737891737892, "grad_norm": 0.19508017599582672, "learning_rate": 8.857259849592744e-06, "loss": 0.7261, "step": 12899 }, { "epoch": 14.692877492877493, "grad_norm": 0.20132368803024292, "learning_rate": 8.853702458122149e-06, "loss": 0.6758, "step": 12900 }, { "epoch": 14.694017094017093, "grad_norm": 0.2252517193555832, "learning_rate": 8.850145627456869e-06, "loss": 0.502, "step": 12901 }, { "epoch": 14.695156695156696, "grad_norm": 0.24951429665088654, "learning_rate": 8.84658935772045e-06, "loss": 0.5665, "step": 12902 }, { "epoch": 14.696296296296296, "grad_norm": 0.24416764080524445, "learning_rate": 8.843033649036397e-06, "loss": 0.7362, "step": 12903 }, { "epoch": 14.697435897435897, "grad_norm": 0.21139328181743622, "learning_rate": 8.839478501528217e-06, "loss": 0.7178, "step": 12904 }, { "epoch": 14.698575498575499, "grad_norm": 0.2219403237104416, "learning_rate": 8.835923915319382e-06, "loss": 0.611, "step": 12905 }, { "epoch": 14.6997150997151, "grad_norm": 0.20910504460334778, "learning_rate": 8.832369890533379e-06, "loss": 0.5973, "step": 12906 }, { "epoch": 14.7008547008547, "grad_norm": 0.20052868127822876, "learning_rate": 8.82881642729362e-06, "loss": 0.6465, "step": 12907 }, { "epoch": 14.701994301994302, "grad_norm": 0.22043506801128387, "learning_rate": 8.82526352572354e-06, "loss": 0.5573, "step": 12908 }, { "epoch": 14.703133903133903, "grad_norm": 0.20473963022232056, "learning_rate": 8.821711185946538e-06, "loss": 0.4251, "step": 12909 }, { "epoch": 14.704273504273504, "grad_norm": 0.21355542540550232, "learning_rate": 8.818159408086004e-06, "loss": 0.4016, "step": 12910 }, { "epoch": 14.705413105413106, "grad_norm": 0.18627426028251648, "learning_rate": 8.814608192265287e-06, "loss": 0.6788, "step": 12911 }, { "epoch": 14.706552706552706, "grad_norm": 0.24863211810588837, "learning_rate": 8.811057538607736e-06, "loss": 0.4992, "step": 12912 }, { "epoch": 14.707692307692307, "grad_norm": 0.18860134482383728, "learning_rate": 8.80750744723668e-06, "loss": 0.6766, "step": 12913 }, { "epoch": 14.70883190883191, "grad_norm": 0.2076950967311859, "learning_rate": 8.803957918275425e-06, "loss": 0.4725, "step": 12914 }, { "epoch": 14.70997150997151, "grad_norm": 0.17382395267486572, "learning_rate": 8.800408951847242e-06, "loss": 0.7972, "step": 12915 }, { "epoch": 14.71111111111111, "grad_norm": 0.2199559062719345, "learning_rate": 8.796860548075397e-06, "loss": 0.6238, "step": 12916 }, { "epoch": 14.712250712250713, "grad_norm": 0.27296844124794006, "learning_rate": 8.793312707083162e-06, "loss": 0.512, "step": 12917 }, { "epoch": 14.713390313390313, "grad_norm": 0.19933409988880157, "learning_rate": 8.789765428993737e-06, "loss": 0.5875, "step": 12918 }, { "epoch": 14.714529914529914, "grad_norm": 0.22687682509422302, "learning_rate": 8.786218713930336e-06, "loss": 0.7143, "step": 12919 }, { "epoch": 14.715669515669516, "grad_norm": 0.23462781310081482, "learning_rate": 8.782672562016147e-06, "loss": 0.4709, "step": 12920 }, { "epoch": 14.716809116809117, "grad_norm": 0.20330078899860382, "learning_rate": 8.779126973374347e-06, "loss": 0.8525, "step": 12921 }, { "epoch": 14.717948717948717, "grad_norm": 0.21839894354343414, "learning_rate": 8.775581948128067e-06, "loss": 0.7413, "step": 12922 }, { "epoch": 14.71908831908832, "grad_norm": 0.18619459867477417, "learning_rate": 8.772037486400441e-06, "loss": 0.775, "step": 12923 }, { "epoch": 14.72022792022792, "grad_norm": 0.27662035822868347, "learning_rate": 8.76849358831458e-06, "loss": 0.4355, "step": 12924 }, { "epoch": 14.72136752136752, "grad_norm": 0.21078123152256012, "learning_rate": 8.764950253993585e-06, "loss": 0.5978, "step": 12925 }, { "epoch": 14.722507122507123, "grad_norm": 0.2041301131248474, "learning_rate": 8.761407483560504e-06, "loss": 0.6782, "step": 12926 }, { "epoch": 14.723646723646723, "grad_norm": 0.203069269657135, "learning_rate": 8.757865277138388e-06, "loss": 0.5123, "step": 12927 }, { "epoch": 14.724786324786324, "grad_norm": 0.20172251760959625, "learning_rate": 8.754323634850294e-06, "loss": 0.588, "step": 12928 }, { "epoch": 14.725925925925926, "grad_norm": 0.22396618127822876, "learning_rate": 8.750782556819209e-06, "loss": 0.6283, "step": 12929 }, { "epoch": 14.727065527065527, "grad_norm": 0.208025723695755, "learning_rate": 8.747242043168133e-06, "loss": 0.8028, "step": 12930 }, { "epoch": 14.728205128205127, "grad_norm": 0.21255724132061005, "learning_rate": 8.743702094020035e-06, "loss": 0.7828, "step": 12931 }, { "epoch": 14.72934472934473, "grad_norm": 0.21702732145786285, "learning_rate": 8.740162709497874e-06, "loss": 0.4972, "step": 12932 }, { "epoch": 14.73048433048433, "grad_norm": 0.1985195428133011, "learning_rate": 8.736623889724572e-06, "loss": 0.5624, "step": 12933 }, { "epoch": 14.73162393162393, "grad_norm": 0.19873084127902985, "learning_rate": 8.733085634823044e-06, "loss": 0.6781, "step": 12934 }, { "epoch": 14.732763532763533, "grad_norm": 0.256135493516922, "learning_rate": 8.729547944916187e-06, "loss": 0.5671, "step": 12935 }, { "epoch": 14.733903133903134, "grad_norm": 0.2034204751253128, "learning_rate": 8.726010820126881e-06, "loss": 0.6488, "step": 12936 }, { "epoch": 14.735042735042736, "grad_norm": 0.17327383160591125, "learning_rate": 8.722474260577965e-06, "loss": 0.807, "step": 12937 }, { "epoch": 14.736182336182337, "grad_norm": 0.2585669159889221, "learning_rate": 8.718938266392281e-06, "loss": 0.4724, "step": 12938 }, { "epoch": 14.737321937321937, "grad_norm": 0.23301592469215393, "learning_rate": 8.715402837692646e-06, "loss": 0.7433, "step": 12939 }, { "epoch": 14.73846153846154, "grad_norm": 0.2916012406349182, "learning_rate": 8.71186797460185e-06, "loss": 0.5879, "step": 12940 }, { "epoch": 14.73960113960114, "grad_norm": 0.19250716269016266, "learning_rate": 8.708333677242675e-06, "loss": 0.6382, "step": 12941 }, { "epoch": 14.74074074074074, "grad_norm": 0.2200244516134262, "learning_rate": 8.704799945737873e-06, "loss": 0.599, "step": 12942 }, { "epoch": 14.741880341880343, "grad_norm": 0.18007096648216248, "learning_rate": 8.701266780210187e-06, "loss": 0.5975, "step": 12943 }, { "epoch": 14.743019943019943, "grad_norm": 0.2152889370918274, "learning_rate": 8.697734180782321e-06, "loss": 0.8369, "step": 12944 }, { "epoch": 14.744159544159544, "grad_norm": 0.22600147128105164, "learning_rate": 8.69420214757698e-06, "loss": 0.6026, "step": 12945 }, { "epoch": 14.745299145299146, "grad_norm": 0.208087757229805, "learning_rate": 8.690670680716836e-06, "loss": 0.7278, "step": 12946 }, { "epoch": 14.746438746438747, "grad_norm": 0.2290181815624237, "learning_rate": 8.687139780324563e-06, "loss": 0.6925, "step": 12947 }, { "epoch": 14.747578347578347, "grad_norm": 0.22713790833950043, "learning_rate": 8.683609446522772e-06, "loss": 0.7047, "step": 12948 }, { "epoch": 14.74871794871795, "grad_norm": 0.244438037276268, "learning_rate": 8.6800796794341e-06, "loss": 0.6606, "step": 12949 }, { "epoch": 14.74985754985755, "grad_norm": 0.2058820128440857, "learning_rate": 8.67655047918113e-06, "loss": 0.6525, "step": 12950 }, { "epoch": 14.75099715099715, "grad_norm": 0.22019192576408386, "learning_rate": 8.673021845886467e-06, "loss": 0.6703, "step": 12951 }, { "epoch": 14.752136752136753, "grad_norm": 0.19387786090373993, "learning_rate": 8.66949377967265e-06, "loss": 0.5103, "step": 12952 }, { "epoch": 14.753276353276354, "grad_norm": 0.2757326662540436, "learning_rate": 8.665966280662219e-06, "loss": 0.7082, "step": 12953 }, { "epoch": 14.754415954415954, "grad_norm": 0.17531627416610718, "learning_rate": 8.6624393489777e-06, "loss": 0.7196, "step": 12954 }, { "epoch": 14.755555555555556, "grad_norm": 0.2704463303089142, "learning_rate": 8.6589129847416e-06, "loss": 0.5077, "step": 12955 }, { "epoch": 14.756695156695157, "grad_norm": 0.2233131229877472, "learning_rate": 8.655387188076381e-06, "loss": 0.7694, "step": 12956 }, { "epoch": 14.757834757834758, "grad_norm": 0.19787319004535675, "learning_rate": 8.651861959104513e-06, "loss": 0.7725, "step": 12957 }, { "epoch": 14.75897435897436, "grad_norm": 0.19262279570102692, "learning_rate": 8.648337297948436e-06, "loss": 0.8371, "step": 12958 }, { "epoch": 14.76011396011396, "grad_norm": 0.17423337697982788, "learning_rate": 8.644813204730578e-06, "loss": 0.7347, "step": 12959 }, { "epoch": 14.761253561253561, "grad_norm": 0.24191370606422424, "learning_rate": 8.64128967957333e-06, "loss": 0.6996, "step": 12960 }, { "epoch": 14.762393162393163, "grad_norm": 0.19753214716911316, "learning_rate": 8.63776672259907e-06, "loss": 0.6602, "step": 12961 }, { "epoch": 14.763532763532764, "grad_norm": 0.19329145550727844, "learning_rate": 8.634244333930181e-06, "loss": 0.7179, "step": 12962 }, { "epoch": 14.764672364672364, "grad_norm": 0.2344941645860672, "learning_rate": 8.630722513688986e-06, "loss": 0.6992, "step": 12963 }, { "epoch": 14.765811965811967, "grad_norm": 0.1706835776567459, "learning_rate": 8.627201261997815e-06, "loss": 0.7616, "step": 12964 }, { "epoch": 14.766951566951567, "grad_norm": 0.19436347484588623, "learning_rate": 8.623680578978968e-06, "loss": 0.6385, "step": 12965 }, { "epoch": 14.768091168091168, "grad_norm": 0.1849130392074585, "learning_rate": 8.620160464754737e-06, "loss": 0.8438, "step": 12966 }, { "epoch": 14.76923076923077, "grad_norm": 0.20800837874412537, "learning_rate": 8.616640919447372e-06, "loss": 0.6825, "step": 12967 }, { "epoch": 14.77037037037037, "grad_norm": 0.17224492132663727, "learning_rate": 8.61312194317912e-06, "loss": 0.7496, "step": 12968 }, { "epoch": 14.771509971509971, "grad_norm": 0.20169848203659058, "learning_rate": 8.609603536072209e-06, "loss": 0.6928, "step": 12969 }, { "epoch": 14.772649572649573, "grad_norm": 0.18767385184764862, "learning_rate": 8.606085698248848e-06, "loss": 0.6698, "step": 12970 }, { "epoch": 14.773789173789174, "grad_norm": 0.18218882381916046, "learning_rate": 8.602568429831203e-06, "loss": 0.8292, "step": 12971 }, { "epoch": 14.774928774928775, "grad_norm": 0.21513763070106506, "learning_rate": 8.599051730941445e-06, "loss": 0.4836, "step": 12972 }, { "epoch": 14.776068376068377, "grad_norm": 0.29810693860054016, "learning_rate": 8.59553560170174e-06, "loss": 0.2674, "step": 12973 }, { "epoch": 14.777207977207977, "grad_norm": 0.20459535717964172, "learning_rate": 8.592020042234186e-06, "loss": 0.702, "step": 12974 }, { "epoch": 14.778347578347578, "grad_norm": 0.2290467917919159, "learning_rate": 8.5885050526609e-06, "loss": 0.5831, "step": 12975 }, { "epoch": 14.77948717948718, "grad_norm": 0.19999980926513672, "learning_rate": 8.584990633103962e-06, "loss": 0.6615, "step": 12976 }, { "epoch": 14.78062678062678, "grad_norm": 0.2327142059803009, "learning_rate": 8.581476783685452e-06, "loss": 0.7061, "step": 12977 }, { "epoch": 14.781766381766381, "grad_norm": 0.19669906795024872, "learning_rate": 8.577963504527395e-06, "loss": 0.842, "step": 12978 }, { "epoch": 14.782905982905984, "grad_norm": 0.2074698507785797, "learning_rate": 8.574450795751827e-06, "loss": 0.7923, "step": 12979 }, { "epoch": 14.784045584045584, "grad_norm": 0.21502606570720673, "learning_rate": 8.570938657480753e-06, "loss": 0.6886, "step": 12980 }, { "epoch": 14.785185185185185, "grad_norm": 0.18325702846050262, "learning_rate": 8.567427089836166e-06, "loss": 0.552, "step": 12981 }, { "epoch": 14.786324786324787, "grad_norm": 0.19786466658115387, "learning_rate": 8.563916092940023e-06, "loss": 0.7887, "step": 12982 }, { "epoch": 14.787464387464388, "grad_norm": 0.20122556388378143, "learning_rate": 8.56040566691426e-06, "loss": 0.8217, "step": 12983 }, { "epoch": 14.788603988603988, "grad_norm": 0.23256726562976837, "learning_rate": 8.556895811880827e-06, "loss": 0.7266, "step": 12984 }, { "epoch": 14.78974358974359, "grad_norm": 0.22507259249687195, "learning_rate": 8.55338652796163e-06, "loss": 0.6712, "step": 12985 }, { "epoch": 14.790883190883191, "grad_norm": 0.2573149502277374, "learning_rate": 8.549877815278537e-06, "loss": 0.6409, "step": 12986 }, { "epoch": 14.792022792022792, "grad_norm": 0.2129795402288437, "learning_rate": 8.546369673953428e-06, "loss": 0.6501, "step": 12987 }, { "epoch": 14.793162393162394, "grad_norm": 0.22979819774627686, "learning_rate": 8.542862104108146e-06, "loss": 0.525, "step": 12988 }, { "epoch": 14.794301994301994, "grad_norm": 0.23122137784957886, "learning_rate": 8.53935510586453e-06, "loss": 0.527, "step": 12989 }, { "epoch": 14.795441595441595, "grad_norm": 0.2197915017604828, "learning_rate": 8.53584867934437e-06, "loss": 0.507, "step": 12990 }, { "epoch": 14.796581196581197, "grad_norm": 0.22086749970912933, "learning_rate": 8.53234282466946e-06, "loss": 0.4494, "step": 12991 }, { "epoch": 14.797720797720798, "grad_norm": 0.24085772037506104, "learning_rate": 8.528837541961571e-06, "loss": 0.6806, "step": 12992 }, { "epoch": 14.798860398860398, "grad_norm": 0.2377721071243286, "learning_rate": 8.525332831342459e-06, "loss": 0.4843, "step": 12993 }, { "epoch": 14.8, "grad_norm": 0.19742220640182495, "learning_rate": 8.521828692933826e-06, "loss": 0.659, "step": 12994 }, { "epoch": 14.801139601139601, "grad_norm": 0.19149692356586456, "learning_rate": 8.518325126857405e-06, "loss": 0.4287, "step": 12995 }, { "epoch": 14.802279202279202, "grad_norm": 0.21423348784446716, "learning_rate": 8.514822133234886e-06, "loss": 0.7736, "step": 12996 }, { "epoch": 14.803418803418804, "grad_norm": 0.25497955083847046, "learning_rate": 8.511319712187924e-06, "loss": 0.6745, "step": 12997 }, { "epoch": 14.804558404558405, "grad_norm": 0.2030903548002243, "learning_rate": 8.507817863838166e-06, "loss": 0.7302, "step": 12998 }, { "epoch": 14.805698005698005, "grad_norm": 0.22430749237537384, "learning_rate": 8.504316588307252e-06, "loss": 0.6879, "step": 12999 }, { "epoch": 14.806837606837608, "grad_norm": 0.22423458099365234, "learning_rate": 8.500815885716792e-06, "loss": 0.5246, "step": 13000 }, { "epoch": 14.807977207977208, "grad_norm": 0.2193293273448944, "learning_rate": 8.497315756188366e-06, "loss": 0.5874, "step": 13001 }, { "epoch": 14.809116809116809, "grad_norm": 0.23553064465522766, "learning_rate": 8.493816199843541e-06, "loss": 0.639, "step": 13002 }, { "epoch": 14.810256410256411, "grad_norm": 0.19212353229522705, "learning_rate": 8.490317216803872e-06, "loss": 0.5617, "step": 13003 }, { "epoch": 14.811396011396011, "grad_norm": 0.2506997883319855, "learning_rate": 8.486818807190897e-06, "loss": 0.4465, "step": 13004 }, { "epoch": 14.812535612535612, "grad_norm": 0.2890404462814331, "learning_rate": 8.483320971126098e-06, "loss": 0.2299, "step": 13005 }, { "epoch": 14.813675213675214, "grad_norm": 0.2695049047470093, "learning_rate": 8.479823708730994e-06, "loss": 0.4743, "step": 13006 }, { "epoch": 14.814814814814815, "grad_norm": 0.24549205601215363, "learning_rate": 8.47632702012705e-06, "loss": 0.6416, "step": 13007 }, { "epoch": 14.815954415954415, "grad_norm": 0.19436125457286835, "learning_rate": 8.472830905435703e-06, "loss": 0.6691, "step": 13008 }, { "epoch": 14.817094017094018, "grad_norm": 0.27004918456077576, "learning_rate": 8.469335364778385e-06, "loss": 0.619, "step": 13009 }, { "epoch": 14.818233618233618, "grad_norm": 0.22013430297374725, "learning_rate": 8.465840398276512e-06, "loss": 0.6623, "step": 13010 }, { "epoch": 14.819373219373219, "grad_norm": 0.21185335516929626, "learning_rate": 8.462346006051477e-06, "loss": 0.72, "step": 13011 }, { "epoch": 14.820512820512821, "grad_norm": 0.2211286723613739, "learning_rate": 8.458852188224636e-06, "loss": 0.7469, "step": 13012 }, { "epoch": 14.821652421652422, "grad_norm": 0.3561602234840393, "learning_rate": 8.455358944917349e-06, "loss": 0.563, "step": 13013 }, { "epoch": 14.822792022792022, "grad_norm": 0.28517386317253113, "learning_rate": 8.45186627625094e-06, "loss": 0.7062, "step": 13014 }, { "epoch": 14.823931623931625, "grad_norm": 0.20802541077136993, "learning_rate": 8.448374182346735e-06, "loss": 0.5973, "step": 13015 }, { "epoch": 14.825071225071225, "grad_norm": 0.23817268013954163, "learning_rate": 8.444882663325996e-06, "loss": 0.5042, "step": 13016 }, { "epoch": 14.826210826210826, "grad_norm": 0.20003987848758698, "learning_rate": 8.441391719310014e-06, "loss": 0.8336, "step": 13017 }, { "epoch": 14.827350427350428, "grad_norm": 0.21644364297389984, "learning_rate": 8.437901350420045e-06, "loss": 0.5737, "step": 13018 }, { "epoch": 14.828490028490029, "grad_norm": 0.20241820812225342, "learning_rate": 8.434411556777294e-06, "loss": 0.828, "step": 13019 }, { "epoch": 14.829629629629629, "grad_norm": 0.21604406833648682, "learning_rate": 8.430922338502991e-06, "loss": 0.5363, "step": 13020 }, { "epoch": 14.830769230769231, "grad_norm": 0.19081199169158936, "learning_rate": 8.427433695718318e-06, "loss": 0.5969, "step": 13021 }, { "epoch": 14.831908831908832, "grad_norm": 0.22546574473381042, "learning_rate": 8.423945628544455e-06, "loss": 0.427, "step": 13022 }, { "epoch": 14.833048433048432, "grad_norm": 0.25194093585014343, "learning_rate": 8.420458137102539e-06, "loss": 0.3672, "step": 13023 }, { "epoch": 14.834188034188035, "grad_norm": 0.19357764720916748, "learning_rate": 8.416971221513703e-06, "loss": 0.6002, "step": 13024 }, { "epoch": 14.835327635327635, "grad_norm": 0.20514234900474548, "learning_rate": 8.41348488189906e-06, "loss": 0.6676, "step": 13025 }, { "epoch": 14.836467236467236, "grad_norm": 0.24507999420166016, "learning_rate": 8.40999911837971e-06, "loss": 0.6282, "step": 13026 }, { "epoch": 14.837606837606838, "grad_norm": 0.2528490126132965, "learning_rate": 8.406513931076699e-06, "loss": 0.6174, "step": 13027 }, { "epoch": 14.838746438746439, "grad_norm": 0.19692501425743103, "learning_rate": 8.403029320111096e-06, "loss": 0.7863, "step": 13028 }, { "epoch": 14.83988603988604, "grad_norm": 0.21329069137573242, "learning_rate": 8.399545285603927e-06, "loss": 0.7401, "step": 13029 }, { "epoch": 14.841025641025642, "grad_norm": 0.2121441662311554, "learning_rate": 8.39606182767621e-06, "loss": 0.7027, "step": 13030 }, { "epoch": 14.842165242165242, "grad_norm": 0.19697655737400055, "learning_rate": 8.39257894644892e-06, "loss": 0.6129, "step": 13031 }, { "epoch": 14.843304843304843, "grad_norm": 0.22422295808792114, "learning_rate": 8.389096642043034e-06, "loss": 0.6316, "step": 13032 }, { "epoch": 14.844444444444445, "grad_norm": 0.19399236142635345, "learning_rate": 8.385614914579501e-06, "loss": 0.715, "step": 13033 }, { "epoch": 14.845584045584046, "grad_norm": 0.22976943850517273, "learning_rate": 8.382133764179258e-06, "loss": 0.6337, "step": 13034 }, { "epoch": 14.846723646723646, "grad_norm": 0.22685664892196655, "learning_rate": 8.378653190963203e-06, "loss": 0.8493, "step": 13035 }, { "epoch": 14.847863247863248, "grad_norm": 0.21823418140411377, "learning_rate": 8.37517319505223e-06, "loss": 0.598, "step": 13036 }, { "epoch": 14.849002849002849, "grad_norm": 0.18279263377189636, "learning_rate": 8.37169377656721e-06, "loss": 0.7885, "step": 13037 }, { "epoch": 14.85014245014245, "grad_norm": 0.17645741999149323, "learning_rate": 8.368214935628996e-06, "loss": 0.6113, "step": 13038 }, { "epoch": 14.851282051282052, "grad_norm": 0.20534776151180267, "learning_rate": 8.36473667235841e-06, "loss": 0.5682, "step": 13039 }, { "epoch": 14.852421652421652, "grad_norm": 0.21249881386756897, "learning_rate": 8.361258986876272e-06, "loss": 0.6571, "step": 13040 }, { "epoch": 14.853561253561253, "grad_norm": 0.2308804839849472, "learning_rate": 8.357781879303372e-06, "loss": 0.6684, "step": 13041 }, { "epoch": 14.854700854700855, "grad_norm": 0.20016048848628998, "learning_rate": 8.354305349760466e-06, "loss": 0.7256, "step": 13042 }, { "epoch": 14.855840455840456, "grad_norm": 0.19493573904037476, "learning_rate": 8.35082939836831e-06, "loss": 0.8461, "step": 13043 }, { "epoch": 14.856980056980056, "grad_norm": 0.19311264157295227, "learning_rate": 8.347354025247633e-06, "loss": 0.6911, "step": 13044 }, { "epoch": 14.858119658119659, "grad_norm": 0.2238873988389969, "learning_rate": 8.343879230519158e-06, "loss": 0.4791, "step": 13045 }, { "epoch": 14.85925925925926, "grad_norm": 0.1901557743549347, "learning_rate": 8.340405014303552e-06, "loss": 0.5558, "step": 13046 }, { "epoch": 14.86039886039886, "grad_norm": 0.20459187030792236, "learning_rate": 8.336931376721491e-06, "loss": 0.6601, "step": 13047 }, { "epoch": 14.861538461538462, "grad_norm": 0.21996235847473145, "learning_rate": 8.333458317893631e-06, "loss": 0.5077, "step": 13048 }, { "epoch": 14.862678062678063, "grad_norm": 0.18952824175357819, "learning_rate": 8.329985837940593e-06, "loss": 0.5322, "step": 13049 }, { "epoch": 14.863817663817663, "grad_norm": 0.1865224987268448, "learning_rate": 8.32651393698299e-06, "loss": 0.6469, "step": 13050 }, { "epoch": 14.864957264957265, "grad_norm": 0.18879809975624084, "learning_rate": 8.323042615141413e-06, "loss": 0.731, "step": 13051 }, { "epoch": 14.866096866096866, "grad_norm": 0.18550975620746613, "learning_rate": 8.319571872536437e-06, "loss": 0.5353, "step": 13052 }, { "epoch": 14.867236467236467, "grad_norm": 0.17426526546478271, "learning_rate": 8.316101709288588e-06, "loss": 0.8389, "step": 13053 }, { "epoch": 14.868376068376069, "grad_norm": 0.18053342401981354, "learning_rate": 8.31263212551841e-06, "loss": 0.8319, "step": 13054 }, { "epoch": 14.86951566951567, "grad_norm": 0.23310673236846924, "learning_rate": 8.309163121346408e-06, "loss": 0.5109, "step": 13055 }, { "epoch": 14.87065527065527, "grad_norm": 0.16054700314998627, "learning_rate": 8.305694696893082e-06, "loss": 0.7403, "step": 13056 }, { "epoch": 14.871794871794872, "grad_norm": 0.20461927354335785, "learning_rate": 8.302226852278877e-06, "loss": 0.7519, "step": 13057 }, { "epoch": 14.872934472934473, "grad_norm": 0.2121797502040863, "learning_rate": 8.298759587624256e-06, "loss": 0.7163, "step": 13058 }, { "epoch": 14.874074074074073, "grad_norm": 0.2598489224910736, "learning_rate": 8.29529290304964e-06, "loss": 0.6562, "step": 13059 }, { "epoch": 14.875213675213676, "grad_norm": 0.22083567082881927, "learning_rate": 8.29182679867544e-06, "loss": 0.6853, "step": 13060 }, { "epoch": 14.876353276353276, "grad_norm": 0.19808152318000793, "learning_rate": 8.288361274622042e-06, "loss": 0.6148, "step": 13061 }, { "epoch": 14.877492877492877, "grad_norm": 0.19256912171840668, "learning_rate": 8.284896331009817e-06, "loss": 0.5713, "step": 13062 }, { "epoch": 14.878632478632479, "grad_norm": 0.18643289804458618, "learning_rate": 8.281431967959108e-06, "loss": 0.8897, "step": 13063 }, { "epoch": 14.87977207977208, "grad_norm": 0.2128099799156189, "learning_rate": 8.277968185590252e-06, "loss": 0.7361, "step": 13064 }, { "epoch": 14.88091168091168, "grad_norm": 0.18543875217437744, "learning_rate": 8.274504984023534e-06, "loss": 0.7431, "step": 13065 }, { "epoch": 14.882051282051282, "grad_norm": 0.2094191014766693, "learning_rate": 8.271042363379258e-06, "loss": 0.6611, "step": 13066 }, { "epoch": 14.883190883190883, "grad_norm": 0.22779624164104462, "learning_rate": 8.267580323777686e-06, "loss": 0.6232, "step": 13067 }, { "epoch": 14.884330484330484, "grad_norm": 0.1786550134420395, "learning_rate": 8.264118865339068e-06, "loss": 0.7023, "step": 13068 }, { "epoch": 14.885470085470086, "grad_norm": 0.20286069810390472, "learning_rate": 8.26065798818362e-06, "loss": 0.7334, "step": 13069 }, { "epoch": 14.886609686609686, "grad_norm": 0.23107458651065826, "learning_rate": 8.257197692431553e-06, "loss": 0.5261, "step": 13070 }, { "epoch": 14.887749287749287, "grad_norm": 0.2501099705696106, "learning_rate": 8.253737978203051e-06, "loss": 0.6518, "step": 13071 }, { "epoch": 14.88888888888889, "grad_norm": 0.26554644107818604, "learning_rate": 8.250278845618283e-06, "loss": 0.5686, "step": 13072 }, { "epoch": 14.89002849002849, "grad_norm": 0.20167513191699982, "learning_rate": 8.246820294797395e-06, "loss": 0.7667, "step": 13073 }, { "epoch": 14.89116809116809, "grad_norm": 0.2177019864320755, "learning_rate": 8.243362325860508e-06, "loss": 0.7147, "step": 13074 }, { "epoch": 14.892307692307693, "grad_norm": 0.19998271763324738, "learning_rate": 8.239904938927736e-06, "loss": 0.8471, "step": 13075 }, { "epoch": 14.893447293447293, "grad_norm": 0.23724326491355896, "learning_rate": 8.236448134119149e-06, "loss": 0.6115, "step": 13076 }, { "epoch": 14.894586894586894, "grad_norm": 0.19616632163524628, "learning_rate": 8.232991911554816e-06, "loss": 0.6234, "step": 13077 }, { "epoch": 14.895726495726496, "grad_norm": 0.2279236912727356, "learning_rate": 8.229536271354785e-06, "loss": 0.6154, "step": 13078 }, { "epoch": 14.896866096866097, "grad_norm": 0.2267291247844696, "learning_rate": 8.226081213639084e-06, "loss": 0.8262, "step": 13079 }, { "epoch": 14.898005698005697, "grad_norm": 0.1714223027229309, "learning_rate": 8.222626738527706e-06, "loss": 0.5915, "step": 13080 }, { "epoch": 14.8991452991453, "grad_norm": 0.1930173635482788, "learning_rate": 8.219172846140638e-06, "loss": 0.5572, "step": 13081 }, { "epoch": 14.9002849002849, "grad_norm": 0.25991231203079224, "learning_rate": 8.215719536597844e-06, "loss": 0.6455, "step": 13082 }, { "epoch": 14.9014245014245, "grad_norm": 0.18016129732131958, "learning_rate": 8.212266810019267e-06, "loss": 0.6232, "step": 13083 }, { "epoch": 14.902564102564103, "grad_norm": 0.22624294459819794, "learning_rate": 8.20881466652483e-06, "loss": 0.6423, "step": 13084 }, { "epoch": 14.903703703703703, "grad_norm": 0.19937890768051147, "learning_rate": 8.205363106234437e-06, "loss": 0.7158, "step": 13085 }, { "epoch": 14.904843304843304, "grad_norm": 0.26309749484062195, "learning_rate": 8.201912129267978e-06, "loss": 0.5991, "step": 13086 }, { "epoch": 14.905982905982906, "grad_norm": 0.23269060254096985, "learning_rate": 8.198461735745294e-06, "loss": 0.7014, "step": 13087 }, { "epoch": 14.907122507122507, "grad_norm": 0.19453437626361847, "learning_rate": 8.195011925786243e-06, "loss": 0.8058, "step": 13088 }, { "epoch": 14.908262108262107, "grad_norm": 0.2564391493797302, "learning_rate": 8.191562699510639e-06, "loss": 0.537, "step": 13089 }, { "epoch": 14.90940170940171, "grad_norm": 0.19253656268119812, "learning_rate": 8.188114057038293e-06, "loss": 0.5394, "step": 13090 }, { "epoch": 14.91054131054131, "grad_norm": 0.17925649881362915, "learning_rate": 8.184665998488971e-06, "loss": 0.817, "step": 13091 }, { "epoch": 14.91168091168091, "grad_norm": 0.23220553994178772, "learning_rate": 8.181218523982443e-06, "loss": 0.6122, "step": 13092 }, { "epoch": 14.912820512820513, "grad_norm": 0.2346821129322052, "learning_rate": 8.177771633638445e-06, "loss": 0.6432, "step": 13093 }, { "epoch": 14.913960113960114, "grad_norm": 0.2243751585483551, "learning_rate": 8.174325327576701e-06, "loss": 0.6897, "step": 13094 }, { "epoch": 14.915099715099714, "grad_norm": 0.1665022224187851, "learning_rate": 8.170879605916909e-06, "loss": 0.9448, "step": 13095 }, { "epoch": 14.916239316239317, "grad_norm": 0.21701228618621826, "learning_rate": 8.167434468778749e-06, "loss": 0.59, "step": 13096 }, { "epoch": 14.917378917378917, "grad_norm": 0.20046526193618774, "learning_rate": 8.163989916281885e-06, "loss": 0.5072, "step": 13097 }, { "epoch": 14.918518518518518, "grad_norm": 0.2463109791278839, "learning_rate": 8.160545948545945e-06, "loss": 0.4702, "step": 13098 }, { "epoch": 14.91965811965812, "grad_norm": 0.19561436772346497, "learning_rate": 8.157102565690553e-06, "loss": 0.5023, "step": 13099 }, { "epoch": 14.92079772079772, "grad_norm": 0.21838906407356262, "learning_rate": 8.153659767835306e-06, "loss": 0.6128, "step": 13100 }, { "epoch": 14.921937321937321, "grad_norm": 0.2385895699262619, "learning_rate": 8.15021755509979e-06, "loss": 0.6217, "step": 13101 }, { "epoch": 14.923076923076923, "grad_norm": 0.1635356992483139, "learning_rate": 8.146775927603551e-06, "loss": 0.7619, "step": 13102 }, { "epoch": 14.924216524216524, "grad_norm": 0.20667387545108795, "learning_rate": 8.143334885466127e-06, "loss": 0.7719, "step": 13103 }, { "epoch": 14.925356125356124, "grad_norm": 0.19133946299552917, "learning_rate": 8.139894428807033e-06, "loss": 0.7622, "step": 13104 }, { "epoch": 14.926495726495727, "grad_norm": 0.5747365355491638, "learning_rate": 8.136454557745785e-06, "loss": 0.8878, "step": 13105 }, { "epoch": 14.927635327635327, "grad_norm": 0.24037352204322815, "learning_rate": 8.133015272401836e-06, "loss": 0.5662, "step": 13106 }, { "epoch": 14.928774928774928, "grad_norm": 0.2153923362493515, "learning_rate": 8.129576572894654e-06, "loss": 0.644, "step": 13107 }, { "epoch": 14.92991452991453, "grad_norm": 0.23597195744514465, "learning_rate": 8.126138459343669e-06, "loss": 0.6726, "step": 13108 }, { "epoch": 14.93105413105413, "grad_norm": 0.23389115929603577, "learning_rate": 8.122700931868307e-06, "loss": 0.7252, "step": 13109 }, { "epoch": 14.932193732193731, "grad_norm": 0.18410557508468628, "learning_rate": 8.11926399058795e-06, "loss": 0.8647, "step": 13110 }, { "epoch": 14.933333333333334, "grad_norm": 0.16643090546131134, "learning_rate": 8.115827635621972e-06, "loss": 0.6601, "step": 13111 }, { "epoch": 14.934472934472934, "grad_norm": 0.25036999583244324, "learning_rate": 8.112391867089733e-06, "loss": 0.7001, "step": 13112 }, { "epoch": 14.935612535612536, "grad_norm": 0.24862360954284668, "learning_rate": 8.108956685110574e-06, "loss": 0.5775, "step": 13113 }, { "epoch": 14.936752136752137, "grad_norm": 0.22571787238121033, "learning_rate": 8.105522089803794e-06, "loss": 0.8268, "step": 13114 }, { "epoch": 14.937891737891738, "grad_norm": 0.1795872151851654, "learning_rate": 8.10208808128868e-06, "loss": 0.884, "step": 13115 }, { "epoch": 14.93903133903134, "grad_norm": 0.24070943892002106, "learning_rate": 8.098654659684538e-06, "loss": 0.7474, "step": 13116 }, { "epoch": 14.94017094017094, "grad_norm": 0.3789416551589966, "learning_rate": 8.095221825110585e-06, "loss": 0.7326, "step": 13117 }, { "epoch": 14.941310541310541, "grad_norm": 0.3040759563446045, "learning_rate": 8.091789577686068e-06, "loss": 0.36, "step": 13118 }, { "epoch": 14.942450142450143, "grad_norm": 0.20012761652469635, "learning_rate": 8.088357917530198e-06, "loss": 0.5821, "step": 13119 }, { "epoch": 14.943589743589744, "grad_norm": 0.2004968374967575, "learning_rate": 8.084926844762173e-06, "loss": 0.7867, "step": 13120 }, { "epoch": 14.944729344729344, "grad_norm": 0.24345803260803223, "learning_rate": 8.081496359501145e-06, "loss": 0.6358, "step": 13121 }, { "epoch": 14.945868945868947, "grad_norm": 0.22192974388599396, "learning_rate": 8.078066461866277e-06, "loss": 0.6239, "step": 13122 }, { "epoch": 14.947008547008547, "grad_norm": 0.19137127697467804, "learning_rate": 8.074637151976695e-06, "loss": 0.7225, "step": 13123 }, { "epoch": 14.948148148148148, "grad_norm": 0.20317646861076355, "learning_rate": 8.071208429951515e-06, "loss": 0.7814, "step": 13124 }, { "epoch": 14.94928774928775, "grad_norm": 0.21901699900627136, "learning_rate": 8.067780295909816e-06, "loss": 0.5427, "step": 13125 }, { "epoch": 14.95042735042735, "grad_norm": 0.21417075395584106, "learning_rate": 8.064352749970663e-06, "loss": 0.7064, "step": 13126 }, { "epoch": 14.951566951566951, "grad_norm": 0.1974361389875412, "learning_rate": 8.060925792253127e-06, "loss": 0.9247, "step": 13127 }, { "epoch": 14.952706552706553, "grad_norm": 0.20048248767852783, "learning_rate": 8.057499422876214e-06, "loss": 0.6776, "step": 13128 }, { "epoch": 14.953846153846154, "grad_norm": 0.2453715205192566, "learning_rate": 8.054073641958936e-06, "loss": 0.5454, "step": 13129 }, { "epoch": 14.954985754985755, "grad_norm": 0.21314768493175507, "learning_rate": 8.050648449620284e-06, "loss": 0.6296, "step": 13130 }, { "epoch": 14.956125356125357, "grad_norm": 0.21752789616584778, "learning_rate": 8.04722384597923e-06, "loss": 0.6996, "step": 13131 }, { "epoch": 14.957264957264957, "grad_norm": 0.17562344670295715, "learning_rate": 8.043799831154703e-06, "loss": 0.7061, "step": 13132 }, { "epoch": 14.958404558404558, "grad_norm": 0.21547521650791168, "learning_rate": 8.040376405265637e-06, "loss": 0.5766, "step": 13133 }, { "epoch": 14.95954415954416, "grad_norm": 0.18879757821559906, "learning_rate": 8.03695356843094e-06, "loss": 0.7277, "step": 13134 }, { "epoch": 14.96068376068376, "grad_norm": 0.16375558078289032, "learning_rate": 8.033531320769502e-06, "loss": 0.5537, "step": 13135 }, { "epoch": 14.961823361823361, "grad_norm": 0.2281581610441208, "learning_rate": 8.03010966240017e-06, "loss": 0.3687, "step": 13136 }, { "epoch": 14.962962962962964, "grad_norm": 0.19088397920131683, "learning_rate": 8.026688593441789e-06, "loss": 0.9433, "step": 13137 }, { "epoch": 14.964102564102564, "grad_norm": 0.2279069423675537, "learning_rate": 8.023268114013205e-06, "loss": 0.6148, "step": 13138 }, { "epoch": 14.965242165242165, "grad_norm": 0.2353031039237976, "learning_rate": 8.019848224233198e-06, "loss": 0.4261, "step": 13139 }, { "epoch": 14.966381766381767, "grad_norm": 0.21128053963184357, "learning_rate": 8.01642892422056e-06, "loss": 0.6382, "step": 13140 }, { "epoch": 14.967521367521368, "grad_norm": 0.1791439950466156, "learning_rate": 8.013010214094047e-06, "loss": 0.716, "step": 13141 }, { "epoch": 14.968660968660968, "grad_norm": 0.2184140533208847, "learning_rate": 8.009592093972407e-06, "loss": 0.5625, "step": 13142 }, { "epoch": 14.96980056980057, "grad_norm": 0.2042216658592224, "learning_rate": 8.006174563974364e-06, "loss": 0.5908, "step": 13143 }, { "epoch": 14.970940170940171, "grad_norm": 0.18341132998466492, "learning_rate": 8.002757624218601e-06, "loss": 0.7578, "step": 13144 }, { "epoch": 14.972079772079772, "grad_norm": 0.1882096379995346, "learning_rate": 7.999341274823813e-06, "loss": 0.7151, "step": 13145 }, { "epoch": 14.973219373219374, "grad_norm": 0.22262947261333466, "learning_rate": 7.99592551590865e-06, "loss": 0.7037, "step": 13146 }, { "epoch": 14.974358974358974, "grad_norm": 0.16954749822616577, "learning_rate": 7.992510347591767e-06, "loss": 0.8455, "step": 13147 }, { "epoch": 14.975498575498575, "grad_norm": 0.19645527005195618, "learning_rate": 7.989095769991752e-06, "loss": 0.5582, "step": 13148 }, { "epoch": 14.976638176638177, "grad_norm": 0.21293814480304718, "learning_rate": 7.985681783227231e-06, "loss": 0.816, "step": 13149 }, { "epoch": 14.977777777777778, "grad_norm": 0.20369470119476318, "learning_rate": 7.982268387416777e-06, "loss": 0.6565, "step": 13150 }, { "epoch": 14.978917378917378, "grad_norm": 0.21613477170467377, "learning_rate": 7.978855582678934e-06, "loss": 0.8284, "step": 13151 }, { "epoch": 14.98005698005698, "grad_norm": 0.1987038254737854, "learning_rate": 7.975443369132246e-06, "loss": 0.6989, "step": 13152 }, { "epoch": 14.981196581196581, "grad_norm": 0.2501388490200043, "learning_rate": 7.972031746895226e-06, "loss": 0.3936, "step": 13153 }, { "epoch": 14.982336182336182, "grad_norm": 0.2502672076225281, "learning_rate": 7.968620716086378e-06, "loss": 0.64, "step": 13154 }, { "epoch": 14.983475783475784, "grad_norm": 0.21978996694087982, "learning_rate": 7.965210276824162e-06, "loss": 0.6249, "step": 13155 }, { "epoch": 14.984615384615385, "grad_norm": 0.22645944356918335, "learning_rate": 7.96180042922704e-06, "loss": 0.7684, "step": 13156 }, { "epoch": 14.985754985754985, "grad_norm": 0.170969158411026, "learning_rate": 7.958391173413444e-06, "loss": 0.9267, "step": 13157 }, { "epoch": 14.986894586894588, "grad_norm": 0.24624089896678925, "learning_rate": 7.954982509501793e-06, "loss": 0.5439, "step": 13158 }, { "epoch": 14.988034188034188, "grad_norm": 0.16725294291973114, "learning_rate": 7.951574437610459e-06, "loss": 0.6938, "step": 13159 }, { "epoch": 14.989173789173789, "grad_norm": 0.20970317721366882, "learning_rate": 7.948166957857837e-06, "loss": 0.6518, "step": 13160 }, { "epoch": 14.990313390313391, "grad_norm": 0.1861695647239685, "learning_rate": 7.944760070362276e-06, "loss": 0.8329, "step": 13161 }, { "epoch": 14.991452991452991, "grad_norm": 0.22031596302986145, "learning_rate": 7.941353775242092e-06, "loss": 0.8112, "step": 13162 }, { "epoch": 14.992592592592592, "grad_norm": 0.2391941398382187, "learning_rate": 7.937948072615603e-06, "loss": 0.3122, "step": 13163 }, { "epoch": 14.993732193732194, "grad_norm": 0.2180904746055603, "learning_rate": 7.934542962601096e-06, "loss": 0.6465, "step": 13164 }, { "epoch": 14.994871794871795, "grad_norm": 0.19434095919132233, "learning_rate": 7.931138445316855e-06, "loss": 0.5411, "step": 13165 }, { "epoch": 14.996011396011395, "grad_norm": 0.1636805236339569, "learning_rate": 7.927734520881103e-06, "loss": 0.7147, "step": 13166 }, { "epoch": 14.997150997150998, "grad_norm": 0.20240598917007446, "learning_rate": 7.924331189412082e-06, "loss": 0.5211, "step": 13167 }, { "epoch": 14.998290598290598, "grad_norm": 0.22989417612552643, "learning_rate": 7.920928451027996e-06, "loss": 0.4583, "step": 13168 }, { "epoch": 14.999430199430199, "grad_norm": 0.21044647693634033, "learning_rate": 7.917526305847042e-06, "loss": 0.6731, "step": 13169 }, { "epoch": 15.0, "grad_norm": 0.4262370467185974, "learning_rate": 7.914124753987367e-06, "loss": 0.5881, "step": 13170 }, { "epoch": 15.0011396011396, "grad_norm": 0.17920657992362976, "learning_rate": 7.910723795567119e-06, "loss": 0.8095, "step": 13171 }, { "epoch": 15.002279202279203, "grad_norm": 0.17844052612781525, "learning_rate": 7.907323430704444e-06, "loss": 0.5271, "step": 13172 }, { "epoch": 15.003418803418803, "grad_norm": 0.21409167349338531, "learning_rate": 7.903923659517424e-06, "loss": 0.6292, "step": 13173 }, { "epoch": 15.004558404558404, "grad_norm": 0.2429785132408142, "learning_rate": 7.90052448212415e-06, "loss": 0.5617, "step": 13174 }, { "epoch": 15.005698005698006, "grad_norm": 0.2211749106645584, "learning_rate": 7.897125898642683e-06, "loss": 0.4085, "step": 13175 }, { "epoch": 15.006837606837607, "grad_norm": 0.17734220623970032, "learning_rate": 7.893727909191075e-06, "loss": 0.6562, "step": 13176 }, { "epoch": 15.007977207977207, "grad_norm": 0.2056671530008316, "learning_rate": 7.890330513887332e-06, "loss": 0.6359, "step": 13177 }, { "epoch": 15.00911680911681, "grad_norm": 0.174454003572464, "learning_rate": 7.886933712849462e-06, "loss": 0.8016, "step": 13178 }, { "epoch": 15.01025641025641, "grad_norm": 0.2390289008617401, "learning_rate": 7.883537506195446e-06, "loss": 0.495, "step": 13179 }, { "epoch": 15.01139601139601, "grad_norm": 0.23715642094612122, "learning_rate": 7.88014189404325e-06, "loss": 0.555, "step": 13180 }, { "epoch": 15.012535612535613, "grad_norm": 0.1833748072385788, "learning_rate": 7.876746876510796e-06, "loss": 0.7993, "step": 13181 }, { "epoch": 15.013675213675214, "grad_norm": 0.17663298547267914, "learning_rate": 7.873352453716007e-06, "loss": 0.9667, "step": 13182 }, { "epoch": 15.014814814814814, "grad_norm": 0.19699348509311676, "learning_rate": 7.86995862577679e-06, "loss": 0.7184, "step": 13183 }, { "epoch": 15.015954415954416, "grad_norm": 0.20095711946487427, "learning_rate": 7.866565392811029e-06, "loss": 0.7508, "step": 13184 }, { "epoch": 15.017094017094017, "grad_norm": 0.17907115817070007, "learning_rate": 7.863172754936562e-06, "loss": 0.6666, "step": 13185 }, { "epoch": 15.018233618233618, "grad_norm": 0.19085030257701874, "learning_rate": 7.859780712271228e-06, "loss": 0.6368, "step": 13186 }, { "epoch": 15.01937321937322, "grad_norm": 0.20784366130828857, "learning_rate": 7.856389264932847e-06, "loss": 0.5504, "step": 13187 }, { "epoch": 15.02051282051282, "grad_norm": 0.1735832691192627, "learning_rate": 7.852998413039222e-06, "loss": 0.6713, "step": 13188 }, { "epoch": 15.021652421652421, "grad_norm": 0.1971295177936554, "learning_rate": 7.849608156708107e-06, "loss": 0.5978, "step": 13189 }, { "epoch": 15.022792022792023, "grad_norm": 0.17829209566116333, "learning_rate": 7.846218496057265e-06, "loss": 0.6386, "step": 13190 }, { "epoch": 15.023931623931624, "grad_norm": 0.19698618352413177, "learning_rate": 7.842829431204426e-06, "loss": 0.6467, "step": 13191 }, { "epoch": 15.025071225071224, "grad_norm": 0.2385060340166092, "learning_rate": 7.839440962267313e-06, "loss": 0.4415, "step": 13192 }, { "epoch": 15.026210826210827, "grad_norm": 0.19427725672721863, "learning_rate": 7.83605308936359e-06, "loss": 0.6364, "step": 13193 }, { "epoch": 15.027350427350427, "grad_norm": 0.18198898434638977, "learning_rate": 7.832665812610954e-06, "loss": 0.5802, "step": 13194 }, { "epoch": 15.028490028490028, "grad_norm": 0.21146386861801147, "learning_rate": 7.829279132127052e-06, "loss": 0.6762, "step": 13195 }, { "epoch": 15.02962962962963, "grad_norm": 0.22080320119857788, "learning_rate": 7.825893048029497e-06, "loss": 0.7275, "step": 13196 }, { "epoch": 15.03076923076923, "grad_norm": 0.1955811232328415, "learning_rate": 7.822507560435905e-06, "loss": 0.6227, "step": 13197 }, { "epoch": 15.031908831908831, "grad_norm": 0.14774277806282043, "learning_rate": 7.819122669463865e-06, "loss": 0.6414, "step": 13198 }, { "epoch": 15.033048433048434, "grad_norm": 0.17973187565803528, "learning_rate": 7.81573837523095e-06, "loss": 0.5757, "step": 13199 }, { "epoch": 15.034188034188034, "grad_norm": 0.23686793446540833, "learning_rate": 7.812354677854688e-06, "loss": 0.4224, "step": 13200 }, { "epoch": 15.035327635327635, "grad_norm": 0.20608681440353394, "learning_rate": 7.808971577452618e-06, "loss": 0.7248, "step": 13201 }, { "epoch": 15.036467236467237, "grad_norm": 0.35443320870399475, "learning_rate": 7.805589074142236e-06, "loss": 0.7227, "step": 13202 }, { "epoch": 15.037606837606837, "grad_norm": 0.2172495424747467, "learning_rate": 7.802207168041042e-06, "loss": 0.8817, "step": 13203 }, { "epoch": 15.038746438746438, "grad_norm": 0.1786259412765503, "learning_rate": 7.79882585926647e-06, "loss": 0.7133, "step": 13204 }, { "epoch": 15.03988603988604, "grad_norm": 0.19469647109508514, "learning_rate": 7.795445147935986e-06, "loss": 0.5264, "step": 13205 }, { "epoch": 15.04102564102564, "grad_norm": 0.22203053534030914, "learning_rate": 7.792065034167014e-06, "loss": 0.6028, "step": 13206 }, { "epoch": 15.042165242165241, "grad_norm": 0.17859232425689697, "learning_rate": 7.788685518076938e-06, "loss": 0.8592, "step": 13207 }, { "epoch": 15.043304843304844, "grad_norm": 0.16376741230487823, "learning_rate": 7.785306599783144e-06, "loss": 0.7242, "step": 13208 }, { "epoch": 15.044444444444444, "grad_norm": 0.21637587249279022, "learning_rate": 7.781928279402992e-06, "loss": 0.6116, "step": 13209 }, { "epoch": 15.045584045584045, "grad_norm": 0.21050210297107697, "learning_rate": 7.778550557053829e-06, "loss": 0.8039, "step": 13210 }, { "epoch": 15.046723646723647, "grad_norm": 0.20630396902561188, "learning_rate": 7.775173432852958e-06, "loss": 0.8097, "step": 13211 }, { "epoch": 15.047863247863248, "grad_norm": 0.28566041588783264, "learning_rate": 7.77179690691768e-06, "loss": 0.3769, "step": 13212 }, { "epoch": 15.049002849002848, "grad_norm": 0.17649933695793152, "learning_rate": 7.768420979365277e-06, "loss": 0.8097, "step": 13213 }, { "epoch": 15.05014245014245, "grad_norm": 0.1777004599571228, "learning_rate": 7.765045650313005e-06, "loss": 0.7521, "step": 13214 }, { "epoch": 15.051282051282051, "grad_norm": 0.20190858840942383, "learning_rate": 7.761670919878077e-06, "loss": 0.7145, "step": 13215 }, { "epoch": 15.052421652421652, "grad_norm": 0.28934812545776367, "learning_rate": 7.758296788177733e-06, "loss": 0.3552, "step": 13216 }, { "epoch": 15.053561253561254, "grad_norm": 0.1863059401512146, "learning_rate": 7.754923255329167e-06, "loss": 0.7675, "step": 13217 }, { "epoch": 15.054700854700855, "grad_norm": 0.2047613114118576, "learning_rate": 7.751550321449527e-06, "loss": 0.7593, "step": 13218 }, { "epoch": 15.055840455840455, "grad_norm": 0.1818694770336151, "learning_rate": 7.748177986655983e-06, "loss": 0.734, "step": 13219 }, { "epoch": 15.056980056980057, "grad_norm": 0.191678985953331, "learning_rate": 7.744806251065657e-06, "loss": 0.6582, "step": 13220 }, { "epoch": 15.058119658119658, "grad_norm": 0.17697349190711975, "learning_rate": 7.741435114795662e-06, "loss": 0.5432, "step": 13221 }, { "epoch": 15.059259259259258, "grad_norm": 0.18738119304180145, "learning_rate": 7.738064577963097e-06, "loss": 0.8128, "step": 13222 }, { "epoch": 15.06039886039886, "grad_norm": 0.17838743329048157, "learning_rate": 7.734694640685009e-06, "loss": 0.8135, "step": 13223 }, { "epoch": 15.061538461538461, "grad_norm": 0.18243756890296936, "learning_rate": 7.731325303078456e-06, "loss": 0.8034, "step": 13224 }, { "epoch": 15.062678062678062, "grad_norm": 0.2645007371902466, "learning_rate": 7.727956565260467e-06, "loss": 0.6103, "step": 13225 }, { "epoch": 15.063817663817664, "grad_norm": 0.168042853474617, "learning_rate": 7.724588427348042e-06, "loss": 0.6509, "step": 13226 }, { "epoch": 15.064957264957265, "grad_norm": 0.21104352176189423, "learning_rate": 7.72122088945817e-06, "loss": 0.6717, "step": 13227 }, { "epoch": 15.066096866096865, "grad_norm": 0.20439642667770386, "learning_rate": 7.717853951707813e-06, "loss": 0.7022, "step": 13228 }, { "epoch": 15.067236467236468, "grad_norm": 0.20428550243377686, "learning_rate": 7.714487614213922e-06, "loss": 0.7572, "step": 13229 }, { "epoch": 15.068376068376068, "grad_norm": 0.21032805740833282, "learning_rate": 7.711121877093405e-06, "loss": 0.6847, "step": 13230 }, { "epoch": 15.069515669515669, "grad_norm": 0.2351704090833664, "learning_rate": 7.707756740463167e-06, "loss": 0.708, "step": 13231 }, { "epoch": 15.070655270655271, "grad_norm": 0.19543321430683136, "learning_rate": 7.704392204440094e-06, "loss": 0.6966, "step": 13232 }, { "epoch": 15.071794871794872, "grad_norm": 0.22169937193393707, "learning_rate": 7.701028269141051e-06, "loss": 0.6904, "step": 13233 }, { "epoch": 15.072934472934472, "grad_norm": 0.20034444332122803, "learning_rate": 7.697664934682859e-06, "loss": 0.5926, "step": 13234 }, { "epoch": 15.074074074074074, "grad_norm": 0.21546687185764313, "learning_rate": 7.694302201182343e-06, "loss": 0.5675, "step": 13235 }, { "epoch": 15.075213675213675, "grad_norm": 0.20143888890743256, "learning_rate": 7.690940068756308e-06, "loss": 0.714, "step": 13236 }, { "epoch": 15.076353276353275, "grad_norm": 0.19900089502334595, "learning_rate": 7.68757853752152e-06, "loss": 0.7343, "step": 13237 }, { "epoch": 15.077492877492878, "grad_norm": 0.23945918679237366, "learning_rate": 7.684217607594741e-06, "loss": 0.5449, "step": 13238 }, { "epoch": 15.078632478632478, "grad_norm": 0.2190757691860199, "learning_rate": 7.680857279092701e-06, "loss": 0.674, "step": 13239 }, { "epoch": 15.079772079772079, "grad_norm": 0.21157409250736237, "learning_rate": 7.677497552132125e-06, "loss": 0.8033, "step": 13240 }, { "epoch": 15.080911680911681, "grad_norm": 0.1564355343580246, "learning_rate": 7.674138426829688e-06, "loss": 0.7209, "step": 13241 }, { "epoch": 15.082051282051282, "grad_norm": 0.24544627964496613, "learning_rate": 7.670779903302071e-06, "loss": 0.6977, "step": 13242 }, { "epoch": 15.083190883190884, "grad_norm": 0.18262211978435516, "learning_rate": 7.667421981665923e-06, "loss": 0.924, "step": 13243 }, { "epoch": 15.084330484330485, "grad_norm": 0.23551349341869354, "learning_rate": 7.664064662037882e-06, "loss": 0.4392, "step": 13244 }, { "epoch": 15.085470085470085, "grad_norm": 0.21138112246990204, "learning_rate": 7.660707944534539e-06, "loss": 0.4933, "step": 13245 }, { "epoch": 15.086609686609687, "grad_norm": 0.18942241370677948, "learning_rate": 7.657351829272496e-06, "loss": 0.6874, "step": 13246 }, { "epoch": 15.087749287749288, "grad_norm": 0.16392655670642853, "learning_rate": 7.653996316368314e-06, "loss": 0.865, "step": 13247 }, { "epoch": 15.088888888888889, "grad_norm": 0.16634123027324677, "learning_rate": 7.65064140593854e-06, "loss": 0.7992, "step": 13248 }, { "epoch": 15.090028490028491, "grad_norm": 0.19082193076610565, "learning_rate": 7.647287098099707e-06, "loss": 0.407, "step": 13249 }, { "epoch": 15.091168091168091, "grad_norm": 0.19843685626983643, "learning_rate": 7.643933392968308e-06, "loss": 0.6434, "step": 13250 }, { "epoch": 15.092307692307692, "grad_norm": 0.21986281871795654, "learning_rate": 7.64058029066084e-06, "loss": 0.6239, "step": 13251 }, { "epoch": 15.093447293447294, "grad_norm": 0.24562717974185944, "learning_rate": 7.637227791293752e-06, "loss": 0.5732, "step": 13252 }, { "epoch": 15.094586894586895, "grad_norm": 0.2094041258096695, "learning_rate": 7.633875894983489e-06, "loss": 0.6639, "step": 13253 }, { "epoch": 15.095726495726495, "grad_norm": 0.24959199130535126, "learning_rate": 7.630524601846472e-06, "loss": 0.3085, "step": 13254 }, { "epoch": 15.096866096866098, "grad_norm": 0.18908515572547913, "learning_rate": 7.627173911999111e-06, "loss": 0.7597, "step": 13255 }, { "epoch": 15.098005698005698, "grad_norm": 0.21889039874076843, "learning_rate": 7.623823825557766e-06, "loss": 0.3345, "step": 13256 }, { "epoch": 15.099145299145299, "grad_norm": 0.21106140315532684, "learning_rate": 7.620474342638806e-06, "loss": 0.6758, "step": 13257 }, { "epoch": 15.100284900284901, "grad_norm": 0.23992294073104858, "learning_rate": 7.617125463358565e-06, "loss": 0.7788, "step": 13258 }, { "epoch": 15.101424501424502, "grad_norm": 0.2021246999502182, "learning_rate": 7.613777187833363e-06, "loss": 0.733, "step": 13259 }, { "epoch": 15.102564102564102, "grad_norm": 0.24441534280776978, "learning_rate": 7.610429516179488e-06, "loss": 0.641, "step": 13260 }, { "epoch": 15.103703703703705, "grad_norm": 0.194869726896286, "learning_rate": 7.607082448513217e-06, "loss": 0.7236, "step": 13261 }, { "epoch": 15.104843304843305, "grad_norm": 0.23420466482639313, "learning_rate": 7.603735984950805e-06, "loss": 0.7169, "step": 13262 }, { "epoch": 15.105982905982906, "grad_norm": 0.1979341059923172, "learning_rate": 7.60039012560849e-06, "loss": 0.7052, "step": 13263 }, { "epoch": 15.107122507122508, "grad_norm": 0.2143956869840622, "learning_rate": 7.5970448706024655e-06, "loss": 0.579, "step": 13264 }, { "epoch": 15.108262108262108, "grad_norm": 0.16880324482917786, "learning_rate": 7.5937002200489324e-06, "loss": 0.6357, "step": 13265 }, { "epoch": 15.109401709401709, "grad_norm": 0.1926845759153366, "learning_rate": 7.590356174064059e-06, "loss": 0.7423, "step": 13266 }, { "epoch": 15.110541310541311, "grad_norm": 0.21125121414661407, "learning_rate": 7.587012732763999e-06, "loss": 0.5724, "step": 13267 }, { "epoch": 15.111680911680912, "grad_norm": 0.23667533695697784, "learning_rate": 7.583669896264864e-06, "loss": 0.8051, "step": 13268 }, { "epoch": 15.112820512820512, "grad_norm": 0.17306900024414062, "learning_rate": 7.580327664682768e-06, "loss": 0.9573, "step": 13269 }, { "epoch": 15.113960113960115, "grad_norm": 0.20057110488414764, "learning_rate": 7.576986038133799e-06, "loss": 0.4748, "step": 13270 }, { "epoch": 15.115099715099715, "grad_norm": 0.17408953607082367, "learning_rate": 7.573645016734018e-06, "loss": 0.5414, "step": 13271 }, { "epoch": 15.116239316239316, "grad_norm": 0.2110489159822464, "learning_rate": 7.570304600599468e-06, "loss": 0.6668, "step": 13272 }, { "epoch": 15.117378917378918, "grad_norm": 0.1887875348329544, "learning_rate": 7.5669647898461705e-06, "loss": 0.4897, "step": 13273 }, { "epoch": 15.118518518518519, "grad_norm": 0.2168729603290558, "learning_rate": 7.563625584590134e-06, "loss": 0.618, "step": 13274 }, { "epoch": 15.11965811965812, "grad_norm": 0.23232001066207886, "learning_rate": 7.560286984947326e-06, "loss": 0.6055, "step": 13275 }, { "epoch": 15.120797720797722, "grad_norm": 0.1825498789548874, "learning_rate": 7.556948991033708e-06, "loss": 0.5826, "step": 13276 }, { "epoch": 15.121937321937322, "grad_norm": 0.18917182087898254, "learning_rate": 7.5536116029652215e-06, "loss": 0.7733, "step": 13277 }, { "epoch": 15.123076923076923, "grad_norm": 0.20644721388816833, "learning_rate": 7.550274820857789e-06, "loss": 0.7803, "step": 13278 }, { "epoch": 15.124216524216525, "grad_norm": 0.20011848211288452, "learning_rate": 7.546938644827289e-06, "loss": 0.6496, "step": 13279 }, { "epoch": 15.125356125356126, "grad_norm": 0.2278977930545807, "learning_rate": 7.543603074989608e-06, "loss": 0.5138, "step": 13280 }, { "epoch": 15.126495726495726, "grad_norm": 0.18745478987693787, "learning_rate": 7.540268111460599e-06, "loss": 0.5401, "step": 13281 }, { "epoch": 15.127635327635328, "grad_norm": 0.18297061324119568, "learning_rate": 7.53693375435609e-06, "loss": 0.6971, "step": 13282 }, { "epoch": 15.128774928774929, "grad_norm": 0.1973070651292801, "learning_rate": 7.5336000037918965e-06, "loss": 0.5205, "step": 13283 }, { "epoch": 15.12991452991453, "grad_norm": 0.22368676960468292, "learning_rate": 7.5302668598838085e-06, "loss": 0.7489, "step": 13284 }, { "epoch": 15.131054131054132, "grad_norm": 0.2430863231420517, "learning_rate": 7.526934322747603e-06, "loss": 0.7293, "step": 13285 }, { "epoch": 15.132193732193732, "grad_norm": 0.17869368195533752, "learning_rate": 7.5236023924990105e-06, "loss": 0.7587, "step": 13286 }, { "epoch": 15.133333333333333, "grad_norm": 0.21526575088500977, "learning_rate": 7.520271069253765e-06, "loss": 0.6364, "step": 13287 }, { "epoch": 15.134472934472935, "grad_norm": 0.19106706976890564, "learning_rate": 7.516940353127577e-06, "loss": 0.7858, "step": 13288 }, { "epoch": 15.135612535612536, "grad_norm": 0.17700304090976715, "learning_rate": 7.513610244236138e-06, "loss": 0.7265, "step": 13289 }, { "epoch": 15.136752136752136, "grad_norm": 0.18937309086322784, "learning_rate": 7.510280742695092e-06, "loss": 0.6815, "step": 13290 }, { "epoch": 15.137891737891739, "grad_norm": 0.18945249915122986, "learning_rate": 7.506951848620094e-06, "loss": 0.6224, "step": 13291 }, { "epoch": 15.13903133903134, "grad_norm": 0.22184334695339203, "learning_rate": 7.503623562126766e-06, "loss": 0.6988, "step": 13292 }, { "epoch": 15.14017094017094, "grad_norm": 0.2096634954214096, "learning_rate": 7.5002958833307075e-06, "loss": 0.8174, "step": 13293 }, { "epoch": 15.141310541310542, "grad_norm": 0.19907282292842865, "learning_rate": 7.496968812347494e-06, "loss": 0.5364, "step": 13294 }, { "epoch": 15.142450142450143, "grad_norm": 0.24593563377857208, "learning_rate": 7.49364234929269e-06, "loss": 0.649, "step": 13295 }, { "epoch": 15.143589743589743, "grad_norm": 0.22036831080913544, "learning_rate": 7.490316494281835e-06, "loss": 0.6822, "step": 13296 }, { "epoch": 15.144729344729345, "grad_norm": 0.224491149187088, "learning_rate": 7.486991247430433e-06, "loss": 0.6195, "step": 13297 }, { "epoch": 15.145868945868946, "grad_norm": 0.18605081737041473, "learning_rate": 7.483666608853987e-06, "loss": 0.4838, "step": 13298 }, { "epoch": 15.147008547008546, "grad_norm": 0.19752785563468933, "learning_rate": 7.4803425786679705e-06, "loss": 0.6153, "step": 13299 }, { "epoch": 15.148148148148149, "grad_norm": 0.17299829423427582, "learning_rate": 7.477019156987835e-06, "loss": 0.4304, "step": 13300 }, { "epoch": 15.14928774928775, "grad_norm": 0.2041027992963791, "learning_rate": 7.473696343929018e-06, "loss": 0.7166, "step": 13301 }, { "epoch": 15.15042735042735, "grad_norm": 0.22014085948467255, "learning_rate": 7.47037413960692e-06, "loss": 0.4789, "step": 13302 }, { "epoch": 15.151566951566952, "grad_norm": 0.2193816602230072, "learning_rate": 7.4670525441369236e-06, "loss": 0.7827, "step": 13303 }, { "epoch": 15.152706552706553, "grad_norm": 0.21887926757335663, "learning_rate": 7.463731557634426e-06, "loss": 0.5464, "step": 13304 }, { "epoch": 15.153846153846153, "grad_norm": 0.158628910779953, "learning_rate": 7.460411180214749e-06, "loss": 0.836, "step": 13305 }, { "epoch": 15.154985754985756, "grad_norm": 0.24886372685432434, "learning_rate": 7.457091411993225e-06, "loss": 0.6018, "step": 13306 }, { "epoch": 15.156125356125356, "grad_norm": 0.20588864386081696, "learning_rate": 7.453772253085159e-06, "loss": 0.6703, "step": 13307 }, { "epoch": 15.157264957264957, "grad_norm": 0.17586199939250946, "learning_rate": 7.450453703605845e-06, "loss": 0.821, "step": 13308 }, { "epoch": 15.158404558404559, "grad_norm": 0.22378386557102203, "learning_rate": 7.447135763670524e-06, "loss": 0.6723, "step": 13309 }, { "epoch": 15.15954415954416, "grad_norm": 0.2398693561553955, "learning_rate": 7.443818433394451e-06, "loss": 0.6561, "step": 13310 }, { "epoch": 15.16068376068376, "grad_norm": 0.280377060174942, "learning_rate": 7.440501712892842e-06, "loss": 0.6986, "step": 13311 }, { "epoch": 15.161823361823362, "grad_norm": 0.19695614278316498, "learning_rate": 7.437185602280905e-06, "loss": 0.6489, "step": 13312 }, { "epoch": 15.162962962962963, "grad_norm": 0.22838595509529114, "learning_rate": 7.433870101673804e-06, "loss": 0.6227, "step": 13313 }, { "epoch": 15.164102564102564, "grad_norm": 0.21671253442764282, "learning_rate": 7.43055521118669e-06, "loss": 0.551, "step": 13314 }, { "epoch": 15.165242165242166, "grad_norm": 0.16176068782806396, "learning_rate": 7.427240930934729e-06, "loss": 0.8505, "step": 13315 }, { "epoch": 15.166381766381766, "grad_norm": 0.21151845157146454, "learning_rate": 7.423927261033006e-06, "loss": 0.6639, "step": 13316 }, { "epoch": 15.167521367521367, "grad_norm": 0.20331384241580963, "learning_rate": 7.420614201596626e-06, "loss": 0.5958, "step": 13317 }, { "epoch": 15.16866096866097, "grad_norm": 0.2182028889656067, "learning_rate": 7.417301752740655e-06, "loss": 0.777, "step": 13318 }, { "epoch": 15.16980056980057, "grad_norm": 0.23227620124816895, "learning_rate": 7.4139899145801576e-06, "loss": 0.4787, "step": 13319 }, { "epoch": 15.17094017094017, "grad_norm": 0.2299741953611374, "learning_rate": 7.4106786872301434e-06, "loss": 0.7904, "step": 13320 }, { "epoch": 15.172079772079773, "grad_norm": 0.217166006565094, "learning_rate": 7.407368070805628e-06, "loss": 0.6805, "step": 13321 }, { "epoch": 15.173219373219373, "grad_norm": 0.1870729923248291, "learning_rate": 7.404058065421599e-06, "loss": 0.6697, "step": 13322 }, { "epoch": 15.174358974358974, "grad_norm": 0.18819458782672882, "learning_rate": 7.400748671193034e-06, "loss": 0.6807, "step": 13323 }, { "epoch": 15.175498575498576, "grad_norm": 0.2323538362979889, "learning_rate": 7.397439888234853e-06, "loss": 0.5102, "step": 13324 }, { "epoch": 15.176638176638177, "grad_norm": 0.25649964809417725, "learning_rate": 7.394131716661987e-06, "loss": 0.5656, "step": 13325 }, { "epoch": 15.177777777777777, "grad_norm": 0.19201435148715973, "learning_rate": 7.3908241565893575e-06, "loss": 0.6959, "step": 13326 }, { "epoch": 15.17891737891738, "grad_norm": 0.19382207095623016, "learning_rate": 7.387517208131823e-06, "loss": 0.5518, "step": 13327 }, { "epoch": 15.18005698005698, "grad_norm": 0.2733919322490692, "learning_rate": 7.384210871404251e-06, "loss": 0.5729, "step": 13328 }, { "epoch": 15.18119658119658, "grad_norm": 0.24027517437934875, "learning_rate": 7.380905146521477e-06, "loss": 0.4685, "step": 13329 }, { "epoch": 15.182336182336183, "grad_norm": 0.22657380998134613, "learning_rate": 7.377600033598331e-06, "loss": 0.6801, "step": 13330 }, { "epoch": 15.183475783475783, "grad_norm": 0.17790141701698303, "learning_rate": 7.374295532749586e-06, "loss": 0.7821, "step": 13331 }, { "epoch": 15.184615384615384, "grad_norm": 0.22210240364074707, "learning_rate": 7.37099164409003e-06, "loss": 0.7127, "step": 13332 }, { "epoch": 15.185754985754986, "grad_norm": 0.19975833594799042, "learning_rate": 7.367688367734415e-06, "loss": 0.7258, "step": 13333 }, { "epoch": 15.186894586894587, "grad_norm": 0.2221197634935379, "learning_rate": 7.36438570379748e-06, "loss": 0.4842, "step": 13334 }, { "epoch": 15.188034188034187, "grad_norm": 0.15724675357341766, "learning_rate": 7.361083652393919e-06, "loss": 0.7987, "step": 13335 }, { "epoch": 15.18917378917379, "grad_norm": 0.18950201570987701, "learning_rate": 7.3577822136384204e-06, "loss": 0.7515, "step": 13336 }, { "epoch": 15.19031339031339, "grad_norm": 0.19062010943889618, "learning_rate": 7.354481387645673e-06, "loss": 0.5944, "step": 13337 }, { "epoch": 15.19145299145299, "grad_norm": 0.22277235984802246, "learning_rate": 7.351181174530317e-06, "loss": 0.6526, "step": 13338 }, { "epoch": 15.192592592592593, "grad_norm": 0.16412495076656342, "learning_rate": 7.347881574406967e-06, "loss": 0.8125, "step": 13339 }, { "epoch": 15.193732193732194, "grad_norm": 0.19410619139671326, "learning_rate": 7.344582587390234e-06, "loss": 0.7798, "step": 13340 }, { "epoch": 15.194871794871794, "grad_norm": 0.17703565955162048, "learning_rate": 7.341284213594698e-06, "loss": 0.6936, "step": 13341 }, { "epoch": 15.196011396011396, "grad_norm": 0.18267908692359924, "learning_rate": 7.337986453134935e-06, "loss": 0.6342, "step": 13342 }, { "epoch": 15.197150997150997, "grad_norm": 0.20167747139930725, "learning_rate": 7.334689306125461e-06, "loss": 0.7808, "step": 13343 }, { "epoch": 15.198290598290598, "grad_norm": 0.1822354644536972, "learning_rate": 7.331392772680809e-06, "loss": 0.5235, "step": 13344 }, { "epoch": 15.1994301994302, "grad_norm": 0.18686936795711517, "learning_rate": 7.328096852915475e-06, "loss": 0.6758, "step": 13345 }, { "epoch": 15.2005698005698, "grad_norm": 0.19524578750133514, "learning_rate": 7.324801546943944e-06, "loss": 0.4665, "step": 13346 }, { "epoch": 15.201709401709401, "grad_norm": 0.19947238266468048, "learning_rate": 7.321506854880644e-06, "loss": 0.4769, "step": 13347 }, { "epoch": 15.202849002849003, "grad_norm": 0.24945421516895294, "learning_rate": 7.318212776840036e-06, "loss": 0.5923, "step": 13348 }, { "epoch": 15.203988603988604, "grad_norm": 0.17447318136692047, "learning_rate": 7.314919312936533e-06, "loss": 0.5977, "step": 13349 }, { "epoch": 15.205128205128204, "grad_norm": 0.21766497194766998, "learning_rate": 7.311626463284504e-06, "loss": 0.5435, "step": 13350 }, { "epoch": 15.206267806267807, "grad_norm": 0.23319518566131592, "learning_rate": 7.3083342279983365e-06, "loss": 0.5095, "step": 13351 }, { "epoch": 15.207407407407407, "grad_norm": 0.1703135073184967, "learning_rate": 7.305042607192369e-06, "loss": 0.7992, "step": 13352 }, { "epoch": 15.208547008547008, "grad_norm": 0.1810133010149002, "learning_rate": 7.301751600980944e-06, "loss": 0.8067, "step": 13353 }, { "epoch": 15.20968660968661, "grad_norm": 0.22245138883590698, "learning_rate": 7.298461209478349e-06, "loss": 0.608, "step": 13354 }, { "epoch": 15.21082621082621, "grad_norm": 0.1670108139514923, "learning_rate": 7.295171432798872e-06, "loss": 0.8941, "step": 13355 }, { "epoch": 15.211965811965811, "grad_norm": 0.2086310088634491, "learning_rate": 7.291882271056783e-06, "loss": 0.5176, "step": 13356 }, { "epoch": 15.213105413105414, "grad_norm": 0.18529261648654938, "learning_rate": 7.288593724366327e-06, "loss": 0.8927, "step": 13357 }, { "epoch": 15.214245014245014, "grad_norm": 0.1824301779270172, "learning_rate": 7.285305792841701e-06, "loss": 0.7053, "step": 13358 }, { "epoch": 15.215384615384615, "grad_norm": 0.1776217371225357, "learning_rate": 7.28201847659713e-06, "loss": 0.57, "step": 13359 }, { "epoch": 15.216524216524217, "grad_norm": 0.19689984619617462, "learning_rate": 7.278731775746789e-06, "loss": 0.6868, "step": 13360 }, { "epoch": 15.217663817663817, "grad_norm": 0.21592627465724945, "learning_rate": 7.275445690404817e-06, "loss": 0.5144, "step": 13361 }, { "epoch": 15.218803418803418, "grad_norm": 0.2437269538640976, "learning_rate": 7.272160220685362e-06, "loss": 0.5688, "step": 13362 }, { "epoch": 15.21994301994302, "grad_norm": 0.19515958428382874, "learning_rate": 7.268875366702532e-06, "loss": 0.7549, "step": 13363 }, { "epoch": 15.221082621082621, "grad_norm": 0.18477806448936462, "learning_rate": 7.265591128570431e-06, "loss": 0.4052, "step": 13364 }, { "epoch": 15.222222222222221, "grad_norm": 0.17877839505672455, "learning_rate": 7.262307506403113e-06, "loss": 0.8298, "step": 13365 }, { "epoch": 15.223361823361824, "grad_norm": 0.17731106281280518, "learning_rate": 7.259024500314632e-06, "loss": 0.6849, "step": 13366 }, { "epoch": 15.224501424501424, "grad_norm": 0.17548824846744537, "learning_rate": 7.255742110419017e-06, "loss": 0.6393, "step": 13367 }, { "epoch": 15.225641025641025, "grad_norm": 0.21052774786949158, "learning_rate": 7.252460336830286e-06, "loss": 0.7747, "step": 13368 }, { "epoch": 15.226780626780627, "grad_norm": 0.2353142499923706, "learning_rate": 7.249179179662397e-06, "loss": 0.5368, "step": 13369 }, { "epoch": 15.227920227920228, "grad_norm": 0.2377912402153015, "learning_rate": 7.245898639029336e-06, "loss": 0.4894, "step": 13370 }, { "epoch": 15.229059829059828, "grad_norm": 0.20241715013980865, "learning_rate": 7.24261871504505e-06, "loss": 0.6448, "step": 13371 }, { "epoch": 15.23019943019943, "grad_norm": 0.22061561048030853, "learning_rate": 7.239339407823437e-06, "loss": 0.4698, "step": 13372 }, { "epoch": 15.231339031339031, "grad_norm": 0.19990026950836182, "learning_rate": 7.2360607174784124e-06, "loss": 0.7864, "step": 13373 }, { "epoch": 15.232478632478632, "grad_norm": 0.16054971516132355, "learning_rate": 7.232782644123848e-06, "loss": 0.7828, "step": 13374 }, { "epoch": 15.233618233618234, "grad_norm": 0.20575302839279175, "learning_rate": 7.229505187873611e-06, "loss": 0.7156, "step": 13375 }, { "epoch": 15.234757834757835, "grad_norm": 0.27407199144363403, "learning_rate": 7.226228348841518e-06, "loss": 0.5875, "step": 13376 }, { "epoch": 15.235897435897435, "grad_norm": 0.1809110790491104, "learning_rate": 7.222952127141397e-06, "loss": 0.7659, "step": 13377 }, { "epoch": 15.237037037037037, "grad_norm": 0.21950261294841766, "learning_rate": 7.219676522887031e-06, "loss": 0.6556, "step": 13378 }, { "epoch": 15.238176638176638, "grad_norm": 0.19694559276103973, "learning_rate": 7.216401536192199e-06, "loss": 0.7612, "step": 13379 }, { "epoch": 15.239316239316238, "grad_norm": 0.19548237323760986, "learning_rate": 7.213127167170644e-06, "loss": 0.7912, "step": 13380 }, { "epoch": 15.24045584045584, "grad_norm": 0.22181104123592377, "learning_rate": 7.2098534159361e-06, "loss": 0.6947, "step": 13381 }, { "epoch": 15.241595441595441, "grad_norm": 0.19053779542446136, "learning_rate": 7.206580282602266e-06, "loss": 0.7365, "step": 13382 }, { "epoch": 15.242735042735042, "grad_norm": 0.20558376610279083, "learning_rate": 7.2033077672828404e-06, "loss": 0.6269, "step": 13383 }, { "epoch": 15.243874643874644, "grad_norm": 0.18020299077033997, "learning_rate": 7.200035870091471e-06, "loss": 0.656, "step": 13384 }, { "epoch": 15.245014245014245, "grad_norm": 0.21702425181865692, "learning_rate": 7.196764591141802e-06, "loss": 0.5239, "step": 13385 }, { "epoch": 15.246153846153845, "grad_norm": 0.22578953206539154, "learning_rate": 7.193493930547462e-06, "loss": 0.3689, "step": 13386 }, { "epoch": 15.247293447293448, "grad_norm": 0.2162429541349411, "learning_rate": 7.1902238884220516e-06, "loss": 0.7994, "step": 13387 }, { "epoch": 15.248433048433048, "grad_norm": 0.2074805051088333, "learning_rate": 7.186954464879136e-06, "loss": 0.7104, "step": 13388 }, { "epoch": 15.249572649572649, "grad_norm": 0.17333164811134338, "learning_rate": 7.1836856600322775e-06, "loss": 0.698, "step": 13389 }, { "epoch": 15.250712250712251, "grad_norm": 0.193593367934227, "learning_rate": 7.1804174739950105e-06, "loss": 0.4865, "step": 13390 }, { "epoch": 15.251851851851852, "grad_norm": 0.1828693002462387, "learning_rate": 7.177149906880848e-06, "loss": 0.7006, "step": 13391 }, { "epoch": 15.252991452991452, "grad_norm": 0.19954030215740204, "learning_rate": 7.173882958803282e-06, "loss": 0.5624, "step": 13392 }, { "epoch": 15.254131054131054, "grad_norm": 0.19160442054271698, "learning_rate": 7.170616629875784e-06, "loss": 0.7892, "step": 13393 }, { "epoch": 15.255270655270655, "grad_norm": 0.224081888794899, "learning_rate": 7.16735092021181e-06, "loss": 0.6121, "step": 13394 }, { "epoch": 15.256410256410255, "grad_norm": 0.1943773776292801, "learning_rate": 7.1640858299247684e-06, "loss": 0.6284, "step": 13395 }, { "epoch": 15.257549857549858, "grad_norm": 0.1929141879081726, "learning_rate": 7.160821359128078e-06, "loss": 0.8806, "step": 13396 }, { "epoch": 15.258689458689458, "grad_norm": 0.27313852310180664, "learning_rate": 7.157557507935117e-06, "loss": 0.4549, "step": 13397 }, { "epoch": 15.25982905982906, "grad_norm": 0.4554316997528076, "learning_rate": 7.154294276459258e-06, "loss": 0.6778, "step": 13398 }, { "epoch": 15.260968660968661, "grad_norm": 0.19258996844291687, "learning_rate": 7.151031664813829e-06, "loss": 0.7, "step": 13399 }, { "epoch": 15.262108262108262, "grad_norm": 0.24250654876232147, "learning_rate": 7.1477696731121565e-06, "loss": 0.6825, "step": 13400 }, { "epoch": 15.263247863247864, "grad_norm": 0.2219674438238144, "learning_rate": 7.144508301467534e-06, "loss": 0.8407, "step": 13401 }, { "epoch": 15.264387464387465, "grad_norm": 0.2358654886484146, "learning_rate": 7.141247549993241e-06, "loss": 0.5774, "step": 13402 }, { "epoch": 15.265527065527065, "grad_norm": 0.20805026590824127, "learning_rate": 7.137987418802533e-06, "loss": 0.6997, "step": 13403 }, { "epoch": 15.266666666666667, "grad_norm": 0.19631749391555786, "learning_rate": 7.134727908008645e-06, "loss": 0.6452, "step": 13404 }, { "epoch": 15.267806267806268, "grad_norm": 0.19085797667503357, "learning_rate": 7.1314690177247925e-06, "loss": 0.87, "step": 13405 }, { "epoch": 15.268945868945869, "grad_norm": 0.16966448724269867, "learning_rate": 7.128210748064151e-06, "loss": 0.6457, "step": 13406 }, { "epoch": 15.270085470085471, "grad_norm": 0.22518151998519897, "learning_rate": 7.1249530991398974e-06, "loss": 0.6048, "step": 13407 }, { "epoch": 15.271225071225071, "grad_norm": 0.1851891130208969, "learning_rate": 7.1216960710651815e-06, "loss": 0.564, "step": 13408 }, { "epoch": 15.272364672364672, "grad_norm": 0.20710742473602295, "learning_rate": 7.118439663953133e-06, "loss": 0.5152, "step": 13409 }, { "epoch": 15.273504273504274, "grad_norm": 0.1835888773202896, "learning_rate": 7.115183877916842e-06, "loss": 0.5819, "step": 13410 }, { "epoch": 15.274643874643875, "grad_norm": 0.1785074919462204, "learning_rate": 7.111928713069399e-06, "loss": 0.8095, "step": 13411 }, { "epoch": 15.275783475783475, "grad_norm": 0.19669075310230255, "learning_rate": 7.108674169523863e-06, "loss": 0.6795, "step": 13412 }, { "epoch": 15.276923076923078, "grad_norm": 0.2427806556224823, "learning_rate": 7.1054202473932825e-06, "loss": 0.6465, "step": 13413 }, { "epoch": 15.278062678062678, "grad_norm": 0.1938008964061737, "learning_rate": 7.102166946790651e-06, "loss": 0.6594, "step": 13414 }, { "epoch": 15.279202279202279, "grad_norm": 0.21511799097061157, "learning_rate": 7.09891426782899e-06, "loss": 0.7886, "step": 13415 }, { "epoch": 15.280341880341881, "grad_norm": 0.19147200882434845, "learning_rate": 7.095662210621262e-06, "loss": 0.6507, "step": 13416 }, { "epoch": 15.281481481481482, "grad_norm": 0.21382924914360046, "learning_rate": 7.092410775280434e-06, "loss": 0.6097, "step": 13417 }, { "epoch": 15.282621082621082, "grad_norm": 0.20031499862670898, "learning_rate": 7.089159961919414e-06, "loss": 0.5536, "step": 13418 }, { "epoch": 15.283760683760685, "grad_norm": 0.2048831731081009, "learning_rate": 7.085909770651128e-06, "loss": 0.8472, "step": 13419 }, { "epoch": 15.284900284900285, "grad_norm": 0.19666039943695068, "learning_rate": 7.0826602015884566e-06, "loss": 0.7362, "step": 13420 }, { "epoch": 15.286039886039886, "grad_norm": 0.17590700089931488, "learning_rate": 7.079411254844276e-06, "loss": 0.6823, "step": 13421 }, { "epoch": 15.287179487179488, "grad_norm": 0.17183063924312592, "learning_rate": 7.076162930531418e-06, "loss": 0.7005, "step": 13422 }, { "epoch": 15.288319088319088, "grad_norm": 0.2370138019323349, "learning_rate": 7.072915228762714e-06, "loss": 0.5092, "step": 13423 }, { "epoch": 15.289458689458689, "grad_norm": 0.16556316614151, "learning_rate": 7.069668149650962e-06, "loss": 0.6152, "step": 13424 }, { "epoch": 15.290598290598291, "grad_norm": 0.1977030485868454, "learning_rate": 7.066421693308945e-06, "loss": 0.6643, "step": 13425 }, { "epoch": 15.291737891737892, "grad_norm": 0.19474047422409058, "learning_rate": 7.063175859849419e-06, "loss": 0.5826, "step": 13426 }, { "epoch": 15.292877492877492, "grad_norm": 0.1902286559343338, "learning_rate": 7.059930649385122e-06, "loss": 0.622, "step": 13427 }, { "epoch": 15.294017094017095, "grad_norm": 0.252772718667984, "learning_rate": 7.056686062028775e-06, "loss": 0.548, "step": 13428 }, { "epoch": 15.295156695156695, "grad_norm": 0.21062877774238586, "learning_rate": 7.05344209789306e-06, "loss": 0.4934, "step": 13429 }, { "epoch": 15.296296296296296, "grad_norm": 0.2218659371137619, "learning_rate": 7.050198757090653e-06, "loss": 0.6007, "step": 13430 }, { "epoch": 15.297435897435898, "grad_norm": 0.3193570375442505, "learning_rate": 7.046956039734206e-06, "loss": 0.2876, "step": 13431 }, { "epoch": 15.298575498575499, "grad_norm": 0.19239835441112518, "learning_rate": 7.0437139459363535e-06, "loss": 0.8156, "step": 13432 }, { "epoch": 15.2997150997151, "grad_norm": 0.21255037188529968, "learning_rate": 7.04047247580969e-06, "loss": 0.6548, "step": 13433 }, { "epoch": 15.300854700854702, "grad_norm": 0.22222843766212463, "learning_rate": 7.037231629466804e-06, "loss": 0.6232, "step": 13434 }, { "epoch": 15.301994301994302, "grad_norm": 0.21897831559181213, "learning_rate": 7.0339914070202625e-06, "loss": 0.7452, "step": 13435 }, { "epoch": 15.303133903133903, "grad_norm": 0.19468940794467926, "learning_rate": 7.0307518085826055e-06, "loss": 0.6738, "step": 13436 }, { "epoch": 15.304273504273505, "grad_norm": 0.2297879457473755, "learning_rate": 7.0275128342663536e-06, "loss": 0.4746, "step": 13437 }, { "epoch": 15.305413105413106, "grad_norm": 0.20002175867557526, "learning_rate": 7.024274484184007e-06, "loss": 0.43, "step": 13438 }, { "epoch": 15.306552706552706, "grad_norm": 0.21483980119228363, "learning_rate": 7.0210367584480465e-06, "loss": 0.6486, "step": 13439 }, { "epoch": 15.307692307692308, "grad_norm": 0.17824262380599976, "learning_rate": 7.017799657170918e-06, "loss": 0.9038, "step": 13440 }, { "epoch": 15.308831908831909, "grad_norm": 0.20306964218616486, "learning_rate": 7.014563180465055e-06, "loss": 0.6428, "step": 13441 }, { "epoch": 15.30997150997151, "grad_norm": 0.15465456247329712, "learning_rate": 7.011327328442874e-06, "loss": 0.4822, "step": 13442 }, { "epoch": 15.311111111111112, "grad_norm": 0.19104813039302826, "learning_rate": 7.008092101216771e-06, "loss": 0.7172, "step": 13443 }, { "epoch": 15.312250712250712, "grad_norm": 0.18439635634422302, "learning_rate": 7.0048574988991e-06, "loss": 0.6679, "step": 13444 }, { "epoch": 15.313390313390313, "grad_norm": 0.19624824821949005, "learning_rate": 7.001623521602216e-06, "loss": 0.676, "step": 13445 }, { "epoch": 15.314529914529915, "grad_norm": 0.20672352612018585, "learning_rate": 6.99839016943844e-06, "loss": 0.7165, "step": 13446 }, { "epoch": 15.315669515669516, "grad_norm": 0.21458734571933746, "learning_rate": 6.995157442520081e-06, "loss": 0.7422, "step": 13447 }, { "epoch": 15.316809116809116, "grad_norm": 0.18510808050632477, "learning_rate": 6.9919253409594155e-06, "loss": 0.671, "step": 13448 }, { "epoch": 15.317948717948719, "grad_norm": 0.20022939145565033, "learning_rate": 6.988693864868706e-06, "loss": 0.7407, "step": 13449 }, { "epoch": 15.31908831908832, "grad_norm": 0.22099626064300537, "learning_rate": 6.985463014360197e-06, "loss": 0.5132, "step": 13450 }, { "epoch": 15.32022792022792, "grad_norm": 0.1770186424255371, "learning_rate": 6.982232789546092e-06, "loss": 0.59, "step": 13451 }, { "epoch": 15.321367521367522, "grad_norm": 0.17203353345394135, "learning_rate": 6.979003190538588e-06, "loss": 0.8341, "step": 13452 }, { "epoch": 15.322507122507123, "grad_norm": 0.1846415251493454, "learning_rate": 6.975774217449865e-06, "loss": 0.6996, "step": 13453 }, { "epoch": 15.323646723646723, "grad_norm": 0.21757927536964417, "learning_rate": 6.972545870392075e-06, "loss": 0.5245, "step": 13454 }, { "epoch": 15.324786324786325, "grad_norm": 0.1696988046169281, "learning_rate": 6.969318149477336e-06, "loss": 0.5843, "step": 13455 }, { "epoch": 15.325925925925926, "grad_norm": 0.1950981318950653, "learning_rate": 6.966091054817764e-06, "loss": 0.8791, "step": 13456 }, { "epoch": 15.327065527065526, "grad_norm": 0.1883671134710312, "learning_rate": 6.962864586525439e-06, "loss": 0.7896, "step": 13457 }, { "epoch": 15.328205128205129, "grad_norm": 0.23657511174678802, "learning_rate": 6.959638744712432e-06, "loss": 0.5707, "step": 13458 }, { "epoch": 15.32934472934473, "grad_norm": 0.17644041776657104, "learning_rate": 6.956413529490782e-06, "loss": 0.6035, "step": 13459 }, { "epoch": 15.33048433048433, "grad_norm": 0.16345062851905823, "learning_rate": 6.953188940972511e-06, "loss": 0.7362, "step": 13460 }, { "epoch": 15.331623931623932, "grad_norm": 0.20905448496341705, "learning_rate": 6.949964979269616e-06, "loss": 0.7829, "step": 13461 }, { "epoch": 15.332763532763533, "grad_norm": 0.1936965137720108, "learning_rate": 6.946741644494084e-06, "loss": 0.6073, "step": 13462 }, { "epoch": 15.333903133903133, "grad_norm": 0.19706177711486816, "learning_rate": 6.94351893675785e-06, "loss": 0.8126, "step": 13463 }, { "epoch": 15.335042735042736, "grad_norm": 0.22282631695270538, "learning_rate": 6.940296856172862e-06, "loss": 0.4836, "step": 13464 }, { "epoch": 15.336182336182336, "grad_norm": 0.19140516221523285, "learning_rate": 6.937075402851029e-06, "loss": 0.7697, "step": 13465 }, { "epoch": 15.337321937321937, "grad_norm": 0.18049626052379608, "learning_rate": 6.933854576904248e-06, "loss": 0.8117, "step": 13466 }, { "epoch": 15.338461538461539, "grad_norm": 0.2209792137145996, "learning_rate": 6.93063437844437e-06, "loss": 0.718, "step": 13467 }, { "epoch": 15.33960113960114, "grad_norm": 0.16232642531394958, "learning_rate": 6.927414807583252e-06, "loss": 0.7753, "step": 13468 }, { "epoch": 15.34074074074074, "grad_norm": 0.17342950403690338, "learning_rate": 6.924195864432717e-06, "loss": 0.8164, "step": 13469 }, { "epoch": 15.341880341880342, "grad_norm": 0.1576143354177475, "learning_rate": 6.9209775491045705e-06, "loss": 0.638, "step": 13470 }, { "epoch": 15.343019943019943, "grad_norm": 0.21170037984848022, "learning_rate": 6.91775986171059e-06, "loss": 0.5737, "step": 13471 }, { "epoch": 15.344159544159544, "grad_norm": 0.21984083950519562, "learning_rate": 6.9145428023625365e-06, "loss": 0.6241, "step": 13472 }, { "epoch": 15.345299145299146, "grad_norm": 0.19216473400592804, "learning_rate": 6.911326371172153e-06, "loss": 0.5762, "step": 13473 }, { "epoch": 15.346438746438746, "grad_norm": 0.17950469255447388, "learning_rate": 6.908110568251141e-06, "loss": 0.7806, "step": 13474 }, { "epoch": 15.347578347578347, "grad_norm": 0.28059157729148865, "learning_rate": 6.904895393711203e-06, "loss": 0.5223, "step": 13475 }, { "epoch": 15.34871794871795, "grad_norm": 0.25041908025741577, "learning_rate": 6.901680847664008e-06, "loss": 0.5838, "step": 13476 }, { "epoch": 15.34985754985755, "grad_norm": 0.2188163548707962, "learning_rate": 6.898466930221217e-06, "loss": 0.5893, "step": 13477 }, { "epoch": 15.35099715099715, "grad_norm": 0.22686757147312164, "learning_rate": 6.89525364149444e-06, "loss": 0.8098, "step": 13478 }, { "epoch": 15.352136752136753, "grad_norm": 0.2084110975265503, "learning_rate": 6.892040981595294e-06, "loss": 0.5571, "step": 13479 }, { "epoch": 15.353276353276353, "grad_norm": 0.16481290757656097, "learning_rate": 6.88882895063536e-06, "loss": 0.7637, "step": 13480 }, { "epoch": 15.354415954415954, "grad_norm": 0.2415093630552292, "learning_rate": 6.885617548726203e-06, "loss": 0.4873, "step": 13481 }, { "epoch": 15.355555555555556, "grad_norm": 0.2453286200761795, "learning_rate": 6.882406775979363e-06, "loss": 0.4265, "step": 13482 }, { "epoch": 15.356695156695157, "grad_norm": 0.2449209839105606, "learning_rate": 6.87919663250636e-06, "loss": 0.5711, "step": 13483 }, { "epoch": 15.357834757834757, "grad_norm": 0.16910892724990845, "learning_rate": 6.875987118418698e-06, "loss": 0.8142, "step": 13484 }, { "epoch": 15.35897435897436, "grad_norm": 0.2112656682729721, "learning_rate": 6.8727782338278364e-06, "loss": 0.6616, "step": 13485 }, { "epoch": 15.36011396011396, "grad_norm": 0.23164135217666626, "learning_rate": 6.869569978845236e-06, "loss": 0.7478, "step": 13486 }, { "epoch": 15.36125356125356, "grad_norm": 0.23191292583942413, "learning_rate": 6.866362353582331e-06, "loss": 0.4085, "step": 13487 }, { "epoch": 15.362393162393163, "grad_norm": 0.1655510514974594, "learning_rate": 6.863155358150536e-06, "loss": 0.7969, "step": 13488 }, { "epoch": 15.363532763532763, "grad_norm": 0.26377928256988525, "learning_rate": 6.859948992661224e-06, "loss": 0.6692, "step": 13489 }, { "epoch": 15.364672364672364, "grad_norm": 0.20556597411632538, "learning_rate": 6.856743257225767e-06, "loss": 0.7595, "step": 13490 }, { "epoch": 15.365811965811966, "grad_norm": 0.2174561470746994, "learning_rate": 6.853538151955508e-06, "loss": 0.3178, "step": 13491 }, { "epoch": 15.366951566951567, "grad_norm": 0.16128693521022797, "learning_rate": 6.850333676961784e-06, "loss": 0.9294, "step": 13492 }, { "epoch": 15.368091168091167, "grad_norm": 0.2202058583498001, "learning_rate": 6.847129832355876e-06, "loss": 0.799, "step": 13493 }, { "epoch": 15.36923076923077, "grad_norm": 0.1866167187690735, "learning_rate": 6.843926618249069e-06, "loss": 0.7674, "step": 13494 }, { "epoch": 15.37037037037037, "grad_norm": 0.1961738020181656, "learning_rate": 6.840724034752621e-06, "loss": 0.6597, "step": 13495 }, { "epoch": 15.37150997150997, "grad_norm": 0.22003182768821716, "learning_rate": 6.837522081977771e-06, "loss": 0.5568, "step": 13496 }, { "epoch": 15.372649572649573, "grad_norm": 0.19237026572227478, "learning_rate": 6.83432076003572e-06, "loss": 0.7058, "step": 13497 }, { "epoch": 15.373789173789174, "grad_norm": 0.2223634421825409, "learning_rate": 6.831120069037666e-06, "loss": 0.579, "step": 13498 }, { "epoch": 15.374928774928774, "grad_norm": 0.19303710758686066, "learning_rate": 6.827920009094774e-06, "loss": 0.6463, "step": 13499 }, { "epoch": 15.376068376068377, "grad_norm": 0.23518739640712738, "learning_rate": 6.824720580318203e-06, "loss": 0.4935, "step": 13500 }, { "epoch": 15.377207977207977, "grad_norm": 0.2545756995677948, "learning_rate": 6.82152178281906e-06, "loss": 0.5802, "step": 13501 }, { "epoch": 15.378347578347578, "grad_norm": 0.18533293902873993, "learning_rate": 6.81832361670845e-06, "loss": 0.8319, "step": 13502 }, { "epoch": 15.37948717948718, "grad_norm": 0.17357389628887177, "learning_rate": 6.8151260820974754e-06, "loss": 0.6984, "step": 13503 }, { "epoch": 15.38062678062678, "grad_norm": 0.21384303271770477, "learning_rate": 6.811929179097173e-06, "loss": 0.6616, "step": 13504 }, { "epoch": 15.381766381766381, "grad_norm": 0.16954922676086426, "learning_rate": 6.808732907818585e-06, "loss": 0.6801, "step": 13505 }, { "epoch": 15.382905982905983, "grad_norm": 0.19935904443264008, "learning_rate": 6.805537268372733e-06, "loss": 0.5464, "step": 13506 }, { "epoch": 15.384045584045584, "grad_norm": 0.22920085489749908, "learning_rate": 6.8023422608706114e-06, "loss": 0.6909, "step": 13507 }, { "epoch": 15.385185185185184, "grad_norm": 0.17728930711746216, "learning_rate": 6.7991478854231825e-06, "loss": 0.7678, "step": 13508 }, { "epoch": 15.386324786324787, "grad_norm": 0.2285919189453125, "learning_rate": 6.7959541421413974e-06, "loss": 0.6241, "step": 13509 }, { "epoch": 15.387464387464387, "grad_norm": 0.2507666349411011, "learning_rate": 6.792761031136189e-06, "loss": 0.4517, "step": 13510 }, { "epoch": 15.388603988603988, "grad_norm": 0.2384263277053833, "learning_rate": 6.789568552518463e-06, "loss": 0.5156, "step": 13511 }, { "epoch": 15.38974358974359, "grad_norm": 0.1914670169353485, "learning_rate": 6.786376706399097e-06, "loss": 0.6855, "step": 13512 }, { "epoch": 15.39088319088319, "grad_norm": 0.18924571573734283, "learning_rate": 6.783185492888947e-06, "loss": 0.5981, "step": 13513 }, { "epoch": 15.392022792022791, "grad_norm": 0.19691336154937744, "learning_rate": 6.779994912098875e-06, "loss": 0.657, "step": 13514 }, { "epoch": 15.393162393162394, "grad_norm": 0.17275047302246094, "learning_rate": 6.776804964139677e-06, "loss": 0.8592, "step": 13515 }, { "epoch": 15.394301994301994, "grad_norm": 0.22531549632549286, "learning_rate": 6.773615649122158e-06, "loss": 0.6171, "step": 13516 }, { "epoch": 15.395441595441595, "grad_norm": 0.18528665602207184, "learning_rate": 6.7704269671570896e-06, "loss": 0.6811, "step": 13517 }, { "epoch": 15.396581196581197, "grad_norm": 0.16656826436519623, "learning_rate": 6.767238918355231e-06, "loss": 0.6328, "step": 13518 }, { "epoch": 15.397720797720797, "grad_norm": 0.20518410205841064, "learning_rate": 6.7640515028272986e-06, "loss": 0.4002, "step": 13519 }, { "epoch": 15.398860398860398, "grad_norm": 0.19122925400733948, "learning_rate": 6.760864720684007e-06, "loss": 0.7034, "step": 13520 }, { "epoch": 15.4, "grad_norm": 0.24097371101379395, "learning_rate": 6.7576785720360416e-06, "loss": 0.6354, "step": 13521 }, { "epoch": 15.401139601139601, "grad_norm": 0.23148728907108307, "learning_rate": 6.754493056994072e-06, "loss": 0.7375, "step": 13522 }, { "epoch": 15.402279202279201, "grad_norm": 0.16283705830574036, "learning_rate": 6.751308175668727e-06, "loss": 0.7969, "step": 13523 }, { "epoch": 15.403418803418804, "grad_norm": 0.24585258960723877, "learning_rate": 6.748123928170627e-06, "loss": 0.5324, "step": 13524 }, { "epoch": 15.404558404558404, "grad_norm": 0.2425752878189087, "learning_rate": 6.744940314610387e-06, "loss": 0.5958, "step": 13525 }, { "epoch": 15.405698005698005, "grad_norm": 0.17162194848060608, "learning_rate": 6.741757335098567e-06, "loss": 0.6547, "step": 13526 }, { "epoch": 15.406837606837607, "grad_norm": 0.18037395179271698, "learning_rate": 6.738574989745724e-06, "loss": 0.8093, "step": 13527 }, { "epoch": 15.407977207977208, "grad_norm": 0.22981791198253632, "learning_rate": 6.7353932786623945e-06, "loss": 0.7284, "step": 13528 }, { "epoch": 15.40911680911681, "grad_norm": 0.23959402740001678, "learning_rate": 6.732212201959087e-06, "loss": 0.3635, "step": 13529 }, { "epoch": 15.41025641025641, "grad_norm": 0.2161649465560913, "learning_rate": 6.729031759746282e-06, "loss": 0.5876, "step": 13530 }, { "epoch": 15.411396011396011, "grad_norm": 0.21092326939105988, "learning_rate": 6.725851952134446e-06, "loss": 0.7228, "step": 13531 }, { "epoch": 15.412535612535613, "grad_norm": 0.21590463817119598, "learning_rate": 6.72267277923403e-06, "loss": 0.6601, "step": 13532 }, { "epoch": 15.413675213675214, "grad_norm": 0.22229517996311188, "learning_rate": 6.7194942411554555e-06, "loss": 0.547, "step": 13533 }, { "epoch": 15.414814814814815, "grad_norm": 0.21258416771888733, "learning_rate": 6.71631633800911e-06, "loss": 0.7539, "step": 13534 }, { "epoch": 15.415954415954417, "grad_norm": 0.15924860537052155, "learning_rate": 6.713139069905372e-06, "loss": 0.7066, "step": 13535 }, { "epoch": 15.417094017094017, "grad_norm": 0.16245602071285248, "learning_rate": 6.709962436954612e-06, "loss": 0.6911, "step": 13536 }, { "epoch": 15.418233618233618, "grad_norm": 0.18839913606643677, "learning_rate": 6.70678643926716e-06, "loss": 0.6721, "step": 13537 }, { "epoch": 15.41937321937322, "grad_norm": 0.19004669785499573, "learning_rate": 6.703611076953315e-06, "loss": 0.7099, "step": 13538 }, { "epoch": 15.42051282051282, "grad_norm": 0.23223291337490082, "learning_rate": 6.7004363501233705e-06, "loss": 0.5233, "step": 13539 }, { "epoch": 15.421652421652421, "grad_norm": 0.18488751351833344, "learning_rate": 6.697262258887596e-06, "loss": 0.7661, "step": 13540 }, { "epoch": 15.422792022792024, "grad_norm": 0.1790686845779419, "learning_rate": 6.694088803356247e-06, "loss": 0.8949, "step": 13541 }, { "epoch": 15.423931623931624, "grad_norm": 0.20640535652637482, "learning_rate": 6.690915983639523e-06, "loss": 0.7, "step": 13542 }, { "epoch": 15.425071225071225, "grad_norm": 0.2872496545314789, "learning_rate": 6.68774379984764e-06, "loss": 0.4593, "step": 13543 }, { "epoch": 15.426210826210827, "grad_norm": 0.20648087561130524, "learning_rate": 6.684572252090773e-06, "loss": 0.5741, "step": 13544 }, { "epoch": 15.427350427350428, "grad_norm": 0.16248594224452972, "learning_rate": 6.681401340479087e-06, "loss": 0.9091, "step": 13545 }, { "epoch": 15.428490028490028, "grad_norm": 0.23793335258960724, "learning_rate": 6.678231065122695e-06, "loss": 0.501, "step": 13546 }, { "epoch": 15.42962962962963, "grad_norm": 0.1887454092502594, "learning_rate": 6.675061426131729e-06, "loss": 0.4776, "step": 13547 }, { "epoch": 15.430769230769231, "grad_norm": 0.20787455141544342, "learning_rate": 6.67189242361628e-06, "loss": 0.7284, "step": 13548 }, { "epoch": 15.431908831908832, "grad_norm": 0.16973797976970673, "learning_rate": 6.668724057686404e-06, "loss": 0.6371, "step": 13549 }, { "epoch": 15.433048433048434, "grad_norm": 0.209912970662117, "learning_rate": 6.665556328452152e-06, "loss": 0.7062, "step": 13550 }, { "epoch": 15.434188034188034, "grad_norm": 0.21412673592567444, "learning_rate": 6.66238923602355e-06, "loss": 0.6927, "step": 13551 }, { "epoch": 15.435327635327635, "grad_norm": 0.18775635957717896, "learning_rate": 6.659222780510605e-06, "loss": 0.4888, "step": 13552 }, { "epoch": 15.436467236467237, "grad_norm": 0.19604071974754333, "learning_rate": 6.65605696202328e-06, "loss": 0.7704, "step": 13553 }, { "epoch": 15.437606837606838, "grad_norm": 0.17671994864940643, "learning_rate": 6.652891780671547e-06, "loss": 0.6545, "step": 13554 }, { "epoch": 15.438746438746438, "grad_norm": 0.2810894548892975, "learning_rate": 6.649727236565334e-06, "loss": 0.6377, "step": 13555 }, { "epoch": 15.43988603988604, "grad_norm": 0.19777388870716095, "learning_rate": 6.646563329814567e-06, "loss": 0.7028, "step": 13556 }, { "epoch": 15.441025641025641, "grad_norm": 0.1773541122674942, "learning_rate": 6.643400060529112e-06, "loss": 0.7958, "step": 13557 }, { "epoch": 15.442165242165242, "grad_norm": 0.2325608879327774, "learning_rate": 6.64023742881886e-06, "loss": 0.8598, "step": 13558 }, { "epoch": 15.443304843304844, "grad_norm": 0.21778924763202667, "learning_rate": 6.6370754347936605e-06, "loss": 0.6885, "step": 13559 }, { "epoch": 15.444444444444445, "grad_norm": 0.20094701647758484, "learning_rate": 6.6339140785633215e-06, "loss": 0.6292, "step": 13560 }, { "epoch": 15.445584045584045, "grad_norm": 0.1571194976568222, "learning_rate": 6.630753360237654e-06, "loss": 0.7107, "step": 13561 }, { "epoch": 15.446723646723648, "grad_norm": 0.226139634847641, "learning_rate": 6.627593279926439e-06, "loss": 0.4293, "step": 13562 }, { "epoch": 15.447863247863248, "grad_norm": 0.2584592401981354, "learning_rate": 6.624433837739438e-06, "loss": 0.5004, "step": 13563 }, { "epoch": 15.449002849002849, "grad_norm": 0.22435049712657928, "learning_rate": 6.621275033786379e-06, "loss": 0.6862, "step": 13564 }, { "epoch": 15.450142450142451, "grad_norm": 0.17067396640777588, "learning_rate": 6.6181168681769765e-06, "loss": 0.7152, "step": 13565 }, { "epoch": 15.451282051282051, "grad_norm": 0.15705978870391846, "learning_rate": 6.614959341020929e-06, "loss": 0.9746, "step": 13566 }, { "epoch": 15.452421652421652, "grad_norm": 0.24600954353809357, "learning_rate": 6.611802452427907e-06, "loss": 0.7185, "step": 13567 }, { "epoch": 15.453561253561254, "grad_norm": 0.18077035248279572, "learning_rate": 6.60864620250754e-06, "loss": 0.5957, "step": 13568 }, { "epoch": 15.454700854700855, "grad_norm": 0.23606523871421814, "learning_rate": 6.605490591369473e-06, "loss": 0.6958, "step": 13569 }, { "epoch": 15.455840455840455, "grad_norm": 0.25060296058654785, "learning_rate": 6.602335619123312e-06, "loss": 0.6264, "step": 13570 }, { "epoch": 15.456980056980058, "grad_norm": 0.19400660693645477, "learning_rate": 6.599181285878622e-06, "loss": 0.7568, "step": 13571 }, { "epoch": 15.458119658119658, "grad_norm": 0.2130562663078308, "learning_rate": 6.596027591744966e-06, "loss": 0.5687, "step": 13572 }, { "epoch": 15.459259259259259, "grad_norm": 0.29239821434020996, "learning_rate": 6.592874536831884e-06, "loss": 0.6646, "step": 13573 }, { "epoch": 15.460398860398861, "grad_norm": 0.23149274289608002, "learning_rate": 6.5897221212488895e-06, "loss": 0.7832, "step": 13574 }, { "epoch": 15.461538461538462, "grad_norm": 0.16438227891921997, "learning_rate": 6.58657034510548e-06, "loss": 0.7924, "step": 13575 }, { "epoch": 15.462678062678062, "grad_norm": 0.2602296471595764, "learning_rate": 6.583419208511113e-06, "loss": 0.3983, "step": 13576 }, { "epoch": 15.463817663817665, "grad_norm": 0.18291400372982025, "learning_rate": 6.580268711575244e-06, "loss": 0.9553, "step": 13577 }, { "epoch": 15.464957264957265, "grad_norm": 0.18735671043395996, "learning_rate": 6.577118854407297e-06, "loss": 0.7603, "step": 13578 }, { "epoch": 15.466096866096866, "grad_norm": 0.2598130404949188, "learning_rate": 6.573969637116673e-06, "loss": 0.7535, "step": 13579 }, { "epoch": 15.467236467236468, "grad_norm": 0.20607733726501465, "learning_rate": 6.570821059812757e-06, "loss": 0.7439, "step": 13580 }, { "epoch": 15.468376068376068, "grad_norm": 0.2689054310321808, "learning_rate": 6.5676731226049056e-06, "loss": 0.5412, "step": 13581 }, { "epoch": 15.469515669515669, "grad_norm": 0.18888936936855316, "learning_rate": 6.564525825602464e-06, "loss": 0.7777, "step": 13582 }, { "epoch": 15.470655270655271, "grad_norm": 0.25411272048950195, "learning_rate": 6.561379168914731e-06, "loss": 0.4327, "step": 13583 }, { "epoch": 15.471794871794872, "grad_norm": 0.15883229672908783, "learning_rate": 6.5582331526510026e-06, "loss": 0.5527, "step": 13584 }, { "epoch": 15.472934472934472, "grad_norm": 0.2070358544588089, "learning_rate": 6.555087776920554e-06, "loss": 0.7144, "step": 13585 }, { "epoch": 15.474074074074075, "grad_norm": 0.20985698699951172, "learning_rate": 6.55194304183264e-06, "loss": 0.7495, "step": 13586 }, { "epoch": 15.475213675213675, "grad_norm": 0.23107481002807617, "learning_rate": 6.548798947496465e-06, "loss": 0.6298, "step": 13587 }, { "epoch": 15.476353276353276, "grad_norm": 0.18231339752674103, "learning_rate": 6.5456554940212465e-06, "loss": 0.7531, "step": 13588 }, { "epoch": 15.477492877492878, "grad_norm": 0.17274364829063416, "learning_rate": 6.542512681516161e-06, "loss": 0.7785, "step": 13589 }, { "epoch": 15.478632478632479, "grad_norm": 0.21576517820358276, "learning_rate": 6.53937051009037e-06, "loss": 0.9112, "step": 13590 }, { "epoch": 15.47977207977208, "grad_norm": 0.20775271952152252, "learning_rate": 6.536228979853007e-06, "loss": 0.8234, "step": 13591 }, { "epoch": 15.480911680911682, "grad_norm": 0.2606852650642395, "learning_rate": 6.533088090913186e-06, "loss": 0.4991, "step": 13592 }, { "epoch": 15.482051282051282, "grad_norm": 0.17941322922706604, "learning_rate": 6.52994784338001e-06, "loss": 0.6829, "step": 13593 }, { "epoch": 15.483190883190883, "grad_norm": 0.1975211352109909, "learning_rate": 6.526808237362528e-06, "loss": 0.4983, "step": 13594 }, { "epoch": 15.484330484330485, "grad_norm": 0.2171153724193573, "learning_rate": 6.523669272969799e-06, "loss": 0.6357, "step": 13595 }, { "epoch": 15.485470085470086, "grad_norm": 0.2066015899181366, "learning_rate": 6.520530950310846e-06, "loss": 0.6761, "step": 13596 }, { "epoch": 15.486609686609686, "grad_norm": 0.24421249330043793, "learning_rate": 6.517393269494679e-06, "loss": 0.67, "step": 13597 }, { "epoch": 15.487749287749288, "grad_norm": 0.18982775509357452, "learning_rate": 6.514256230630264e-06, "loss": 0.6891, "step": 13598 }, { "epoch": 15.488888888888889, "grad_norm": 0.2217584252357483, "learning_rate": 6.511119833826567e-06, "loss": 0.5141, "step": 13599 }, { "epoch": 15.49002849002849, "grad_norm": 0.19728122651576996, "learning_rate": 6.507984079192523e-06, "loss": 0.7819, "step": 13600 }, { "epoch": 15.491168091168092, "grad_norm": 0.20434993505477905, "learning_rate": 6.504848966837043e-06, "loss": 0.7104, "step": 13601 }, { "epoch": 15.492307692307692, "grad_norm": 0.2107301652431488, "learning_rate": 6.501714496869021e-06, "loss": 0.8561, "step": 13602 }, { "epoch": 15.493447293447293, "grad_norm": 0.17963461577892303, "learning_rate": 6.498580669397325e-06, "loss": 0.8131, "step": 13603 }, { "epoch": 15.494586894586895, "grad_norm": 0.19163499772548676, "learning_rate": 6.495447484530809e-06, "loss": 0.5698, "step": 13604 }, { "epoch": 15.495726495726496, "grad_norm": 0.1939983367919922, "learning_rate": 6.492314942378283e-06, "loss": 0.8806, "step": 13605 }, { "epoch": 15.496866096866096, "grad_norm": 0.21192686259746552, "learning_rate": 6.489183043048552e-06, "loss": 0.6066, "step": 13606 }, { "epoch": 15.498005698005699, "grad_norm": 0.15550144016742706, "learning_rate": 6.486051786650402e-06, "loss": 0.6037, "step": 13607 }, { "epoch": 15.4991452991453, "grad_norm": 0.19832514226436615, "learning_rate": 6.482921173292595e-06, "loss": 0.6994, "step": 13608 }, { "epoch": 15.5002849002849, "grad_norm": 0.21604904532432556, "learning_rate": 6.479791203083848e-06, "loss": 0.7685, "step": 13609 }, { "epoch": 15.501424501424502, "grad_norm": 0.21722885966300964, "learning_rate": 6.476661876132886e-06, "loss": 0.7895, "step": 13610 }, { "epoch": 15.502564102564103, "grad_norm": 0.2538605034351349, "learning_rate": 6.473533192548395e-06, "loss": 0.4011, "step": 13611 }, { "epoch": 15.503703703703703, "grad_norm": 0.22261404991149902, "learning_rate": 6.470405152439043e-06, "loss": 0.6813, "step": 13612 }, { "epoch": 15.504843304843305, "grad_norm": 0.18874602019786835, "learning_rate": 6.467277755913478e-06, "loss": 0.5311, "step": 13613 }, { "epoch": 15.505982905982906, "grad_norm": 0.24819247424602509, "learning_rate": 6.464151003080321e-06, "loss": 0.5075, "step": 13614 }, { "epoch": 15.507122507122507, "grad_norm": 0.1872682273387909, "learning_rate": 6.461024894048176e-06, "loss": 0.656, "step": 13615 }, { "epoch": 15.508262108262109, "grad_norm": 0.19362613558769226, "learning_rate": 6.457899428925624e-06, "loss": 0.7423, "step": 13616 }, { "epoch": 15.50940170940171, "grad_norm": 0.20935077965259552, "learning_rate": 6.454774607821206e-06, "loss": 0.67, "step": 13617 }, { "epoch": 15.51054131054131, "grad_norm": 0.15607993304729462, "learning_rate": 6.45165043084347e-06, "loss": 0.7793, "step": 13618 }, { "epoch": 15.511680911680912, "grad_norm": 0.23192988336086273, "learning_rate": 6.448526898100921e-06, "loss": 0.8127, "step": 13619 }, { "epoch": 15.512820512820513, "grad_norm": 0.215484619140625, "learning_rate": 6.445404009702055e-06, "loss": 0.7282, "step": 13620 }, { "epoch": 15.513960113960113, "grad_norm": 0.18776561319828033, "learning_rate": 6.442281765755329e-06, "loss": 0.6589, "step": 13621 }, { "epoch": 15.515099715099716, "grad_norm": 0.24401643872261047, "learning_rate": 6.43916016636919e-06, "loss": 0.3575, "step": 13622 }, { "epoch": 15.516239316239316, "grad_norm": 0.18532097339630127, "learning_rate": 6.4360392116520594e-06, "loss": 0.8261, "step": 13623 }, { "epoch": 15.517378917378917, "grad_norm": 0.24294257164001465, "learning_rate": 6.43291890171234e-06, "loss": 0.7725, "step": 13624 }, { "epoch": 15.518518518518519, "grad_norm": 0.16897542774677277, "learning_rate": 6.429799236658404e-06, "loss": 0.8488, "step": 13625 }, { "epoch": 15.51965811965812, "grad_norm": 0.22571931779384613, "learning_rate": 6.426680216598613e-06, "loss": 0.5263, "step": 13626 }, { "epoch": 15.52079772079772, "grad_norm": 0.18616579473018646, "learning_rate": 6.4235618416412986e-06, "loss": 0.5997, "step": 13627 }, { "epoch": 15.521937321937322, "grad_norm": 0.1776077151298523, "learning_rate": 6.420444111894761e-06, "loss": 0.5849, "step": 13628 }, { "epoch": 15.523076923076923, "grad_norm": 0.17871063947677612, "learning_rate": 6.417327027467293e-06, "loss": 0.6076, "step": 13629 }, { "epoch": 15.524216524216524, "grad_norm": 0.203303724527359, "learning_rate": 6.414210588467162e-06, "loss": 0.6325, "step": 13630 }, { "epoch": 15.525356125356126, "grad_norm": 0.17372427880764008, "learning_rate": 6.411094795002612e-06, "loss": 0.8, "step": 13631 }, { "epoch": 15.526495726495726, "grad_norm": 0.22373448312282562, "learning_rate": 6.407979647181855e-06, "loss": 0.735, "step": 13632 }, { "epoch": 15.527635327635327, "grad_norm": 0.2009502649307251, "learning_rate": 6.404865145113093e-06, "loss": 0.5968, "step": 13633 }, { "epoch": 15.52877492877493, "grad_norm": 0.24295362830162048, "learning_rate": 6.401751288904498e-06, "loss": 0.616, "step": 13634 }, { "epoch": 15.52991452991453, "grad_norm": 0.2393190711736679, "learning_rate": 6.398638078664229e-06, "loss": 0.7765, "step": 13635 }, { "epoch": 15.53105413105413, "grad_norm": 0.1962868869304657, "learning_rate": 6.3955255145004135e-06, "loss": 0.6915, "step": 13636 }, { "epoch": 15.532193732193733, "grad_norm": 0.34747734665870667, "learning_rate": 6.3924135965211605e-06, "loss": 0.8085, "step": 13637 }, { "epoch": 15.533333333333333, "grad_norm": 0.20524170994758606, "learning_rate": 6.389302324834559e-06, "loss": 0.742, "step": 13638 }, { "epoch": 15.534472934472934, "grad_norm": 0.2063647210597992, "learning_rate": 6.38619169954866e-06, "loss": 0.5466, "step": 13639 }, { "epoch": 15.535612535612536, "grad_norm": 0.2075195014476776, "learning_rate": 6.383081720771514e-06, "loss": 0.7451, "step": 13640 }, { "epoch": 15.536752136752137, "grad_norm": 0.17599990963935852, "learning_rate": 6.379972388611136e-06, "loss": 0.698, "step": 13641 }, { "epoch": 15.537891737891737, "grad_norm": 0.19163137674331665, "learning_rate": 6.376863703175528e-06, "loss": 0.8553, "step": 13642 }, { "epoch": 15.53903133903134, "grad_norm": 0.18536020815372467, "learning_rate": 6.37375566457265e-06, "loss": 0.635, "step": 13643 }, { "epoch": 15.54017094017094, "grad_norm": 0.22451657056808472, "learning_rate": 6.370648272910462e-06, "loss": 0.5601, "step": 13644 }, { "epoch": 15.54131054131054, "grad_norm": 0.2571554481983185, "learning_rate": 6.367541528296889e-06, "loss": 0.6696, "step": 13645 }, { "epoch": 15.542450142450143, "grad_norm": 1.3970999717712402, "learning_rate": 6.364435430839838e-06, "loss": 0.6399, "step": 13646 }, { "epoch": 15.543589743589743, "grad_norm": 0.21825458109378815, "learning_rate": 6.361329980647194e-06, "loss": 0.704, "step": 13647 }, { "epoch": 15.544729344729344, "grad_norm": 0.16594573855400085, "learning_rate": 6.358225177826812e-06, "loss": 0.6154, "step": 13648 }, { "epoch": 15.545868945868946, "grad_norm": 0.1690720170736313, "learning_rate": 6.355121022486546e-06, "loss": 0.9228, "step": 13649 }, { "epoch": 15.547008547008547, "grad_norm": 0.23778723180294037, "learning_rate": 6.352017514734193e-06, "loss": 0.4943, "step": 13650 }, { "epoch": 15.548148148148147, "grad_norm": 0.23354555666446686, "learning_rate": 6.348914654677554e-06, "loss": 0.3579, "step": 13651 }, { "epoch": 15.54928774928775, "grad_norm": 0.22613407671451569, "learning_rate": 6.345812442424398e-06, "loss": 0.552, "step": 13652 }, { "epoch": 15.55042735042735, "grad_norm": 0.19844311475753784, "learning_rate": 6.342710878082478e-06, "loss": 0.7388, "step": 13653 }, { "epoch": 15.55156695156695, "grad_norm": 0.17051514983177185, "learning_rate": 6.339609961759521e-06, "loss": 0.7468, "step": 13654 }, { "epoch": 15.552706552706553, "grad_norm": 0.2350083738565445, "learning_rate": 6.33650969356322e-06, "loss": 0.6674, "step": 13655 }, { "epoch": 15.553846153846154, "grad_norm": 0.21361273527145386, "learning_rate": 6.3334100736012644e-06, "loss": 0.5688, "step": 13656 }, { "epoch": 15.554985754985754, "grad_norm": 0.18314236402511597, "learning_rate": 6.330311101981307e-06, "loss": 0.6635, "step": 13657 }, { "epoch": 15.556125356125357, "grad_norm": 0.2125498503446579, "learning_rate": 6.32721277881099e-06, "loss": 0.5927, "step": 13658 }, { "epoch": 15.557264957264957, "grad_norm": 0.21594832837581635, "learning_rate": 6.324115104197919e-06, "loss": 0.6365, "step": 13659 }, { "epoch": 15.558404558404558, "grad_norm": 0.2782728374004364, "learning_rate": 6.321018078249693e-06, "loss": 0.6943, "step": 13660 }, { "epoch": 15.55954415954416, "grad_norm": 0.18271590769290924, "learning_rate": 6.317921701073884e-06, "loss": 0.7358, "step": 13661 }, { "epoch": 15.56068376068376, "grad_norm": 0.19562652707099915, "learning_rate": 6.314825972778022e-06, "loss": 0.6251, "step": 13662 }, { "epoch": 15.561823361823361, "grad_norm": 0.2306443750858307, "learning_rate": 6.311730893469636e-06, "loss": 0.5493, "step": 13663 }, { "epoch": 15.562962962962963, "grad_norm": 0.2782440781593323, "learning_rate": 6.3086364632562316e-06, "loss": 0.6875, "step": 13664 }, { "epoch": 15.564102564102564, "grad_norm": 0.16360704600811005, "learning_rate": 6.305542682245291e-06, "loss": 0.6351, "step": 13665 }, { "epoch": 15.565242165242164, "grad_norm": 0.2210601568222046, "learning_rate": 6.302449550544256e-06, "loss": 0.5766, "step": 13666 }, { "epoch": 15.566381766381767, "grad_norm": 0.22705049812793732, "learning_rate": 6.299357068260567e-06, "loss": 0.3935, "step": 13667 }, { "epoch": 15.567521367521367, "grad_norm": 0.2909753620624542, "learning_rate": 6.296265235501634e-06, "loss": 0.4501, "step": 13668 }, { "epoch": 15.568660968660968, "grad_norm": 0.23857071995735168, "learning_rate": 6.293174052374845e-06, "loss": 0.586, "step": 13669 }, { "epoch": 15.56980056980057, "grad_norm": 0.19779235124588013, "learning_rate": 6.290083518987563e-06, "loss": 0.4547, "step": 13670 }, { "epoch": 15.57094017094017, "grad_norm": 0.2003849744796753, "learning_rate": 6.286993635447136e-06, "loss": 0.7345, "step": 13671 }, { "epoch": 15.572079772079771, "grad_norm": 0.188863605260849, "learning_rate": 6.283904401860885e-06, "loss": 0.6969, "step": 13672 }, { "epoch": 15.573219373219374, "grad_norm": 0.23119278252124786, "learning_rate": 6.2808158183361e-06, "loss": 0.5791, "step": 13673 }, { "epoch": 15.574358974358974, "grad_norm": 0.19105198979377747, "learning_rate": 6.277727884980056e-06, "loss": 0.5908, "step": 13674 }, { "epoch": 15.575498575498575, "grad_norm": 0.20295053720474243, "learning_rate": 6.274640601900012e-06, "loss": 0.5843, "step": 13675 }, { "epoch": 15.576638176638177, "grad_norm": 0.18536078929901123, "learning_rate": 6.2715539692032e-06, "loss": 0.723, "step": 13676 }, { "epoch": 15.577777777777778, "grad_norm": 0.18521544337272644, "learning_rate": 6.2684679869968145e-06, "loss": 0.8696, "step": 13677 }, { "epoch": 15.578917378917378, "grad_norm": 0.19663411378860474, "learning_rate": 6.26538265538805e-06, "loss": 0.9285, "step": 13678 }, { "epoch": 15.58005698005698, "grad_norm": 0.22954058647155762, "learning_rate": 6.262297974484063e-06, "loss": 0.4636, "step": 13679 }, { "epoch": 15.581196581196581, "grad_norm": 0.19672894477844238, "learning_rate": 6.259213944392001e-06, "loss": 0.6554, "step": 13680 }, { "epoch": 15.582336182336181, "grad_norm": 0.23419034481048584, "learning_rate": 6.256130565218971e-06, "loss": 0.4539, "step": 13681 }, { "epoch": 15.583475783475784, "grad_norm": 0.16873398423194885, "learning_rate": 6.2530478370720765e-06, "loss": 0.4983, "step": 13682 }, { "epoch": 15.584615384615384, "grad_norm": 0.22072651982307434, "learning_rate": 6.249965760058391e-06, "loss": 0.5829, "step": 13683 }, { "epoch": 15.585754985754985, "grad_norm": 0.18701456487178802, "learning_rate": 6.246884334284947e-06, "loss": 0.8002, "step": 13684 }, { "epoch": 15.586894586894587, "grad_norm": 0.1927860826253891, "learning_rate": 6.243803559858785e-06, "loss": 0.6156, "step": 13685 }, { "epoch": 15.588034188034188, "grad_norm": 0.22001326084136963, "learning_rate": 6.240723436886903e-06, "loss": 0.6379, "step": 13686 }, { "epoch": 15.589173789173788, "grad_norm": 0.18028047680854797, "learning_rate": 6.237643965476292e-06, "loss": 0.8061, "step": 13687 }, { "epoch": 15.59031339031339, "grad_norm": 0.20484380424022675, "learning_rate": 6.234565145733898e-06, "loss": 0.6586, "step": 13688 }, { "epoch": 15.591452991452991, "grad_norm": 0.2841224670410156, "learning_rate": 6.231486977766657e-06, "loss": 0.4662, "step": 13689 }, { "epoch": 15.592592592592592, "grad_norm": 0.27535203099250793, "learning_rate": 6.228409461681484e-06, "loss": 0.7547, "step": 13690 }, { "epoch": 15.593732193732194, "grad_norm": 0.23370136320590973, "learning_rate": 6.2253325975852844e-06, "loss": 0.6202, "step": 13691 }, { "epoch": 15.594871794871795, "grad_norm": 0.20593878626823425, "learning_rate": 6.222256385584907e-06, "loss": 0.4472, "step": 13692 }, { "epoch": 15.596011396011395, "grad_norm": 0.17125186324119568, "learning_rate": 6.219180825787204e-06, "loss": 0.7245, "step": 13693 }, { "epoch": 15.597150997150997, "grad_norm": 0.19655224680900574, "learning_rate": 6.216105918299e-06, "loss": 0.5892, "step": 13694 }, { "epoch": 15.598290598290598, "grad_norm": 0.17949378490447998, "learning_rate": 6.2130316632271e-06, "loss": 0.4656, "step": 13695 }, { "epoch": 15.5994301994302, "grad_norm": 0.17048440873622894, "learning_rate": 6.20995806067827e-06, "loss": 0.6455, "step": 13696 }, { "epoch": 15.6005698005698, "grad_norm": 0.16665752232074738, "learning_rate": 6.2068851107592645e-06, "loss": 0.7673, "step": 13697 }, { "epoch": 15.601709401709401, "grad_norm": 0.22793520987033844, "learning_rate": 6.203812813576823e-06, "loss": 0.7257, "step": 13698 }, { "epoch": 15.602849002849004, "grad_norm": 0.20825371146202087, "learning_rate": 6.200741169237659e-06, "loss": 0.519, "step": 13699 }, { "epoch": 15.603988603988604, "grad_norm": 0.22275830805301666, "learning_rate": 6.197670177848447e-06, "loss": 0.5398, "step": 13700 }, { "epoch": 15.605128205128205, "grad_norm": 0.16599951684474945, "learning_rate": 6.194599839515846e-06, "loss": 0.8594, "step": 13701 }, { "epoch": 15.606267806267807, "grad_norm": 0.20247229933738708, "learning_rate": 6.191530154346523e-06, "loss": 0.9489, "step": 13702 }, { "epoch": 15.607407407407408, "grad_norm": 0.19792650640010834, "learning_rate": 6.188461122447075e-06, "loss": 0.5835, "step": 13703 }, { "epoch": 15.608547008547008, "grad_norm": 0.1858179271221161, "learning_rate": 6.185392743924104e-06, "loss": 0.651, "step": 13704 }, { "epoch": 15.60968660968661, "grad_norm": 0.1663610339164734, "learning_rate": 6.182325018884183e-06, "loss": 0.5449, "step": 13705 }, { "epoch": 15.610826210826211, "grad_norm": 0.19580359756946564, "learning_rate": 6.17925794743387e-06, "loss": 0.6482, "step": 13706 }, { "epoch": 15.611965811965812, "grad_norm": 0.21123597025871277, "learning_rate": 6.176191529679678e-06, "loss": 0.6836, "step": 13707 }, { "epoch": 15.613105413105414, "grad_norm": 0.18846312165260315, "learning_rate": 6.173125765728122e-06, "loss": 0.5522, "step": 13708 }, { "epoch": 15.614245014245014, "grad_norm": 0.1801503747701645, "learning_rate": 6.1700606556856805e-06, "loss": 0.7241, "step": 13709 }, { "epoch": 15.615384615384615, "grad_norm": 0.2200639843940735, "learning_rate": 6.1669961996588195e-06, "loss": 0.7103, "step": 13710 }, { "epoch": 15.616524216524217, "grad_norm": 0.19399894773960114, "learning_rate": 6.163932397753969e-06, "loss": 0.6425, "step": 13711 }, { "epoch": 15.617663817663818, "grad_norm": 0.2099304497241974, "learning_rate": 6.160869250077536e-06, "loss": 0.74, "step": 13712 }, { "epoch": 15.618803418803418, "grad_norm": 0.2581200897693634, "learning_rate": 6.157806756735937e-06, "loss": 0.5516, "step": 13713 }, { "epoch": 15.61994301994302, "grad_norm": 0.21245378255844116, "learning_rate": 6.1547449178355174e-06, "loss": 0.4188, "step": 13714 }, { "epoch": 15.621082621082621, "grad_norm": 0.225898876786232, "learning_rate": 6.151683733482633e-06, "loss": 0.7311, "step": 13715 }, { "epoch": 15.622222222222222, "grad_norm": 0.1864606887102127, "learning_rate": 6.148623203783605e-06, "loss": 0.6569, "step": 13716 }, { "epoch": 15.623361823361824, "grad_norm": 0.2043372094631195, "learning_rate": 6.145563328844742e-06, "loss": 0.5602, "step": 13717 }, { "epoch": 15.624501424501425, "grad_norm": 0.22436149418354034, "learning_rate": 6.142504108772307e-06, "loss": 0.7941, "step": 13718 }, { "epoch": 15.625641025641025, "grad_norm": 0.20962457358837128, "learning_rate": 6.139445543672562e-06, "loss": 0.6914, "step": 13719 }, { "epoch": 15.626780626780628, "grad_norm": 0.3241886496543884, "learning_rate": 6.136387633651741e-06, "loss": 0.4077, "step": 13720 }, { "epoch": 15.627920227920228, "grad_norm": 0.20413905382156372, "learning_rate": 6.133330378816057e-06, "loss": 0.5582, "step": 13721 }, { "epoch": 15.629059829059829, "grad_norm": 0.2374972552061081, "learning_rate": 6.1302737792716895e-06, "loss": 0.6637, "step": 13722 }, { "epoch": 15.630199430199431, "grad_norm": 0.16341468691825867, "learning_rate": 6.127217835124796e-06, "loss": 0.7038, "step": 13723 }, { "epoch": 15.631339031339031, "grad_norm": 0.2513233423233032, "learning_rate": 6.124162546481543e-06, "loss": 0.5117, "step": 13724 }, { "epoch": 15.632478632478632, "grad_norm": 0.19655568897724152, "learning_rate": 6.121107913448024e-06, "loss": 0.7148, "step": 13725 }, { "epoch": 15.633618233618234, "grad_norm": 0.1788332313299179, "learning_rate": 6.118053936130347e-06, "loss": 0.6445, "step": 13726 }, { "epoch": 15.634757834757835, "grad_norm": 0.21004877984523773, "learning_rate": 6.115000614634581e-06, "loss": 0.7228, "step": 13727 }, { "epoch": 15.635897435897435, "grad_norm": 0.23008368909358978, "learning_rate": 6.111947949066784e-06, "loss": 0.5885, "step": 13728 }, { "epoch": 15.637037037037038, "grad_norm": 0.20609422028064728, "learning_rate": 6.108895939532972e-06, "loss": 0.6035, "step": 13729 }, { "epoch": 15.638176638176638, "grad_norm": 0.20903274416923523, "learning_rate": 6.105844586139154e-06, "loss": 0.7254, "step": 13730 }, { "epoch": 15.639316239316239, "grad_norm": 0.1961694359779358, "learning_rate": 6.102793888991312e-06, "loss": 0.3647, "step": 13731 }, { "epoch": 15.640455840455841, "grad_norm": 0.23456496000289917, "learning_rate": 6.099743848195408e-06, "loss": 0.6749, "step": 13732 }, { "epoch": 15.641595441595442, "grad_norm": 0.19131501019001007, "learning_rate": 6.096694463857381e-06, "loss": 0.6785, "step": 13733 }, { "epoch": 15.642735042735042, "grad_norm": 0.17733091115951538, "learning_rate": 6.0936457360831256e-06, "loss": 0.6629, "step": 13734 }, { "epoch": 15.643874643874645, "grad_norm": 0.2198195606470108, "learning_rate": 6.090597664978553e-06, "loss": 0.589, "step": 13735 }, { "epoch": 15.645014245014245, "grad_norm": 0.179597869515419, "learning_rate": 6.087550250649532e-06, "loss": 0.6942, "step": 13736 }, { "epoch": 15.646153846153846, "grad_norm": 0.20365090668201447, "learning_rate": 6.084503493201893e-06, "loss": 0.6757, "step": 13737 }, { "epoch": 15.647293447293448, "grad_norm": 0.2194930911064148, "learning_rate": 6.081457392741466e-06, "loss": 0.7845, "step": 13738 }, { "epoch": 15.648433048433048, "grad_norm": 0.19855773448944092, "learning_rate": 6.078411949374049e-06, "loss": 0.5702, "step": 13739 }, { "epoch": 15.649572649572649, "grad_norm": 0.1931416541337967, "learning_rate": 6.075367163205428e-06, "loss": 0.6954, "step": 13740 }, { "epoch": 15.650712250712251, "grad_norm": 0.19040903449058533, "learning_rate": 6.072323034341343e-06, "loss": 0.6878, "step": 13741 }, { "epoch": 15.651851851851852, "grad_norm": 0.20580269396305084, "learning_rate": 6.0692795628875286e-06, "loss": 0.6766, "step": 13742 }, { "epoch": 15.652991452991452, "grad_norm": 0.22299405932426453, "learning_rate": 6.066236748949691e-06, "loss": 0.5875, "step": 13743 }, { "epoch": 15.654131054131055, "grad_norm": 0.2034991979598999, "learning_rate": 6.063194592633531e-06, "loss": 0.6345, "step": 13744 }, { "epoch": 15.655270655270655, "grad_norm": 0.1856851428747177, "learning_rate": 6.060153094044682e-06, "loss": 0.7327, "step": 13745 }, { "epoch": 15.656410256410256, "grad_norm": 0.18239708244800568, "learning_rate": 6.0571122532888106e-06, "loss": 0.6735, "step": 13746 }, { "epoch": 15.657549857549858, "grad_norm": 0.21312111616134644, "learning_rate": 6.054072070471531e-06, "loss": 0.5746, "step": 13747 }, { "epoch": 15.658689458689459, "grad_norm": 0.17315645515918732, "learning_rate": 6.051032545698421e-06, "loss": 0.893, "step": 13748 }, { "epoch": 15.65982905982906, "grad_norm": 0.21159929037094116, "learning_rate": 6.047993679075059e-06, "loss": 0.5457, "step": 13749 }, { "epoch": 15.660968660968662, "grad_norm": 0.2133578658103943, "learning_rate": 6.044955470706997e-06, "loss": 0.8786, "step": 13750 }, { "epoch": 15.662108262108262, "grad_norm": 0.20090807974338531, "learning_rate": 6.0419179206997664e-06, "loss": 0.7035, "step": 13751 }, { "epoch": 15.663247863247863, "grad_norm": 0.17659156024456024, "learning_rate": 6.038881029158852e-06, "loss": 0.7067, "step": 13752 }, { "epoch": 15.664387464387465, "grad_norm": 0.235539972782135, "learning_rate": 6.035844796189743e-06, "loss": 0.5339, "step": 13753 }, { "epoch": 15.665527065527066, "grad_norm": 0.18721574544906616, "learning_rate": 6.032809221897898e-06, "loss": 0.8193, "step": 13754 }, { "epoch": 15.666666666666666, "grad_norm": 0.21573007106781006, "learning_rate": 6.029774306388753e-06, "loss": 0.6192, "step": 13755 }, { "epoch": 15.667806267806268, "grad_norm": 0.2722488343715668, "learning_rate": 6.026740049767701e-06, "loss": 0.7024, "step": 13756 }, { "epoch": 15.668945868945869, "grad_norm": 0.21313630044460297, "learning_rate": 6.0237064521401515e-06, "loss": 0.7763, "step": 13757 }, { "epoch": 15.67008547008547, "grad_norm": 0.16580374538898468, "learning_rate": 6.020673513611469e-06, "loss": 0.8544, "step": 13758 }, { "epoch": 15.671225071225072, "grad_norm": 0.24010197818279266, "learning_rate": 6.017641234286983e-06, "loss": 0.3291, "step": 13759 }, { "epoch": 15.672364672364672, "grad_norm": 0.2201775759458542, "learning_rate": 6.014609614272018e-06, "loss": 0.7122, "step": 13760 }, { "epoch": 15.673504273504273, "grad_norm": 0.19828544557094574, "learning_rate": 6.0115786536718705e-06, "loss": 0.4014, "step": 13761 }, { "epoch": 15.674643874643875, "grad_norm": 0.20189552009105682, "learning_rate": 6.00854835259182e-06, "loss": 0.5587, "step": 13762 }, { "epoch": 15.675783475783476, "grad_norm": 0.17917750775814056, "learning_rate": 6.005518711137106e-06, "loss": 0.8163, "step": 13763 }, { "epoch": 15.676923076923076, "grad_norm": 0.17694270610809326, "learning_rate": 6.002489729412963e-06, "loss": 0.7236, "step": 13764 }, { "epoch": 15.678062678062679, "grad_norm": 0.204745352268219, "learning_rate": 5.999461407524596e-06, "loss": 0.5852, "step": 13765 }, { "epoch": 15.67920227920228, "grad_norm": 0.23212666809558868, "learning_rate": 5.99643374557719e-06, "loss": 0.6488, "step": 13766 }, { "epoch": 15.68034188034188, "grad_norm": 0.20231904089450836, "learning_rate": 5.993406743675889e-06, "loss": 0.6721, "step": 13767 }, { "epoch": 15.681481481481482, "grad_norm": 0.2235812097787857, "learning_rate": 5.990380401925847e-06, "loss": 0.5575, "step": 13768 }, { "epoch": 15.682621082621083, "grad_norm": 0.20220504701137543, "learning_rate": 5.987354720432167e-06, "loss": 0.6535, "step": 13769 }, { "epoch": 15.683760683760683, "grad_norm": 0.17467883229255676, "learning_rate": 5.984329699299954e-06, "loss": 0.738, "step": 13770 }, { "epoch": 15.684900284900285, "grad_norm": 0.18006768822669983, "learning_rate": 5.981305338634255e-06, "loss": 0.6707, "step": 13771 }, { "epoch": 15.686039886039886, "grad_norm": 0.21668808162212372, "learning_rate": 5.97828163854012e-06, "loss": 0.5729, "step": 13772 }, { "epoch": 15.687179487179487, "grad_norm": 0.1905527114868164, "learning_rate": 5.975258599122577e-06, "loss": 0.7817, "step": 13773 }, { "epoch": 15.688319088319089, "grad_norm": 0.20768201351165771, "learning_rate": 5.97223622048663e-06, "loss": 0.7568, "step": 13774 }, { "epoch": 15.68945868945869, "grad_norm": 0.28116485476493835, "learning_rate": 5.969214502737233e-06, "loss": 0.4476, "step": 13775 }, { "epoch": 15.69059829059829, "grad_norm": 0.21743696928024292, "learning_rate": 5.966193445979357e-06, "loss": 0.7654, "step": 13776 }, { "epoch": 15.691737891737892, "grad_norm": 0.17984333634376526, "learning_rate": 5.963173050317921e-06, "loss": 0.7383, "step": 13777 }, { "epoch": 15.692877492877493, "grad_norm": 0.2137390673160553, "learning_rate": 5.960153315857839e-06, "loss": 0.6606, "step": 13778 }, { "epoch": 15.694017094017093, "grad_norm": 0.1965833157300949, "learning_rate": 5.957134242703991e-06, "loss": 0.8008, "step": 13779 }, { "epoch": 15.695156695156696, "grad_norm": 0.22292731702327728, "learning_rate": 5.9541158309612404e-06, "loss": 0.7107, "step": 13780 }, { "epoch": 15.696296296296296, "grad_norm": 0.24760042130947113, "learning_rate": 5.9510980807344295e-06, "loss": 0.5346, "step": 13781 }, { "epoch": 15.697435897435897, "grad_norm": 0.2328803688287735, "learning_rate": 5.948080992128361e-06, "loss": 0.4708, "step": 13782 }, { "epoch": 15.698575498575499, "grad_norm": 0.21508914232254028, "learning_rate": 5.945064565247829e-06, "loss": 0.6997, "step": 13783 }, { "epoch": 15.6997150997151, "grad_norm": 0.1934168040752411, "learning_rate": 5.942048800197611e-06, "loss": 0.593, "step": 13784 }, { "epoch": 15.7008547008547, "grad_norm": 0.1619928926229477, "learning_rate": 5.939033697082453e-06, "loss": 0.5503, "step": 13785 }, { "epoch": 15.701994301994302, "grad_norm": 0.18001818656921387, "learning_rate": 5.936019256007063e-06, "loss": 0.3774, "step": 13786 }, { "epoch": 15.703133903133903, "grad_norm": 0.18391923606395721, "learning_rate": 5.933005477076151e-06, "loss": 0.7215, "step": 13787 }, { "epoch": 15.704273504273504, "grad_norm": 0.16934546828269958, "learning_rate": 5.929992360394396e-06, "loss": 0.6671, "step": 13788 }, { "epoch": 15.705413105413106, "grad_norm": 0.2290726751089096, "learning_rate": 5.926979906066446e-06, "loss": 0.7625, "step": 13789 }, { "epoch": 15.706552706552706, "grad_norm": 0.18426409363746643, "learning_rate": 5.923968114196937e-06, "loss": 0.6394, "step": 13790 }, { "epoch": 15.707692307692307, "grad_norm": 0.20047087967395782, "learning_rate": 5.920956984890474e-06, "loss": 0.5195, "step": 13791 }, { "epoch": 15.70883190883191, "grad_norm": 0.1657576560974121, "learning_rate": 5.917946518251649e-06, "loss": 0.6565, "step": 13792 }, { "epoch": 15.70997150997151, "grad_norm": 0.21902921795845032, "learning_rate": 5.914936714385011e-06, "loss": 0.7981, "step": 13793 }, { "epoch": 15.71111111111111, "grad_norm": 0.19958922266960144, "learning_rate": 5.911927573395104e-06, "loss": 0.6652, "step": 13794 }, { "epoch": 15.712250712250713, "grad_norm": 0.23519207537174225, "learning_rate": 5.908919095386445e-06, "loss": 0.7342, "step": 13795 }, { "epoch": 15.713390313390313, "grad_norm": 0.16713076829910278, "learning_rate": 5.905911280463536e-06, "loss": 0.7188, "step": 13796 }, { "epoch": 15.714529914529914, "grad_norm": 0.18714037537574768, "learning_rate": 5.902904128730826e-06, "loss": 0.7061, "step": 13797 }, { "epoch": 15.715669515669516, "grad_norm": 0.18389609456062317, "learning_rate": 5.899897640292776e-06, "loss": 0.6432, "step": 13798 }, { "epoch": 15.716809116809117, "grad_norm": 0.18872186541557312, "learning_rate": 5.896891815253805e-06, "loss": 0.6875, "step": 13799 }, { "epoch": 15.717948717948717, "grad_norm": 0.2201945185661316, "learning_rate": 5.893886653718317e-06, "loss": 0.7302, "step": 13800 }, { "epoch": 15.71908831908832, "grad_norm": 0.22730977833271027, "learning_rate": 5.890882155790686e-06, "loss": 0.8484, "step": 13801 }, { "epoch": 15.72022792022792, "grad_norm": 0.18116992712020874, "learning_rate": 5.887878321575266e-06, "loss": 0.7743, "step": 13802 }, { "epoch": 15.72136752136752, "grad_norm": 0.2321796417236328, "learning_rate": 5.8848751511764e-06, "loss": 0.4451, "step": 13803 }, { "epoch": 15.722507122507123, "grad_norm": 0.18436527252197266, "learning_rate": 5.88187264469838e-06, "loss": 0.9361, "step": 13804 }, { "epoch": 15.723646723646723, "grad_norm": 0.2078021615743637, "learning_rate": 5.8788708022454984e-06, "loss": 0.6999, "step": 13805 }, { "epoch": 15.724786324786324, "grad_norm": 0.18605077266693115, "learning_rate": 5.875869623922014e-06, "loss": 0.8222, "step": 13806 }, { "epoch": 15.725925925925926, "grad_norm": 0.1825038194656372, "learning_rate": 5.87286910983218e-06, "loss": 0.8306, "step": 13807 }, { "epoch": 15.727065527065527, "grad_norm": 0.24502849578857422, "learning_rate": 5.8698692600801905e-06, "loss": 0.7141, "step": 13808 }, { "epoch": 15.728205128205127, "grad_norm": 0.26166555285453796, "learning_rate": 5.866870074770253e-06, "loss": 0.5176, "step": 13809 }, { "epoch": 15.72934472934473, "grad_norm": 0.17108017206192017, "learning_rate": 5.863871554006534e-06, "loss": 0.6432, "step": 13810 }, { "epoch": 15.73048433048433, "grad_norm": 0.1834830939769745, "learning_rate": 5.860873697893179e-06, "loss": 0.6208, "step": 13811 }, { "epoch": 15.73162393162393, "grad_norm": 0.19852334260940552, "learning_rate": 5.857876506534313e-06, "loss": 0.5493, "step": 13812 }, { "epoch": 15.732763532763533, "grad_norm": 0.1938435286283493, "learning_rate": 5.854879980034039e-06, "loss": 0.7416, "step": 13813 }, { "epoch": 15.733903133903134, "grad_norm": 0.19227145612239838, "learning_rate": 5.851884118496432e-06, "loss": 0.6168, "step": 13814 }, { "epoch": 15.735042735042736, "grad_norm": 0.26434120535850525, "learning_rate": 5.848888922025553e-06, "loss": 0.4461, "step": 13815 }, { "epoch": 15.736182336182337, "grad_norm": 0.20051394402980804, "learning_rate": 5.845894390725421e-06, "loss": 0.5806, "step": 13816 }, { "epoch": 15.737321937321937, "grad_norm": 0.23545600473880768, "learning_rate": 5.842900524700051e-06, "loss": 0.6093, "step": 13817 }, { "epoch": 15.73846153846154, "grad_norm": 0.2062317430973053, "learning_rate": 5.839907324053425e-06, "loss": 0.7049, "step": 13818 }, { "epoch": 15.73960113960114, "grad_norm": 0.2594757676124573, "learning_rate": 5.836914788889519e-06, "loss": 0.6828, "step": 13819 }, { "epoch": 15.74074074074074, "grad_norm": 0.2455168217420578, "learning_rate": 5.8339229193122544e-06, "loss": 0.8516, "step": 13820 }, { "epoch": 15.741880341880343, "grad_norm": 0.2117108553647995, "learning_rate": 5.830931715425553e-06, "loss": 0.675, "step": 13821 }, { "epoch": 15.743019943019943, "grad_norm": 0.22628933191299438, "learning_rate": 5.827941177333307e-06, "loss": 0.6739, "step": 13822 }, { "epoch": 15.744159544159544, "grad_norm": 0.17668086290359497, "learning_rate": 5.824951305139387e-06, "loss": 0.7275, "step": 13823 }, { "epoch": 15.745299145299146, "grad_norm": 0.1669437438249588, "learning_rate": 5.821962098947642e-06, "loss": 0.957, "step": 13824 }, { "epoch": 15.746438746438747, "grad_norm": 0.22993691265583038, "learning_rate": 5.81897355886189e-06, "loss": 0.6483, "step": 13825 }, { "epoch": 15.747578347578347, "grad_norm": 0.23172104358673096, "learning_rate": 5.815985684985945e-06, "loss": 0.7056, "step": 13826 }, { "epoch": 15.74871794871795, "grad_norm": 0.1977315992116928, "learning_rate": 5.812998477423562e-06, "loss": 0.5988, "step": 13827 }, { "epoch": 15.74985754985755, "grad_norm": 0.18591248989105225, "learning_rate": 5.810011936278509e-06, "loss": 0.7036, "step": 13828 }, { "epoch": 15.75099715099715, "grad_norm": 0.20047298073768616, "learning_rate": 5.807026061654513e-06, "loss": 0.7494, "step": 13829 }, { "epoch": 15.752136752136753, "grad_norm": 0.22419284284114838, "learning_rate": 5.804040853655293e-06, "loss": 0.6049, "step": 13830 }, { "epoch": 15.753276353276354, "grad_norm": 0.19881680607795715, "learning_rate": 5.801056312384512e-06, "loss": 0.6976, "step": 13831 }, { "epoch": 15.754415954415954, "grad_norm": 0.22455157339572906, "learning_rate": 5.798072437945845e-06, "loss": 0.3943, "step": 13832 }, { "epoch": 15.755555555555556, "grad_norm": 0.17858223617076874, "learning_rate": 5.795089230442927e-06, "loss": 0.7836, "step": 13833 }, { "epoch": 15.756695156695157, "grad_norm": 0.1665424108505249, "learning_rate": 5.792106689979373e-06, "loss": 0.7137, "step": 13834 }, { "epoch": 15.757834757834758, "grad_norm": 0.2321743220090866, "learning_rate": 5.789124816658778e-06, "loss": 0.2724, "step": 13835 }, { "epoch": 15.75897435897436, "grad_norm": 0.15501980483531952, "learning_rate": 5.786143610584707e-06, "loss": 0.784, "step": 13836 }, { "epoch": 15.76011396011396, "grad_norm": 0.24102312326431274, "learning_rate": 5.783163071860715e-06, "loss": 0.7432, "step": 13837 }, { "epoch": 15.761253561253561, "grad_norm": 0.21585829555988312, "learning_rate": 5.780183200590306e-06, "loss": 0.6081, "step": 13838 }, { "epoch": 15.762393162393163, "grad_norm": 0.23042422533035278, "learning_rate": 5.77720399687699e-06, "loss": 0.5227, "step": 13839 }, { "epoch": 15.763532763532764, "grad_norm": 0.22137843072414398, "learning_rate": 5.774225460824243e-06, "loss": 0.7376, "step": 13840 }, { "epoch": 15.764672364672364, "grad_norm": 0.1850956231355667, "learning_rate": 5.771247592535523e-06, "loss": 0.6656, "step": 13841 }, { "epoch": 15.765811965811967, "grad_norm": 0.18516525626182556, "learning_rate": 5.7682703921142474e-06, "loss": 0.6501, "step": 13842 }, { "epoch": 15.766951566951567, "grad_norm": 0.22501075267791748, "learning_rate": 5.7652938596638286e-06, "loss": 0.7376, "step": 13843 }, { "epoch": 15.768091168091168, "grad_norm": 0.19035011529922485, "learning_rate": 5.762317995287641e-06, "loss": 0.615, "step": 13844 }, { "epoch": 15.76923076923077, "grad_norm": 0.18901343643665314, "learning_rate": 5.759342799089068e-06, "loss": 0.6742, "step": 13845 }, { "epoch": 15.77037037037037, "grad_norm": 0.24338462948799133, "learning_rate": 5.756368271171425e-06, "loss": 0.4775, "step": 13846 }, { "epoch": 15.771509971509971, "grad_norm": 0.21235759556293488, "learning_rate": 5.753394411638033e-06, "loss": 0.6555, "step": 13847 }, { "epoch": 15.772649572649573, "grad_norm": 0.202910378575325, "learning_rate": 5.7504212205921806e-06, "loss": 0.5172, "step": 13848 }, { "epoch": 15.773789173789174, "grad_norm": 0.16425685584545135, "learning_rate": 5.747448698137142e-06, "loss": 0.7835, "step": 13849 }, { "epoch": 15.774928774928775, "grad_norm": 0.21649502217769623, "learning_rate": 5.744476844376148e-06, "loss": 0.6296, "step": 13850 }, { "epoch": 15.776068376068377, "grad_norm": 0.18373684585094452, "learning_rate": 5.7415056594124274e-06, "loss": 0.7905, "step": 13851 }, { "epoch": 15.777207977207977, "grad_norm": 0.21412786841392517, "learning_rate": 5.738535143349178e-06, "loss": 0.7043, "step": 13852 }, { "epoch": 15.778347578347578, "grad_norm": 0.23520193994045258, "learning_rate": 5.735565296289574e-06, "loss": 0.472, "step": 13853 }, { "epoch": 15.77948717948718, "grad_norm": 0.15721645951271057, "learning_rate": 5.732596118336761e-06, "loss": 0.8033, "step": 13854 }, { "epoch": 15.78062678062678, "grad_norm": 0.22200199961662292, "learning_rate": 5.729627609593863e-06, "loss": 0.5786, "step": 13855 }, { "epoch": 15.781766381766381, "grad_norm": 0.19021391868591309, "learning_rate": 5.726659770164006e-06, "loss": 0.6212, "step": 13856 }, { "epoch": 15.782905982905984, "grad_norm": 0.21510300040245056, "learning_rate": 5.723692600150249e-06, "loss": 0.5331, "step": 13857 }, { "epoch": 15.784045584045584, "grad_norm": 0.1836748570203781, "learning_rate": 5.72072609965566e-06, "loss": 0.6963, "step": 13858 }, { "epoch": 15.785185185185185, "grad_norm": 0.17830480635166168, "learning_rate": 5.717760268783271e-06, "loss": 0.8189, "step": 13859 }, { "epoch": 15.786324786324787, "grad_norm": 0.2077512890100479, "learning_rate": 5.714795107636101e-06, "loss": 0.7427, "step": 13860 }, { "epoch": 15.787464387464388, "grad_norm": 0.19024313986301422, "learning_rate": 5.711830616317123e-06, "loss": 0.5893, "step": 13861 }, { "epoch": 15.788603988603988, "grad_norm": 0.20871800184249878, "learning_rate": 5.708866794929313e-06, "loss": 0.7435, "step": 13862 }, { "epoch": 15.78974358974359, "grad_norm": 0.19228936731815338, "learning_rate": 5.705903643575608e-06, "loss": 0.7723, "step": 13863 }, { "epoch": 15.790883190883191, "grad_norm": 0.18718524277210236, "learning_rate": 5.702941162358935e-06, "loss": 0.7106, "step": 13864 }, { "epoch": 15.792022792022792, "grad_norm": 0.19854268431663513, "learning_rate": 5.6999793513821785e-06, "loss": 0.6359, "step": 13865 }, { "epoch": 15.793162393162394, "grad_norm": 0.2403852492570877, "learning_rate": 5.697018210748206e-06, "loss": 0.4741, "step": 13866 }, { "epoch": 15.794301994301994, "grad_norm": 0.20986339449882507, "learning_rate": 5.694057740559889e-06, "loss": 0.7468, "step": 13867 }, { "epoch": 15.795441595441595, "grad_norm": 0.2499758005142212, "learning_rate": 5.691097940920029e-06, "loss": 0.4523, "step": 13868 }, { "epoch": 15.796581196581197, "grad_norm": 0.23681160807609558, "learning_rate": 5.688138811931437e-06, "loss": 0.5846, "step": 13869 }, { "epoch": 15.797720797720798, "grad_norm": 0.1891166716814041, "learning_rate": 5.685180353696895e-06, "loss": 0.6728, "step": 13870 }, { "epoch": 15.798860398860398, "grad_norm": 0.20599184930324554, "learning_rate": 5.682222566319159e-06, "loss": 0.8015, "step": 13871 }, { "epoch": 15.8, "grad_norm": 0.17965242266654968, "learning_rate": 5.679265449900953e-06, "loss": 0.88, "step": 13872 }, { "epoch": 15.801139601139601, "grad_norm": 0.18087612092494965, "learning_rate": 5.676309004544989e-06, "loss": 0.6112, "step": 13873 }, { "epoch": 15.802279202279202, "grad_norm": 0.2494555115699768, "learning_rate": 5.673353230353954e-06, "loss": 0.5585, "step": 13874 }, { "epoch": 15.803418803418804, "grad_norm": 0.21561256051063538, "learning_rate": 5.670398127430515e-06, "loss": 0.5682, "step": 13875 }, { "epoch": 15.804558404558405, "grad_norm": 0.1912786066532135, "learning_rate": 5.6674436958773e-06, "loss": 0.6802, "step": 13876 }, { "epoch": 15.805698005698005, "grad_norm": 0.24314413964748383, "learning_rate": 5.6644899357969235e-06, "loss": 0.5237, "step": 13877 }, { "epoch": 15.806837606837608, "grad_norm": 0.18922634422779083, "learning_rate": 5.661536847291998e-06, "loss": 0.7806, "step": 13878 }, { "epoch": 15.807977207977208, "grad_norm": 0.2381921410560608, "learning_rate": 5.658584430465072e-06, "loss": 0.5844, "step": 13879 }, { "epoch": 15.809116809116809, "grad_norm": 0.25195929408073425, "learning_rate": 5.655632685418699e-06, "loss": 0.4855, "step": 13880 }, { "epoch": 15.810256410256411, "grad_norm": 0.19405119121074677, "learning_rate": 5.652681612255398e-06, "loss": 0.6456, "step": 13881 }, { "epoch": 15.811396011396011, "grad_norm": 0.19623373448848724, "learning_rate": 5.64973121107768e-06, "loss": 0.5443, "step": 13882 }, { "epoch": 15.812535612535612, "grad_norm": 0.20332282781600952, "learning_rate": 5.646781481988e-06, "loss": 0.7967, "step": 13883 }, { "epoch": 15.813675213675214, "grad_norm": 0.26592984795570374, "learning_rate": 5.643832425088821e-06, "loss": 0.5693, "step": 13884 }, { "epoch": 15.814814814814815, "grad_norm": 0.19840383529663086, "learning_rate": 5.640884040482574e-06, "loss": 0.8236, "step": 13885 }, { "epoch": 15.815954415954415, "grad_norm": 0.22513839602470398, "learning_rate": 5.6379363282716675e-06, "loss": 0.6565, "step": 13886 }, { "epoch": 15.817094017094018, "grad_norm": 0.19240371882915497, "learning_rate": 5.63498928855847e-06, "loss": 0.6027, "step": 13887 }, { "epoch": 15.818233618233618, "grad_norm": 0.2948347330093384, "learning_rate": 5.632042921445352e-06, "loss": 0.7077, "step": 13888 }, { "epoch": 15.819373219373219, "grad_norm": 0.23926644027233124, "learning_rate": 5.629097227034635e-06, "loss": 0.4717, "step": 13889 }, { "epoch": 15.820512820512821, "grad_norm": 0.2090224325656891, "learning_rate": 5.626152205428656e-06, "loss": 0.5267, "step": 13890 }, { "epoch": 15.821652421652422, "grad_norm": 0.18229421973228455, "learning_rate": 5.6232078567296845e-06, "loss": 0.7855, "step": 13891 }, { "epoch": 15.822792022792022, "grad_norm": 0.18569529056549072, "learning_rate": 5.620264181039989e-06, "loss": 0.6802, "step": 13892 }, { "epoch": 15.823931623931625, "grad_norm": 0.20608794689178467, "learning_rate": 5.6173211784618125e-06, "loss": 0.6522, "step": 13893 }, { "epoch": 15.825071225071225, "grad_norm": 0.2130715250968933, "learning_rate": 5.614378849097382e-06, "loss": 0.5559, "step": 13894 }, { "epoch": 15.826210826210826, "grad_norm": 0.21179170906543732, "learning_rate": 5.611437193048877e-06, "loss": 0.808, "step": 13895 }, { "epoch": 15.827350427350428, "grad_norm": 0.21439428627490997, "learning_rate": 5.608496210418476e-06, "loss": 0.5765, "step": 13896 }, { "epoch": 15.828490028490029, "grad_norm": 0.16110475361347198, "learning_rate": 5.6055559013083295e-06, "loss": 0.6759, "step": 13897 }, { "epoch": 15.829629629629629, "grad_norm": 0.19680850207805634, "learning_rate": 5.602616265820568e-06, "loss": 0.5224, "step": 13898 }, { "epoch": 15.830769230769231, "grad_norm": 0.20465679466724396, "learning_rate": 5.5996773040572795e-06, "loss": 0.7224, "step": 13899 }, { "epoch": 15.831908831908832, "grad_norm": 0.21229791641235352, "learning_rate": 5.596739016120545e-06, "loss": 0.5391, "step": 13900 }, { "epoch": 15.833048433048432, "grad_norm": 0.20955872535705566, "learning_rate": 5.593801402112436e-06, "loss": 0.7048, "step": 13901 }, { "epoch": 15.834188034188035, "grad_norm": 0.24753613770008087, "learning_rate": 5.590864462134965e-06, "loss": 0.4146, "step": 13902 }, { "epoch": 15.835327635327635, "grad_norm": 0.17635095119476318, "learning_rate": 5.587928196290143e-06, "loss": 0.759, "step": 13903 }, { "epoch": 15.836467236467236, "grad_norm": 0.18288402259349823, "learning_rate": 5.584992604679961e-06, "loss": 0.7086, "step": 13904 }, { "epoch": 15.837606837606838, "grad_norm": 0.18178793787956238, "learning_rate": 5.582057687406386e-06, "loss": 0.6704, "step": 13905 }, { "epoch": 15.838746438746439, "grad_norm": 0.19446249306201935, "learning_rate": 5.579123444571338e-06, "loss": 0.6034, "step": 13906 }, { "epoch": 15.83988603988604, "grad_norm": 0.17122094333171844, "learning_rate": 5.576189876276741e-06, "loss": 0.6343, "step": 13907 }, { "epoch": 15.841025641025642, "grad_norm": 0.2367812544107437, "learning_rate": 5.573256982624483e-06, "loss": 0.4544, "step": 13908 }, { "epoch": 15.842165242165242, "grad_norm": 0.15342922508716583, "learning_rate": 5.570324763716445e-06, "loss": 0.5693, "step": 13909 }, { "epoch": 15.843304843304843, "grad_norm": 0.19328589737415314, "learning_rate": 5.5673932196544485e-06, "loss": 0.6786, "step": 13910 }, { "epoch": 15.844444444444445, "grad_norm": 0.22357133030891418, "learning_rate": 5.564462350540323e-06, "loss": 0.7917, "step": 13911 }, { "epoch": 15.845584045584046, "grad_norm": 0.22361674904823303, "learning_rate": 5.561532156475879e-06, "loss": 0.5247, "step": 13912 }, { "epoch": 15.846723646723646, "grad_norm": 0.20205283164978027, "learning_rate": 5.558602637562871e-06, "loss": 0.7483, "step": 13913 }, { "epoch": 15.847863247863248, "grad_norm": 0.205192431807518, "learning_rate": 5.55567379390306e-06, "loss": 0.6263, "step": 13914 }, { "epoch": 15.849002849002849, "grad_norm": 0.19363215565681458, "learning_rate": 5.552745625598169e-06, "loss": 0.864, "step": 13915 }, { "epoch": 15.85014245014245, "grad_norm": 0.24662263691425323, "learning_rate": 5.5498181327499095e-06, "loss": 0.7262, "step": 13916 }, { "epoch": 15.851282051282052, "grad_norm": 0.20658141374588013, "learning_rate": 5.546891315459948e-06, "loss": 0.5356, "step": 13917 }, { "epoch": 15.852421652421652, "grad_norm": 0.2509874999523163, "learning_rate": 5.543965173829949e-06, "loss": 0.6354, "step": 13918 }, { "epoch": 15.853561253561253, "grad_norm": 0.22638174891471863, "learning_rate": 5.54103970796154e-06, "loss": 0.3051, "step": 13919 }, { "epoch": 15.854700854700855, "grad_norm": 0.1767934411764145, "learning_rate": 5.5381149179563444e-06, "loss": 0.839, "step": 13920 }, { "epoch": 15.855840455840456, "grad_norm": 0.16481101512908936, "learning_rate": 5.5351908039159295e-06, "loss": 0.7852, "step": 13921 }, { "epoch": 15.856980056980056, "grad_norm": 0.22488468885421753, "learning_rate": 5.53226736594186e-06, "loss": 0.5728, "step": 13922 }, { "epoch": 15.858119658119659, "grad_norm": 0.2066558301448822, "learning_rate": 5.529344604135689e-06, "loss": 0.764, "step": 13923 }, { "epoch": 15.85925925925926, "grad_norm": 0.18836940824985504, "learning_rate": 5.526422518598928e-06, "loss": 0.6421, "step": 13924 }, { "epoch": 15.86039886039886, "grad_norm": 0.1548687368631363, "learning_rate": 5.523501109433063e-06, "loss": 0.5461, "step": 13925 }, { "epoch": 15.861538461538462, "grad_norm": 0.18227815628051758, "learning_rate": 5.520580376739562e-06, "loss": 0.5217, "step": 13926 }, { "epoch": 15.862678062678063, "grad_norm": 0.18795578181743622, "learning_rate": 5.5176603206198746e-06, "loss": 0.7182, "step": 13927 }, { "epoch": 15.863817663817663, "grad_norm": 0.1918874830007553, "learning_rate": 5.514740941175428e-06, "loss": 0.7007, "step": 13928 }, { "epoch": 15.864957264957265, "grad_norm": 0.18768425285816193, "learning_rate": 5.5118222385076056e-06, "loss": 0.7876, "step": 13929 }, { "epoch": 15.866096866096866, "grad_norm": 0.19266986846923828, "learning_rate": 5.508904212717789e-06, "loss": 0.5271, "step": 13930 }, { "epoch": 15.867236467236467, "grad_norm": 0.2516765594482422, "learning_rate": 5.5059868639073305e-06, "loss": 0.5491, "step": 13931 }, { "epoch": 15.868376068376069, "grad_norm": 0.260698139667511, "learning_rate": 5.5030701921775645e-06, "loss": 0.7285, "step": 13932 }, { "epoch": 15.86951566951567, "grad_norm": 0.17928585410118103, "learning_rate": 5.5001541976297724e-06, "loss": 0.605, "step": 13933 }, { "epoch": 15.87065527065527, "grad_norm": 0.20798265933990479, "learning_rate": 5.497238880365258e-06, "loss": 0.6186, "step": 13934 }, { "epoch": 15.871794871794872, "grad_norm": 0.19151423871517181, "learning_rate": 5.494324240485277e-06, "loss": 0.6788, "step": 13935 }, { "epoch": 15.872934472934473, "grad_norm": 0.216191828250885, "learning_rate": 5.4914102780910474e-06, "loss": 0.7635, "step": 13936 }, { "epoch": 15.874074074074073, "grad_norm": 0.2170773595571518, "learning_rate": 5.4884969932837895e-06, "loss": 0.7002, "step": 13937 }, { "epoch": 15.875213675213676, "grad_norm": 0.201252281665802, "learning_rate": 5.485584386164688e-06, "loss": 0.7075, "step": 13938 }, { "epoch": 15.876353276353276, "grad_norm": 0.2165941447019577, "learning_rate": 5.482672456834911e-06, "loss": 0.6854, "step": 13939 }, { "epoch": 15.877492877492877, "grad_norm": 0.22835934162139893, "learning_rate": 5.479761205395587e-06, "loss": 0.4414, "step": 13940 }, { "epoch": 15.878632478632479, "grad_norm": 0.23190470039844513, "learning_rate": 5.476850631947836e-06, "loss": 0.5443, "step": 13941 }, { "epoch": 15.87977207977208, "grad_norm": 0.22439834475517273, "learning_rate": 5.47394073659275e-06, "loss": 0.3856, "step": 13942 }, { "epoch": 15.88091168091168, "grad_norm": 0.2105487734079361, "learning_rate": 5.471031519431408e-06, "loss": 0.7456, "step": 13943 }, { "epoch": 15.882051282051282, "grad_norm": 0.18805000185966492, "learning_rate": 5.468122980564833e-06, "loss": 0.7202, "step": 13944 }, { "epoch": 15.883190883190883, "grad_norm": 0.2164195030927658, "learning_rate": 5.465215120094067e-06, "loss": 0.5933, "step": 13945 }, { "epoch": 15.884330484330484, "grad_norm": 0.1648697853088379, "learning_rate": 5.462307938120103e-06, "loss": 0.7291, "step": 13946 }, { "epoch": 15.885470085470086, "grad_norm": 0.2595181465148926, "learning_rate": 5.459401434743911e-06, "loss": 0.4453, "step": 13947 }, { "epoch": 15.886609686609686, "grad_norm": 0.18670688569545746, "learning_rate": 5.456495610066442e-06, "loss": 0.6382, "step": 13948 }, { "epoch": 15.887749287749287, "grad_norm": 0.20756377279758453, "learning_rate": 5.4535904641886265e-06, "loss": 0.808, "step": 13949 }, { "epoch": 15.88888888888889, "grad_norm": 0.2046612799167633, "learning_rate": 5.450685997211375e-06, "loss": 0.6016, "step": 13950 }, { "epoch": 15.89002849002849, "grad_norm": 0.24006542563438416, "learning_rate": 5.44778220923555e-06, "loss": 0.5609, "step": 13951 }, { "epoch": 15.89116809116809, "grad_norm": 0.1908271312713623, "learning_rate": 5.444879100362019e-06, "loss": 0.9716, "step": 13952 }, { "epoch": 15.892307692307693, "grad_norm": 0.19450271129608154, "learning_rate": 5.441976670691615e-06, "loss": 0.6022, "step": 13953 }, { "epoch": 15.893447293447293, "grad_norm": 0.23146270215511322, "learning_rate": 5.439074920325149e-06, "loss": 0.6238, "step": 13954 }, { "epoch": 15.894586894586894, "grad_norm": 0.2440861463546753, "learning_rate": 5.436173849363393e-06, "loss": 0.5213, "step": 13955 }, { "epoch": 15.895726495726496, "grad_norm": 0.1997850388288498, "learning_rate": 5.433273457907126e-06, "loss": 0.7222, "step": 13956 }, { "epoch": 15.896866096866097, "grad_norm": 0.21447142958641052, "learning_rate": 5.430373746057088e-06, "loss": 0.664, "step": 13957 }, { "epoch": 15.898005698005697, "grad_norm": 0.22905848920345306, "learning_rate": 5.42747471391398e-06, "loss": 0.8078, "step": 13958 }, { "epoch": 15.8991452991453, "grad_norm": 0.22671881318092346, "learning_rate": 5.424576361578499e-06, "loss": 0.6527, "step": 13959 }, { "epoch": 15.9002849002849, "grad_norm": 0.18757790327072144, "learning_rate": 5.421678689151313e-06, "loss": 0.6938, "step": 13960 }, { "epoch": 15.9014245014245, "grad_norm": 0.21530726552009583, "learning_rate": 5.418781696733074e-06, "loss": 0.6772, "step": 13961 }, { "epoch": 15.902564102564103, "grad_norm": 0.21243935823440552, "learning_rate": 5.415885384424388e-06, "loss": 0.5114, "step": 13962 }, { "epoch": 15.903703703703703, "grad_norm": 0.2314883917570114, "learning_rate": 5.412989752325862e-06, "loss": 0.55, "step": 13963 }, { "epoch": 15.904843304843304, "grad_norm": 0.16625399887561798, "learning_rate": 5.410094800538062e-06, "loss": 0.7069, "step": 13964 }, { "epoch": 15.905982905982906, "grad_norm": 0.24676908552646637, "learning_rate": 5.407200529161552e-06, "loss": 0.5306, "step": 13965 }, { "epoch": 15.907122507122507, "grad_norm": 0.1859494149684906, "learning_rate": 5.404306938296832e-06, "loss": 0.6454, "step": 13966 }, { "epoch": 15.908262108262107, "grad_norm": 0.18464453518390656, "learning_rate": 5.4014140280444296e-06, "loss": 0.9086, "step": 13967 }, { "epoch": 15.90940170940171, "grad_norm": 0.2452298402786255, "learning_rate": 5.398521798504813e-06, "loss": 0.6533, "step": 13968 }, { "epoch": 15.91054131054131, "grad_norm": 0.19522514939308167, "learning_rate": 5.3956302497784466e-06, "loss": 0.6949, "step": 13969 }, { "epoch": 15.91168091168091, "grad_norm": 0.23665203154087067, "learning_rate": 5.392739381965744e-06, "loss": 0.5759, "step": 13970 }, { "epoch": 15.912820512820513, "grad_norm": 0.1601344347000122, "learning_rate": 5.389849195167127e-06, "loss": 0.6828, "step": 13971 }, { "epoch": 15.913960113960114, "grad_norm": 0.23100726306438446, "learning_rate": 5.386959689482973e-06, "loss": 0.4421, "step": 13972 }, { "epoch": 15.915099715099714, "grad_norm": 0.19434283673763275, "learning_rate": 5.384070865013652e-06, "loss": 0.7509, "step": 13973 }, { "epoch": 15.916239316239317, "grad_norm": 0.25062304735183716, "learning_rate": 5.3811827218594874e-06, "loss": 0.3491, "step": 13974 }, { "epoch": 15.917378917378917, "grad_norm": 0.18781554698944092, "learning_rate": 5.3782952601208e-06, "loss": 0.7871, "step": 13975 }, { "epoch": 15.918518518518518, "grad_norm": 0.20198141038417816, "learning_rate": 5.3754084798978754e-06, "loss": 0.5347, "step": 13976 }, { "epoch": 15.91965811965812, "grad_norm": 0.2023342251777649, "learning_rate": 5.372522381290984e-06, "loss": 0.777, "step": 13977 }, { "epoch": 15.92079772079772, "grad_norm": 0.17312119901180267, "learning_rate": 5.3696369644003654e-06, "loss": 0.5332, "step": 13978 }, { "epoch": 15.921937321937321, "grad_norm": 0.2182493656873703, "learning_rate": 5.366752229326241e-06, "loss": 0.6611, "step": 13979 }, { "epoch": 15.923076923076923, "grad_norm": 0.21295364201068878, "learning_rate": 5.363868176168807e-06, "loss": 0.6564, "step": 13980 }, { "epoch": 15.924216524216524, "grad_norm": 0.24037398397922516, "learning_rate": 5.360984805028227e-06, "loss": 0.5228, "step": 13981 }, { "epoch": 15.925356125356124, "grad_norm": 0.19931496679782867, "learning_rate": 5.3581021160046486e-06, "loss": 0.5939, "step": 13982 }, { "epoch": 15.926495726495727, "grad_norm": 0.19876810908317566, "learning_rate": 5.355220109198203e-06, "loss": 0.5636, "step": 13983 }, { "epoch": 15.927635327635327, "grad_norm": 0.19912245869636536, "learning_rate": 5.352338784708991e-06, "loss": 0.404, "step": 13984 }, { "epoch": 15.928774928774928, "grad_norm": 0.2745014429092407, "learning_rate": 5.349458142637076e-06, "loss": 0.4361, "step": 13985 }, { "epoch": 15.92991452991453, "grad_norm": 0.19565151631832123, "learning_rate": 5.34657818308252e-06, "loss": 0.7867, "step": 13986 }, { "epoch": 15.93105413105413, "grad_norm": 0.1679268777370453, "learning_rate": 5.343698906145353e-06, "loss": 0.6697, "step": 13987 }, { "epoch": 15.932193732193731, "grad_norm": 0.17869971692562103, "learning_rate": 5.340820311925576e-06, "loss": 0.7855, "step": 13988 }, { "epoch": 15.933333333333334, "grad_norm": 0.19192154705524445, "learning_rate": 5.337942400523174e-06, "loss": 0.7875, "step": 13989 }, { "epoch": 15.934472934472934, "grad_norm": 0.27600088715553284, "learning_rate": 5.335065172038101e-06, "loss": 0.4982, "step": 13990 }, { "epoch": 15.935612535612536, "grad_norm": 0.21197514235973358, "learning_rate": 5.3321886265703035e-06, "loss": 0.6903, "step": 13991 }, { "epoch": 15.936752136752137, "grad_norm": 0.20699726045131683, "learning_rate": 5.329312764219671e-06, "loss": 0.642, "step": 13992 }, { "epoch": 15.937891737891738, "grad_norm": 0.1790648102760315, "learning_rate": 5.326437585086102e-06, "loss": 0.8265, "step": 13993 }, { "epoch": 15.93903133903134, "grad_norm": 0.2005932480096817, "learning_rate": 5.323563089269459e-06, "loss": 0.6435, "step": 13994 }, { "epoch": 15.94017094017094, "grad_norm": 0.22714604437351227, "learning_rate": 5.320689276869586e-06, "loss": 0.3956, "step": 13995 }, { "epoch": 15.941310541310541, "grad_norm": 0.23463019728660583, "learning_rate": 5.317816147986287e-06, "loss": 0.7648, "step": 13996 }, { "epoch": 15.942450142450143, "grad_norm": 0.20150230824947357, "learning_rate": 5.314943702719361e-06, "loss": 0.7364, "step": 13997 }, { "epoch": 15.943589743589744, "grad_norm": 0.19925189018249512, "learning_rate": 5.312071941168572e-06, "loss": 0.6879, "step": 13998 }, { "epoch": 15.944729344729344, "grad_norm": 0.22107404470443726, "learning_rate": 5.309200863433667e-06, "loss": 0.5548, "step": 13999 }, { "epoch": 15.945868945868947, "grad_norm": 0.15994355082511902, "learning_rate": 5.3063304696143655e-06, "loss": 0.5801, "step": 14000 }, { "epoch": 15.947008547008547, "grad_norm": 0.17757205665111542, "learning_rate": 5.303460759810366e-06, "loss": 0.7727, "step": 14001 }, { "epoch": 15.948148148148148, "grad_norm": 0.2089131623506546, "learning_rate": 5.300591734121338e-06, "loss": 0.5178, "step": 14002 }, { "epoch": 15.94928774928775, "grad_norm": 0.1872301995754242, "learning_rate": 5.297723392646942e-06, "loss": 0.7741, "step": 14003 }, { "epoch": 15.95042735042735, "grad_norm": 0.18554730713367462, "learning_rate": 5.294855735486784e-06, "loss": 0.7561, "step": 14004 }, { "epoch": 15.951566951566951, "grad_norm": 0.16955998539924622, "learning_rate": 5.291988762740477e-06, "loss": 0.7384, "step": 14005 }, { "epoch": 15.952706552706553, "grad_norm": 0.18044869601726532, "learning_rate": 5.289122474507599e-06, "loss": 0.7132, "step": 14006 }, { "epoch": 15.953846153846154, "grad_norm": 0.26764407753944397, "learning_rate": 5.286256870887707e-06, "loss": 0.4791, "step": 14007 }, { "epoch": 15.954985754985755, "grad_norm": 0.19423027336597443, "learning_rate": 5.283391951980324e-06, "loss": 0.5809, "step": 14008 }, { "epoch": 15.956125356125357, "grad_norm": 0.2263379991054535, "learning_rate": 5.280527717884956e-06, "loss": 0.5008, "step": 14009 }, { "epoch": 15.957264957264957, "grad_norm": 0.25239741802215576, "learning_rate": 5.277664168701088e-06, "loss": 0.6367, "step": 14010 }, { "epoch": 15.958404558404558, "grad_norm": 0.16616038978099823, "learning_rate": 5.274801304528182e-06, "loss": 0.7751, "step": 14011 }, { "epoch": 15.95954415954416, "grad_norm": 0.23653560876846313, "learning_rate": 5.271939125465672e-06, "loss": 0.6919, "step": 14012 }, { "epoch": 15.96068376068376, "grad_norm": 0.23039647936820984, "learning_rate": 5.269077631612967e-06, "loss": 0.6409, "step": 14013 }, { "epoch": 15.961823361823361, "grad_norm": 0.1734488606452942, "learning_rate": 5.2662168230694645e-06, "loss": 0.8209, "step": 14014 }, { "epoch": 15.962962962962964, "grad_norm": 0.22865897417068481, "learning_rate": 5.263356699934513e-06, "loss": 0.3679, "step": 14015 }, { "epoch": 15.964102564102564, "grad_norm": 0.20063550770282745, "learning_rate": 5.260497262307456e-06, "loss": 0.7736, "step": 14016 }, { "epoch": 15.965242165242165, "grad_norm": 0.17283949255943298, "learning_rate": 5.2576385102876155e-06, "loss": 0.7293, "step": 14017 }, { "epoch": 15.966381766381767, "grad_norm": 0.30130085349082947, "learning_rate": 5.254780443974289e-06, "loss": 0.53, "step": 14018 }, { "epoch": 15.967521367521368, "grad_norm": 0.22147002816200256, "learning_rate": 5.2519230634667295e-06, "loss": 0.4588, "step": 14019 }, { "epoch": 15.968660968660968, "grad_norm": 0.21945025026798248, "learning_rate": 5.249066368864189e-06, "loss": 0.4553, "step": 14020 }, { "epoch": 15.96980056980057, "grad_norm": 0.22448524832725525, "learning_rate": 5.246210360265888e-06, "loss": 0.4898, "step": 14021 }, { "epoch": 15.970940170940171, "grad_norm": 0.21025843918323517, "learning_rate": 5.243355037771028e-06, "loss": 0.554, "step": 14022 }, { "epoch": 15.972079772079772, "grad_norm": 0.315996915102005, "learning_rate": 5.240500401478774e-06, "loss": 0.5518, "step": 14023 }, { "epoch": 15.973219373219374, "grad_norm": 0.22453878819942474, "learning_rate": 5.237646451488282e-06, "loss": 0.6244, "step": 14024 }, { "epoch": 15.974358974358974, "grad_norm": 0.22095011174678802, "learning_rate": 5.234793187898682e-06, "loss": 0.7162, "step": 14025 }, { "epoch": 15.975498575498575, "grad_norm": 0.20376168191432953, "learning_rate": 5.231940610809063e-06, "loss": 0.563, "step": 14026 }, { "epoch": 15.976638176638177, "grad_norm": 0.21002911031246185, "learning_rate": 5.229088720318507e-06, "loss": 0.5615, "step": 14027 }, { "epoch": 15.977777777777778, "grad_norm": 0.22033274173736572, "learning_rate": 5.226237516526072e-06, "loss": 0.6323, "step": 14028 }, { "epoch": 15.978917378917378, "grad_norm": 0.24790626764297485, "learning_rate": 5.223386999530791e-06, "loss": 0.518, "step": 14029 }, { "epoch": 15.98005698005698, "grad_norm": 0.17082878947257996, "learning_rate": 5.2205371694316606e-06, "loss": 0.7464, "step": 14030 }, { "epoch": 15.981196581196581, "grad_norm": 0.2568652331829071, "learning_rate": 5.217688026327666e-06, "loss": 0.6016, "step": 14031 }, { "epoch": 15.982336182336182, "grad_norm": 0.17630797624588013, "learning_rate": 5.21483957031777e-06, "loss": 0.5679, "step": 14032 }, { "epoch": 15.983475783475784, "grad_norm": 0.2258668839931488, "learning_rate": 5.2119918015009036e-06, "loss": 0.5935, "step": 14033 }, { "epoch": 15.984615384615385, "grad_norm": 0.18470749258995056, "learning_rate": 5.209144719975981e-06, "loss": 0.8318, "step": 14034 }, { "epoch": 15.985754985754985, "grad_norm": 0.19650182127952576, "learning_rate": 5.206298325841885e-06, "loss": 0.7677, "step": 14035 }, { "epoch": 15.986894586894588, "grad_norm": 0.18926526606082916, "learning_rate": 5.203452619197488e-06, "loss": 0.7555, "step": 14036 }, { "epoch": 15.988034188034188, "grad_norm": 0.24393440783023834, "learning_rate": 5.200607600141619e-06, "loss": 0.5547, "step": 14037 }, { "epoch": 15.989173789173789, "grad_norm": 0.2429366260766983, "learning_rate": 5.197763268773093e-06, "loss": 0.4874, "step": 14038 }, { "epoch": 15.990313390313391, "grad_norm": 0.19000403583049774, "learning_rate": 5.194919625190706e-06, "loss": 0.6184, "step": 14039 }, { "epoch": 15.991452991452991, "grad_norm": 0.21088244020938873, "learning_rate": 5.192076669493231e-06, "loss": 0.4815, "step": 14040 }, { "epoch": 15.992592592592592, "grad_norm": 0.17279838025569916, "learning_rate": 5.1892344017794e-06, "loss": 0.5564, "step": 14041 }, { "epoch": 15.993732193732194, "grad_norm": 0.24155011773109436, "learning_rate": 5.186392822147934e-06, "loss": 0.6469, "step": 14042 }, { "epoch": 15.994871794871795, "grad_norm": 0.23470929265022278, "learning_rate": 5.1835519306975305e-06, "loss": 0.5868, "step": 14043 }, { "epoch": 15.996011396011395, "grad_norm": 0.17550434172153473, "learning_rate": 5.180711727526877e-06, "loss": 0.6681, "step": 14044 }, { "epoch": 15.997150997150998, "grad_norm": 0.188474640250206, "learning_rate": 5.1778722127346e-06, "loss": 0.7394, "step": 14045 }, { "epoch": 15.998290598290598, "grad_norm": 0.27023807168006897, "learning_rate": 5.1750333864193315e-06, "loss": 0.4689, "step": 14046 }, { "epoch": 15.999430199430199, "grad_norm": 0.2202572524547577, "learning_rate": 5.1721952486796736e-06, "loss": 0.7047, "step": 14047 }, { "epoch": 16.0, "grad_norm": 0.3407000005245209, "learning_rate": 5.169357799614208e-06, "loss": 1.1228, "step": 14048 }, { "epoch": 16.001139601139602, "grad_norm": 0.17774610221385956, "learning_rate": 5.166521039321473e-06, "loss": 0.6193, "step": 14049 }, { "epoch": 16.0022792022792, "grad_norm": 0.1734650731086731, "learning_rate": 5.163684967900007e-06, "loss": 0.8355, "step": 14050 }, { "epoch": 16.003418803418803, "grad_norm": 0.18855872750282288, "learning_rate": 5.16084958544831e-06, "loss": 0.595, "step": 14051 }, { "epoch": 16.004558404558406, "grad_norm": 0.17589649558067322, "learning_rate": 5.1580148920648715e-06, "loss": 0.932, "step": 14052 }, { "epoch": 16.005698005698004, "grad_norm": 0.1905054897069931, "learning_rate": 5.155180887848135e-06, "loss": 0.6708, "step": 14053 }, { "epoch": 16.006837606837607, "grad_norm": 0.203451007604599, "learning_rate": 5.152347572896535e-06, "loss": 0.6731, "step": 14054 }, { "epoch": 16.00797720797721, "grad_norm": 0.1736338585615158, "learning_rate": 5.149514947308495e-06, "loss": 0.7534, "step": 14055 }, { "epoch": 16.009116809116808, "grad_norm": 0.18473488092422485, "learning_rate": 5.146683011182388e-06, "loss": 0.5515, "step": 14056 }, { "epoch": 16.01025641025641, "grad_norm": 0.19602778553962708, "learning_rate": 5.143851764616572e-06, "loss": 0.4613, "step": 14057 }, { "epoch": 16.011396011396013, "grad_norm": 0.1879139393568039, "learning_rate": 5.14102120770939e-06, "loss": 0.5354, "step": 14058 }, { "epoch": 16.01253561253561, "grad_norm": 0.21041598916053772, "learning_rate": 5.138191340559162e-06, "loss": 0.4618, "step": 14059 }, { "epoch": 16.013675213675214, "grad_norm": 0.19878196716308594, "learning_rate": 5.1353621632641625e-06, "loss": 0.581, "step": 14060 }, { "epoch": 16.014814814814816, "grad_norm": 0.21880239248275757, "learning_rate": 5.13253367592266e-06, "loss": 0.6558, "step": 14061 }, { "epoch": 16.015954415954415, "grad_norm": 0.21958042681217194, "learning_rate": 5.129705878632901e-06, "loss": 0.6628, "step": 14062 }, { "epoch": 16.017094017094017, "grad_norm": 0.1946631819009781, "learning_rate": 5.126878771493107e-06, "loss": 0.7045, "step": 14063 }, { "epoch": 16.01823361823362, "grad_norm": 0.19736795127391815, "learning_rate": 5.124052354601458e-06, "loss": 0.9372, "step": 14064 }, { "epoch": 16.019373219373218, "grad_norm": 0.17857308685779572, "learning_rate": 5.1212266280561225e-06, "loss": 0.5775, "step": 14065 }, { "epoch": 16.02051282051282, "grad_norm": 0.1766318827867508, "learning_rate": 5.118401591955269e-06, "loss": 0.77, "step": 14066 }, { "epoch": 16.021652421652423, "grad_norm": 0.16231514513492584, "learning_rate": 5.115577246396991e-06, "loss": 0.6573, "step": 14067 }, { "epoch": 16.02279202279202, "grad_norm": 0.21469400823116302, "learning_rate": 5.112753591479402e-06, "loss": 0.5812, "step": 14068 }, { "epoch": 16.023931623931624, "grad_norm": 0.1972481906414032, "learning_rate": 5.109930627300569e-06, "loss": 0.7897, "step": 14069 }, { "epoch": 16.025071225071226, "grad_norm": 0.20298215746879578, "learning_rate": 5.107108353958551e-06, "loss": 0.4743, "step": 14070 }, { "epoch": 16.026210826210825, "grad_norm": 0.20357541739940643, "learning_rate": 5.104286771551356e-06, "loss": 0.5591, "step": 14071 }, { "epoch": 16.027350427350427, "grad_norm": 0.24694040417671204, "learning_rate": 5.101465880176998e-06, "loss": 0.6901, "step": 14072 }, { "epoch": 16.02849002849003, "grad_norm": 0.19539234042167664, "learning_rate": 5.098645679933451e-06, "loss": 0.7029, "step": 14073 }, { "epoch": 16.02962962962963, "grad_norm": 0.19871358573436737, "learning_rate": 5.095826170918674e-06, "loss": 0.7805, "step": 14074 }, { "epoch": 16.03076923076923, "grad_norm": 0.17123480141162872, "learning_rate": 5.093007353230584e-06, "loss": 0.5841, "step": 14075 }, { "epoch": 16.031908831908833, "grad_norm": 0.18302345275878906, "learning_rate": 5.090189226967085e-06, "loss": 0.5956, "step": 14076 }, { "epoch": 16.03304843304843, "grad_norm": 0.1640847623348236, "learning_rate": 5.087371792226084e-06, "loss": 0.8549, "step": 14077 }, { "epoch": 16.034188034188034, "grad_norm": 0.22754384577274323, "learning_rate": 5.08455504910541e-06, "loss": 0.6512, "step": 14078 }, { "epoch": 16.035327635327636, "grad_norm": 0.2154824435710907, "learning_rate": 5.081738997702909e-06, "loss": 0.6456, "step": 14079 }, { "epoch": 16.036467236467235, "grad_norm": 0.19186750054359436, "learning_rate": 5.078923638116387e-06, "loss": 0.7609, "step": 14080 }, { "epoch": 16.037606837606837, "grad_norm": 0.21435517072677612, "learning_rate": 5.07610897044363e-06, "loss": 0.6266, "step": 14081 }, { "epoch": 16.03874643874644, "grad_norm": 0.25843796133995056, "learning_rate": 5.073294994782407e-06, "loss": 0.3241, "step": 14082 }, { "epoch": 16.03988603988604, "grad_norm": 0.2305968701839447, "learning_rate": 5.070481711230441e-06, "loss": 0.646, "step": 14083 }, { "epoch": 16.04102564102564, "grad_norm": 0.1847716122865677, "learning_rate": 5.0676691198854485e-06, "loss": 0.7921, "step": 14084 }, { "epoch": 16.042165242165243, "grad_norm": 0.19992592930793762, "learning_rate": 5.0648572208451235e-06, "loss": 0.4748, "step": 14085 }, { "epoch": 16.043304843304842, "grad_norm": 0.22528088092803955, "learning_rate": 5.062046014207136e-06, "loss": 0.4691, "step": 14086 }, { "epoch": 16.044444444444444, "grad_norm": 0.23095327615737915, "learning_rate": 5.059235500069106e-06, "loss": 0.6832, "step": 14087 }, { "epoch": 16.045584045584047, "grad_norm": 0.2332068383693695, "learning_rate": 5.056425678528673e-06, "loss": 0.3386, "step": 14088 }, { "epoch": 16.046723646723645, "grad_norm": 0.16111916303634644, "learning_rate": 5.053616549683427e-06, "loss": 0.7517, "step": 14089 }, { "epoch": 16.047863247863248, "grad_norm": 0.2254699319601059, "learning_rate": 5.050808113630925e-06, "loss": 0.6548, "step": 14090 }, { "epoch": 16.04900284900285, "grad_norm": 0.21958674490451813, "learning_rate": 5.048000370468717e-06, "loss": 0.5918, "step": 14091 }, { "epoch": 16.05014245014245, "grad_norm": 0.1744617074728012, "learning_rate": 5.045193320294323e-06, "loss": 0.7799, "step": 14092 }, { "epoch": 16.05128205128205, "grad_norm": 0.22298863530158997, "learning_rate": 5.0423869632052475e-06, "loss": 0.4798, "step": 14093 }, { "epoch": 16.052421652421653, "grad_norm": 0.22837935388088226, "learning_rate": 5.0395812992989535e-06, "loss": 0.5823, "step": 14094 }, { "epoch": 16.053561253561252, "grad_norm": 0.21403367817401886, "learning_rate": 5.0367763286728875e-06, "loss": 0.6041, "step": 14095 }, { "epoch": 16.054700854700855, "grad_norm": 0.21587662398815155, "learning_rate": 5.033972051424482e-06, "loss": 0.3409, "step": 14096 }, { "epoch": 16.055840455840457, "grad_norm": 0.18038514256477356, "learning_rate": 5.03116846765114e-06, "loss": 0.7024, "step": 14097 }, { "epoch": 16.056980056980056, "grad_norm": 0.2202543467283249, "learning_rate": 5.028365577450217e-06, "loss": 0.507, "step": 14098 }, { "epoch": 16.058119658119658, "grad_norm": 0.22714479267597198, "learning_rate": 5.025563380919088e-06, "loss": 0.2714, "step": 14099 }, { "epoch": 16.05925925925926, "grad_norm": 0.18912501633167267, "learning_rate": 5.02276187815508e-06, "loss": 0.7436, "step": 14100 }, { "epoch": 16.06039886039886, "grad_norm": 0.16916537284851074, "learning_rate": 5.01996106925548e-06, "loss": 0.6765, "step": 14101 }, { "epoch": 16.06153846153846, "grad_norm": 0.26116690039634705, "learning_rate": 5.017160954317579e-06, "loss": 0.5046, "step": 14102 }, { "epoch": 16.062678062678064, "grad_norm": 0.17816010117530823, "learning_rate": 5.01436153343863e-06, "loss": 0.7736, "step": 14103 }, { "epoch": 16.063817663817662, "grad_norm": 0.2398657202720642, "learning_rate": 5.0115628067158745e-06, "loss": 0.6005, "step": 14104 }, { "epoch": 16.064957264957265, "grad_norm": 0.19028586149215698, "learning_rate": 5.008764774246502e-06, "loss": 0.891, "step": 14105 }, { "epoch": 16.066096866096867, "grad_norm": 0.18784038722515106, "learning_rate": 5.005967436127706e-06, "loss": 0.7326, "step": 14106 }, { "epoch": 16.067236467236466, "grad_norm": 0.2758425176143646, "learning_rate": 5.003170792456646e-06, "loss": 0.5643, "step": 14107 }, { "epoch": 16.068376068376068, "grad_norm": 0.17467260360717773, "learning_rate": 5.000374843330463e-06, "loss": 0.772, "step": 14108 }, { "epoch": 16.06951566951567, "grad_norm": 0.19102101027965546, "learning_rate": 4.997579588846246e-06, "loss": 0.8077, "step": 14109 }, { "epoch": 16.07065527065527, "grad_norm": 0.19210946559906006, "learning_rate": 4.994785029101104e-06, "loss": 0.7675, "step": 14110 }, { "epoch": 16.07179487179487, "grad_norm": 0.23584634065628052, "learning_rate": 4.991991164192097e-06, "loss": 0.5919, "step": 14111 }, { "epoch": 16.072934472934474, "grad_norm": 0.2016778588294983, "learning_rate": 4.989197994216255e-06, "loss": 0.573, "step": 14112 }, { "epoch": 16.074074074074073, "grad_norm": 0.1590469479560852, "learning_rate": 4.986405519270595e-06, "loss": 0.6816, "step": 14113 }, { "epoch": 16.075213675213675, "grad_norm": 0.2068215012550354, "learning_rate": 4.983613739452109e-06, "loss": 0.585, "step": 14114 }, { "epoch": 16.076353276353277, "grad_norm": 0.19409841299057007, "learning_rate": 4.9808226548577725e-06, "loss": 0.642, "step": 14115 }, { "epoch": 16.077492877492876, "grad_norm": 0.20613178610801697, "learning_rate": 4.978032265584509e-06, "loss": 0.608, "step": 14116 }, { "epoch": 16.07863247863248, "grad_norm": 0.24388591945171356, "learning_rate": 4.975242571729244e-06, "loss": 0.4963, "step": 14117 }, { "epoch": 16.07977207977208, "grad_norm": 0.18532788753509521, "learning_rate": 4.972453573388875e-06, "loss": 0.5111, "step": 14118 }, { "epoch": 16.08091168091168, "grad_norm": 0.2667822539806366, "learning_rate": 4.969665270660276e-06, "loss": 0.5238, "step": 14119 }, { "epoch": 16.08205128205128, "grad_norm": 0.15260297060012817, "learning_rate": 4.966877663640279e-06, "loss": 0.6686, "step": 14120 }, { "epoch": 16.083190883190884, "grad_norm": 0.16568130254745483, "learning_rate": 4.964090752425704e-06, "loss": 0.7475, "step": 14121 }, { "epoch": 16.084330484330483, "grad_norm": 0.2504945397377014, "learning_rate": 4.9613045371133644e-06, "loss": 0.4494, "step": 14122 }, { "epoch": 16.085470085470085, "grad_norm": 0.20629118382930756, "learning_rate": 4.958519017800031e-06, "loss": 0.7373, "step": 14123 }, { "epoch": 16.086609686609687, "grad_norm": 0.21487271785736084, "learning_rate": 4.95573419458244e-06, "loss": 0.3647, "step": 14124 }, { "epoch": 16.087749287749286, "grad_norm": 0.2165013551712036, "learning_rate": 4.952950067557324e-06, "loss": 0.4537, "step": 14125 }, { "epoch": 16.08888888888889, "grad_norm": 0.1743670254945755, "learning_rate": 4.95016663682138e-06, "loss": 0.6269, "step": 14126 }, { "epoch": 16.09002849002849, "grad_norm": 0.2542421519756317, "learning_rate": 4.947383902471292e-06, "loss": 0.5258, "step": 14127 }, { "epoch": 16.09116809116809, "grad_norm": 0.16560454666614532, "learning_rate": 4.944601864603698e-06, "loss": 0.8407, "step": 14128 }, { "epoch": 16.092307692307692, "grad_norm": 0.18629920482635498, "learning_rate": 4.941820523315238e-06, "loss": 0.8053, "step": 14129 }, { "epoch": 16.093447293447294, "grad_norm": 0.20721445977687836, "learning_rate": 4.9390398787025085e-06, "loss": 0.6357, "step": 14130 }, { "epoch": 16.094586894586893, "grad_norm": 0.20368462800979614, "learning_rate": 4.9362599308621e-06, "loss": 0.714, "step": 14131 }, { "epoch": 16.095726495726495, "grad_norm": 0.15554797649383545, "learning_rate": 4.933480679890545e-06, "loss": 0.6415, "step": 14132 }, { "epoch": 16.096866096866098, "grad_norm": 0.17425628006458282, "learning_rate": 4.930702125884396e-06, "loss": 0.7755, "step": 14133 }, { "epoch": 16.098005698005696, "grad_norm": 0.19579556584358215, "learning_rate": 4.927924268940159e-06, "loss": 0.6669, "step": 14134 }, { "epoch": 16.0991452991453, "grad_norm": 0.19466902315616608, "learning_rate": 4.925147109154304e-06, "loss": 0.5933, "step": 14135 }, { "epoch": 16.1002849002849, "grad_norm": 0.18656110763549805, "learning_rate": 4.922370646623298e-06, "loss": 0.6976, "step": 14136 }, { "epoch": 16.1014245014245, "grad_norm": 0.17684151232242584, "learning_rate": 4.919594881443573e-06, "loss": 0.6015, "step": 14137 }, { "epoch": 16.102564102564102, "grad_norm": 0.17531482875347137, "learning_rate": 4.916819813711543e-06, "loss": 0.5635, "step": 14138 }, { "epoch": 16.103703703703705, "grad_norm": 0.20434801280498505, "learning_rate": 4.914045443523585e-06, "loss": 0.7176, "step": 14139 }, { "epoch": 16.104843304843303, "grad_norm": 0.1615433245897293, "learning_rate": 4.911271770976064e-06, "loss": 0.708, "step": 14140 }, { "epoch": 16.105982905982906, "grad_norm": 0.1820320039987564, "learning_rate": 4.9084987961653205e-06, "loss": 0.8438, "step": 14141 }, { "epoch": 16.107122507122508, "grad_norm": 0.1728564351797104, "learning_rate": 4.9057265191876725e-06, "loss": 0.7711, "step": 14142 }, { "epoch": 16.108262108262107, "grad_norm": 0.1594773530960083, "learning_rate": 4.902954940139387e-06, "loss": 0.8792, "step": 14143 }, { "epoch": 16.10940170940171, "grad_norm": 0.19131292402744293, "learning_rate": 4.9001840591167525e-06, "loss": 0.8845, "step": 14144 }, { "epoch": 16.11054131054131, "grad_norm": 0.24351909756660461, "learning_rate": 4.897413876216003e-06, "loss": 0.4772, "step": 14145 }, { "epoch": 16.11168091168091, "grad_norm": 0.19399766623973846, "learning_rate": 4.894644391533348e-06, "loss": 0.7453, "step": 14146 }, { "epoch": 16.112820512820512, "grad_norm": 0.18186573684215546, "learning_rate": 4.891875605164983e-06, "loss": 0.5295, "step": 14147 }, { "epoch": 16.113960113960115, "grad_norm": 0.21143805980682373, "learning_rate": 4.889107517207075e-06, "loss": 0.6165, "step": 14148 }, { "epoch": 16.115099715099714, "grad_norm": 0.1572553515434265, "learning_rate": 4.886340127755776e-06, "loss": 0.8428, "step": 14149 }, { "epoch": 16.116239316239316, "grad_norm": 0.20264385640621185, "learning_rate": 4.883573436907191e-06, "loss": 0.6445, "step": 14150 }, { "epoch": 16.117378917378918, "grad_norm": 0.20846238732337952, "learning_rate": 4.880807444757418e-06, "loss": 0.6308, "step": 14151 }, { "epoch": 16.118518518518517, "grad_norm": 0.19726471602916718, "learning_rate": 4.8780421514025345e-06, "loss": 0.7571, "step": 14152 }, { "epoch": 16.11965811965812, "grad_norm": 0.18709038197994232, "learning_rate": 4.8752775569385845e-06, "loss": 0.7035, "step": 14153 }, { "epoch": 16.12079772079772, "grad_norm": 0.18194390833377838, "learning_rate": 4.872513661461578e-06, "loss": 0.6971, "step": 14154 }, { "epoch": 16.12193732193732, "grad_norm": 0.21358007192611694, "learning_rate": 4.869750465067529e-06, "loss": 0.6177, "step": 14155 }, { "epoch": 16.123076923076923, "grad_norm": 0.2152358442544937, "learning_rate": 4.866987967852413e-06, "loss": 0.6067, "step": 14156 }, { "epoch": 16.124216524216525, "grad_norm": 0.17053575813770294, "learning_rate": 4.864226169912162e-06, "loss": 0.7404, "step": 14157 }, { "epoch": 16.125356125356124, "grad_norm": 0.2537604868412018, "learning_rate": 4.861465071342708e-06, "loss": 0.6115, "step": 14158 }, { "epoch": 16.126495726495726, "grad_norm": 0.18276666104793549, "learning_rate": 4.858704672239956e-06, "loss": 0.8084, "step": 14159 }, { "epoch": 16.12763532763533, "grad_norm": 0.23169055581092834, "learning_rate": 4.85594497269978e-06, "loss": 0.652, "step": 14160 }, { "epoch": 16.128774928774927, "grad_norm": 0.215578094124794, "learning_rate": 4.853185972818036e-06, "loss": 0.6886, "step": 14161 }, { "epoch": 16.12991452991453, "grad_norm": 0.2252432256937027, "learning_rate": 4.850427672690541e-06, "loss": 0.778, "step": 14162 }, { "epoch": 16.13105413105413, "grad_norm": 0.2282172590494156, "learning_rate": 4.847670072413105e-06, "loss": 0.5893, "step": 14163 }, { "epoch": 16.13219373219373, "grad_norm": 0.1930750161409378, "learning_rate": 4.844913172081508e-06, "loss": 0.7367, "step": 14164 }, { "epoch": 16.133333333333333, "grad_norm": 0.2539467215538025, "learning_rate": 4.8421569717915005e-06, "loss": 0.5583, "step": 14165 }, { "epoch": 16.134472934472935, "grad_norm": 0.21186986565589905, "learning_rate": 4.839401471638818e-06, "loss": 0.4972, "step": 14166 }, { "epoch": 16.135612535612534, "grad_norm": 0.21824829280376434, "learning_rate": 4.836646671719164e-06, "loss": 0.6324, "step": 14167 }, { "epoch": 16.136752136752136, "grad_norm": 0.2590080201625824, "learning_rate": 4.8338925721282265e-06, "loss": 0.3894, "step": 14168 }, { "epoch": 16.13789173789174, "grad_norm": 0.27673637866973877, "learning_rate": 4.831139172961649e-06, "loss": 0.3115, "step": 14169 }, { "epoch": 16.139031339031337, "grad_norm": 0.1810840368270874, "learning_rate": 4.828386474315075e-06, "loss": 0.6401, "step": 14170 }, { "epoch": 16.14017094017094, "grad_norm": 0.19593803584575653, "learning_rate": 4.825634476284108e-06, "loss": 0.564, "step": 14171 }, { "epoch": 16.141310541310542, "grad_norm": 0.20629572868347168, "learning_rate": 4.822883178964343e-06, "loss": 0.7883, "step": 14172 }, { "epoch": 16.14245014245014, "grad_norm": 0.28529953956604004, "learning_rate": 4.8201325824513284e-06, "loss": 0.7357, "step": 14173 }, { "epoch": 16.143589743589743, "grad_norm": 0.19003815948963165, "learning_rate": 4.817382686840602e-06, "loss": 0.6053, "step": 14174 }, { "epoch": 16.144729344729345, "grad_norm": 0.20167994499206543, "learning_rate": 4.814633492227677e-06, "loss": 0.6366, "step": 14175 }, { "epoch": 16.145868945868944, "grad_norm": 0.17483095824718475, "learning_rate": 4.811884998708041e-06, "loss": 0.7448, "step": 14176 }, { "epoch": 16.147008547008546, "grad_norm": 0.2396191954612732, "learning_rate": 4.8091372063771566e-06, "loss": 0.7191, "step": 14177 }, { "epoch": 16.14814814814815, "grad_norm": 0.21231231093406677, "learning_rate": 4.8063901153304615e-06, "loss": 0.6498, "step": 14178 }, { "epoch": 16.149287749287748, "grad_norm": 0.1673159897327423, "learning_rate": 4.803643725663379e-06, "loss": 0.6035, "step": 14179 }, { "epoch": 16.15042735042735, "grad_norm": 0.1700536459684372, "learning_rate": 4.800898037471283e-06, "loss": 0.9098, "step": 14180 }, { "epoch": 16.151566951566952, "grad_norm": 0.17028027772903442, "learning_rate": 4.798153050849543e-06, "loss": 0.7013, "step": 14181 }, { "epoch": 16.15270655270655, "grad_norm": 0.1970115751028061, "learning_rate": 4.795408765893506e-06, "loss": 0.8321, "step": 14182 }, { "epoch": 16.153846153846153, "grad_norm": 0.16475103795528412, "learning_rate": 4.792665182698491e-06, "loss": 0.9184, "step": 14183 }, { "epoch": 16.154985754985756, "grad_norm": 0.17631348967552185, "learning_rate": 4.789922301359778e-06, "loss": 0.7771, "step": 14184 }, { "epoch": 16.156125356125354, "grad_norm": 0.21707049012184143, "learning_rate": 4.78718012197264e-06, "loss": 0.7017, "step": 14185 }, { "epoch": 16.157264957264957, "grad_norm": 0.19491758942604065, "learning_rate": 4.784438644632325e-06, "loss": 0.7447, "step": 14186 }, { "epoch": 16.15840455840456, "grad_norm": 0.19003994762897491, "learning_rate": 4.781697869434046e-06, "loss": 0.7024, "step": 14187 }, { "epoch": 16.159544159544158, "grad_norm": 0.20359846949577332, "learning_rate": 4.778957796473002e-06, "loss": 0.8946, "step": 14188 }, { "epoch": 16.16068376068376, "grad_norm": 0.22367510199546814, "learning_rate": 4.776218425844362e-06, "loss": 0.6882, "step": 14189 }, { "epoch": 16.161823361823362, "grad_norm": 0.2077135294675827, "learning_rate": 4.773479757643276e-06, "loss": 0.487, "step": 14190 }, { "epoch": 16.162962962962965, "grad_norm": 0.1768800914287567, "learning_rate": 4.770741791964856e-06, "loss": 0.6527, "step": 14191 }, { "epoch": 16.164102564102564, "grad_norm": 0.19622980058193207, "learning_rate": 4.7680045289042045e-06, "loss": 0.4951, "step": 14192 }, { "epoch": 16.165242165242166, "grad_norm": 0.1771240234375, "learning_rate": 4.7652679685563945e-06, "loss": 0.6839, "step": 14193 }, { "epoch": 16.166381766381768, "grad_norm": 0.21365617215633392, "learning_rate": 4.76253211101648e-06, "loss": 0.6114, "step": 14194 }, { "epoch": 16.167521367521367, "grad_norm": 0.23467305302619934, "learning_rate": 4.7597969563794726e-06, "loss": 0.5676, "step": 14195 }, { "epoch": 16.16866096866097, "grad_norm": 0.18005667626857758, "learning_rate": 4.7570625047403755e-06, "loss": 0.6889, "step": 14196 }, { "epoch": 16.16980056980057, "grad_norm": 0.6053818464279175, "learning_rate": 4.754328756194168e-06, "loss": 0.7246, "step": 14197 }, { "epoch": 16.17094017094017, "grad_norm": 0.2183670699596405, "learning_rate": 4.7515957108357965e-06, "loss": 0.7077, "step": 14198 }, { "epoch": 16.172079772079773, "grad_norm": 0.21603445708751678, "learning_rate": 4.74886336876019e-06, "loss": 0.7866, "step": 14199 }, { "epoch": 16.173219373219375, "grad_norm": 0.21477492153644562, "learning_rate": 4.74613173006225e-06, "loss": 0.4504, "step": 14200 }, { "epoch": 16.174358974358974, "grad_norm": 0.19185055792331696, "learning_rate": 4.743400794836855e-06, "loss": 0.6005, "step": 14201 }, { "epoch": 16.175498575498576, "grad_norm": 0.18981438875198364, "learning_rate": 4.740670563178861e-06, "loss": 0.6728, "step": 14202 }, { "epoch": 16.17663817663818, "grad_norm": 0.18861323595046997, "learning_rate": 4.737941035183085e-06, "loss": 0.6924, "step": 14203 }, { "epoch": 16.177777777777777, "grad_norm": 0.1881737858057022, "learning_rate": 4.735212210944337e-06, "loss": 0.7978, "step": 14204 }, { "epoch": 16.17891737891738, "grad_norm": 0.17678165435791016, "learning_rate": 4.732484090557399e-06, "loss": 0.7861, "step": 14205 }, { "epoch": 16.180056980056982, "grad_norm": 0.19209317862987518, "learning_rate": 4.7297566741170305e-06, "loss": 0.8984, "step": 14206 }, { "epoch": 16.18119658119658, "grad_norm": 0.2325279712677002, "learning_rate": 4.727029961717949e-06, "loss": 0.4909, "step": 14207 }, { "epoch": 16.182336182336183, "grad_norm": 0.24878153204917908, "learning_rate": 4.72430395345487e-06, "loss": 0.5382, "step": 14208 }, { "epoch": 16.183475783475785, "grad_norm": 0.2192421704530716, "learning_rate": 4.72157864942247e-06, "loss": 0.6141, "step": 14209 }, { "epoch": 16.184615384615384, "grad_norm": 0.18220512568950653, "learning_rate": 4.71885404971541e-06, "loss": 0.6841, "step": 14210 }, { "epoch": 16.185754985754986, "grad_norm": 0.16583271324634552, "learning_rate": 4.716130154428325e-06, "loss": 0.7289, "step": 14211 }, { "epoch": 16.18689458689459, "grad_norm": 0.1805356740951538, "learning_rate": 4.713406963655817e-06, "loss": 0.5646, "step": 14212 }, { "epoch": 16.188034188034187, "grad_norm": 0.18584349751472473, "learning_rate": 4.710684477492483e-06, "loss": 0.4618, "step": 14213 }, { "epoch": 16.18917378917379, "grad_norm": 0.20051173865795135, "learning_rate": 4.707962696032864e-06, "loss": 0.5886, "step": 14214 }, { "epoch": 16.190313390313392, "grad_norm": 0.17054873704910278, "learning_rate": 4.705241619371506e-06, "loss": 0.7782, "step": 14215 }, { "epoch": 16.19145299145299, "grad_norm": 0.20547950267791748, "learning_rate": 4.702521247602915e-06, "loss": 0.602, "step": 14216 }, { "epoch": 16.192592592592593, "grad_norm": 0.21678493916988373, "learning_rate": 4.699801580821586e-06, "loss": 0.6294, "step": 14217 }, { "epoch": 16.193732193732195, "grad_norm": 0.16647404432296753, "learning_rate": 4.69708261912197e-06, "loss": 0.5903, "step": 14218 }, { "epoch": 16.194871794871794, "grad_norm": 0.1705893576145172, "learning_rate": 4.6943643625985054e-06, "loss": 0.5777, "step": 14219 }, { "epoch": 16.196011396011396, "grad_norm": 0.22176790237426758, "learning_rate": 4.6916468113456095e-06, "loss": 0.8056, "step": 14220 }, { "epoch": 16.197150997151, "grad_norm": 0.18766705691814423, "learning_rate": 4.688929965457667e-06, "loss": 0.8236, "step": 14221 }, { "epoch": 16.198290598290598, "grad_norm": 0.21484479308128357, "learning_rate": 4.68621382502904e-06, "loss": 0.741, "step": 14222 }, { "epoch": 16.1994301994302, "grad_norm": 0.21632397174835205, "learning_rate": 4.683498390154073e-06, "loss": 0.6077, "step": 14223 }, { "epoch": 16.200569800569802, "grad_norm": 0.2317371666431427, "learning_rate": 4.680783660927082e-06, "loss": 0.5916, "step": 14224 }, { "epoch": 16.2017094017094, "grad_norm": 0.2338539958000183, "learning_rate": 4.678069637442348e-06, "loss": 0.6772, "step": 14225 }, { "epoch": 16.202849002849003, "grad_norm": 0.19969230890274048, "learning_rate": 4.675356319794139e-06, "loss": 0.6911, "step": 14226 }, { "epoch": 16.203988603988606, "grad_norm": 0.21318411827087402, "learning_rate": 4.6726437080766985e-06, "loss": 0.7214, "step": 14227 }, { "epoch": 16.205128205128204, "grad_norm": 0.22114770114421844, "learning_rate": 4.6699318023842505e-06, "loss": 0.6869, "step": 14228 }, { "epoch": 16.206267806267807, "grad_norm": 0.20420998334884644, "learning_rate": 4.667220602810973e-06, "loss": 0.5772, "step": 14229 }, { "epoch": 16.20740740740741, "grad_norm": 0.1809093952178955, "learning_rate": 4.664510109451037e-06, "loss": 0.7658, "step": 14230 }, { "epoch": 16.208547008547008, "grad_norm": 0.24055469036102295, "learning_rate": 4.6618003223985875e-06, "loss": 0.6819, "step": 14231 }, { "epoch": 16.20968660968661, "grad_norm": 0.2183607965707779, "learning_rate": 4.659091241747745e-06, "loss": 0.7314, "step": 14232 }, { "epoch": 16.210826210826212, "grad_norm": 0.1742558628320694, "learning_rate": 4.656382867592601e-06, "loss": 0.6664, "step": 14233 }, { "epoch": 16.21196581196581, "grad_norm": 0.2021118849515915, "learning_rate": 4.653675200027224e-06, "loss": 0.6182, "step": 14234 }, { "epoch": 16.213105413105414, "grad_norm": 0.19149664044380188, "learning_rate": 4.650968239145667e-06, "loss": 0.5857, "step": 14235 }, { "epoch": 16.214245014245016, "grad_norm": 0.1900215893983841, "learning_rate": 4.648261985041938e-06, "loss": 0.7543, "step": 14236 }, { "epoch": 16.215384615384615, "grad_norm": 0.17404602468013763, "learning_rate": 4.6455564378100365e-06, "loss": 0.589, "step": 14237 }, { "epoch": 16.216524216524217, "grad_norm": 0.18508280813694, "learning_rate": 4.642851597543935e-06, "loss": 0.7098, "step": 14238 }, { "epoch": 16.21766381766382, "grad_norm": 0.2232862412929535, "learning_rate": 4.640147464337577e-06, "loss": 0.4957, "step": 14239 }, { "epoch": 16.218803418803418, "grad_norm": 0.17864273488521576, "learning_rate": 4.637444038284897e-06, "loss": 0.676, "step": 14240 }, { "epoch": 16.21994301994302, "grad_norm": 0.15862596035003662, "learning_rate": 4.634741319479777e-06, "loss": 0.7898, "step": 14241 }, { "epoch": 16.221082621082623, "grad_norm": 0.2464563250541687, "learning_rate": 4.632039308016087e-06, "loss": 0.5794, "step": 14242 }, { "epoch": 16.22222222222222, "grad_norm": 0.19402630627155304, "learning_rate": 4.629338003987699e-06, "loss": 0.5994, "step": 14243 }, { "epoch": 16.223361823361824, "grad_norm": 0.18478505313396454, "learning_rate": 4.6266374074884125e-06, "loss": 0.6814, "step": 14244 }, { "epoch": 16.224501424501426, "grad_norm": 0.16128045320510864, "learning_rate": 4.623937518612037e-06, "loss": 0.8334, "step": 14245 }, { "epoch": 16.225641025641025, "grad_norm": 0.21322540938854218, "learning_rate": 4.6212383374523465e-06, "loss": 0.5717, "step": 14246 }, { "epoch": 16.226780626780627, "grad_norm": 0.18833217024803162, "learning_rate": 4.618539864103097e-06, "loss": 0.7203, "step": 14247 }, { "epoch": 16.22792022792023, "grad_norm": 0.17805443704128265, "learning_rate": 4.615842098658002e-06, "loss": 0.5734, "step": 14248 }, { "epoch": 16.22905982905983, "grad_norm": 0.2207133173942566, "learning_rate": 4.613145041210765e-06, "loss": 0.6573, "step": 14249 }, { "epoch": 16.23019943019943, "grad_norm": 0.2588041126728058, "learning_rate": 4.610448691855066e-06, "loss": 0.3913, "step": 14250 }, { "epoch": 16.231339031339033, "grad_norm": 0.20682033896446228, "learning_rate": 4.60775305068456e-06, "loss": 0.7064, "step": 14251 }, { "epoch": 16.23247863247863, "grad_norm": 0.18519830703735352, "learning_rate": 4.605058117792865e-06, "loss": 0.4983, "step": 14252 }, { "epoch": 16.233618233618234, "grad_norm": 0.20930525660514832, "learning_rate": 4.602363893273581e-06, "loss": 0.6482, "step": 14253 }, { "epoch": 16.234757834757836, "grad_norm": 0.2454683631658554, "learning_rate": 4.5996703772203054e-06, "loss": 0.5938, "step": 14254 }, { "epoch": 16.235897435897435, "grad_norm": 0.2545921504497528, "learning_rate": 4.5969775697265735e-06, "loss": 0.4597, "step": 14255 }, { "epoch": 16.237037037037037, "grad_norm": 0.20620526373386383, "learning_rate": 4.594285470885917e-06, "loss": 0.783, "step": 14256 }, { "epoch": 16.23817663817664, "grad_norm": 0.16898657381534576, "learning_rate": 4.5915940807918444e-06, "loss": 0.6661, "step": 14257 }, { "epoch": 16.23931623931624, "grad_norm": 0.20109368860721588, "learning_rate": 4.588903399537836e-06, "loss": 0.4781, "step": 14258 }, { "epoch": 16.24045584045584, "grad_norm": 0.24306416511535645, "learning_rate": 4.586213427217339e-06, "loss": 0.7478, "step": 14259 }, { "epoch": 16.241595441595443, "grad_norm": 0.20856760442256927, "learning_rate": 4.5835241639237884e-06, "loss": 0.7023, "step": 14260 }, { "epoch": 16.242735042735042, "grad_norm": 0.16698814928531647, "learning_rate": 4.580835609750589e-06, "loss": 0.6244, "step": 14261 }, { "epoch": 16.243874643874644, "grad_norm": 0.19303050637245178, "learning_rate": 4.5781477647911265e-06, "loss": 0.6157, "step": 14262 }, { "epoch": 16.245014245014247, "grad_norm": 0.22583156824111938, "learning_rate": 4.575460629138747e-06, "loss": 0.5981, "step": 14263 }, { "epoch": 16.246153846153845, "grad_norm": 0.26615431904792786, "learning_rate": 4.5727742028867815e-06, "loss": 0.5091, "step": 14264 }, { "epoch": 16.247293447293448, "grad_norm": 0.21061669290065765, "learning_rate": 4.570088486128554e-06, "loss": 0.5863, "step": 14265 }, { "epoch": 16.24843304843305, "grad_norm": 0.167541041970253, "learning_rate": 4.5674034789573325e-06, "loss": 0.6977, "step": 14266 }, { "epoch": 16.24957264957265, "grad_norm": 0.26731422543525696, "learning_rate": 4.564719181466376e-06, "loss": 0.4772, "step": 14267 }, { "epoch": 16.25071225071225, "grad_norm": 0.19785575568675995, "learning_rate": 4.56203559374892e-06, "loss": 0.7455, "step": 14268 }, { "epoch": 16.251851851851853, "grad_norm": 1.1487804651260376, "learning_rate": 4.55935271589818e-06, "loss": 0.6764, "step": 14269 }, { "epoch": 16.252991452991452, "grad_norm": 0.1889180988073349, "learning_rate": 4.556670548007322e-06, "loss": 0.7167, "step": 14270 }, { "epoch": 16.254131054131054, "grad_norm": 0.23285332322120667, "learning_rate": 4.553989090169519e-06, "loss": 0.5157, "step": 14271 }, { "epoch": 16.255270655270657, "grad_norm": 0.1659945249557495, "learning_rate": 4.5513083424779e-06, "loss": 0.6041, "step": 14272 }, { "epoch": 16.256410256410255, "grad_norm": 0.2029780149459839, "learning_rate": 4.548628305025584e-06, "loss": 0.6255, "step": 14273 }, { "epoch": 16.257549857549858, "grad_norm": 0.19079919159412384, "learning_rate": 4.54594897790564e-06, "loss": 0.6769, "step": 14274 }, { "epoch": 16.25868945868946, "grad_norm": 0.182784304022789, "learning_rate": 4.543270361211133e-06, "loss": 0.874, "step": 14275 }, { "epoch": 16.25982905982906, "grad_norm": 0.18271218240261078, "learning_rate": 4.540592455035108e-06, "loss": 0.6597, "step": 14276 }, { "epoch": 16.26096866096866, "grad_norm": 0.20095323026180267, "learning_rate": 4.5379152594705765e-06, "loss": 0.6511, "step": 14277 }, { "epoch": 16.262108262108264, "grad_norm": 0.27629172801971436, "learning_rate": 4.535238774610512e-06, "loss": 0.5178, "step": 14278 }, { "epoch": 16.263247863247862, "grad_norm": 0.21059632301330566, "learning_rate": 4.532563000547885e-06, "loss": 0.549, "step": 14279 }, { "epoch": 16.264387464387465, "grad_norm": 0.19386284053325653, "learning_rate": 4.529887937375629e-06, "loss": 0.7525, "step": 14280 }, { "epoch": 16.265527065527067, "grad_norm": 0.19157487154006958, "learning_rate": 4.527213585186665e-06, "loss": 0.6305, "step": 14281 }, { "epoch": 16.266666666666666, "grad_norm": 0.21057751774787903, "learning_rate": 4.5245399440738686e-06, "loss": 0.8012, "step": 14282 }, { "epoch": 16.267806267806268, "grad_norm": 0.1656307429075241, "learning_rate": 4.521867014130104e-06, "loss": 0.7583, "step": 14283 }, { "epoch": 16.26894586894587, "grad_norm": 0.2325628250837326, "learning_rate": 4.519194795448217e-06, "loss": 0.6343, "step": 14284 }, { "epoch": 16.27008547008547, "grad_norm": 0.23686346411705017, "learning_rate": 4.516523288121022e-06, "loss": 0.4647, "step": 14285 }, { "epoch": 16.27122507122507, "grad_norm": 0.207304909825325, "learning_rate": 4.513852492241291e-06, "loss": 0.6472, "step": 14286 }, { "epoch": 16.272364672364674, "grad_norm": 0.2733539938926697, "learning_rate": 4.511182407901804e-06, "loss": 0.5109, "step": 14287 }, { "epoch": 16.273504273504273, "grad_norm": 0.2384081482887268, "learning_rate": 4.5085130351953055e-06, "loss": 0.6514, "step": 14288 }, { "epoch": 16.274643874643875, "grad_norm": 0.18418949842453003, "learning_rate": 4.505844374214494e-06, "loss": 0.6763, "step": 14289 }, { "epoch": 16.275783475783477, "grad_norm": 0.20137840509414673, "learning_rate": 4.503176425052066e-06, "loss": 0.6515, "step": 14290 }, { "epoch": 16.276923076923076, "grad_norm": 0.19259755313396454, "learning_rate": 4.500509187800686e-06, "loss": 0.7522, "step": 14291 }, { "epoch": 16.27806267806268, "grad_norm": 0.343244343996048, "learning_rate": 4.497842662553004e-06, "loss": 0.5417, "step": 14292 }, { "epoch": 16.27920227920228, "grad_norm": 0.2501336932182312, "learning_rate": 4.49517684940162e-06, "loss": 0.391, "step": 14293 }, { "epoch": 16.28034188034188, "grad_norm": 0.17898280918598175, "learning_rate": 4.4925117484391345e-06, "loss": 0.5586, "step": 14294 }, { "epoch": 16.28148148148148, "grad_norm": 0.18999363481998444, "learning_rate": 4.489847359758109e-06, "loss": 0.8255, "step": 14295 }, { "epoch": 16.282621082621084, "grad_norm": 0.20510265231132507, "learning_rate": 4.487183683451096e-06, "loss": 0.6636, "step": 14296 }, { "epoch": 16.283760683760683, "grad_norm": 0.17222599685192108, "learning_rate": 4.484520719610591e-06, "loss": 0.6504, "step": 14297 }, { "epoch": 16.284900284900285, "grad_norm": 0.212445929646492, "learning_rate": 4.481858468329106e-06, "loss": 0.6673, "step": 14298 }, { "epoch": 16.286039886039887, "grad_norm": 0.200743168592453, "learning_rate": 4.479196929699108e-06, "loss": 0.5258, "step": 14299 }, { "epoch": 16.287179487179486, "grad_norm": 0.19284288585186005, "learning_rate": 4.476536103813026e-06, "loss": 0.6693, "step": 14300 }, { "epoch": 16.28831908831909, "grad_norm": 0.2084466516971588, "learning_rate": 4.473875990763285e-06, "loss": 0.8346, "step": 14301 }, { "epoch": 16.28945868945869, "grad_norm": 0.233940988779068, "learning_rate": 4.471216590642277e-06, "loss": 0.5976, "step": 14302 }, { "epoch": 16.29059829059829, "grad_norm": 0.17546918988227844, "learning_rate": 4.468557903542378e-06, "loss": 0.6661, "step": 14303 }, { "epoch": 16.291737891737892, "grad_norm": 0.17741990089416504, "learning_rate": 4.4658999295559195e-06, "loss": 0.608, "step": 14304 }, { "epoch": 16.292877492877494, "grad_norm": 0.23576615750789642, "learning_rate": 4.463242668775225e-06, "loss": 0.6547, "step": 14305 }, { "epoch": 16.294017094017093, "grad_norm": 0.1998688131570816, "learning_rate": 4.46058612129259e-06, "loss": 0.7048, "step": 14306 }, { "epoch": 16.295156695156695, "grad_norm": 0.1796422302722931, "learning_rate": 4.457930287200285e-06, "loss": 0.6466, "step": 14307 }, { "epoch": 16.296296296296298, "grad_norm": 0.22589747607707977, "learning_rate": 4.4552751665905415e-06, "loss": 0.5144, "step": 14308 }, { "epoch": 16.297435897435896, "grad_norm": 0.19338934123516083, "learning_rate": 4.452620759555598e-06, "loss": 0.3244, "step": 14309 }, { "epoch": 16.2985754985755, "grad_norm": 0.17849305272102356, "learning_rate": 4.449967066187646e-06, "loss": 0.7224, "step": 14310 }, { "epoch": 16.2997150997151, "grad_norm": 0.18624725937843323, "learning_rate": 4.447314086578844e-06, "loss": 0.5929, "step": 14311 }, { "epoch": 16.3008547008547, "grad_norm": 0.21634441614151, "learning_rate": 4.444661820821344e-06, "loss": 0.6098, "step": 14312 }, { "epoch": 16.301994301994302, "grad_norm": 0.1766114979982376, "learning_rate": 4.442010269007266e-06, "loss": 0.6877, "step": 14313 }, { "epoch": 16.303133903133904, "grad_norm": 0.2100430727005005, "learning_rate": 4.4393594312287126e-06, "loss": 0.7294, "step": 14314 }, { "epoch": 16.304273504273503, "grad_norm": 0.2034444361925125, "learning_rate": 4.436709307577741e-06, "loss": 0.5865, "step": 14315 }, { "epoch": 16.305413105413106, "grad_norm": 0.18770147860050201, "learning_rate": 4.434059898146406e-06, "loss": 0.6676, "step": 14316 }, { "epoch": 16.306552706552708, "grad_norm": 0.19414395093917847, "learning_rate": 4.431411203026728e-06, "loss": 0.7387, "step": 14317 }, { "epoch": 16.307692307692307, "grad_norm": 0.1796029955148697, "learning_rate": 4.4287632223107e-06, "loss": 0.5206, "step": 14318 }, { "epoch": 16.30883190883191, "grad_norm": 0.17089225351810455, "learning_rate": 4.4261159560902985e-06, "loss": 0.6783, "step": 14319 }, { "epoch": 16.30997150997151, "grad_norm": 0.18960067629814148, "learning_rate": 4.423469404457467e-06, "loss": 0.5921, "step": 14320 }, { "epoch": 16.31111111111111, "grad_norm": 0.17689311504364014, "learning_rate": 4.420823567504129e-06, "loss": 0.8011, "step": 14321 }, { "epoch": 16.312250712250712, "grad_norm": 0.20006713271141052, "learning_rate": 4.418178445322188e-06, "loss": 0.663, "step": 14322 }, { "epoch": 16.313390313390315, "grad_norm": 0.1775377094745636, "learning_rate": 4.4155340380035045e-06, "loss": 0.5213, "step": 14323 }, { "epoch": 16.314529914529913, "grad_norm": 0.32989034056663513, "learning_rate": 4.412890345639931e-06, "loss": 0.6832, "step": 14324 }, { "epoch": 16.315669515669516, "grad_norm": 0.21517755091190338, "learning_rate": 4.410247368323289e-06, "loss": 0.6429, "step": 14325 }, { "epoch": 16.316809116809118, "grad_norm": 0.20794017612934113, "learning_rate": 4.407605106145385e-06, "loss": 0.7617, "step": 14326 }, { "epoch": 16.317948717948717, "grad_norm": 0.21983087062835693, "learning_rate": 4.40496355919798e-06, "loss": 0.4611, "step": 14327 }, { "epoch": 16.31908831908832, "grad_norm": 0.19113603234291077, "learning_rate": 4.402322727572825e-06, "loss": 0.7669, "step": 14328 }, { "epoch": 16.32022792022792, "grad_norm": 0.21938566863536835, "learning_rate": 4.399682611361647e-06, "loss": 0.6253, "step": 14329 }, { "epoch": 16.32136752136752, "grad_norm": 0.18340864777565002, "learning_rate": 4.397043210656143e-06, "loss": 0.6056, "step": 14330 }, { "epoch": 16.322507122507123, "grad_norm": 0.2354237288236618, "learning_rate": 4.394404525547985e-06, "loss": 0.5475, "step": 14331 }, { "epoch": 16.323646723646725, "grad_norm": 0.2733052372932434, "learning_rate": 4.391766556128826e-06, "loss": 0.4954, "step": 14332 }, { "epoch": 16.324786324786324, "grad_norm": 0.19026166200637817, "learning_rate": 4.389129302490291e-06, "loss": 0.7062, "step": 14333 }, { "epoch": 16.325925925925926, "grad_norm": 0.225381001830101, "learning_rate": 4.386492764723971e-06, "loss": 0.8039, "step": 14334 }, { "epoch": 16.32706552706553, "grad_norm": 0.1898542195558548, "learning_rate": 4.3838569429214435e-06, "loss": 0.6213, "step": 14335 }, { "epoch": 16.328205128205127, "grad_norm": 0.23803776502609253, "learning_rate": 4.38122183717426e-06, "loss": 0.6764, "step": 14336 }, { "epoch": 16.32934472934473, "grad_norm": 0.1944434940814972, "learning_rate": 4.378587447573951e-06, "loss": 0.7895, "step": 14337 }, { "epoch": 16.33048433048433, "grad_norm": 0.1940242499113083, "learning_rate": 4.375953774211999e-06, "loss": 0.6543, "step": 14338 }, { "epoch": 16.33162393162393, "grad_norm": 0.202335387468338, "learning_rate": 4.3733208171798934e-06, "loss": 0.8032, "step": 14339 }, { "epoch": 16.332763532763533, "grad_norm": 0.22755244374275208, "learning_rate": 4.370688576569076e-06, "loss": 0.6037, "step": 14340 }, { "epoch": 16.333903133903135, "grad_norm": 0.18921247124671936, "learning_rate": 4.368057052470978e-06, "loss": 0.749, "step": 14341 }, { "epoch": 16.335042735042734, "grad_norm": 0.17129771411418915, "learning_rate": 4.3654262449769946e-06, "loss": 0.6225, "step": 14342 }, { "epoch": 16.336182336182336, "grad_norm": 0.1838228851556778, "learning_rate": 4.362796154178503e-06, "loss": 0.8151, "step": 14343 }, { "epoch": 16.33732193732194, "grad_norm": 0.20149587094783783, "learning_rate": 4.360166780166861e-06, "loss": 0.6324, "step": 14344 }, { "epoch": 16.338461538461537, "grad_norm": 0.25393611192703247, "learning_rate": 4.357538123033378e-06, "loss": 0.2834, "step": 14345 }, { "epoch": 16.33960113960114, "grad_norm": 0.18815156817436218, "learning_rate": 4.354910182869365e-06, "loss": 0.8636, "step": 14346 }, { "epoch": 16.340740740740742, "grad_norm": 0.17681847512722015, "learning_rate": 4.352282959766096e-06, "loss": 0.7832, "step": 14347 }, { "epoch": 16.34188034188034, "grad_norm": 0.20507217943668365, "learning_rate": 4.349656453814827e-06, "loss": 0.5741, "step": 14348 }, { "epoch": 16.343019943019943, "grad_norm": 0.2923872768878937, "learning_rate": 4.347030665106772e-06, "loss": 0.4988, "step": 14349 }, { "epoch": 16.344159544159545, "grad_norm": 0.2339613139629364, "learning_rate": 4.344405593733136e-06, "loss": 0.5456, "step": 14350 }, { "epoch": 16.345299145299144, "grad_norm": 0.1596457064151764, "learning_rate": 4.341781239785098e-06, "loss": 0.8624, "step": 14351 }, { "epoch": 16.346438746438746, "grad_norm": 0.19932234287261963, "learning_rate": 4.339157603353808e-06, "loss": 0.8029, "step": 14352 }, { "epoch": 16.34757834757835, "grad_norm": 0.20804062485694885, "learning_rate": 4.336534684530391e-06, "loss": 0.6707, "step": 14353 }, { "epoch": 16.348717948717947, "grad_norm": 0.18091614544391632, "learning_rate": 4.33391248340595e-06, "loss": 0.6526, "step": 14354 }, { "epoch": 16.34985754985755, "grad_norm": 0.23857539892196655, "learning_rate": 4.331291000071561e-06, "loss": 0.8791, "step": 14355 }, { "epoch": 16.350997150997152, "grad_norm": 0.23884528875350952, "learning_rate": 4.3286702346182815e-06, "loss": 0.6356, "step": 14356 }, { "epoch": 16.35213675213675, "grad_norm": 0.22342674434185028, "learning_rate": 4.326050187137123e-06, "loss": 0.608, "step": 14357 }, { "epoch": 16.353276353276353, "grad_norm": 0.2086082547903061, "learning_rate": 4.323430857719097e-06, "loss": 0.666, "step": 14358 }, { "epoch": 16.354415954415956, "grad_norm": 0.21723128855228424, "learning_rate": 4.320812246455175e-06, "loss": 0.6238, "step": 14359 }, { "epoch": 16.355555555555554, "grad_norm": 0.2092699557542801, "learning_rate": 4.3181943534363225e-06, "loss": 0.5761, "step": 14360 }, { "epoch": 16.356695156695157, "grad_norm": 0.22917023301124573, "learning_rate": 4.315577178753444e-06, "loss": 0.6894, "step": 14361 }, { "epoch": 16.35783475783476, "grad_norm": 0.24297906458377838, "learning_rate": 4.312960722497455e-06, "loss": 0.5083, "step": 14362 }, { "epoch": 16.358974358974358, "grad_norm": 0.21671253442764282, "learning_rate": 4.310344984759229e-06, "loss": 0.5863, "step": 14363 }, { "epoch": 16.36011396011396, "grad_norm": 0.19929228723049164, "learning_rate": 4.30772996562962e-06, "loss": 0.6084, "step": 14364 }, { "epoch": 16.361253561253562, "grad_norm": 0.17057958245277405, "learning_rate": 4.305115665199453e-06, "loss": 0.6095, "step": 14365 }, { "epoch": 16.36239316239316, "grad_norm": 0.25199905037879944, "learning_rate": 4.302502083559529e-06, "loss": 0.7987, "step": 14366 }, { "epoch": 16.363532763532763, "grad_norm": 0.2120455950498581, "learning_rate": 4.299889220800632e-06, "loss": 0.7455, "step": 14367 }, { "epoch": 16.364672364672366, "grad_norm": 0.19496478140354156, "learning_rate": 4.2972770770135035e-06, "loss": 0.6692, "step": 14368 }, { "epoch": 16.365811965811965, "grad_norm": 0.18825624883174896, "learning_rate": 4.294665652288873e-06, "loss": 0.796, "step": 14369 }, { "epoch": 16.366951566951567, "grad_norm": 0.21346953511238098, "learning_rate": 4.292054946717444e-06, "loss": 0.5156, "step": 14370 }, { "epoch": 16.36809116809117, "grad_norm": 0.19936388731002808, "learning_rate": 4.2894449603899015e-06, "loss": 0.6216, "step": 14371 }, { "epoch": 16.369230769230768, "grad_norm": 0.25102996826171875, "learning_rate": 4.286835693396885e-06, "loss": 0.5496, "step": 14372 }, { "epoch": 16.37037037037037, "grad_norm": 0.21934431791305542, "learning_rate": 4.284227145829023e-06, "loss": 0.5757, "step": 14373 }, { "epoch": 16.371509971509973, "grad_norm": 0.16905446350574493, "learning_rate": 4.281619317776922e-06, "loss": 0.9319, "step": 14374 }, { "epoch": 16.37264957264957, "grad_norm": 0.17735671997070312, "learning_rate": 4.279012209331157e-06, "loss": 0.7828, "step": 14375 }, { "epoch": 16.373789173789174, "grad_norm": 0.17744825780391693, "learning_rate": 4.276405820582282e-06, "loss": 0.5597, "step": 14376 }, { "epoch": 16.374928774928776, "grad_norm": 0.1992998868227005, "learning_rate": 4.273800151620824e-06, "loss": 0.8753, "step": 14377 }, { "epoch": 16.376068376068375, "grad_norm": 0.1772407591342926, "learning_rate": 4.27119520253729e-06, "loss": 0.6356, "step": 14378 }, { "epoch": 16.377207977207977, "grad_norm": 0.2159920334815979, "learning_rate": 4.268590973422143e-06, "loss": 0.7826, "step": 14379 }, { "epoch": 16.37834757834758, "grad_norm": 0.24267174303531647, "learning_rate": 4.265987464365842e-06, "loss": 0.5721, "step": 14380 }, { "epoch": 16.379487179487178, "grad_norm": 0.25559645891189575, "learning_rate": 4.2633846754588175e-06, "loss": 0.5517, "step": 14381 }, { "epoch": 16.38062678062678, "grad_norm": 0.22765012085437775, "learning_rate": 4.260782606791475e-06, "loss": 0.4509, "step": 14382 }, { "epoch": 16.381766381766383, "grad_norm": 0.2365788072347641, "learning_rate": 4.2581812584541805e-06, "loss": 0.7347, "step": 14383 }, { "epoch": 16.38290598290598, "grad_norm": 0.19298528134822845, "learning_rate": 4.25558063053729e-06, "loss": 0.6287, "step": 14384 }, { "epoch": 16.384045584045584, "grad_norm": 0.16975969076156616, "learning_rate": 4.252980723131131e-06, "loss": 0.9887, "step": 14385 }, { "epoch": 16.385185185185186, "grad_norm": 0.19433729350566864, "learning_rate": 4.250381536326006e-06, "loss": 0.6136, "step": 14386 }, { "epoch": 16.386324786324785, "grad_norm": 0.23822815716266632, "learning_rate": 4.247783070212194e-06, "loss": 0.5435, "step": 14387 }, { "epoch": 16.387464387464387, "grad_norm": 0.16016489267349243, "learning_rate": 4.245185324879941e-06, "loss": 0.6939, "step": 14388 }, { "epoch": 16.38860398860399, "grad_norm": 0.22926373779773712, "learning_rate": 4.2425883004194865e-06, "loss": 0.5478, "step": 14389 }, { "epoch": 16.38974358974359, "grad_norm": 0.21683046221733093, "learning_rate": 4.2399919969210164e-06, "loss": 0.7768, "step": 14390 }, { "epoch": 16.39088319088319, "grad_norm": 0.21870996057987213, "learning_rate": 4.237396414474715e-06, "loss": 0.5079, "step": 14391 }, { "epoch": 16.392022792022793, "grad_norm": 0.2075919359922409, "learning_rate": 4.234801553170734e-06, "loss": 0.6293, "step": 14392 }, { "epoch": 16.39316239316239, "grad_norm": 0.20165051519870758, "learning_rate": 4.232207413099204e-06, "loss": 0.6944, "step": 14393 }, { "epoch": 16.394301994301994, "grad_norm": 0.2192036360502243, "learning_rate": 4.2296139943502175e-06, "loss": 0.4595, "step": 14394 }, { "epoch": 16.395441595441596, "grad_norm": 0.22861096262931824, "learning_rate": 4.227021297013856e-06, "loss": 0.6534, "step": 14395 }, { "epoch": 16.396581196581195, "grad_norm": 0.18797266483306885, "learning_rate": 4.22442932118017e-06, "loss": 0.6111, "step": 14396 }, { "epoch": 16.397720797720797, "grad_norm": 0.1839674562215805, "learning_rate": 4.221838066939187e-06, "loss": 0.6883, "step": 14397 }, { "epoch": 16.3988603988604, "grad_norm": 0.22609136998653412, "learning_rate": 4.219247534380907e-06, "loss": 0.6929, "step": 14398 }, { "epoch": 16.4, "grad_norm": 0.18066316843032837, "learning_rate": 4.216657723595311e-06, "loss": 0.6833, "step": 14399 }, { "epoch": 16.4011396011396, "grad_norm": 0.21688348054885864, "learning_rate": 4.214068634672344e-06, "loss": 0.4891, "step": 14400 }, { "epoch": 16.402279202279203, "grad_norm": 0.21518702805042267, "learning_rate": 4.2114802677019395e-06, "loss": 0.6103, "step": 14401 }, { "epoch": 16.403418803418802, "grad_norm": 0.17440588772296906, "learning_rate": 4.208892622773988e-06, "loss": 0.7396, "step": 14402 }, { "epoch": 16.404558404558404, "grad_norm": 0.22613508999347687, "learning_rate": 4.206305699978374e-06, "loss": 0.6477, "step": 14403 }, { "epoch": 16.405698005698007, "grad_norm": 0.1704530566930771, "learning_rate": 4.203719499404946e-06, "loss": 0.754, "step": 14404 }, { "epoch": 16.406837606837605, "grad_norm": 0.17848657071590424, "learning_rate": 4.201134021143535e-06, "loss": 0.8487, "step": 14405 }, { "epoch": 16.407977207977208, "grad_norm": 0.19880610704421997, "learning_rate": 4.19854926528393e-06, "loss": 0.619, "step": 14406 }, { "epoch": 16.40911680911681, "grad_norm": 0.19024336338043213, "learning_rate": 4.195965231915914e-06, "loss": 0.6118, "step": 14407 }, { "epoch": 16.41025641025641, "grad_norm": 0.1932651847600937, "learning_rate": 4.1933819211292355e-06, "loss": 0.543, "step": 14408 }, { "epoch": 16.41139601139601, "grad_norm": 0.17125093936920166, "learning_rate": 4.19079933301362e-06, "loss": 0.8024, "step": 14409 }, { "epoch": 16.412535612535613, "grad_norm": 0.20307296514511108, "learning_rate": 4.188217467658773e-06, "loss": 0.848, "step": 14410 }, { "epoch": 16.413675213675212, "grad_norm": 0.20626360177993774, "learning_rate": 4.185636325154363e-06, "loss": 0.677, "step": 14411 }, { "epoch": 16.414814814814815, "grad_norm": 0.2043163925409317, "learning_rate": 4.18305590559005e-06, "loss": 0.6448, "step": 14412 }, { "epoch": 16.415954415954417, "grad_norm": 0.1820337325334549, "learning_rate": 4.180476209055448e-06, "loss": 0.8245, "step": 14413 }, { "epoch": 16.417094017094016, "grad_norm": 0.22119086980819702, "learning_rate": 4.1778972356401575e-06, "loss": 0.6379, "step": 14414 }, { "epoch": 16.418233618233618, "grad_norm": 0.2301556020975113, "learning_rate": 4.17531898543376e-06, "loss": 0.7971, "step": 14415 }, { "epoch": 16.41937321937322, "grad_norm": 0.1856745034456253, "learning_rate": 4.1727414585258085e-06, "loss": 0.6311, "step": 14416 }, { "epoch": 16.42051282051282, "grad_norm": 0.16893912851810455, "learning_rate": 4.170164655005812e-06, "loss": 0.7953, "step": 14417 }, { "epoch": 16.42165242165242, "grad_norm": 0.18980665504932404, "learning_rate": 4.167588574963282e-06, "loss": 0.5062, "step": 14418 }, { "epoch": 16.422792022792024, "grad_norm": 0.24632985889911652, "learning_rate": 4.16501321848769e-06, "loss": 0.6022, "step": 14419 }, { "epoch": 16.423931623931622, "grad_norm": 0.18609194457530975, "learning_rate": 4.162438585668485e-06, "loss": 0.6492, "step": 14420 }, { "epoch": 16.425071225071225, "grad_norm": 0.1913001388311386, "learning_rate": 4.159864676595093e-06, "loss": 0.6434, "step": 14421 }, { "epoch": 16.426210826210827, "grad_norm": 0.17134885489940643, "learning_rate": 4.15729149135691e-06, "loss": 0.8395, "step": 14422 }, { "epoch": 16.427350427350426, "grad_norm": 0.18671829998493195, "learning_rate": 4.154719030043316e-06, "loss": 0.6149, "step": 14423 }, { "epoch": 16.428490028490028, "grad_norm": 0.19499360024929047, "learning_rate": 4.15214729274365e-06, "loss": 0.6216, "step": 14424 }, { "epoch": 16.42962962962963, "grad_norm": 0.21025635302066803, "learning_rate": 4.14957627954724e-06, "loss": 0.6376, "step": 14425 }, { "epoch": 16.43076923076923, "grad_norm": 0.20964206755161285, "learning_rate": 4.1470059905433845e-06, "loss": 0.6179, "step": 14426 }, { "epoch": 16.43190883190883, "grad_norm": 0.19520244002342224, "learning_rate": 4.144436425821363e-06, "loss": 0.543, "step": 14427 }, { "epoch": 16.433048433048434, "grad_norm": 0.2406681627035141, "learning_rate": 4.141867585470413e-06, "loss": 0.6697, "step": 14428 }, { "epoch": 16.434188034188033, "grad_norm": 0.15467698872089386, "learning_rate": 4.13929946957976e-06, "loss": 0.9953, "step": 14429 }, { "epoch": 16.435327635327635, "grad_norm": 0.17723286151885986, "learning_rate": 4.1367320782385976e-06, "loss": 0.755, "step": 14430 }, { "epoch": 16.436467236467237, "grad_norm": 0.19826023280620575, "learning_rate": 4.134165411536117e-06, "loss": 0.605, "step": 14431 }, { "epoch": 16.437606837606836, "grad_norm": 0.2520255446434021, "learning_rate": 4.131599469561448e-06, "loss": 0.6394, "step": 14432 }, { "epoch": 16.43874643874644, "grad_norm": 0.23350045084953308, "learning_rate": 4.129034252403715e-06, "loss": 0.818, "step": 14433 }, { "epoch": 16.43988603988604, "grad_norm": 0.220793679356575, "learning_rate": 4.126469760152021e-06, "loss": 0.818, "step": 14434 }, { "epoch": 16.44102564102564, "grad_norm": 0.22970087826251984, "learning_rate": 4.1239059928954385e-06, "loss": 0.7733, "step": 14435 }, { "epoch": 16.442165242165242, "grad_norm": 0.18339882791042328, "learning_rate": 4.121342950723004e-06, "loss": 0.7766, "step": 14436 }, { "epoch": 16.443304843304844, "grad_norm": 0.19485601782798767, "learning_rate": 4.118780633723745e-06, "loss": 0.6469, "step": 14437 }, { "epoch": 16.444444444444443, "grad_norm": 0.2181605100631714, "learning_rate": 4.11621904198666e-06, "loss": 0.6113, "step": 14438 }, { "epoch": 16.445584045584045, "grad_norm": 0.2137926071882248, "learning_rate": 4.113658175600724e-06, "loss": 0.5545, "step": 14439 }, { "epoch": 16.446723646723648, "grad_norm": 0.19882921874523163, "learning_rate": 4.111098034654873e-06, "loss": 0.8499, "step": 14440 }, { "epoch": 16.447863247863246, "grad_norm": 0.19488947093486786, "learning_rate": 4.108538619238022e-06, "loss": 0.7716, "step": 14441 }, { "epoch": 16.44900284900285, "grad_norm": 0.2073955237865448, "learning_rate": 4.105979929439091e-06, "loss": 0.7342, "step": 14442 }, { "epoch": 16.45014245014245, "grad_norm": 0.21484479308128357, "learning_rate": 4.103421965346929e-06, "loss": 0.7877, "step": 14443 }, { "epoch": 16.45128205128205, "grad_norm": 0.2445450872182846, "learning_rate": 4.100864727050388e-06, "loss": 0.4911, "step": 14444 }, { "epoch": 16.452421652421652, "grad_norm": 0.17344039678573608, "learning_rate": 4.098308214638288e-06, "loss": 0.6512, "step": 14445 }, { "epoch": 16.453561253561254, "grad_norm": 0.16234394907951355, "learning_rate": 4.0957524281994284e-06, "loss": 0.8383, "step": 14446 }, { "epoch": 16.454700854700853, "grad_norm": 0.22016826272010803, "learning_rate": 4.093197367822571e-06, "loss": 0.7096, "step": 14447 }, { "epoch": 16.455840455840455, "grad_norm": 0.26369261741638184, "learning_rate": 4.090643033596461e-06, "loss": 0.5644, "step": 14448 }, { "epoch": 16.456980056980058, "grad_norm": 0.23340226709842682, "learning_rate": 4.088089425609817e-06, "loss": 0.6914, "step": 14449 }, { "epoch": 16.458119658119656, "grad_norm": 0.21980200707912445, "learning_rate": 4.085536543951346e-06, "loss": 0.6858, "step": 14450 }, { "epoch": 16.45925925925926, "grad_norm": 0.20083510875701904, "learning_rate": 4.082984388709696e-06, "loss": 0.6296, "step": 14451 }, { "epoch": 16.46039886039886, "grad_norm": 0.21656829118728638, "learning_rate": 4.080432959973515e-06, "loss": 0.6918, "step": 14452 }, { "epoch": 16.46153846153846, "grad_norm": 0.21565794944763184, "learning_rate": 4.077882257831439e-06, "loss": 0.5942, "step": 14453 }, { "epoch": 16.462678062678062, "grad_norm": 0.18151481449604034, "learning_rate": 4.075332282372044e-06, "loss": 0.6738, "step": 14454 }, { "epoch": 16.463817663817665, "grad_norm": 0.1792241334915161, "learning_rate": 4.0727830336838994e-06, "loss": 0.7729, "step": 14455 }, { "epoch": 16.464957264957263, "grad_norm": 0.20354300737380981, "learning_rate": 4.07023451185555e-06, "loss": 0.6296, "step": 14456 }, { "epoch": 16.466096866096866, "grad_norm": 0.2067917436361313, "learning_rate": 4.067686716975522e-06, "loss": 0.512, "step": 14457 }, { "epoch": 16.467236467236468, "grad_norm": 0.1799246221780777, "learning_rate": 4.065139649132288e-06, "loss": 0.4658, "step": 14458 }, { "epoch": 16.468376068376067, "grad_norm": 0.21179606020450592, "learning_rate": 4.0625933084143284e-06, "loss": 0.6547, "step": 14459 }, { "epoch": 16.46951566951567, "grad_norm": 0.22315360605716705, "learning_rate": 4.06004769491008e-06, "loss": 0.6729, "step": 14460 }, { "epoch": 16.47065527065527, "grad_norm": 0.179313063621521, "learning_rate": 4.057502808707967e-06, "loss": 0.7105, "step": 14461 }, { "epoch": 16.47179487179487, "grad_norm": 0.18379870057106018, "learning_rate": 4.054958649896368e-06, "loss": 0.4343, "step": 14462 }, { "epoch": 16.472934472934472, "grad_norm": 0.2692694664001465, "learning_rate": 4.052415218563646e-06, "loss": 0.5529, "step": 14463 }, { "epoch": 16.474074074074075, "grad_norm": 0.1646180897951126, "learning_rate": 4.049872514798162e-06, "loss": 0.6413, "step": 14464 }, { "epoch": 16.475213675213674, "grad_norm": 0.1998300552368164, "learning_rate": 4.047330538688212e-06, "loss": 0.4966, "step": 14465 }, { "epoch": 16.476353276353276, "grad_norm": 0.15743793547153473, "learning_rate": 4.044789290322096e-06, "loss": 0.7123, "step": 14466 }, { "epoch": 16.477492877492878, "grad_norm": 0.2263445407152176, "learning_rate": 4.042248769788074e-06, "loss": 0.6525, "step": 14467 }, { "epoch": 16.478632478632477, "grad_norm": 0.1588156670331955, "learning_rate": 4.039708977174389e-06, "loss": 0.6672, "step": 14468 }, { "epoch": 16.47977207977208, "grad_norm": 0.17254389822483063, "learning_rate": 4.0371699125692495e-06, "loss": 0.5763, "step": 14469 }, { "epoch": 16.48091168091168, "grad_norm": 0.1884051412343979, "learning_rate": 4.034631576060846e-06, "loss": 0.7396, "step": 14470 }, { "epoch": 16.48205128205128, "grad_norm": 0.18718352913856506, "learning_rate": 4.032093967737341e-06, "loss": 0.7101, "step": 14471 }, { "epoch": 16.483190883190883, "grad_norm": 0.25985613465309143, "learning_rate": 4.029557087686883e-06, "loss": 0.5803, "step": 14472 }, { "epoch": 16.484330484330485, "grad_norm": 0.19891129434108734, "learning_rate": 4.027020935997569e-06, "loss": 0.6691, "step": 14473 }, { "epoch": 16.485470085470084, "grad_norm": 0.23379211127758026, "learning_rate": 4.024485512757489e-06, "loss": 0.4533, "step": 14474 }, { "epoch": 16.486609686609686, "grad_norm": 0.19329865276813507, "learning_rate": 4.021950818054715e-06, "loss": 0.6181, "step": 14475 }, { "epoch": 16.48774928774929, "grad_norm": 0.15092094242572784, "learning_rate": 4.019416851977284e-06, "loss": 0.75, "step": 14476 }, { "epoch": 16.488888888888887, "grad_norm": 0.16361835598945618, "learning_rate": 4.016883614613198e-06, "loss": 0.5619, "step": 14477 }, { "epoch": 16.49002849002849, "grad_norm": 0.2616507112979889, "learning_rate": 4.014351106050449e-06, "loss": 0.4272, "step": 14478 }, { "epoch": 16.491168091168092, "grad_norm": 0.16996940970420837, "learning_rate": 4.011819326376995e-06, "loss": 0.7497, "step": 14479 }, { "epoch": 16.49230769230769, "grad_norm": 0.21917283535003662, "learning_rate": 4.0092882756807805e-06, "loss": 0.4194, "step": 14480 }, { "epoch": 16.493447293447293, "grad_norm": 0.23285363614559174, "learning_rate": 4.0067579540497006e-06, "loss": 0.3855, "step": 14481 }, { "epoch": 16.494586894586895, "grad_norm": 0.20673397183418274, "learning_rate": 4.004228361571652e-06, "loss": 0.5267, "step": 14482 }, { "epoch": 16.495726495726494, "grad_norm": 0.16720455884933472, "learning_rate": 4.001699498334488e-06, "loss": 0.8746, "step": 14483 }, { "epoch": 16.496866096866096, "grad_norm": 0.17041461169719696, "learning_rate": 3.999171364426055e-06, "loss": 0.7391, "step": 14484 }, { "epoch": 16.4980056980057, "grad_norm": 0.20871534943580627, "learning_rate": 3.9966439599341375e-06, "loss": 0.6219, "step": 14485 }, { "epoch": 16.499145299145297, "grad_norm": 0.26762112975120544, "learning_rate": 3.994117284946544e-06, "loss": 0.5247, "step": 14486 }, { "epoch": 16.5002849002849, "grad_norm": 0.3475840091705322, "learning_rate": 3.991591339551026e-06, "loss": 0.5787, "step": 14487 }, { "epoch": 16.501424501424502, "grad_norm": 0.19313666224479675, "learning_rate": 3.989066123835311e-06, "loss": 0.7083, "step": 14488 }, { "epoch": 16.5025641025641, "grad_norm": 0.2199021279811859, "learning_rate": 3.986541637887109e-06, "loss": 0.6433, "step": 14489 }, { "epoch": 16.503703703703703, "grad_norm": 0.18482081592082977, "learning_rate": 3.984017881794103e-06, "loss": 0.8095, "step": 14490 }, { "epoch": 16.504843304843305, "grad_norm": 0.1827738881111145, "learning_rate": 3.981494855643958e-06, "loss": 0.6815, "step": 14491 }, { "epoch": 16.505982905982904, "grad_norm": 0.19812440872192383, "learning_rate": 3.9789725595242884e-06, "loss": 0.7362, "step": 14492 }, { "epoch": 16.507122507122507, "grad_norm": 0.2004772573709488, "learning_rate": 3.976450993522712e-06, "loss": 0.7225, "step": 14493 }, { "epoch": 16.50826210826211, "grad_norm": 0.2659797966480255, "learning_rate": 3.973930157726807e-06, "loss": 0.5827, "step": 14494 }, { "epoch": 16.509401709401708, "grad_norm": 0.2283308357000351, "learning_rate": 3.971410052224136e-06, "loss": 0.7094, "step": 14495 }, { "epoch": 16.51054131054131, "grad_norm": 0.1879384070634842, "learning_rate": 3.968890677102211e-06, "loss": 0.628, "step": 14496 }, { "epoch": 16.511680911680912, "grad_norm": 0.2578437626361847, "learning_rate": 3.966372032448554e-06, "loss": 0.4458, "step": 14497 }, { "epoch": 16.51282051282051, "grad_norm": 0.2004712074995041, "learning_rate": 3.963854118350644e-06, "loss": 0.6449, "step": 14498 }, { "epoch": 16.513960113960113, "grad_norm": 0.20575734972953796, "learning_rate": 3.961336934895926e-06, "loss": 0.6711, "step": 14499 }, { "epoch": 16.515099715099716, "grad_norm": 0.2130632847547531, "learning_rate": 3.958820482171832e-06, "loss": 0.6722, "step": 14500 }, { "epoch": 16.516239316239318, "grad_norm": 0.19198299944400787, "learning_rate": 3.956304760265763e-06, "loss": 0.8823, "step": 14501 }, { "epoch": 16.517378917378917, "grad_norm": 0.19161811470985413, "learning_rate": 3.953789769265112e-06, "loss": 0.5373, "step": 14502 }, { "epoch": 16.51851851851852, "grad_norm": 0.24001465737819672, "learning_rate": 3.95127550925721e-06, "loss": 0.7958, "step": 14503 }, { "epoch": 16.51965811965812, "grad_norm": 0.1894005835056305, "learning_rate": 3.948761980329393e-06, "loss": 0.7023, "step": 14504 }, { "epoch": 16.52079772079772, "grad_norm": 0.24162663519382477, "learning_rate": 3.946249182568968e-06, "loss": 0.6452, "step": 14505 }, { "epoch": 16.521937321937322, "grad_norm": 0.19434046745300293, "learning_rate": 3.943737116063209e-06, "loss": 0.9394, "step": 14506 }, { "epoch": 16.523076923076925, "grad_norm": 0.16493722796440125, "learning_rate": 3.941225780899352e-06, "loss": 0.7118, "step": 14507 }, { "epoch": 16.524216524216524, "grad_norm": 0.18182341754436493, "learning_rate": 3.938715177164645e-06, "loss": 0.5909, "step": 14508 }, { "epoch": 16.525356125356126, "grad_norm": 0.20856042206287384, "learning_rate": 3.936205304946275e-06, "loss": 0.4961, "step": 14509 }, { "epoch": 16.526495726495728, "grad_norm": 0.23776394128799438, "learning_rate": 3.93369616433143e-06, "loss": 0.5035, "step": 14510 }, { "epoch": 16.527635327635327, "grad_norm": 0.16974763572216034, "learning_rate": 3.931187755407243e-06, "loss": 0.6542, "step": 14511 }, { "epoch": 16.52877492877493, "grad_norm": 0.18352046608924866, "learning_rate": 3.928680078260844e-06, "loss": 0.6894, "step": 14512 }, { "epoch": 16.52991452991453, "grad_norm": 0.30340099334716797, "learning_rate": 3.92617313297933e-06, "loss": 0.6206, "step": 14513 }, { "epoch": 16.53105413105413, "grad_norm": 0.17844238877296448, "learning_rate": 3.9236669196497846e-06, "loss": 0.6471, "step": 14514 }, { "epoch": 16.532193732193733, "grad_norm": 0.20907242596149445, "learning_rate": 3.921161438359242e-06, "loss": 0.6187, "step": 14515 }, { "epoch": 16.533333333333335, "grad_norm": 0.21441438794136047, "learning_rate": 3.918656689194727e-06, "loss": 0.6341, "step": 14516 }, { "epoch": 16.534472934472934, "grad_norm": 0.19238892197608948, "learning_rate": 3.916152672243243e-06, "loss": 0.5023, "step": 14517 }, { "epoch": 16.535612535612536, "grad_norm": 0.19719743728637695, "learning_rate": 3.913649387591756e-06, "loss": 0.7562, "step": 14518 }, { "epoch": 16.53675213675214, "grad_norm": 0.1943766176700592, "learning_rate": 3.911146835327212e-06, "loss": 0.6566, "step": 14519 }, { "epoch": 16.537891737891737, "grad_norm": 0.21831713616847992, "learning_rate": 3.908645015536533e-06, "loss": 0.5037, "step": 14520 }, { "epoch": 16.53903133903134, "grad_norm": 0.2081470787525177, "learning_rate": 3.9061439283066216e-06, "loss": 0.7748, "step": 14521 }, { "epoch": 16.540170940170942, "grad_norm": 0.21147605776786804, "learning_rate": 3.903643573724333e-06, "loss": 0.6704, "step": 14522 }, { "epoch": 16.54131054131054, "grad_norm": 0.17337815463542938, "learning_rate": 3.901143951876518e-06, "loss": 0.6969, "step": 14523 }, { "epoch": 16.542450142450143, "grad_norm": 0.21074214577674866, "learning_rate": 3.8986450628499955e-06, "loss": 0.505, "step": 14524 }, { "epoch": 16.543589743589745, "grad_norm": 0.1778637170791626, "learning_rate": 3.896146906731565e-06, "loss": 0.7977, "step": 14525 }, { "epoch": 16.544729344729344, "grad_norm": 0.17211110889911652, "learning_rate": 3.893649483607984e-06, "loss": 0.6107, "step": 14526 }, { "epoch": 16.545868945868946, "grad_norm": 0.15985257923603058, "learning_rate": 3.891152793565997e-06, "loss": 0.8332, "step": 14527 }, { "epoch": 16.54700854700855, "grad_norm": 0.18682222068309784, "learning_rate": 3.888656836692325e-06, "loss": 0.5215, "step": 14528 }, { "epoch": 16.548148148148147, "grad_norm": 0.18926292657852173, "learning_rate": 3.886161613073655e-06, "loss": 0.7584, "step": 14529 }, { "epoch": 16.54928774928775, "grad_norm": 0.20884409546852112, "learning_rate": 3.883667122796658e-06, "loss": 0.7324, "step": 14530 }, { "epoch": 16.550427350427352, "grad_norm": 0.22595719993114471, "learning_rate": 3.881173365947971e-06, "loss": 0.685, "step": 14531 }, { "epoch": 16.55156695156695, "grad_norm": 0.2707895338535309, "learning_rate": 3.878680342614216e-06, "loss": 0.509, "step": 14532 }, { "epoch": 16.552706552706553, "grad_norm": 0.24852532148361206, "learning_rate": 3.8761880528819685e-06, "loss": 0.5025, "step": 14533 }, { "epoch": 16.553846153846155, "grad_norm": 0.23397788405418396, "learning_rate": 3.8736964968378035e-06, "loss": 0.6484, "step": 14534 }, { "epoch": 16.554985754985754, "grad_norm": 0.2433735877275467, "learning_rate": 3.871205674568257e-06, "loss": 0.6137, "step": 14535 }, { "epoch": 16.556125356125357, "grad_norm": 0.23208960890769958, "learning_rate": 3.8687155861598465e-06, "loss": 0.6291, "step": 14536 }, { "epoch": 16.55726495726496, "grad_norm": 0.22305455803871155, "learning_rate": 3.8662262316990464e-06, "loss": 0.5538, "step": 14537 }, { "epoch": 16.558404558404558, "grad_norm": 0.1901799887418747, "learning_rate": 3.8637376112723305e-06, "loss": 0.573, "step": 14538 }, { "epoch": 16.55954415954416, "grad_norm": 0.1874188780784607, "learning_rate": 3.861249724966132e-06, "loss": 0.6841, "step": 14539 }, { "epoch": 16.560683760683762, "grad_norm": 0.26205453276634216, "learning_rate": 3.8587625728668615e-06, "loss": 0.607, "step": 14540 }, { "epoch": 16.56182336182336, "grad_norm": 0.20798297226428986, "learning_rate": 3.856276155060906e-06, "loss": 0.7327, "step": 14541 }, { "epoch": 16.562962962962963, "grad_norm": 0.23050467669963837, "learning_rate": 3.853790471634628e-06, "loss": 0.4925, "step": 14542 }, { "epoch": 16.564102564102566, "grad_norm": 0.19091200828552246, "learning_rate": 3.851305522674361e-06, "loss": 0.7183, "step": 14543 }, { "epoch": 16.565242165242164, "grad_norm": 0.23158277571201324, "learning_rate": 3.848821308266406e-06, "loss": 0.7537, "step": 14544 }, { "epoch": 16.566381766381767, "grad_norm": 0.19433674216270447, "learning_rate": 3.846337828497057e-06, "loss": 0.6727, "step": 14545 }, { "epoch": 16.56752136752137, "grad_norm": 0.2191799134016037, "learning_rate": 3.843855083452563e-06, "loss": 0.4923, "step": 14546 }, { "epoch": 16.568660968660968, "grad_norm": 0.24010121822357178, "learning_rate": 3.841373073219171e-06, "loss": 0.5411, "step": 14547 }, { "epoch": 16.56980056980057, "grad_norm": 0.20162785053253174, "learning_rate": 3.838891797883074e-06, "loss": 0.789, "step": 14548 }, { "epoch": 16.570940170940172, "grad_norm": 0.21228350698947906, "learning_rate": 3.836411257530453e-06, "loss": 0.6263, "step": 14549 }, { "epoch": 16.57207977207977, "grad_norm": 0.18376502394676208, "learning_rate": 3.833931452247474e-06, "loss": 0.7464, "step": 14550 }, { "epoch": 16.573219373219374, "grad_norm": 0.1630278378725052, "learning_rate": 3.83145238212026e-06, "loss": 0.5857, "step": 14551 }, { "epoch": 16.574358974358976, "grad_norm": 0.16570539772510529, "learning_rate": 3.828974047234921e-06, "loss": 0.7666, "step": 14552 }, { "epoch": 16.575498575498575, "grad_norm": 0.1783129870891571, "learning_rate": 3.82649644767753e-06, "loss": 0.8422, "step": 14553 }, { "epoch": 16.576638176638177, "grad_norm": 0.1791054606437683, "learning_rate": 3.824019583534147e-06, "loss": 0.7483, "step": 14554 }, { "epoch": 16.57777777777778, "grad_norm": 0.22978992760181427, "learning_rate": 3.821543454890805e-06, "loss": 0.3788, "step": 14555 }, { "epoch": 16.578917378917378, "grad_norm": 0.2110435962677002, "learning_rate": 3.819068061833492e-06, "loss": 0.613, "step": 14556 }, { "epoch": 16.58005698005698, "grad_norm": 0.17531338334083557, "learning_rate": 3.816593404448193e-06, "loss": 0.8015, "step": 14557 }, { "epoch": 16.581196581196583, "grad_norm": 0.19135034084320068, "learning_rate": 3.8141194828208602e-06, "loss": 0.576, "step": 14558 }, { "epoch": 16.58233618233618, "grad_norm": 0.17526598274707794, "learning_rate": 3.8116462970374246e-06, "loss": 0.7397, "step": 14559 }, { "epoch": 16.583475783475784, "grad_norm": 0.17120303213596344, "learning_rate": 3.8091738471837778e-06, "loss": 0.578, "step": 14560 }, { "epoch": 16.584615384615386, "grad_norm": 0.20212437212467194, "learning_rate": 3.8067021333457965e-06, "loss": 0.6067, "step": 14561 }, { "epoch": 16.585754985754985, "grad_norm": 0.16685090959072113, "learning_rate": 3.804231155609331e-06, "loss": 0.6871, "step": 14562 }, { "epoch": 16.586894586894587, "grad_norm": 0.2742098867893219, "learning_rate": 3.8017609140602067e-06, "loss": 0.4295, "step": 14563 }, { "epoch": 16.58803418803419, "grad_norm": 0.18924805521965027, "learning_rate": 3.7992914087842224e-06, "loss": 0.6182, "step": 14564 }, { "epoch": 16.58917378917379, "grad_norm": 0.17991560697555542, "learning_rate": 3.796822639867148e-06, "loss": 0.6326, "step": 14565 }, { "epoch": 16.59031339031339, "grad_norm": 0.17940419912338257, "learning_rate": 3.79435460739474e-06, "loss": 0.6089, "step": 14566 }, { "epoch": 16.591452991452993, "grad_norm": 0.1745978444814682, "learning_rate": 3.7918873114527047e-06, "loss": 0.5845, "step": 14567 }, { "epoch": 16.59259259259259, "grad_norm": 0.2253025323152542, "learning_rate": 3.789420752126746e-06, "loss": 0.7228, "step": 14568 }, { "epoch": 16.593732193732194, "grad_norm": 0.22936908900737762, "learning_rate": 3.7869549295025343e-06, "loss": 0.6392, "step": 14569 }, { "epoch": 16.594871794871796, "grad_norm": 0.18933890759944916, "learning_rate": 3.78448984366572e-06, "loss": 0.7953, "step": 14570 }, { "epoch": 16.596011396011395, "grad_norm": 0.24416644871234894, "learning_rate": 3.7820254947019073e-06, "loss": 0.6635, "step": 14571 }, { "epoch": 16.597150997150997, "grad_norm": 0.22809933125972748, "learning_rate": 3.7795618826967026e-06, "loss": 0.5825, "step": 14572 }, { "epoch": 16.5982905982906, "grad_norm": 0.17031829059123993, "learning_rate": 3.777099007735668e-06, "loss": 0.5184, "step": 14573 }, { "epoch": 16.5994301994302, "grad_norm": 0.2156430184841156, "learning_rate": 3.7746368699043496e-06, "loss": 0.5978, "step": 14574 }, { "epoch": 16.6005698005698, "grad_norm": 0.21404266357421875, "learning_rate": 3.772175469288264e-06, "loss": 0.3838, "step": 14575 }, { "epoch": 16.601709401709403, "grad_norm": 0.17592374980449677, "learning_rate": 3.7697148059728986e-06, "loss": 0.4915, "step": 14576 }, { "epoch": 16.602849002849002, "grad_norm": 0.2183118611574173, "learning_rate": 3.7672548800437274e-06, "loss": 0.4314, "step": 14577 }, { "epoch": 16.603988603988604, "grad_norm": 0.21222040057182312, "learning_rate": 3.7647956915861786e-06, "loss": 0.4624, "step": 14578 }, { "epoch": 16.605128205128207, "grad_norm": 0.1846335232257843, "learning_rate": 3.762337240685673e-06, "loss": 0.7217, "step": 14579 }, { "epoch": 16.606267806267805, "grad_norm": 0.30568239092826843, "learning_rate": 3.759879527427601e-06, "loss": 0.8213, "step": 14580 }, { "epoch": 16.607407407407408, "grad_norm": 0.202920064330101, "learning_rate": 3.757422551897327e-06, "loss": 0.2951, "step": 14581 }, { "epoch": 16.60854700854701, "grad_norm": 0.19714248180389404, "learning_rate": 3.7549663141801805e-06, "loss": 0.6109, "step": 14582 }, { "epoch": 16.60968660968661, "grad_norm": 0.17863404750823975, "learning_rate": 3.752510814361476e-06, "loss": 0.5999, "step": 14583 }, { "epoch": 16.61082621082621, "grad_norm": 0.16537688672542572, "learning_rate": 3.7500560525265044e-06, "loss": 0.7788, "step": 14584 }, { "epoch": 16.611965811965813, "grad_norm": 0.21520625054836273, "learning_rate": 3.7476020287605217e-06, "loss": 0.5608, "step": 14585 }, { "epoch": 16.613105413105412, "grad_norm": 0.19842830300331116, "learning_rate": 3.745148743148766e-06, "loss": 0.6802, "step": 14586 }, { "epoch": 16.614245014245014, "grad_norm": 0.21955198049545288, "learning_rate": 3.7426961957764434e-06, "loss": 0.4403, "step": 14587 }, { "epoch": 16.615384615384617, "grad_norm": 0.21487829089164734, "learning_rate": 3.740244386728742e-06, "loss": 0.6714, "step": 14588 }, { "epoch": 16.616524216524216, "grad_norm": 0.2130952626466751, "learning_rate": 3.737793316090821e-06, "loss": 0.5808, "step": 14589 }, { "epoch": 16.617663817663818, "grad_norm": 0.20674735307693481, "learning_rate": 3.7353429839478064e-06, "loss": 0.724, "step": 14590 }, { "epoch": 16.61880341880342, "grad_norm": 0.2151806503534317, "learning_rate": 3.732893390384806e-06, "loss": 0.5662, "step": 14591 }, { "epoch": 16.61994301994302, "grad_norm": 0.19295625388622284, "learning_rate": 3.7304445354869044e-06, "loss": 0.6498, "step": 14592 }, { "epoch": 16.62108262108262, "grad_norm": 0.1685391366481781, "learning_rate": 3.727996419339161e-06, "loss": 0.6361, "step": 14593 }, { "epoch": 16.622222222222224, "grad_norm": 0.24617771804332733, "learning_rate": 3.725549042026594e-06, "loss": 0.4309, "step": 14594 }, { "epoch": 16.623361823361822, "grad_norm": 0.22348542511463165, "learning_rate": 3.723102403634213e-06, "loss": 0.5611, "step": 14595 }, { "epoch": 16.624501424501425, "grad_norm": 0.18756158649921417, "learning_rate": 3.720656504246997e-06, "loss": 0.6909, "step": 14596 }, { "epoch": 16.625641025641027, "grad_norm": 0.2298748642206192, "learning_rate": 3.7182113439499013e-06, "loss": 0.5768, "step": 14597 }, { "epoch": 16.626780626780626, "grad_norm": 0.1706974357366562, "learning_rate": 3.7157669228278486e-06, "loss": 0.7419, "step": 14598 }, { "epoch": 16.627920227920228, "grad_norm": 0.24354368448257446, "learning_rate": 3.713323240965744e-06, "loss": 0.6543, "step": 14599 }, { "epoch": 16.62905982905983, "grad_norm": 0.2417616844177246, "learning_rate": 3.7108802984484686e-06, "loss": 0.3807, "step": 14600 }, { "epoch": 16.63019943019943, "grad_norm": 0.19453510642051697, "learning_rate": 3.7084380953608582e-06, "loss": 0.511, "step": 14601 }, { "epoch": 16.63133903133903, "grad_norm": 0.20993264019489288, "learning_rate": 3.705996631787745e-06, "loss": 0.3764, "step": 14602 }, { "epoch": 16.632478632478634, "grad_norm": 0.19857636094093323, "learning_rate": 3.703555907813927e-06, "loss": 0.7042, "step": 14603 }, { "epoch": 16.633618233618233, "grad_norm": 0.23784996569156647, "learning_rate": 3.7011159235241845e-06, "loss": 0.5527, "step": 14604 }, { "epoch": 16.634757834757835, "grad_norm": 0.1775774359703064, "learning_rate": 3.698676679003252e-06, "loss": 0.7023, "step": 14605 }, { "epoch": 16.635897435897437, "grad_norm": 0.21216173470020294, "learning_rate": 3.696238174335856e-06, "loss": 0.5788, "step": 14606 }, { "epoch": 16.637037037037036, "grad_norm": 0.22988300025463104, "learning_rate": 3.6938004096066956e-06, "loss": 0.7976, "step": 14607 }, { "epoch": 16.63817663817664, "grad_norm": 0.1869416981935501, "learning_rate": 3.6913633849004397e-06, "loss": 0.636, "step": 14608 }, { "epoch": 16.63931623931624, "grad_norm": 0.21013885736465454, "learning_rate": 3.6889271003017313e-06, "loss": 0.5862, "step": 14609 }, { "epoch": 16.64045584045584, "grad_norm": 0.19351568818092346, "learning_rate": 3.6864915558951886e-06, "loss": 0.5212, "step": 14610 }, { "epoch": 16.64159544159544, "grad_norm": 0.17369745671749115, "learning_rate": 3.684056751765416e-06, "loss": 0.9157, "step": 14611 }, { "epoch": 16.642735042735044, "grad_norm": 0.1538432538509369, "learning_rate": 3.6816226879969636e-06, "loss": 0.654, "step": 14612 }, { "epoch": 16.643874643874643, "grad_norm": 0.27092444896698, "learning_rate": 3.679189364674382e-06, "loss": 0.3433, "step": 14613 }, { "epoch": 16.645014245014245, "grad_norm": 0.22474908828735352, "learning_rate": 3.6767567818821847e-06, "loss": 0.4906, "step": 14614 }, { "epoch": 16.646153846153847, "grad_norm": 0.19490982592105865, "learning_rate": 3.674324939704871e-06, "loss": 0.8353, "step": 14615 }, { "epoch": 16.647293447293446, "grad_norm": 0.16275721788406372, "learning_rate": 3.671893838226889e-06, "loss": 0.7055, "step": 14616 }, { "epoch": 16.64843304843305, "grad_norm": 0.17398375272750854, "learning_rate": 3.669463477532689e-06, "loss": 0.9279, "step": 14617 }, { "epoch": 16.64957264957265, "grad_norm": 0.20997199416160583, "learning_rate": 3.667033857706681e-06, "loss": 0.6837, "step": 14618 }, { "epoch": 16.65071225071225, "grad_norm": 0.18733809888362885, "learning_rate": 3.664604978833255e-06, "loss": 0.5058, "step": 14619 }, { "epoch": 16.651851851851852, "grad_norm": 0.16576658189296722, "learning_rate": 3.66217684099677e-06, "loss": 0.7647, "step": 14620 }, { "epoch": 16.652991452991454, "grad_norm": 0.2201228141784668, "learning_rate": 3.6597494442815598e-06, "loss": 0.8653, "step": 14621 }, { "epoch": 16.654131054131053, "grad_norm": 0.1736903190612793, "learning_rate": 3.657322788771947e-06, "loss": 0.7397, "step": 14622 }, { "epoch": 16.655270655270655, "grad_norm": 0.2093871533870697, "learning_rate": 3.6548968745521967e-06, "loss": 0.8634, "step": 14623 }, { "epoch": 16.656410256410258, "grad_norm": 0.21694384515285492, "learning_rate": 3.652471701706581e-06, "loss": 0.6973, "step": 14624 }, { "epoch": 16.657549857549856, "grad_norm": 0.2191031575202942, "learning_rate": 3.6500472703193263e-06, "loss": 0.4609, "step": 14625 }, { "epoch": 16.65868945868946, "grad_norm": 0.1684856116771698, "learning_rate": 3.647623580474649e-06, "loss": 0.8714, "step": 14626 }, { "epoch": 16.65982905982906, "grad_norm": 0.2283681184053421, "learning_rate": 3.645200632256718e-06, "loss": 0.5105, "step": 14627 }, { "epoch": 16.66096866096866, "grad_norm": 0.1907675415277481, "learning_rate": 3.6427784257496933e-06, "loss": 0.6516, "step": 14628 }, { "epoch": 16.662108262108262, "grad_norm": 0.2024574875831604, "learning_rate": 3.640356961037705e-06, "loss": 0.7249, "step": 14629 }, { "epoch": 16.663247863247864, "grad_norm": 0.19128815829753876, "learning_rate": 3.637936238204867e-06, "loss": 0.5558, "step": 14630 }, { "epoch": 16.664387464387463, "grad_norm": 0.19036570191383362, "learning_rate": 3.635516257335245e-06, "loss": 0.4762, "step": 14631 }, { "epoch": 16.665527065527066, "grad_norm": 0.19478853046894073, "learning_rate": 3.633097018512896e-06, "loss": 0.8306, "step": 14632 }, { "epoch": 16.666666666666668, "grad_norm": 0.19821801781654358, "learning_rate": 3.6306785218218453e-06, "loss": 0.5955, "step": 14633 }, { "epoch": 16.667806267806267, "grad_norm": 0.21600525081157684, "learning_rate": 3.6282607673461004e-06, "loss": 0.7088, "step": 14634 }, { "epoch": 16.66894586894587, "grad_norm": 0.1982768028974533, "learning_rate": 3.6258437551696278e-06, "loss": 0.873, "step": 14635 }, { "epoch": 16.67008547008547, "grad_norm": 0.19198794662952423, "learning_rate": 3.623427485376382e-06, "loss": 0.8126, "step": 14636 }, { "epoch": 16.67122507122507, "grad_norm": 0.23557241261005402, "learning_rate": 3.6210119580502825e-06, "loss": 0.638, "step": 14637 }, { "epoch": 16.672364672364672, "grad_norm": 0.18622344732284546, "learning_rate": 3.618597173275237e-06, "loss": 0.7704, "step": 14638 }, { "epoch": 16.673504273504275, "grad_norm": 0.21982015669345856, "learning_rate": 3.6161831311351065e-06, "loss": 0.5458, "step": 14639 }, { "epoch": 16.674643874643873, "grad_norm": 0.22193771600723267, "learning_rate": 3.613769831713734e-06, "loss": 0.5057, "step": 14640 }, { "epoch": 16.675783475783476, "grad_norm": 0.20188890397548676, "learning_rate": 3.611357275094959e-06, "loss": 0.529, "step": 14641 }, { "epoch": 16.676923076923078, "grad_norm": 0.18457788228988647, "learning_rate": 3.608945461362559e-06, "loss": 0.7242, "step": 14642 }, { "epoch": 16.678062678062677, "grad_norm": 0.21365272998809814, "learning_rate": 3.6065343906003106e-06, "loss": 0.6462, "step": 14643 }, { "epoch": 16.67920227920228, "grad_norm": 0.17531974613666534, "learning_rate": 3.604124062891953e-06, "loss": 0.7558, "step": 14644 }, { "epoch": 16.68034188034188, "grad_norm": 0.18487349152565002, "learning_rate": 3.6017144783212135e-06, "loss": 0.6911, "step": 14645 }, { "epoch": 16.68148148148148, "grad_norm": 0.1780790388584137, "learning_rate": 3.599305636971767e-06, "loss": 0.7966, "step": 14646 }, { "epoch": 16.682621082621083, "grad_norm": 0.1781991422176361, "learning_rate": 3.5968975389272906e-06, "loss": 0.5923, "step": 14647 }, { "epoch": 16.683760683760685, "grad_norm": 0.2787780165672302, "learning_rate": 3.5944901842714207e-06, "loss": 0.6287, "step": 14648 }, { "epoch": 16.684900284900284, "grad_norm": 0.1921214461326599, "learning_rate": 3.5920835730877762e-06, "loss": 0.7003, "step": 14649 }, { "epoch": 16.686039886039886, "grad_norm": 0.19757118821144104, "learning_rate": 3.5896777054599372e-06, "loss": 0.6759, "step": 14650 }, { "epoch": 16.68717948717949, "grad_norm": 0.21820276975631714, "learning_rate": 3.5872725814714652e-06, "loss": 0.8021, "step": 14651 }, { "epoch": 16.688319088319087, "grad_norm": 0.1930316537618637, "learning_rate": 3.58486820120591e-06, "loss": 0.6904, "step": 14652 }, { "epoch": 16.68945868945869, "grad_norm": 0.18890811502933502, "learning_rate": 3.5824645647467707e-06, "loss": 0.5526, "step": 14653 }, { "epoch": 16.69059829059829, "grad_norm": 0.17934978008270264, "learning_rate": 3.5800616721775344e-06, "loss": 0.795, "step": 14654 }, { "epoch": 16.69173789173789, "grad_norm": 0.2052299827337265, "learning_rate": 3.5776595235816612e-06, "loss": 0.6729, "step": 14655 }, { "epoch": 16.692877492877493, "grad_norm": 0.1690863072872162, "learning_rate": 3.575258119042593e-06, "loss": 0.481, "step": 14656 }, { "epoch": 16.694017094017095, "grad_norm": 0.1977846622467041, "learning_rate": 3.572857458643719e-06, "loss": 0.6312, "step": 14657 }, { "epoch": 16.695156695156694, "grad_norm": 0.21069231629371643, "learning_rate": 3.5704575424684332e-06, "loss": 0.581, "step": 14658 }, { "epoch": 16.696296296296296, "grad_norm": 0.1934773176908493, "learning_rate": 3.5680583706000857e-06, "loss": 0.2961, "step": 14659 }, { "epoch": 16.6974358974359, "grad_norm": 0.2008139193058014, "learning_rate": 3.565659943122018e-06, "loss": 0.6576, "step": 14660 }, { "epoch": 16.698575498575497, "grad_norm": 0.1939091682434082, "learning_rate": 3.563262260117517e-06, "loss": 0.574, "step": 14661 }, { "epoch": 16.6997150997151, "grad_norm": 0.18059399724006653, "learning_rate": 3.560865321669865e-06, "loss": 0.7012, "step": 14662 }, { "epoch": 16.700854700854702, "grad_norm": 0.18085725605487823, "learning_rate": 3.558469127862327e-06, "loss": 0.5699, "step": 14663 }, { "epoch": 16.7019943019943, "grad_norm": 0.20898693799972534, "learning_rate": 3.5560736787781155e-06, "loss": 0.5635, "step": 14664 }, { "epoch": 16.703133903133903, "grad_norm": 0.21455982327461243, "learning_rate": 3.5536789745004378e-06, "loss": 0.6346, "step": 14665 }, { "epoch": 16.704273504273505, "grad_norm": 0.2405419945716858, "learning_rate": 3.5512850151124683e-06, "loss": 0.6799, "step": 14666 }, { "epoch": 16.705413105413104, "grad_norm": 0.1728639453649521, "learning_rate": 3.5488918006973516e-06, "loss": 0.7291, "step": 14667 }, { "epoch": 16.706552706552706, "grad_norm": 0.1853685826063156, "learning_rate": 3.546499331338218e-06, "loss": 0.9132, "step": 14668 }, { "epoch": 16.70769230769231, "grad_norm": 0.23775158822536469, "learning_rate": 3.544107607118158e-06, "loss": 0.6041, "step": 14669 }, { "epoch": 16.708831908831907, "grad_norm": 0.2054608315229416, "learning_rate": 3.5417166281202423e-06, "loss": 0.7261, "step": 14670 }, { "epoch": 16.70997150997151, "grad_norm": 0.16729244589805603, "learning_rate": 3.5393263944275195e-06, "loss": 0.8438, "step": 14671 }, { "epoch": 16.711111111111112, "grad_norm": 0.19036667048931122, "learning_rate": 3.5369369061230144e-06, "loss": 0.7448, "step": 14672 }, { "epoch": 16.71225071225071, "grad_norm": 0.23822540044784546, "learning_rate": 3.5345481632897027e-06, "loss": 0.3402, "step": 14673 }, { "epoch": 16.713390313390313, "grad_norm": 0.26414963603019714, "learning_rate": 3.5321601660105675e-06, "loss": 0.5163, "step": 14674 }, { "epoch": 16.714529914529916, "grad_norm": 0.23553740978240967, "learning_rate": 3.5297729143685536e-06, "loss": 0.5234, "step": 14675 }, { "epoch": 16.715669515669514, "grad_norm": 0.18662650883197784, "learning_rate": 3.527386408446562e-06, "loss": 0.6276, "step": 14676 }, { "epoch": 16.716809116809117, "grad_norm": 0.21046824753284454, "learning_rate": 3.525000648327492e-06, "loss": 0.5953, "step": 14677 }, { "epoch": 16.71794871794872, "grad_norm": 0.18378998339176178, "learning_rate": 3.5226156340942063e-06, "loss": 0.8075, "step": 14678 }, { "epoch": 16.719088319088318, "grad_norm": 0.1846972405910492, "learning_rate": 3.520231365829549e-06, "loss": 0.6992, "step": 14679 }, { "epoch": 16.72022792022792, "grad_norm": 0.18984457850456238, "learning_rate": 3.5178478436163177e-06, "loss": 0.6164, "step": 14680 }, { "epoch": 16.721367521367522, "grad_norm": 0.19495059549808502, "learning_rate": 3.5154650675373103e-06, "loss": 0.575, "step": 14681 }, { "epoch": 16.72250712250712, "grad_norm": 0.1732192188501358, "learning_rate": 3.5130830376752797e-06, "loss": 0.5981, "step": 14682 }, { "epoch": 16.723646723646723, "grad_norm": 0.1762353926897049, "learning_rate": 3.5107017541129746e-06, "loss": 0.8969, "step": 14683 }, { "epoch": 16.724786324786326, "grad_norm": 0.18922844529151917, "learning_rate": 3.5083212169330803e-06, "loss": 0.7806, "step": 14684 }, { "epoch": 16.725925925925925, "grad_norm": 0.18500468134880066, "learning_rate": 3.5059414262182955e-06, "loss": 0.7059, "step": 14685 }, { "epoch": 16.727065527065527, "grad_norm": 0.20659609138965607, "learning_rate": 3.5035623820512844e-06, "loss": 0.5154, "step": 14686 }, { "epoch": 16.72820512820513, "grad_norm": 0.1955866813659668, "learning_rate": 3.5011840845146584e-06, "loss": 0.7957, "step": 14687 }, { "epoch": 16.729344729344728, "grad_norm": 0.23779912292957306, "learning_rate": 3.498806533691032e-06, "loss": 0.729, "step": 14688 }, { "epoch": 16.73048433048433, "grad_norm": 0.15403661131858826, "learning_rate": 3.496429729662981e-06, "loss": 0.6981, "step": 14689 }, { "epoch": 16.731623931623933, "grad_norm": 0.1875777244567871, "learning_rate": 3.4940536725130676e-06, "loss": 0.5385, "step": 14690 }, { "epoch": 16.73276353276353, "grad_norm": 0.21117456257343292, "learning_rate": 3.491678362323808e-06, "loss": 0.6495, "step": 14691 }, { "epoch": 16.733903133903134, "grad_norm": 0.21795162558555603, "learning_rate": 3.489303799177704e-06, "loss": 0.3546, "step": 14692 }, { "epoch": 16.735042735042736, "grad_norm": 0.15931764245033264, "learning_rate": 3.486929983157236e-06, "loss": 0.4599, "step": 14693 }, { "epoch": 16.736182336182335, "grad_norm": 0.1928333342075348, "learning_rate": 3.4845569143448553e-06, "loss": 0.6611, "step": 14694 }, { "epoch": 16.737321937321937, "grad_norm": 0.21025556325912476, "learning_rate": 3.4821845928229708e-06, "loss": 0.6483, "step": 14695 }, { "epoch": 16.73846153846154, "grad_norm": 0.241347536444664, "learning_rate": 3.479813018673994e-06, "loss": 0.6776, "step": 14696 }, { "epoch": 16.739601139601138, "grad_norm": 0.212122842669487, "learning_rate": 3.4774421919802956e-06, "loss": 0.4725, "step": 14697 }, { "epoch": 16.74074074074074, "grad_norm": 0.2250332534313202, "learning_rate": 3.475072112824215e-06, "loss": 0.6554, "step": 14698 }, { "epoch": 16.741880341880343, "grad_norm": 0.21439555287361145, "learning_rate": 3.472702781288073e-06, "loss": 0.5723, "step": 14699 }, { "epoch": 16.74301994301994, "grad_norm": 0.1837013065814972, "learning_rate": 3.4703341974541616e-06, "loss": 0.4998, "step": 14700 }, { "epoch": 16.744159544159544, "grad_norm": 0.2202627956867218, "learning_rate": 3.4679663614047593e-06, "loss": 0.5517, "step": 14701 }, { "epoch": 16.745299145299146, "grad_norm": 0.2174326777458191, "learning_rate": 3.465599273222089e-06, "loss": 0.7179, "step": 14702 }, { "epoch": 16.746438746438745, "grad_norm": 0.20844949781894684, "learning_rate": 3.463232932988378e-06, "loss": 0.5682, "step": 14703 }, { "epoch": 16.747578347578347, "grad_norm": 0.2191018909215927, "learning_rate": 3.4608673407858144e-06, "loss": 0.4616, "step": 14704 }, { "epoch": 16.74871794871795, "grad_norm": 0.23420454561710358, "learning_rate": 3.458502496696564e-06, "loss": 0.4996, "step": 14705 }, { "epoch": 16.74985754985755, "grad_norm": 0.24128484725952148, "learning_rate": 3.4561384008027524e-06, "loss": 0.492, "step": 14706 }, { "epoch": 16.75099715099715, "grad_norm": 0.20390881597995758, "learning_rate": 3.453775053186503e-06, "loss": 0.5434, "step": 14707 }, { "epoch": 16.752136752136753, "grad_norm": 0.22439153492450714, "learning_rate": 3.4514124539299e-06, "loss": 0.7022, "step": 14708 }, { "epoch": 16.753276353276352, "grad_norm": 0.19062481820583344, "learning_rate": 3.4490506031150087e-06, "loss": 0.6312, "step": 14709 }, { "epoch": 16.754415954415954, "grad_norm": 0.17792372405529022, "learning_rate": 3.4466895008238463e-06, "loss": 0.726, "step": 14710 }, { "epoch": 16.755555555555556, "grad_norm": 0.22493785619735718, "learning_rate": 3.4443291471384308e-06, "loss": 0.6689, "step": 14711 }, { "epoch": 16.756695156695155, "grad_norm": 0.24571868777275085, "learning_rate": 3.441969542140744e-06, "loss": 0.6001, "step": 14712 }, { "epoch": 16.757834757834758, "grad_norm": 0.19063417613506317, "learning_rate": 3.4396106859127447e-06, "loss": 0.6952, "step": 14713 }, { "epoch": 16.75897435897436, "grad_norm": 0.2070053070783615, "learning_rate": 3.4372525785363512e-06, "loss": 0.6566, "step": 14714 }, { "epoch": 16.76011396011396, "grad_norm": 0.18680186569690704, "learning_rate": 3.434895220093473e-06, "loss": 0.6214, "step": 14715 }, { "epoch": 16.76125356125356, "grad_norm": 0.22149209678173065, "learning_rate": 3.4325386106659892e-06, "loss": 0.7235, "step": 14716 }, { "epoch": 16.762393162393163, "grad_norm": 0.2250470668077469, "learning_rate": 3.4301827503357474e-06, "loss": 0.7235, "step": 14717 }, { "epoch": 16.763532763532762, "grad_norm": 0.1864991933107376, "learning_rate": 3.427827639184578e-06, "loss": 0.5742, "step": 14718 }, { "epoch": 16.764672364672364, "grad_norm": 0.19813434779644012, "learning_rate": 3.4254732772942804e-06, "loss": 0.661, "step": 14719 }, { "epoch": 16.765811965811967, "grad_norm": 0.2334143966436386, "learning_rate": 3.4231196647466295e-06, "loss": 0.7577, "step": 14720 }, { "epoch": 16.766951566951565, "grad_norm": 0.16335418820381165, "learning_rate": 3.4207668016233626e-06, "loss": 0.6523, "step": 14721 }, { "epoch": 16.768091168091168, "grad_norm": 0.1976618766784668, "learning_rate": 3.4184146880062105e-06, "loss": 0.7471, "step": 14722 }, { "epoch": 16.76923076923077, "grad_norm": 0.16605301201343536, "learning_rate": 3.4160633239768618e-06, "loss": 0.6936, "step": 14723 }, { "epoch": 16.77037037037037, "grad_norm": 0.2468760907649994, "learning_rate": 3.4137127096170007e-06, "loss": 0.4153, "step": 14724 }, { "epoch": 16.77150997150997, "grad_norm": 0.22643277049064636, "learning_rate": 3.411362845008251e-06, "loss": 0.7011, "step": 14725 }, { "epoch": 16.772649572649573, "grad_norm": 0.20217841863632202, "learning_rate": 3.409013730232238e-06, "loss": 0.4763, "step": 14726 }, { "epoch": 16.773789173789172, "grad_norm": 0.17784053087234497, "learning_rate": 3.4066653653705566e-06, "loss": 0.845, "step": 14727 }, { "epoch": 16.774928774928775, "grad_norm": 0.238676518201828, "learning_rate": 3.404317750504765e-06, "loss": 0.541, "step": 14728 }, { "epoch": 16.776068376068377, "grad_norm": 0.23390735685825348, "learning_rate": 3.4019708857164102e-06, "loss": 0.7431, "step": 14729 }, { "epoch": 16.777207977207976, "grad_norm": 0.25913941860198975, "learning_rate": 3.399624771086998e-06, "loss": 0.6154, "step": 14730 }, { "epoch": 16.778347578347578, "grad_norm": 0.17202185094356537, "learning_rate": 3.3972794066980256e-06, "loss": 0.9398, "step": 14731 }, { "epoch": 16.77948717948718, "grad_norm": 0.16503441333770752, "learning_rate": 3.3949347926309437e-06, "loss": 0.4665, "step": 14732 }, { "epoch": 16.78062678062678, "grad_norm": 0.21525779366493225, "learning_rate": 3.3925909289671876e-06, "loss": 0.6305, "step": 14733 }, { "epoch": 16.78176638176638, "grad_norm": 0.21760420501232147, "learning_rate": 3.3902478157881723e-06, "loss": 0.6829, "step": 14734 }, { "epoch": 16.782905982905984, "grad_norm": 0.27629998326301575, "learning_rate": 3.3879054531752803e-06, "loss": 0.5113, "step": 14735 }, { "epoch": 16.784045584045582, "grad_norm": 0.32538872957229614, "learning_rate": 3.38556384120986e-06, "loss": 0.6818, "step": 14736 }, { "epoch": 16.785185185185185, "grad_norm": 0.1555013358592987, "learning_rate": 3.383222979973247e-06, "loss": 0.6033, "step": 14737 }, { "epoch": 16.786324786324787, "grad_norm": 0.20977364480495453, "learning_rate": 3.3808828695467476e-06, "loss": 0.5296, "step": 14738 }, { "epoch": 16.787464387464386, "grad_norm": 0.17263485491275787, "learning_rate": 3.378543510011639e-06, "loss": 0.6992, "step": 14739 }, { "epoch": 16.788603988603988, "grad_norm": 0.16598084568977356, "learning_rate": 3.376204901449173e-06, "loss": 0.7407, "step": 14740 }, { "epoch": 16.78974358974359, "grad_norm": 0.17881295084953308, "learning_rate": 3.3738670439405763e-06, "loss": 0.8125, "step": 14741 }, { "epoch": 16.79088319088319, "grad_norm": 0.16692639887332916, "learning_rate": 3.3715299375670554e-06, "loss": 0.8586, "step": 14742 }, { "epoch": 16.79202279202279, "grad_norm": 0.22679796814918518, "learning_rate": 3.3691935824097713e-06, "loss": 0.6736, "step": 14743 }, { "epoch": 16.793162393162394, "grad_norm": 0.2388918399810791, "learning_rate": 3.3668579785498812e-06, "loss": 0.5589, "step": 14744 }, { "epoch": 16.794301994301993, "grad_norm": 0.18057364225387573, "learning_rate": 3.3645231260685054e-06, "loss": 0.8457, "step": 14745 }, { "epoch": 16.795441595441595, "grad_norm": 0.19479382038116455, "learning_rate": 3.362189025046736e-06, "loss": 0.8523, "step": 14746 }, { "epoch": 16.796581196581197, "grad_norm": 0.18581032752990723, "learning_rate": 3.359855675565654e-06, "loss": 0.8232, "step": 14747 }, { "epoch": 16.797720797720796, "grad_norm": 0.19107264280319214, "learning_rate": 3.357523077706287e-06, "loss": 0.5805, "step": 14748 }, { "epoch": 16.7988603988604, "grad_norm": 0.21707648038864136, "learning_rate": 3.355191231549665e-06, "loss": 0.4581, "step": 14749 }, { "epoch": 16.8, "grad_norm": 0.17213472723960876, "learning_rate": 3.3528601371767722e-06, "loss": 0.6278, "step": 14750 }, { "epoch": 16.8011396011396, "grad_norm": 0.18237662315368652, "learning_rate": 3.3505297946685748e-06, "loss": 0.7756, "step": 14751 }, { "epoch": 16.802279202279202, "grad_norm": 0.2457255870103836, "learning_rate": 3.3482002041060166e-06, "loss": 0.6011, "step": 14752 }, { "epoch": 16.803418803418804, "grad_norm": 0.21424660086631775, "learning_rate": 3.345871365570008e-06, "loss": 0.6095, "step": 14753 }, { "epoch": 16.804558404558403, "grad_norm": 0.209234818816185, "learning_rate": 3.3435432791414395e-06, "loss": 0.6892, "step": 14754 }, { "epoch": 16.805698005698005, "grad_norm": 0.23546189069747925, "learning_rate": 3.341215944901163e-06, "loss": 0.6176, "step": 14755 }, { "epoch": 16.806837606837608, "grad_norm": 0.18982715904712677, "learning_rate": 3.33888936293002e-06, "loss": 0.8664, "step": 14756 }, { "epoch": 16.807977207977206, "grad_norm": 0.16256925463676453, "learning_rate": 3.3365635333088173e-06, "loss": 0.6438, "step": 14757 }, { "epoch": 16.80911680911681, "grad_norm": 0.2089722603559494, "learning_rate": 3.3342384561183436e-06, "loss": 0.6521, "step": 14758 }, { "epoch": 16.81025641025641, "grad_norm": 0.23766598105430603, "learning_rate": 3.331914131439343e-06, "loss": 0.2682, "step": 14759 }, { "epoch": 16.81139601139601, "grad_norm": 0.24712476134300232, "learning_rate": 3.329590559352555e-06, "loss": 0.6654, "step": 14760 }, { "epoch": 16.812535612535612, "grad_norm": 0.1959812343120575, "learning_rate": 3.3272677399386775e-06, "loss": 0.5572, "step": 14761 }, { "epoch": 16.813675213675214, "grad_norm": 0.19847257435321808, "learning_rate": 3.3249456732783927e-06, "loss": 0.5969, "step": 14762 }, { "epoch": 16.814814814814813, "grad_norm": 0.21061527729034424, "learning_rate": 3.3226243594523504e-06, "loss": 0.7511, "step": 14763 }, { "epoch": 16.815954415954415, "grad_norm": 0.22279202938079834, "learning_rate": 3.320303798541177e-06, "loss": 0.639, "step": 14764 }, { "epoch": 16.817094017094018, "grad_norm": 0.20850226283073425, "learning_rate": 3.3179839906254783e-06, "loss": 0.7963, "step": 14765 }, { "epoch": 16.81823361823362, "grad_norm": 0.2269270420074463, "learning_rate": 3.3156649357858173e-06, "loss": 0.7152, "step": 14766 }, { "epoch": 16.81937321937322, "grad_norm": 0.16206906735897064, "learning_rate": 3.313346634102743e-06, "loss": 0.7077, "step": 14767 }, { "epoch": 16.82051282051282, "grad_norm": 0.19101674854755402, "learning_rate": 3.3110290856567805e-06, "loss": 0.7085, "step": 14768 }, { "epoch": 16.821652421652423, "grad_norm": 0.24302618205547333, "learning_rate": 3.3087122905284263e-06, "loss": 0.7949, "step": 14769 }, { "epoch": 16.822792022792022, "grad_norm": 0.160295769572258, "learning_rate": 3.306396248798141e-06, "loss": 0.7553, "step": 14770 }, { "epoch": 16.823931623931625, "grad_norm": 0.20017482340335846, "learning_rate": 3.304080960546374e-06, "loss": 0.7562, "step": 14771 }, { "epoch": 16.825071225071227, "grad_norm": 0.21850743889808655, "learning_rate": 3.3017664258535364e-06, "loss": 0.6732, "step": 14772 }, { "epoch": 16.826210826210826, "grad_norm": 0.19815374910831451, "learning_rate": 3.299452644800022e-06, "loss": 0.692, "step": 14773 }, { "epoch": 16.827350427350428, "grad_norm": 0.1972140371799469, "learning_rate": 3.297139617466194e-06, "loss": 0.4356, "step": 14774 }, { "epoch": 16.82849002849003, "grad_norm": 0.1911310851573944, "learning_rate": 3.2948273439323884e-06, "loss": 0.6865, "step": 14775 }, { "epoch": 16.82962962962963, "grad_norm": 0.22164954245090485, "learning_rate": 3.292515824278927e-06, "loss": 0.4935, "step": 14776 }, { "epoch": 16.83076923076923, "grad_norm": 0.20343266427516937, "learning_rate": 3.2902050585860792e-06, "loss": 0.5128, "step": 14777 }, { "epoch": 16.831908831908834, "grad_norm": 0.18866266310214996, "learning_rate": 3.2878950469341112e-06, "loss": 0.9589, "step": 14778 }, { "epoch": 16.833048433048432, "grad_norm": 0.2625249922275543, "learning_rate": 3.2855857894032588e-06, "loss": 0.5282, "step": 14779 }, { "epoch": 16.834188034188035, "grad_norm": 0.16464684903621674, "learning_rate": 3.2832772860737325e-06, "loss": 0.5974, "step": 14780 }, { "epoch": 16.835327635327637, "grad_norm": 0.25545403361320496, "learning_rate": 3.2809695370256993e-06, "loss": 0.5496, "step": 14781 }, { "epoch": 16.836467236467236, "grad_norm": 0.2255530059337616, "learning_rate": 3.278662542339325e-06, "loss": 0.3252, "step": 14782 }, { "epoch": 16.837606837606838, "grad_norm": 0.18065612018108368, "learning_rate": 3.276356302094727e-06, "loss": 0.7256, "step": 14783 }, { "epoch": 16.83874643874644, "grad_norm": 0.18317294120788574, "learning_rate": 3.274050816372026e-06, "loss": 0.6904, "step": 14784 }, { "epoch": 16.83988603988604, "grad_norm": 0.1812399923801422, "learning_rate": 3.2717460852512814e-06, "loss": 0.7243, "step": 14785 }, { "epoch": 16.84102564102564, "grad_norm": 0.19768092036247253, "learning_rate": 3.2694421088125484e-06, "loss": 0.7388, "step": 14786 }, { "epoch": 16.842165242165244, "grad_norm": 0.22675330936908722, "learning_rate": 3.267138887135851e-06, "loss": 0.6823, "step": 14787 }, { "epoch": 16.843304843304843, "grad_norm": 0.2749398350715637, "learning_rate": 3.26483642030119e-06, "loss": 0.5847, "step": 14788 }, { "epoch": 16.844444444444445, "grad_norm": 0.23310603201389313, "learning_rate": 3.262534708388526e-06, "loss": 0.5695, "step": 14789 }, { "epoch": 16.845584045584047, "grad_norm": 0.16094189882278442, "learning_rate": 3.260233751477809e-06, "loss": 0.7909, "step": 14790 }, { "epoch": 16.846723646723646, "grad_norm": 0.1772032082080841, "learning_rate": 3.2579335496489617e-06, "loss": 0.9013, "step": 14791 }, { "epoch": 16.84786324786325, "grad_norm": 0.23654736578464508, "learning_rate": 3.2556341029818747e-06, "loss": 0.4448, "step": 14792 }, { "epoch": 16.84900284900285, "grad_norm": 0.21854573488235474, "learning_rate": 3.25333541155641e-06, "loss": 0.6025, "step": 14793 }, { "epoch": 16.85014245014245, "grad_norm": 0.1650972068309784, "learning_rate": 3.2510374754524037e-06, "loss": 0.8344, "step": 14794 }, { "epoch": 16.851282051282052, "grad_norm": 0.21306946873664856, "learning_rate": 3.2487402947496887e-06, "loss": 0.5082, "step": 14795 }, { "epoch": 16.852421652421654, "grad_norm": 0.187848761677742, "learning_rate": 3.246443869528032e-06, "loss": 0.712, "step": 14796 }, { "epoch": 16.853561253561253, "grad_norm": 0.22535042464733124, "learning_rate": 3.2441481998672004e-06, "loss": 0.4455, "step": 14797 }, { "epoch": 16.854700854700855, "grad_norm": 0.24472816288471222, "learning_rate": 3.2418532858469327e-06, "loss": 0.6224, "step": 14798 }, { "epoch": 16.855840455840458, "grad_norm": 0.2107369750738144, "learning_rate": 3.2395591275469427e-06, "loss": 0.5785, "step": 14799 }, { "epoch": 16.856980056980056, "grad_norm": 0.2104388326406479, "learning_rate": 3.237265725046898e-06, "loss": 0.4863, "step": 14800 }, { "epoch": 16.85811965811966, "grad_norm": 0.20485562086105347, "learning_rate": 3.234973078426462e-06, "loss": 0.799, "step": 14801 }, { "epoch": 16.85925925925926, "grad_norm": 0.2231946438550949, "learning_rate": 3.232681187765266e-06, "loss": 0.7746, "step": 14802 }, { "epoch": 16.86039886039886, "grad_norm": 0.1903351992368698, "learning_rate": 3.2303900531429176e-06, "loss": 0.7163, "step": 14803 }, { "epoch": 16.861538461538462, "grad_norm": 0.19041438400745392, "learning_rate": 3.2280996746389847e-06, "loss": 0.699, "step": 14804 }, { "epoch": 16.862678062678064, "grad_norm": 0.16172075271606445, "learning_rate": 3.225810052333017e-06, "loss": 0.7741, "step": 14805 }, { "epoch": 16.863817663817663, "grad_norm": 0.30482029914855957, "learning_rate": 3.2235211863045567e-06, "loss": 0.6755, "step": 14806 }, { "epoch": 16.864957264957265, "grad_norm": 0.2442866861820221, "learning_rate": 3.2212330766330846e-06, "loss": 0.6495, "step": 14807 }, { "epoch": 16.866096866096868, "grad_norm": 0.18580791354179382, "learning_rate": 3.2189457233980815e-06, "loss": 0.5523, "step": 14808 }, { "epoch": 16.867236467236467, "grad_norm": 0.17136384546756744, "learning_rate": 3.216659126678989e-06, "loss": 0.6446, "step": 14809 }, { "epoch": 16.86837606837607, "grad_norm": 0.1991439014673233, "learning_rate": 3.214373286555236e-06, "loss": 0.673, "step": 14810 }, { "epoch": 16.86951566951567, "grad_norm": 0.2081996649503708, "learning_rate": 3.212088203106206e-06, "loss": 0.4252, "step": 14811 }, { "epoch": 16.87065527065527, "grad_norm": 0.1936638057231903, "learning_rate": 3.209803876411266e-06, "loss": 0.5941, "step": 14812 }, { "epoch": 16.871794871794872, "grad_norm": 0.19413010776042938, "learning_rate": 3.2075203065497633e-06, "loss": 0.5806, "step": 14813 }, { "epoch": 16.872934472934475, "grad_norm": 0.19577611982822418, "learning_rate": 3.2052374936010154e-06, "loss": 0.5351, "step": 14814 }, { "epoch": 16.874074074074073, "grad_norm": 0.21065311133861542, "learning_rate": 3.202955437644298e-06, "loss": 0.4754, "step": 14815 }, { "epoch": 16.875213675213676, "grad_norm": 0.21284545958042145, "learning_rate": 3.2006741387588773e-06, "loss": 0.5826, "step": 14816 }, { "epoch": 16.876353276353278, "grad_norm": 0.223323255777359, "learning_rate": 3.198393597024002e-06, "loss": 0.5474, "step": 14817 }, { "epoch": 16.877492877492877, "grad_norm": 0.17122265696525574, "learning_rate": 3.1961138125188665e-06, "loss": 0.6242, "step": 14818 }, { "epoch": 16.87863247863248, "grad_norm": 0.15588898956775665, "learning_rate": 3.193834785322658e-06, "loss": 0.8631, "step": 14819 }, { "epoch": 16.87977207977208, "grad_norm": 0.20647187530994415, "learning_rate": 3.191556515514535e-06, "loss": 0.655, "step": 14820 }, { "epoch": 16.88091168091168, "grad_norm": 0.21994750201702118, "learning_rate": 3.189279003173634e-06, "loss": 0.8065, "step": 14821 }, { "epoch": 16.882051282051282, "grad_norm": 0.16426804661750793, "learning_rate": 3.187002248379045e-06, "loss": 0.7207, "step": 14822 }, { "epoch": 16.883190883190885, "grad_norm": 0.28191372752189636, "learning_rate": 3.1847262512098573e-06, "loss": 0.6303, "step": 14823 }, { "epoch": 16.884330484330484, "grad_norm": 0.18324729800224304, "learning_rate": 3.182451011745116e-06, "loss": 0.5286, "step": 14824 }, { "epoch": 16.885470085470086, "grad_norm": 0.18427149951457977, "learning_rate": 3.1801765300638526e-06, "loss": 0.6509, "step": 14825 }, { "epoch": 16.886609686609688, "grad_norm": 0.24386465549468994, "learning_rate": 3.1779028062450673e-06, "loss": 0.7362, "step": 14826 }, { "epoch": 16.887749287749287, "grad_norm": 0.20113185048103333, "learning_rate": 3.1756298403677164e-06, "loss": 0.5979, "step": 14827 }, { "epoch": 16.88888888888889, "grad_norm": 0.214745432138443, "learning_rate": 3.173357632510765e-06, "loss": 0.6971, "step": 14828 }, { "epoch": 16.89002849002849, "grad_norm": 0.21380400657653809, "learning_rate": 3.171086182753133e-06, "loss": 0.7386, "step": 14829 }, { "epoch": 16.89116809116809, "grad_norm": 0.1792917400598526, "learning_rate": 3.1688154911737015e-06, "loss": 0.7436, "step": 14830 }, { "epoch": 16.892307692307693, "grad_norm": 0.18266457319259644, "learning_rate": 3.1665455578513415e-06, "loss": 0.7923, "step": 14831 }, { "epoch": 16.893447293447295, "grad_norm": 0.24912922084331512, "learning_rate": 3.1642763828649003e-06, "loss": 0.5411, "step": 14832 }, { "epoch": 16.894586894586894, "grad_norm": 0.21742737293243408, "learning_rate": 3.1620079662931927e-06, "loss": 0.6616, "step": 14833 }, { "epoch": 16.895726495726496, "grad_norm": 0.1927417814731598, "learning_rate": 3.1597403082149953e-06, "loss": 0.5473, "step": 14834 }, { "epoch": 16.8968660968661, "grad_norm": 0.17637412250041962, "learning_rate": 3.15747340870908e-06, "loss": 0.6818, "step": 14835 }, { "epoch": 16.898005698005697, "grad_norm": 0.23352162539958954, "learning_rate": 3.155207267854182e-06, "loss": 0.7385, "step": 14836 }, { "epoch": 16.8991452991453, "grad_norm": 0.17706164717674255, "learning_rate": 3.1529418857290134e-06, "loss": 0.7056, "step": 14837 }, { "epoch": 16.900284900284902, "grad_norm": 0.17459584772586823, "learning_rate": 3.1506772624122468e-06, "loss": 0.7081, "step": 14838 }, { "epoch": 16.9014245014245, "grad_norm": 0.2351045459508896, "learning_rate": 3.1484133979825387e-06, "loss": 0.5974, "step": 14839 }, { "epoch": 16.902564102564103, "grad_norm": 0.29291340708732605, "learning_rate": 3.1461502925185376e-06, "loss": 0.6583, "step": 14840 }, { "epoch": 16.903703703703705, "grad_norm": 0.24841253459453583, "learning_rate": 3.1438879460988278e-06, "loss": 0.4212, "step": 14841 }, { "epoch": 16.904843304843304, "grad_norm": 0.1727171242237091, "learning_rate": 3.141626358801997e-06, "loss": 0.8017, "step": 14842 }, { "epoch": 16.905982905982906, "grad_norm": 0.18711979687213898, "learning_rate": 3.1393655307065894e-06, "loss": 0.6099, "step": 14843 }, { "epoch": 16.90712250712251, "grad_norm": 0.2179073989391327, "learning_rate": 3.1371054618911433e-06, "loss": 0.6586, "step": 14844 }, { "epoch": 16.908262108262107, "grad_norm": 0.177495077252388, "learning_rate": 3.13484615243414e-06, "loss": 0.9016, "step": 14845 }, { "epoch": 16.90940170940171, "grad_norm": 0.21736614406108856, "learning_rate": 3.132587602414061e-06, "loss": 0.6466, "step": 14846 }, { "epoch": 16.910541310541312, "grad_norm": 0.19747169315814972, "learning_rate": 3.1303298119093493e-06, "loss": 0.6743, "step": 14847 }, { "epoch": 16.91168091168091, "grad_norm": 0.19041146337985992, "learning_rate": 3.1280727809984313e-06, "loss": 0.5422, "step": 14848 }, { "epoch": 16.912820512820513, "grad_norm": 0.21531081199645996, "learning_rate": 3.1258165097596855e-06, "loss": 0.6582, "step": 14849 }, { "epoch": 16.913960113960115, "grad_norm": 0.22194461524486542, "learning_rate": 3.1235609982714823e-06, "loss": 0.6973, "step": 14850 }, { "epoch": 16.915099715099714, "grad_norm": 0.1573968082666397, "learning_rate": 3.1213062466121796e-06, "loss": 0.8643, "step": 14851 }, { "epoch": 16.916239316239317, "grad_norm": 0.1980697512626648, "learning_rate": 3.1190522548600694e-06, "loss": 0.5669, "step": 14852 }, { "epoch": 16.91737891737892, "grad_norm": 0.1801229864358902, "learning_rate": 3.1167990230934475e-06, "loss": 0.8694, "step": 14853 }, { "epoch": 16.918518518518518, "grad_norm": 0.18354345858097076, "learning_rate": 3.114546551390576e-06, "loss": 0.6403, "step": 14854 }, { "epoch": 16.91965811965812, "grad_norm": 0.15931031107902527, "learning_rate": 3.1122948398296906e-06, "loss": 0.5668, "step": 14855 }, { "epoch": 16.920797720797722, "grad_norm": 0.204697385430336, "learning_rate": 3.110043888488995e-06, "loss": 0.8192, "step": 14856 }, { "epoch": 16.92193732193732, "grad_norm": 0.2309281975030899, "learning_rate": 3.1077936974466706e-06, "loss": 0.5979, "step": 14857 }, { "epoch": 16.923076923076923, "grad_norm": 0.19758395850658417, "learning_rate": 3.105544266780874e-06, "loss": 0.8186, "step": 14858 }, { "epoch": 16.924216524216526, "grad_norm": 0.21037371456623077, "learning_rate": 3.103295596569744e-06, "loss": 0.5528, "step": 14859 }, { "epoch": 16.925356125356124, "grad_norm": 0.20761847496032715, "learning_rate": 3.1010476868913645e-06, "loss": 0.7354, "step": 14860 }, { "epoch": 16.926495726495727, "grad_norm": 0.1998409628868103, "learning_rate": 3.098800537823818e-06, "loss": 0.7765, "step": 14861 }, { "epoch": 16.92763532763533, "grad_norm": 0.18831759691238403, "learning_rate": 3.0965541494451634e-06, "loss": 0.6404, "step": 14862 }, { "epoch": 16.928774928774928, "grad_norm": 0.1787118762731552, "learning_rate": 3.094308521833422e-06, "loss": 0.7494, "step": 14863 }, { "epoch": 16.92991452991453, "grad_norm": 0.2410799264907837, "learning_rate": 3.0920636550665823e-06, "loss": 0.6113, "step": 14864 }, { "epoch": 16.931054131054132, "grad_norm": 0.18928298354148865, "learning_rate": 3.089819549222617e-06, "loss": 0.557, "step": 14865 }, { "epoch": 16.93219373219373, "grad_norm": 0.1769939512014389, "learning_rate": 3.087576204379475e-06, "loss": 0.7215, "step": 14866 }, { "epoch": 16.933333333333334, "grad_norm": 0.20230601727962494, "learning_rate": 3.0853336206150716e-06, "loss": 0.4905, "step": 14867 }, { "epoch": 16.934472934472936, "grad_norm": 2.089182138442993, "learning_rate": 3.0830917980072954e-06, "loss": 0.6583, "step": 14868 }, { "epoch": 16.935612535612535, "grad_norm": 0.16396862268447876, "learning_rate": 3.080850736634011e-06, "loss": 0.8526, "step": 14869 }, { "epoch": 16.936752136752137, "grad_norm": 0.18934102356433868, "learning_rate": 3.0786104365730595e-06, "loss": 0.7467, "step": 14870 }, { "epoch": 16.93789173789174, "grad_norm": 0.21667882800102234, "learning_rate": 3.0763708979022587e-06, "loss": 0.5795, "step": 14871 }, { "epoch": 16.939031339031338, "grad_norm": 0.1986517608165741, "learning_rate": 3.0741321206993718e-06, "loss": 0.6223, "step": 14872 }, { "epoch": 16.94017094017094, "grad_norm": 0.2782285809516907, "learning_rate": 3.071894105042181e-06, "loss": 0.6218, "step": 14873 }, { "epoch": 16.941310541310543, "grad_norm": 0.18464824557304382, "learning_rate": 3.069656851008415e-06, "loss": 0.5569, "step": 14874 }, { "epoch": 16.94245014245014, "grad_norm": 0.20595508813858032, "learning_rate": 3.067420358675771e-06, "loss": 0.5863, "step": 14875 }, { "epoch": 16.943589743589744, "grad_norm": 0.23332270979881287, "learning_rate": 3.065184628121931e-06, "loss": 0.693, "step": 14876 }, { "epoch": 16.944729344729346, "grad_norm": 0.22465844452381134, "learning_rate": 3.062949659424552e-06, "loss": 0.6365, "step": 14877 }, { "epoch": 16.945868945868945, "grad_norm": 0.209318146109581, "learning_rate": 3.060715452661261e-06, "loss": 0.537, "step": 14878 }, { "epoch": 16.947008547008547, "grad_norm": 0.21809899806976318, "learning_rate": 3.058482007909652e-06, "loss": 0.5206, "step": 14879 }, { "epoch": 16.94814814814815, "grad_norm": 0.20825175940990448, "learning_rate": 3.056249325247301e-06, "loss": 0.6252, "step": 14880 }, { "epoch": 16.94928774928775, "grad_norm": 0.23183006048202515, "learning_rate": 3.0540174047517578e-06, "loss": 0.5467, "step": 14881 }, { "epoch": 16.95042735042735, "grad_norm": 0.1657174676656723, "learning_rate": 3.0517862465005486e-06, "loss": 0.7303, "step": 14882 }, { "epoch": 16.951566951566953, "grad_norm": 0.23311227560043335, "learning_rate": 3.049555850571148e-06, "loss": 0.7206, "step": 14883 }, { "epoch": 16.95270655270655, "grad_norm": 0.2159399688243866, "learning_rate": 3.047326217041041e-06, "loss": 0.7392, "step": 14884 }, { "epoch": 16.953846153846154, "grad_norm": 0.18478034436702728, "learning_rate": 3.045097345987671e-06, "loss": 0.6682, "step": 14885 }, { "epoch": 16.954985754985756, "grad_norm": 0.22123868763446808, "learning_rate": 3.042869237488444e-06, "loss": 0.5325, "step": 14886 }, { "epoch": 16.956125356125355, "grad_norm": 0.21874210238456726, "learning_rate": 3.040641891620746e-06, "loss": 0.7025, "step": 14887 }, { "epoch": 16.957264957264957, "grad_norm": 0.19596925377845764, "learning_rate": 3.0384153084619477e-06, "loss": 0.7079, "step": 14888 }, { "epoch": 16.95840455840456, "grad_norm": 0.17991815507411957, "learning_rate": 3.0361894880893834e-06, "loss": 0.582, "step": 14889 }, { "epoch": 16.95954415954416, "grad_norm": 0.2364872395992279, "learning_rate": 3.0339644305803523e-06, "loss": 0.641, "step": 14890 }, { "epoch": 16.96068376068376, "grad_norm": 0.2232533097267151, "learning_rate": 3.0317401360121456e-06, "loss": 0.6508, "step": 14891 }, { "epoch": 16.961823361823363, "grad_norm": 0.21058550477027893, "learning_rate": 3.0295166044620154e-06, "loss": 0.6476, "step": 14892 }, { "epoch": 16.962962962962962, "grad_norm": 0.16185157001018524, "learning_rate": 3.0272938360072e-06, "loss": 0.6307, "step": 14893 }, { "epoch": 16.964102564102564, "grad_norm": 0.24551266431808472, "learning_rate": 3.025071830724882e-06, "loss": 0.4534, "step": 14894 }, { "epoch": 16.965242165242167, "grad_norm": 0.21784427762031555, "learning_rate": 3.0228505886922582e-06, "loss": 0.6538, "step": 14895 }, { "epoch": 16.966381766381765, "grad_norm": 0.6637478470802307, "learning_rate": 3.0206301099864747e-06, "loss": 0.7642, "step": 14896 }, { "epoch": 16.967521367521368, "grad_norm": 0.1842934638261795, "learning_rate": 3.0184103946846453e-06, "loss": 0.8049, "step": 14897 }, { "epoch": 16.96866096866097, "grad_norm": 0.19058462977409363, "learning_rate": 3.016191442863872e-06, "loss": 0.7098, "step": 14898 }, { "epoch": 16.96980056980057, "grad_norm": 0.18843884766101837, "learning_rate": 3.0139732546012266e-06, "loss": 0.6843, "step": 14899 }, { "epoch": 16.97094017094017, "grad_norm": 0.20854750275611877, "learning_rate": 3.0117558299737587e-06, "loss": 0.6831, "step": 14900 }, { "epoch": 16.972079772079773, "grad_norm": 0.20695297420024872, "learning_rate": 3.00953916905847e-06, "loss": 0.6184, "step": 14901 }, { "epoch": 16.973219373219372, "grad_norm": 0.1774614304304123, "learning_rate": 3.0073232719323637e-06, "loss": 0.8779, "step": 14902 }, { "epoch": 16.974358974358974, "grad_norm": 0.2092989981174469, "learning_rate": 3.0051081386723967e-06, "loss": 0.7488, "step": 14903 }, { "epoch": 16.975498575498577, "grad_norm": 0.19810225069522858, "learning_rate": 3.0028937693555195e-06, "loss": 0.6228, "step": 14904 }, { "epoch": 16.976638176638176, "grad_norm": 0.17616690695285797, "learning_rate": 3.0006801640586197e-06, "loss": 0.5429, "step": 14905 }, { "epoch": 16.977777777777778, "grad_norm": 0.17729952931404114, "learning_rate": 2.998467322858603e-06, "loss": 0.6216, "step": 14906 }, { "epoch": 16.97891737891738, "grad_norm": 0.16273464262485504, "learning_rate": 2.996255245832319e-06, "loss": 1.0133, "step": 14907 }, { "epoch": 16.98005698005698, "grad_norm": 0.20543445646762848, "learning_rate": 2.994043933056609e-06, "loss": 0.9106, "step": 14908 }, { "epoch": 16.98119658119658, "grad_norm": 0.18109145760536194, "learning_rate": 2.991833384608264e-06, "loss": 0.7002, "step": 14909 }, { "epoch": 16.982336182336184, "grad_norm": 0.23538149893283844, "learning_rate": 2.989623600564068e-06, "loss": 0.608, "step": 14910 }, { "epoch": 16.983475783475782, "grad_norm": 0.19944344460964203, "learning_rate": 2.987414581000772e-06, "loss": 0.5037, "step": 14911 }, { "epoch": 16.984615384615385, "grad_norm": 0.2201206386089325, "learning_rate": 2.9852063259951072e-06, "loss": 0.5791, "step": 14912 }, { "epoch": 16.985754985754987, "grad_norm": 0.17543230950832367, "learning_rate": 2.9829988356237624e-06, "loss": 0.7668, "step": 14913 }, { "epoch": 16.986894586894586, "grad_norm": 0.24056994915008545, "learning_rate": 2.9807921099634178e-06, "loss": 0.5446, "step": 14914 }, { "epoch": 16.988034188034188, "grad_norm": 0.1986328512430191, "learning_rate": 2.9785861490907145e-06, "loss": 0.4784, "step": 14915 }, { "epoch": 16.98917378917379, "grad_norm": 0.17287662625312805, "learning_rate": 2.976380953082272e-06, "loss": 0.7429, "step": 14916 }, { "epoch": 16.99031339031339, "grad_norm": 0.185201957821846, "learning_rate": 2.974176522014685e-06, "loss": 0.6023, "step": 14917 }, { "epoch": 16.99145299145299, "grad_norm": 0.17165346443653107, "learning_rate": 2.9719728559645194e-06, "loss": 0.5681, "step": 14918 }, { "epoch": 16.992592592592594, "grad_norm": 0.16597877442836761, "learning_rate": 2.96976995500832e-06, "loss": 0.5537, "step": 14919 }, { "epoch": 16.993732193732193, "grad_norm": 0.1753251552581787, "learning_rate": 2.967567819222586e-06, "loss": 0.7084, "step": 14920 }, { "epoch": 16.994871794871795, "grad_norm": 0.16678474843502045, "learning_rate": 2.965366448683812e-06, "loss": 0.6858, "step": 14921 }, { "epoch": 16.996011396011397, "grad_norm": 0.35645949840545654, "learning_rate": 2.9631658434684572e-06, "loss": 0.8283, "step": 14922 }, { "epoch": 16.997150997150996, "grad_norm": 0.15596824884414673, "learning_rate": 2.9609660036529596e-06, "loss": 0.8399, "step": 14923 }, { "epoch": 16.9982905982906, "grad_norm": 0.20432667434215546, "learning_rate": 2.9587669293137136e-06, "loss": 0.8565, "step": 14924 }, { "epoch": 16.9994301994302, "grad_norm": 0.179723858833313, "learning_rate": 2.956568620527106e-06, "loss": 0.6696, "step": 14925 }, { "epoch": 17.0, "grad_norm": 0.2801351845264435, "learning_rate": 2.9543710773694915e-06, "loss": 0.7652, "step": 14926 }, { "epoch": 17.001139601139602, "grad_norm": 0.19512665271759033, "learning_rate": 2.952174299917193e-06, "loss": 0.6563, "step": 14927 }, { "epoch": 17.0022792022792, "grad_norm": 0.25168266892433167, "learning_rate": 2.949978288246516e-06, "loss": 0.6191, "step": 14928 }, { "epoch": 17.003418803418803, "grad_norm": 0.25868624448776245, "learning_rate": 2.947783042433727e-06, "loss": 0.3627, "step": 14929 }, { "epoch": 17.004558404558406, "grad_norm": 0.23300258815288544, "learning_rate": 2.945588562555085e-06, "loss": 0.5863, "step": 14930 }, { "epoch": 17.005698005698004, "grad_norm": 0.19605261087417603, "learning_rate": 2.943394848686795e-06, "loss": 0.6877, "step": 14931 }, { "epoch": 17.006837606837607, "grad_norm": 0.24441637098789215, "learning_rate": 2.9412019009050606e-06, "loss": 0.4629, "step": 14932 }, { "epoch": 17.00797720797721, "grad_norm": 0.19533565640449524, "learning_rate": 2.9390097192860425e-06, "loss": 0.8337, "step": 14933 }, { "epoch": 17.009116809116808, "grad_norm": 0.16391415894031525, "learning_rate": 2.9368183039058916e-06, "loss": 1.0077, "step": 14934 }, { "epoch": 17.01025641025641, "grad_norm": 0.16765032708644867, "learning_rate": 2.9346276548407102e-06, "loss": 0.649, "step": 14935 }, { "epoch": 17.011396011396013, "grad_norm": 0.18843749165534973, "learning_rate": 2.932437772166591e-06, "loss": 0.7146, "step": 14936 }, { "epoch": 17.01253561253561, "grad_norm": 0.1859249472618103, "learning_rate": 2.9302486559595917e-06, "loss": 0.5675, "step": 14937 }, { "epoch": 17.013675213675214, "grad_norm": 0.20158928632736206, "learning_rate": 2.928060306295749e-06, "loss": 0.6998, "step": 14938 }, { "epoch": 17.014814814814816, "grad_norm": 0.1976604163646698, "learning_rate": 2.925872723251072e-06, "loss": 0.5616, "step": 14939 }, { "epoch": 17.015954415954415, "grad_norm": 0.17264212667942047, "learning_rate": 2.9236859069015382e-06, "loss": 0.4989, "step": 14940 }, { "epoch": 17.017094017094017, "grad_norm": 0.17468909919261932, "learning_rate": 2.9214998573231013e-06, "loss": 0.6128, "step": 14941 }, { "epoch": 17.01823361823362, "grad_norm": 0.1987021416425705, "learning_rate": 2.9193145745916973e-06, "loss": 0.7211, "step": 14942 }, { "epoch": 17.019373219373218, "grad_norm": 0.2015351802110672, "learning_rate": 2.9171300587832158e-06, "loss": 0.6194, "step": 14943 }, { "epoch": 17.02051282051282, "grad_norm": 0.16504210233688354, "learning_rate": 2.9149463099735348e-06, "loss": 0.636, "step": 14944 }, { "epoch": 17.021652421652423, "grad_norm": 0.20216158032417297, "learning_rate": 2.912763328238502e-06, "loss": 0.7022, "step": 14945 }, { "epoch": 17.02279202279202, "grad_norm": 0.18127597868442535, "learning_rate": 2.9105811136539456e-06, "loss": 0.6556, "step": 14946 }, { "epoch": 17.023931623931624, "grad_norm": 0.17371995747089386, "learning_rate": 2.908399666295647e-06, "loss": 0.5179, "step": 14947 }, { "epoch": 17.025071225071226, "grad_norm": 0.19458843767642975, "learning_rate": 2.906218986239381e-06, "loss": 0.8547, "step": 14948 }, { "epoch": 17.026210826210825, "grad_norm": 0.2290373146533966, "learning_rate": 2.904039073560885e-06, "loss": 0.5221, "step": 14949 }, { "epoch": 17.027350427350427, "grad_norm": 0.17556844651699066, "learning_rate": 2.9018599283358787e-06, "loss": 0.6471, "step": 14950 }, { "epoch": 17.02849002849003, "grad_norm": 0.23710325360298157, "learning_rate": 2.8996815506400458e-06, "loss": 0.5864, "step": 14951 }, { "epoch": 17.02962962962963, "grad_norm": 0.2378053218126297, "learning_rate": 2.897503940549051e-06, "loss": 0.4265, "step": 14952 }, { "epoch": 17.03076923076923, "grad_norm": 0.21921874582767487, "learning_rate": 2.8953270981385313e-06, "loss": 0.7231, "step": 14953 }, { "epoch": 17.031908831908833, "grad_norm": 0.18270917236804962, "learning_rate": 2.893151023484081e-06, "loss": 0.556, "step": 14954 }, { "epoch": 17.03304843304843, "grad_norm": 0.18606983125209808, "learning_rate": 2.8909757166612925e-06, "loss": 0.7677, "step": 14955 }, { "epoch": 17.034188034188034, "grad_norm": 0.2043803483247757, "learning_rate": 2.8888011777457173e-06, "loss": 0.6551, "step": 14956 }, { "epoch": 17.035327635327636, "grad_norm": 0.1733582764863968, "learning_rate": 2.886627406812889e-06, "loss": 0.7667, "step": 14957 }, { "epoch": 17.036467236467235, "grad_norm": 0.18059155344963074, "learning_rate": 2.8844544039382943e-06, "loss": 0.8046, "step": 14958 }, { "epoch": 17.037606837606837, "grad_norm": 0.16338680684566498, "learning_rate": 2.88228216919742e-06, "loss": 0.7791, "step": 14959 }, { "epoch": 17.03874643874644, "grad_norm": 0.17464567720890045, "learning_rate": 2.880110702665709e-06, "loss": 0.6268, "step": 14960 }, { "epoch": 17.03988603988604, "grad_norm": 0.2310246229171753, "learning_rate": 2.8779400044185807e-06, "loss": 0.5787, "step": 14961 }, { "epoch": 17.04102564102564, "grad_norm": 0.18465375900268555, "learning_rate": 2.8757700745314337e-06, "loss": 0.8619, "step": 14962 }, { "epoch": 17.042165242165243, "grad_norm": 0.17990891635417938, "learning_rate": 2.873600913079635e-06, "loss": 0.7925, "step": 14963 }, { "epoch": 17.043304843304842, "grad_norm": 0.2204209566116333, "learning_rate": 2.8714325201385284e-06, "loss": 0.6887, "step": 14964 }, { "epoch": 17.044444444444444, "grad_norm": 0.18174409866333008, "learning_rate": 2.869264895783419e-06, "loss": 0.4737, "step": 14965 }, { "epoch": 17.045584045584047, "grad_norm": 0.19974087178707123, "learning_rate": 2.8670980400895997e-06, "loss": 0.5616, "step": 14966 }, { "epoch": 17.046723646723645, "grad_norm": 0.20438915491104126, "learning_rate": 2.8649319531323324e-06, "loss": 0.4612, "step": 14967 }, { "epoch": 17.047863247863248, "grad_norm": 0.17648115754127502, "learning_rate": 2.862766634986855e-06, "loss": 0.8006, "step": 14968 }, { "epoch": 17.04900284900285, "grad_norm": 0.20189687609672546, "learning_rate": 2.8606020857283615e-06, "loss": 0.6507, "step": 14969 }, { "epoch": 17.05014245014245, "grad_norm": 0.3259961009025574, "learning_rate": 2.8584383054320456e-06, "loss": 0.487, "step": 14970 }, { "epoch": 17.05128205128205, "grad_norm": 0.24771519005298615, "learning_rate": 2.8562752941730552e-06, "loss": 0.4163, "step": 14971 }, { "epoch": 17.052421652421653, "grad_norm": 0.19273053109645844, "learning_rate": 2.854113052026519e-06, "loss": 0.6057, "step": 14972 }, { "epoch": 17.053561253561252, "grad_norm": 0.18914632499217987, "learning_rate": 2.8519515790675354e-06, "loss": 0.601, "step": 14973 }, { "epoch": 17.054700854700855, "grad_norm": 0.1870405226945877, "learning_rate": 2.849790875371186e-06, "loss": 0.6059, "step": 14974 }, { "epoch": 17.055840455840457, "grad_norm": 0.19459015130996704, "learning_rate": 2.8476309410125136e-06, "loss": 0.5807, "step": 14975 }, { "epoch": 17.056980056980056, "grad_norm": 0.217820942401886, "learning_rate": 2.8454717760665355e-06, "loss": 0.5976, "step": 14976 }, { "epoch": 17.058119658119658, "grad_norm": 0.17932353913784027, "learning_rate": 2.8433133806082452e-06, "loss": 0.6844, "step": 14977 }, { "epoch": 17.05925925925926, "grad_norm": 0.2388077676296234, "learning_rate": 2.8411557547126154e-06, "loss": 0.6194, "step": 14978 }, { "epoch": 17.06039886039886, "grad_norm": 0.2089555561542511, "learning_rate": 2.8389988984545868e-06, "loss": 0.7011, "step": 14979 }, { "epoch": 17.06153846153846, "grad_norm": 0.22288168966770172, "learning_rate": 2.836842811909063e-06, "loss": 0.7498, "step": 14980 }, { "epoch": 17.062678062678064, "grad_norm": 0.2411726415157318, "learning_rate": 2.8346874951509394e-06, "loss": 0.7155, "step": 14981 }, { "epoch": 17.063817663817662, "grad_norm": 0.18449333310127258, "learning_rate": 2.832532948255068e-06, "loss": 0.6725, "step": 14982 }, { "epoch": 17.064957264957265, "grad_norm": 0.17795316874980927, "learning_rate": 2.830379171296299e-06, "loss": 0.7807, "step": 14983 }, { "epoch": 17.066096866096867, "grad_norm": 0.16813020408153534, "learning_rate": 2.8282261643494205e-06, "loss": 0.662, "step": 14984 }, { "epoch": 17.067236467236466, "grad_norm": 0.20966660976409912, "learning_rate": 2.826073927489223e-06, "loss": 0.4603, "step": 14985 }, { "epoch": 17.068376068376068, "grad_norm": 0.16195650398731232, "learning_rate": 2.823922460790454e-06, "loss": 0.731, "step": 14986 }, { "epoch": 17.06951566951567, "grad_norm": 0.20361843705177307, "learning_rate": 2.821771764327849e-06, "loss": 0.4872, "step": 14987 }, { "epoch": 17.07065527065527, "grad_norm": 0.22102849185466766, "learning_rate": 2.819621838176095e-06, "loss": 0.6218, "step": 14988 }, { "epoch": 17.07179487179487, "grad_norm": 0.17357826232910156, "learning_rate": 2.817472682409872e-06, "loss": 0.6965, "step": 14989 }, { "epoch": 17.072934472934474, "grad_norm": 0.18162836134433746, "learning_rate": 2.8153242971038245e-06, "loss": 0.77, "step": 14990 }, { "epoch": 17.074074074074073, "grad_norm": 0.22448180615901947, "learning_rate": 2.8131766823325772e-06, "loss": 0.6818, "step": 14991 }, { "epoch": 17.075213675213675, "grad_norm": 0.19140951335430145, "learning_rate": 2.8110298381707147e-06, "loss": 0.6996, "step": 14992 }, { "epoch": 17.076353276353277, "grad_norm": 0.1942320168018341, "learning_rate": 2.808883764692799e-06, "loss": 0.5972, "step": 14993 }, { "epoch": 17.077492877492876, "grad_norm": 0.17436128854751587, "learning_rate": 2.806738461973385e-06, "loss": 0.7579, "step": 14994 }, { "epoch": 17.07863247863248, "grad_norm": 0.1683426797389984, "learning_rate": 2.8045939300869712e-06, "loss": 0.655, "step": 14995 }, { "epoch": 17.07977207977208, "grad_norm": 0.19002686440944672, "learning_rate": 2.8024501691080478e-06, "loss": 0.5977, "step": 14996 }, { "epoch": 17.08091168091168, "grad_norm": 0.195677250623703, "learning_rate": 2.8003071791110752e-06, "loss": 0.5108, "step": 14997 }, { "epoch": 17.08205128205128, "grad_norm": 0.17492468655109406, "learning_rate": 2.798164960170488e-06, "loss": 0.6474, "step": 14998 }, { "epoch": 17.083190883190884, "grad_norm": 0.24908673763275146, "learning_rate": 2.796023512360679e-06, "loss": 0.5377, "step": 14999 }, { "epoch": 17.084330484330483, "grad_norm": 0.18173104524612427, "learning_rate": 2.7938828357560397e-06, "loss": 0.5363, "step": 15000 }, { "epoch": 17.085470085470085, "grad_norm": 0.2287999540567398, "learning_rate": 2.7917429304309127e-06, "loss": 0.5335, "step": 15001 }, { "epoch": 17.086609686609687, "grad_norm": 0.18001307547092438, "learning_rate": 2.7896037964596333e-06, "loss": 0.6861, "step": 15002 }, { "epoch": 17.087749287749286, "grad_norm": 0.19185101985931396, "learning_rate": 2.787465433916486e-06, "loss": 0.661, "step": 15003 }, { "epoch": 17.08888888888889, "grad_norm": 0.273325115442276, "learning_rate": 2.7853278428757455e-06, "loss": 0.3503, "step": 15004 }, { "epoch": 17.09002849002849, "grad_norm": 0.16111336648464203, "learning_rate": 2.7831910234116716e-06, "loss": 0.6399, "step": 15005 }, { "epoch": 17.09116809116809, "grad_norm": 0.2116054743528366, "learning_rate": 2.781054975598463e-06, "loss": 0.7563, "step": 15006 }, { "epoch": 17.092307692307692, "grad_norm": 0.17487739026546478, "learning_rate": 2.7789196995103155e-06, "loss": 0.6322, "step": 15007 }, { "epoch": 17.093447293447294, "grad_norm": 0.19562922418117523, "learning_rate": 2.7767851952213988e-06, "loss": 0.4521, "step": 15008 }, { "epoch": 17.094586894586893, "grad_norm": 0.18290062248706818, "learning_rate": 2.7746514628058466e-06, "loss": 0.6395, "step": 15009 }, { "epoch": 17.095726495726495, "grad_norm": 0.18277134001255035, "learning_rate": 2.7725185023377676e-06, "loss": 0.4365, "step": 15010 }, { "epoch": 17.096866096866098, "grad_norm": 0.20527899265289307, "learning_rate": 2.770386313891246e-06, "loss": 0.4795, "step": 15011 }, { "epoch": 17.098005698005696, "grad_norm": 0.24203483760356903, "learning_rate": 2.76825489754034e-06, "loss": 0.4322, "step": 15012 }, { "epoch": 17.0991452991453, "grad_norm": 1.090076208114624, "learning_rate": 2.7661242533590842e-06, "loss": 0.744, "step": 15013 }, { "epoch": 17.1002849002849, "grad_norm": 0.2109411507844925, "learning_rate": 2.7639943814214696e-06, "loss": 0.4425, "step": 15014 }, { "epoch": 17.1014245014245, "grad_norm": 0.27713099122047424, "learning_rate": 2.761865281801476e-06, "loss": 0.5374, "step": 15015 }, { "epoch": 17.102564102564102, "grad_norm": 0.24252092838287354, "learning_rate": 2.759736954573064e-06, "loss": 0.5649, "step": 15016 }, { "epoch": 17.103703703703705, "grad_norm": 0.2273343801498413, "learning_rate": 2.757609399810146e-06, "loss": 0.7687, "step": 15017 }, { "epoch": 17.104843304843303, "grad_norm": 0.21032743155956268, "learning_rate": 2.7554826175866187e-06, "loss": 0.7519, "step": 15018 }, { "epoch": 17.105982905982906, "grad_norm": 0.22888341546058655, "learning_rate": 2.753356607976354e-06, "loss": 0.6021, "step": 15019 }, { "epoch": 17.107122507122508, "grad_norm": 0.16688553988933563, "learning_rate": 2.7512313710531924e-06, "loss": 0.8072, "step": 15020 }, { "epoch": 17.108262108262107, "grad_norm": 0.18631112575531006, "learning_rate": 2.7491069068909553e-06, "loss": 0.7823, "step": 15021 }, { "epoch": 17.10940170940171, "grad_norm": 0.17442795634269714, "learning_rate": 2.74698321556342e-06, "loss": 0.7536, "step": 15022 }, { "epoch": 17.11054131054131, "grad_norm": 0.21520088613033295, "learning_rate": 2.7448602971443527e-06, "loss": 0.4669, "step": 15023 }, { "epoch": 17.11168091168091, "grad_norm": 0.2754962742328644, "learning_rate": 2.742738151707491e-06, "loss": 0.591, "step": 15024 }, { "epoch": 17.112820512820512, "grad_norm": 0.216432586312294, "learning_rate": 2.7406167793265465e-06, "loss": 0.6418, "step": 15025 }, { "epoch": 17.113960113960115, "grad_norm": 0.18414662778377533, "learning_rate": 2.738496180075181e-06, "loss": 0.8555, "step": 15026 }, { "epoch": 17.115099715099714, "grad_norm": 0.18145786225795746, "learning_rate": 2.7363763540270705e-06, "loss": 0.5076, "step": 15027 }, { "epoch": 17.116239316239316, "grad_norm": 0.1736234575510025, "learning_rate": 2.734257301255838e-06, "loss": 0.8125, "step": 15028 }, { "epoch": 17.117378917378918, "grad_norm": 0.163956880569458, "learning_rate": 2.7321390218350754e-06, "loss": 0.7355, "step": 15029 }, { "epoch": 17.118518518518517, "grad_norm": 0.1710730493068695, "learning_rate": 2.730021515838363e-06, "loss": 0.7979, "step": 15030 }, { "epoch": 17.11965811965812, "grad_norm": 0.20885802805423737, "learning_rate": 2.7279047833392467e-06, "loss": 0.7053, "step": 15031 }, { "epoch": 17.12079772079772, "grad_norm": 0.18835724890232086, "learning_rate": 2.725788824411249e-06, "loss": 0.5905, "step": 15032 }, { "epoch": 17.12193732193732, "grad_norm": 0.19526271522045135, "learning_rate": 2.723673639127855e-06, "loss": 0.5703, "step": 15033 }, { "epoch": 17.123076923076923, "grad_norm": 0.17180439829826355, "learning_rate": 2.721559227562537e-06, "loss": 0.7765, "step": 15034 }, { "epoch": 17.124216524216525, "grad_norm": 0.24739310145378113, "learning_rate": 2.71944558978873e-06, "loss": 0.5759, "step": 15035 }, { "epoch": 17.125356125356124, "grad_norm": 0.18169115483760834, "learning_rate": 2.7173327258798566e-06, "loss": 0.7963, "step": 15036 }, { "epoch": 17.126495726495726, "grad_norm": 0.16554099321365356, "learning_rate": 2.715220635909285e-06, "loss": 0.5819, "step": 15037 }, { "epoch": 17.12763532763533, "grad_norm": 0.2766818106174469, "learning_rate": 2.7131093199503883e-06, "loss": 0.417, "step": 15038 }, { "epoch": 17.128774928774927, "grad_norm": 0.20147456228733063, "learning_rate": 2.7109987780764985e-06, "loss": 0.5501, "step": 15039 }, { "epoch": 17.12991452991453, "grad_norm": 0.16212566196918488, "learning_rate": 2.708889010360913e-06, "loss": 0.6883, "step": 15040 }, { "epoch": 17.13105413105413, "grad_norm": 0.16984368860721588, "learning_rate": 2.7067800168769116e-06, "loss": 0.6953, "step": 15041 }, { "epoch": 17.13219373219373, "grad_norm": 0.22705335915088654, "learning_rate": 2.704671797697747e-06, "loss": 0.6529, "step": 15042 }, { "epoch": 17.133333333333333, "grad_norm": 0.1955214887857437, "learning_rate": 2.70256435289665e-06, "loss": 0.536, "step": 15043 }, { "epoch": 17.134472934472935, "grad_norm": 0.21065659821033478, "learning_rate": 2.7004576825468058e-06, "loss": 0.5179, "step": 15044 }, { "epoch": 17.135612535612534, "grad_norm": 0.22755803167819977, "learning_rate": 2.698351786721387e-06, "loss": 0.4612, "step": 15045 }, { "epoch": 17.136752136752136, "grad_norm": 0.21350692212581635, "learning_rate": 2.696246665493543e-06, "loss": 0.6052, "step": 15046 }, { "epoch": 17.13789173789174, "grad_norm": 0.20846883952617645, "learning_rate": 2.6941423189363934e-06, "loss": 0.8051, "step": 15047 }, { "epoch": 17.139031339031337, "grad_norm": 0.15907186269760132, "learning_rate": 2.6920387471230128e-06, "loss": 0.6744, "step": 15048 }, { "epoch": 17.14017094017094, "grad_norm": 0.1589837670326233, "learning_rate": 2.6899359501264765e-06, "loss": 0.5029, "step": 15049 }, { "epoch": 17.141310541310542, "grad_norm": 0.20351219177246094, "learning_rate": 2.687833928019823e-06, "loss": 0.6728, "step": 15050 }, { "epoch": 17.14245014245014, "grad_norm": 0.2065182328224182, "learning_rate": 2.685732680876052e-06, "loss": 0.496, "step": 15051 }, { "epoch": 17.143589743589743, "grad_norm": 0.16153362393379211, "learning_rate": 2.68363220876815e-06, "loss": 0.952, "step": 15052 }, { "epoch": 17.144729344729345, "grad_norm": 0.2070079892873764, "learning_rate": 2.6815325117690694e-06, "loss": 0.6689, "step": 15053 }, { "epoch": 17.145868945868944, "grad_norm": 0.18489223718643188, "learning_rate": 2.6794335899517464e-06, "loss": 0.665, "step": 15054 }, { "epoch": 17.147008547008546, "grad_norm": 0.21992330253124237, "learning_rate": 2.677335443389073e-06, "loss": 0.7136, "step": 15055 }, { "epoch": 17.14814814814815, "grad_norm": 0.19489827752113342, "learning_rate": 2.6752380721539265e-06, "loss": 0.814, "step": 15056 }, { "epoch": 17.149287749287748, "grad_norm": 0.20492392778396606, "learning_rate": 2.673141476319155e-06, "loss": 0.5362, "step": 15057 }, { "epoch": 17.15042735042735, "grad_norm": 0.15961942076683044, "learning_rate": 2.6710456559575827e-06, "loss": 0.7438, "step": 15058 }, { "epoch": 17.151566951566952, "grad_norm": 0.21485158801078796, "learning_rate": 2.6689506111419905e-06, "loss": 0.6132, "step": 15059 }, { "epoch": 17.15270655270655, "grad_norm": 0.21094417572021484, "learning_rate": 2.6668563419451593e-06, "loss": 0.6229, "step": 15060 }, { "epoch": 17.153846153846153, "grad_norm": 0.18798595666885376, "learning_rate": 2.664762848439825e-06, "loss": 0.6037, "step": 15061 }, { "epoch": 17.154985754985756, "grad_norm": 0.2002788484096527, "learning_rate": 2.6626701306987024e-06, "loss": 0.672, "step": 15062 }, { "epoch": 17.156125356125354, "grad_norm": 0.18735022842884064, "learning_rate": 2.6605781887944712e-06, "loss": 0.6756, "step": 15063 }, { "epoch": 17.157264957264957, "grad_norm": 0.22012650966644287, "learning_rate": 2.658487022799791e-06, "loss": 0.6063, "step": 15064 }, { "epoch": 17.15840455840456, "grad_norm": 0.18836379051208496, "learning_rate": 2.6563966327872974e-06, "loss": 0.6306, "step": 15065 }, { "epoch": 17.159544159544158, "grad_norm": 0.18315967917442322, "learning_rate": 2.6543070188295963e-06, "loss": 0.7899, "step": 15066 }, { "epoch": 17.16068376068376, "grad_norm": 0.27469968795776367, "learning_rate": 2.652218180999261e-06, "loss": 0.5203, "step": 15067 }, { "epoch": 17.161823361823362, "grad_norm": 0.27029624581336975, "learning_rate": 2.6501301193688434e-06, "loss": 0.4631, "step": 15068 }, { "epoch": 17.162962962962965, "grad_norm": 0.21950866281986237, "learning_rate": 2.648042834010869e-06, "loss": 0.7517, "step": 15069 }, { "epoch": 17.164102564102564, "grad_norm": 0.1846713274717331, "learning_rate": 2.6459563249978418e-06, "loss": 0.6662, "step": 15070 }, { "epoch": 17.165242165242166, "grad_norm": 0.1970273107290268, "learning_rate": 2.6438705924022143e-06, "loss": 0.5449, "step": 15071 }, { "epoch": 17.166381766381768, "grad_norm": 0.2726972997188568, "learning_rate": 2.6417856362964457e-06, "loss": 0.5926, "step": 15072 }, { "epoch": 17.167521367521367, "grad_norm": 0.24256399273872375, "learning_rate": 2.6397014567529523e-06, "loss": 0.5511, "step": 15073 }, { "epoch": 17.16866096866097, "grad_norm": 0.1722864955663681, "learning_rate": 2.6376180538441155e-06, "loss": 0.7555, "step": 15074 }, { "epoch": 17.16980056980057, "grad_norm": 0.19845432043075562, "learning_rate": 2.6355354276422965e-06, "loss": 0.6709, "step": 15075 }, { "epoch": 17.17094017094017, "grad_norm": 0.24148957431316376, "learning_rate": 2.6334535782198383e-06, "loss": 0.8372, "step": 15076 }, { "epoch": 17.172079772079773, "grad_norm": 0.18178880214691162, "learning_rate": 2.6313725056490462e-06, "loss": 0.6222, "step": 15077 }, { "epoch": 17.173219373219375, "grad_norm": 0.19029007852077484, "learning_rate": 2.6292922100021988e-06, "loss": 0.6022, "step": 15078 }, { "epoch": 17.174358974358974, "grad_norm": 0.23001739382743835, "learning_rate": 2.627212691351555e-06, "loss": 0.7335, "step": 15079 }, { "epoch": 17.175498575498576, "grad_norm": 0.1962716430425644, "learning_rate": 2.6251339497693345e-06, "loss": 0.6243, "step": 15080 }, { "epoch": 17.17663817663818, "grad_norm": 0.19968239963054657, "learning_rate": 2.623055985327752e-06, "loss": 0.735, "step": 15081 }, { "epoch": 17.177777777777777, "grad_norm": 0.1714327186346054, "learning_rate": 2.620978798098961e-06, "loss": 0.6554, "step": 15082 }, { "epoch": 17.17891737891738, "grad_norm": 0.3115695118904114, "learning_rate": 2.618902388155123e-06, "loss": 0.377, "step": 15083 }, { "epoch": 17.180056980056982, "grad_norm": 0.17888757586479187, "learning_rate": 2.6168267555683577e-06, "loss": 0.6587, "step": 15084 }, { "epoch": 17.18119658119658, "grad_norm": 0.19751174747943878, "learning_rate": 2.61475190041075e-06, "loss": 0.4875, "step": 15085 }, { "epoch": 17.182336182336183, "grad_norm": 0.21054057776927948, "learning_rate": 2.61267782275437e-06, "loss": 0.6593, "step": 15086 }, { "epoch": 17.183475783475785, "grad_norm": 0.25037145614624023, "learning_rate": 2.6106045226712505e-06, "loss": 0.5905, "step": 15087 }, { "epoch": 17.184615384615384, "grad_norm": 0.19860246777534485, "learning_rate": 2.6085320002334157e-06, "loss": 0.5893, "step": 15088 }, { "epoch": 17.185754985754986, "grad_norm": 0.17965349555015564, "learning_rate": 2.6064602555128326e-06, "loss": 0.7347, "step": 15089 }, { "epoch": 17.18689458689459, "grad_norm": 0.16771340370178223, "learning_rate": 2.6043892885814687e-06, "loss": 0.6191, "step": 15090 }, { "epoch": 17.188034188034187, "grad_norm": 0.17742687463760376, "learning_rate": 2.602319099511255e-06, "loss": 0.5684, "step": 15091 }, { "epoch": 17.18917378917379, "grad_norm": 0.18416251242160797, "learning_rate": 2.600249688374096e-06, "loss": 0.6517, "step": 15092 }, { "epoch": 17.190313390313392, "grad_norm": 0.1928284615278244, "learning_rate": 2.598181055241855e-06, "loss": 0.5186, "step": 15093 }, { "epoch": 17.19145299145299, "grad_norm": 0.16785964369773865, "learning_rate": 2.596113200186395e-06, "loss": 0.6798, "step": 15094 }, { "epoch": 17.192592592592593, "grad_norm": 0.2011309266090393, "learning_rate": 2.5940461232795422e-06, "loss": 0.782, "step": 15095 }, { "epoch": 17.193732193732195, "grad_norm": 0.15364626049995422, "learning_rate": 2.5919798245930772e-06, "loss": 0.8462, "step": 15096 }, { "epoch": 17.194871794871794, "grad_norm": 0.1840306669473648, "learning_rate": 2.589914304198776e-06, "loss": 0.8393, "step": 15097 }, { "epoch": 17.196011396011396, "grad_norm": 0.21074427664279938, "learning_rate": 2.587849562168379e-06, "loss": 0.5411, "step": 15098 }, { "epoch": 17.197150997151, "grad_norm": 0.17806614935398102, "learning_rate": 2.585785598573601e-06, "loss": 0.5351, "step": 15099 }, { "epoch": 17.198290598290598, "grad_norm": 0.2036043405532837, "learning_rate": 2.583722413486131e-06, "loss": 0.8876, "step": 15100 }, { "epoch": 17.1994301994302, "grad_norm": 0.16868923604488373, "learning_rate": 2.5816600069776236e-06, "loss": 0.8464, "step": 15101 }, { "epoch": 17.200569800569802, "grad_norm": 0.21217770874500275, "learning_rate": 2.579598379119716e-06, "loss": 0.8388, "step": 15102 }, { "epoch": 17.2017094017094, "grad_norm": 0.22023366391658783, "learning_rate": 2.5775375299840117e-06, "loss": 0.8549, "step": 15103 }, { "epoch": 17.202849002849003, "grad_norm": 0.1760704666376114, "learning_rate": 2.5754774596420893e-06, "loss": 0.8507, "step": 15104 }, { "epoch": 17.203988603988606, "grad_norm": 0.1938440203666687, "learning_rate": 2.5734181681655035e-06, "loss": 0.6966, "step": 15105 }, { "epoch": 17.205128205128204, "grad_norm": 0.17305469512939453, "learning_rate": 2.57135965562578e-06, "loss": 0.5743, "step": 15106 }, { "epoch": 17.206267806267807, "grad_norm": 0.22448083758354187, "learning_rate": 2.5693019220944163e-06, "loss": 0.6156, "step": 15107 }, { "epoch": 17.20740740740741, "grad_norm": 0.262983113527298, "learning_rate": 2.567244967642879e-06, "loss": 0.5224, "step": 15108 }, { "epoch": 17.208547008547008, "grad_norm": 0.23758886754512787, "learning_rate": 2.5651887923426155e-06, "loss": 0.5305, "step": 15109 }, { "epoch": 17.20968660968661, "grad_norm": 0.21146656572818756, "learning_rate": 2.5631333962650384e-06, "loss": 0.5773, "step": 15110 }, { "epoch": 17.210826210826212, "grad_norm": 0.156987264752388, "learning_rate": 2.561078779481546e-06, "loss": 0.6917, "step": 15111 }, { "epoch": 17.21196581196581, "grad_norm": 0.21438787877559662, "learning_rate": 2.55902494206349e-06, "loss": 0.6482, "step": 15112 }, { "epoch": 17.213105413105414, "grad_norm": 0.2056760936975479, "learning_rate": 2.55697188408221e-06, "loss": 0.6169, "step": 15113 }, { "epoch": 17.214245014245016, "grad_norm": 0.19529277086257935, "learning_rate": 2.5549196056090156e-06, "loss": 0.8006, "step": 15114 }, { "epoch": 17.215384615384615, "grad_norm": 0.23238149285316467, "learning_rate": 2.5528681067151866e-06, "loss": 0.4384, "step": 15115 }, { "epoch": 17.216524216524217, "grad_norm": 0.15593671798706055, "learning_rate": 2.5508173874719793e-06, "loss": 0.857, "step": 15116 }, { "epoch": 17.21766381766382, "grad_norm": 0.22833791375160217, "learning_rate": 2.5487674479506202e-06, "loss": 0.5225, "step": 15117 }, { "epoch": 17.218803418803418, "grad_norm": 0.1787058562040329, "learning_rate": 2.5467182882223135e-06, "loss": 0.7493, "step": 15118 }, { "epoch": 17.21994301994302, "grad_norm": 0.18769077956676483, "learning_rate": 2.5446699083582213e-06, "loss": 0.7195, "step": 15119 }, { "epoch": 17.221082621082623, "grad_norm": 0.20116661489009857, "learning_rate": 2.5426223084294957e-06, "loss": 0.5683, "step": 15120 }, { "epoch": 17.22222222222222, "grad_norm": 0.1935771405696869, "learning_rate": 2.540575488507255e-06, "loss": 0.7803, "step": 15121 }, { "epoch": 17.223361823361824, "grad_norm": 0.21847252547740936, "learning_rate": 2.5385294486625966e-06, "loss": 0.631, "step": 15122 }, { "epoch": 17.224501424501426, "grad_norm": 0.16176047921180725, "learning_rate": 2.536484188966576e-06, "loss": 0.8931, "step": 15123 }, { "epoch": 17.225641025641025, "grad_norm": 0.17873893678188324, "learning_rate": 2.534439709490233e-06, "loss": 0.7149, "step": 15124 }, { "epoch": 17.226780626780627, "grad_norm": 0.21072816848754883, "learning_rate": 2.53239601030458e-06, "loss": 0.6532, "step": 15125 }, { "epoch": 17.22792022792023, "grad_norm": 0.17184481024742126, "learning_rate": 2.5303530914806e-06, "loss": 0.2553, "step": 15126 }, { "epoch": 17.22905982905983, "grad_norm": 0.22857321798801422, "learning_rate": 2.5283109530892496e-06, "loss": 0.5491, "step": 15127 }, { "epoch": 17.23019943019943, "grad_norm": 0.17435388267040253, "learning_rate": 2.5262695952014556e-06, "loss": 0.7099, "step": 15128 }, { "epoch": 17.231339031339033, "grad_norm": 0.26842939853668213, "learning_rate": 2.524229017888127e-06, "loss": 0.6365, "step": 15129 }, { "epoch": 17.23247863247863, "grad_norm": 0.16448654234409332, "learning_rate": 2.522189221220128e-06, "loss": 0.7441, "step": 15130 }, { "epoch": 17.233618233618234, "grad_norm": 0.1764354705810547, "learning_rate": 2.5201502052683122e-06, "loss": 0.6701, "step": 15131 }, { "epoch": 17.234757834757836, "grad_norm": 0.2524125874042511, "learning_rate": 2.518111970103498e-06, "loss": 0.6656, "step": 15132 }, { "epoch": 17.235897435897435, "grad_norm": 0.200727179646492, "learning_rate": 2.516074515796488e-06, "loss": 0.6161, "step": 15133 }, { "epoch": 17.237037037037037, "grad_norm": 0.16180752217769623, "learning_rate": 2.514037842418035e-06, "loss": 0.7554, "step": 15134 }, { "epoch": 17.23817663817664, "grad_norm": 0.232011616230011, "learning_rate": 2.512001950038884e-06, "loss": 0.7122, "step": 15135 }, { "epoch": 17.23931623931624, "grad_norm": 0.6532576084136963, "learning_rate": 2.5099668387297463e-06, "loss": 0.8184, "step": 15136 }, { "epoch": 17.24045584045584, "grad_norm": 0.18580158054828644, "learning_rate": 2.5079325085613113e-06, "loss": 0.4323, "step": 15137 }, { "epoch": 17.241595441595443, "grad_norm": 0.182058647274971, "learning_rate": 2.5058989596042303e-06, "loss": 0.6386, "step": 15138 }, { "epoch": 17.242735042735042, "grad_norm": 0.19445578753948212, "learning_rate": 2.5038661919291385e-06, "loss": 0.777, "step": 15139 }, { "epoch": 17.243874643874644, "grad_norm": 0.2003912627696991, "learning_rate": 2.50183420560664e-06, "loss": 0.8179, "step": 15140 }, { "epoch": 17.245014245014247, "grad_norm": 0.16480514407157898, "learning_rate": 2.4998030007073124e-06, "loss": 0.7508, "step": 15141 }, { "epoch": 17.246153846153845, "grad_norm": 0.16990984976291656, "learning_rate": 2.497772577301699e-06, "loss": 0.816, "step": 15142 }, { "epoch": 17.247293447293448, "grad_norm": 0.2032969444990158, "learning_rate": 2.495742935460327e-06, "loss": 0.4964, "step": 15143 }, { "epoch": 17.24843304843305, "grad_norm": 0.2081516683101654, "learning_rate": 2.4937140752536862e-06, "loss": 0.5333, "step": 15144 }, { "epoch": 17.24957264957265, "grad_norm": 0.18476413190364838, "learning_rate": 2.491685996752255e-06, "loss": 0.6408, "step": 15145 }, { "epoch": 17.25071225071225, "grad_norm": 0.17959251999855042, "learning_rate": 2.489658700026462e-06, "loss": 0.7507, "step": 15146 }, { "epoch": 17.251851851851853, "grad_norm": 0.23477263748645782, "learning_rate": 2.4876321851467267e-06, "loss": 0.4408, "step": 15147 }, { "epoch": 17.252991452991452, "grad_norm": 0.17450331151485443, "learning_rate": 2.485606452183434e-06, "loss": 0.6646, "step": 15148 }, { "epoch": 17.254131054131054, "grad_norm": 0.2237221598625183, "learning_rate": 2.483581501206944e-06, "loss": 0.6068, "step": 15149 }, { "epoch": 17.255270655270657, "grad_norm": 0.17771928012371063, "learning_rate": 2.48155733228759e-06, "loss": 0.7018, "step": 15150 }, { "epoch": 17.256410256410255, "grad_norm": 0.1876230239868164, "learning_rate": 2.479533945495674e-06, "loss": 0.5195, "step": 15151 }, { "epoch": 17.257549857549858, "grad_norm": 0.24191851913928986, "learning_rate": 2.477511340901481e-06, "loss": 0.3405, "step": 15152 }, { "epoch": 17.25868945868946, "grad_norm": 0.20344719290733337, "learning_rate": 2.4754895185752524e-06, "loss": 0.758, "step": 15153 }, { "epoch": 17.25982905982906, "grad_norm": 0.19998767971992493, "learning_rate": 2.4734684785872152e-06, "loss": 0.6802, "step": 15154 }, { "epoch": 17.26096866096866, "grad_norm": 0.18411147594451904, "learning_rate": 2.471448221007566e-06, "loss": 0.7494, "step": 15155 }, { "epoch": 17.262108262108264, "grad_norm": 0.2152598649263382, "learning_rate": 2.469428745906477e-06, "loss": 0.7002, "step": 15156 }, { "epoch": 17.263247863247862, "grad_norm": 0.19655431807041168, "learning_rate": 2.4674100533540854e-06, "loss": 0.6389, "step": 15157 }, { "epoch": 17.264387464387465, "grad_norm": 0.29243990778923035, "learning_rate": 2.465392143420506e-06, "loss": 0.4292, "step": 15158 }, { "epoch": 17.265527065527067, "grad_norm": 0.1663278341293335, "learning_rate": 2.463375016175826e-06, "loss": 0.7744, "step": 15159 }, { "epoch": 17.266666666666666, "grad_norm": 0.18078675866127014, "learning_rate": 2.4613586716901086e-06, "loss": 0.6191, "step": 15160 }, { "epoch": 17.267806267806268, "grad_norm": 0.18140067160129547, "learning_rate": 2.459343110033385e-06, "loss": 0.8783, "step": 15161 }, { "epoch": 17.26894586894587, "grad_norm": 0.20100940763950348, "learning_rate": 2.4573283312756657e-06, "loss": 0.7135, "step": 15162 }, { "epoch": 17.27008547008547, "grad_norm": 0.1684829294681549, "learning_rate": 2.4553143354869275e-06, "loss": 0.6913, "step": 15163 }, { "epoch": 17.27122507122507, "grad_norm": 0.1947351098060608, "learning_rate": 2.4533011227371172e-06, "loss": 0.6651, "step": 15164 }, { "epoch": 17.272364672364674, "grad_norm": 0.15982107818126678, "learning_rate": 2.4512886930961625e-06, "loss": 0.7237, "step": 15165 }, { "epoch": 17.273504273504273, "grad_norm": 0.22549481689929962, "learning_rate": 2.4492770466339605e-06, "loss": 0.7848, "step": 15166 }, { "epoch": 17.274643874643875, "grad_norm": 0.21762266755104065, "learning_rate": 2.447266183420388e-06, "loss": 0.5598, "step": 15167 }, { "epoch": 17.275783475783477, "grad_norm": 0.20130868256092072, "learning_rate": 2.445256103525273e-06, "loss": 0.6003, "step": 15168 }, { "epoch": 17.276923076923076, "grad_norm": 0.21412499248981476, "learning_rate": 2.4432468070184427e-06, "loss": 0.3147, "step": 15169 }, { "epoch": 17.27806267806268, "grad_norm": 0.19877946376800537, "learning_rate": 2.4412382939696803e-06, "loss": 0.4896, "step": 15170 }, { "epoch": 17.27920227920228, "grad_norm": 0.1650521606206894, "learning_rate": 2.439230564448747e-06, "loss": 0.6814, "step": 15171 }, { "epoch": 17.28034188034188, "grad_norm": 0.20785923302173615, "learning_rate": 2.4372236185253807e-06, "loss": 0.6307, "step": 15172 }, { "epoch": 17.28148148148148, "grad_norm": 0.2041858583688736, "learning_rate": 2.435217456269287e-06, "loss": 0.6081, "step": 15173 }, { "epoch": 17.282621082621084, "grad_norm": 0.16322332620620728, "learning_rate": 2.4332120777501467e-06, "loss": 0.7391, "step": 15174 }, { "epoch": 17.283760683760683, "grad_norm": 0.2151019424200058, "learning_rate": 2.4312074830376064e-06, "loss": 0.6587, "step": 15175 }, { "epoch": 17.284900284900285, "grad_norm": 0.155168816447258, "learning_rate": 2.4292036722012967e-06, "loss": 0.8807, "step": 15176 }, { "epoch": 17.286039886039887, "grad_norm": 0.24426081776618958, "learning_rate": 2.4272006453108094e-06, "loss": 0.1383, "step": 15177 }, { "epoch": 17.287179487179486, "grad_norm": 0.22462360560894012, "learning_rate": 2.425198402435722e-06, "loss": 0.4408, "step": 15178 }, { "epoch": 17.28831908831909, "grad_norm": 0.16213785111904144, "learning_rate": 2.423196943645578e-06, "loss": 0.6757, "step": 15179 }, { "epoch": 17.28945868945869, "grad_norm": 0.1819656491279602, "learning_rate": 2.421196269009887e-06, "loss": 0.6015, "step": 15180 }, { "epoch": 17.29059829059829, "grad_norm": 0.17803218960762024, "learning_rate": 2.4191963785981377e-06, "loss": 0.7926, "step": 15181 }, { "epoch": 17.291737891737892, "grad_norm": 0.17023199796676636, "learning_rate": 2.4171972724798016e-06, "loss": 0.5545, "step": 15182 }, { "epoch": 17.292877492877494, "grad_norm": 0.18583688139915466, "learning_rate": 2.4151989507243067e-06, "loss": 0.5112, "step": 15183 }, { "epoch": 17.294017094017093, "grad_norm": 0.18971213698387146, "learning_rate": 2.413201413401059e-06, "loss": 0.8555, "step": 15184 }, { "epoch": 17.295156695156695, "grad_norm": 0.24449138343334198, "learning_rate": 2.4112046605794386e-06, "loss": 0.5715, "step": 15185 }, { "epoch": 17.296296296296298, "grad_norm": 0.24402859807014465, "learning_rate": 2.409208692328804e-06, "loss": 0.6896, "step": 15186 }, { "epoch": 17.297435897435896, "grad_norm": 0.20627640187740326, "learning_rate": 2.4072135087184757e-06, "loss": 0.7722, "step": 15187 }, { "epoch": 17.2985754985755, "grad_norm": 0.24078132212162018, "learning_rate": 2.4052191098177494e-06, "loss": 0.4887, "step": 15188 }, { "epoch": 17.2997150997151, "grad_norm": 0.2839394807815552, "learning_rate": 2.4032254956959015e-06, "loss": 0.6151, "step": 15189 }, { "epoch": 17.3008547008547, "grad_norm": 0.16072551906108856, "learning_rate": 2.401232666422176e-06, "loss": 0.7992, "step": 15190 }, { "epoch": 17.301994301994302, "grad_norm": 0.22132433950901031, "learning_rate": 2.399240622065782e-06, "loss": 0.5885, "step": 15191 }, { "epoch": 17.303133903133904, "grad_norm": 0.24359983205795288, "learning_rate": 2.3972493626959106e-06, "loss": 0.6215, "step": 15192 }, { "epoch": 17.304273504273503, "grad_norm": 0.21864564716815948, "learning_rate": 2.395258888381735e-06, "loss": 0.7649, "step": 15193 }, { "epoch": 17.305413105413106, "grad_norm": 0.18766537308692932, "learning_rate": 2.393269199192377e-06, "loss": 0.6378, "step": 15194 }, { "epoch": 17.306552706552708, "grad_norm": 0.2603372633457184, "learning_rate": 2.3912802951969488e-06, "loss": 0.5515, "step": 15195 }, { "epoch": 17.307692307692307, "grad_norm": 0.2291465848684311, "learning_rate": 2.3892921764645304e-06, "loss": 0.6621, "step": 15196 }, { "epoch": 17.30883190883191, "grad_norm": 0.22870177030563354, "learning_rate": 2.3873048430641783e-06, "loss": 0.5992, "step": 15197 }, { "epoch": 17.30997150997151, "grad_norm": 0.18157687783241272, "learning_rate": 2.3853182950649118e-06, "loss": 0.688, "step": 15198 }, { "epoch": 17.31111111111111, "grad_norm": 0.27913182973861694, "learning_rate": 2.383332532535729e-06, "loss": 0.4714, "step": 15199 }, { "epoch": 17.312250712250712, "grad_norm": 0.17969077825546265, "learning_rate": 2.381347555545604e-06, "loss": 0.7142, "step": 15200 }, { "epoch": 17.313390313390315, "grad_norm": 0.1802387237548828, "learning_rate": 2.379363364163484e-06, "loss": 0.6974, "step": 15201 }, { "epoch": 17.314529914529913, "grad_norm": 0.17316658794879913, "learning_rate": 2.3773799584582756e-06, "loss": 0.9818, "step": 15202 }, { "epoch": 17.315669515669516, "grad_norm": 0.15264250338077545, "learning_rate": 2.375397338498872e-06, "loss": 0.7006, "step": 15203 }, { "epoch": 17.316809116809118, "grad_norm": 0.16245713829994202, "learning_rate": 2.3734155043541457e-06, "loss": 0.7195, "step": 15204 }, { "epoch": 17.317948717948717, "grad_norm": 0.18768909573554993, "learning_rate": 2.3714344560929165e-06, "loss": 0.7107, "step": 15205 }, { "epoch": 17.31908831908832, "grad_norm": 0.20597070455551147, "learning_rate": 2.369454193783996e-06, "loss": 0.67, "step": 15206 }, { "epoch": 17.32022792022792, "grad_norm": 0.20749664306640625, "learning_rate": 2.367474717496168e-06, "loss": 0.5346, "step": 15207 }, { "epoch": 17.32136752136752, "grad_norm": 0.20586107671260834, "learning_rate": 2.3654960272981862e-06, "loss": 0.6622, "step": 15208 }, { "epoch": 17.322507122507123, "grad_norm": 0.16947408020496368, "learning_rate": 2.363518123258768e-06, "loss": 0.6493, "step": 15209 }, { "epoch": 17.323646723646725, "grad_norm": 0.18709897994995117, "learning_rate": 2.3615410054466156e-06, "loss": 0.7421, "step": 15210 }, { "epoch": 17.324786324786324, "grad_norm": 0.18042375147342682, "learning_rate": 2.3595646739304008e-06, "loss": 0.9068, "step": 15211 }, { "epoch": 17.325925925925926, "grad_norm": 0.3071350157260895, "learning_rate": 2.3575891287787727e-06, "loss": 0.4679, "step": 15212 }, { "epoch": 17.32706552706553, "grad_norm": 0.1570240706205368, "learning_rate": 2.3556143700603327e-06, "loss": 0.8679, "step": 15213 }, { "epoch": 17.328205128205127, "grad_norm": 0.21524712443351746, "learning_rate": 2.3536403978436763e-06, "loss": 0.7099, "step": 15214 }, { "epoch": 17.32934472934473, "grad_norm": 0.2164105623960495, "learning_rate": 2.35166721219737e-06, "loss": 0.7441, "step": 15215 }, { "epoch": 17.33048433048433, "grad_norm": 0.19767627120018005, "learning_rate": 2.3496948131899483e-06, "loss": 0.8, "step": 15216 }, { "epoch": 17.33162393162393, "grad_norm": 0.1692846268415451, "learning_rate": 2.3477232008899117e-06, "loss": 0.6675, "step": 15217 }, { "epoch": 17.332763532763533, "grad_norm": 0.19805654883384705, "learning_rate": 2.3457523753657422e-06, "loss": 0.8357, "step": 15218 }, { "epoch": 17.333903133903135, "grad_norm": 0.19935043156147003, "learning_rate": 2.3437823366858902e-06, "loss": 0.5921, "step": 15219 }, { "epoch": 17.335042735042734, "grad_norm": 0.17238126695156097, "learning_rate": 2.341813084918787e-06, "loss": 0.464, "step": 15220 }, { "epoch": 17.336182336182336, "grad_norm": 0.21349164843559265, "learning_rate": 2.339844620132822e-06, "loss": 0.2943, "step": 15221 }, { "epoch": 17.33732193732194, "grad_norm": 0.16946952044963837, "learning_rate": 2.337876942396369e-06, "loss": 0.6608, "step": 15222 }, { "epoch": 17.338461538461537, "grad_norm": 0.18029257655143738, "learning_rate": 2.3359100517777733e-06, "loss": 0.9875, "step": 15223 }, { "epoch": 17.33960113960114, "grad_norm": 0.16952665150165558, "learning_rate": 2.333943948345349e-06, "loss": 0.8551, "step": 15224 }, { "epoch": 17.340740740740742, "grad_norm": 0.336750328540802, "learning_rate": 2.3319786321673753e-06, "loss": 0.4064, "step": 15225 }, { "epoch": 17.34188034188034, "grad_norm": 0.19412781298160553, "learning_rate": 2.3300141033121254e-06, "loss": 0.6186, "step": 15226 }, { "epoch": 17.343019943019943, "grad_norm": 0.19354306161403656, "learning_rate": 2.3280503618478334e-06, "loss": 0.7205, "step": 15227 }, { "epoch": 17.344159544159545, "grad_norm": 0.21017666161060333, "learning_rate": 2.3260874078426947e-06, "loss": 0.6777, "step": 15228 }, { "epoch": 17.345299145299144, "grad_norm": 0.17014797031879425, "learning_rate": 2.3241252413648966e-06, "loss": 0.5468, "step": 15229 }, { "epoch": 17.346438746438746, "grad_norm": 0.2159450501203537, "learning_rate": 2.3221638624825847e-06, "loss": 0.4806, "step": 15230 }, { "epoch": 17.34757834757835, "grad_norm": 0.17926137149333954, "learning_rate": 2.320203271263893e-06, "loss": 0.7847, "step": 15231 }, { "epoch": 17.348717948717947, "grad_norm": 0.1916458010673523, "learning_rate": 2.3182434677769066e-06, "loss": 0.6717, "step": 15232 }, { "epoch": 17.34985754985755, "grad_norm": 0.19014115631580353, "learning_rate": 2.3162844520896983e-06, "loss": 0.9252, "step": 15233 }, { "epoch": 17.350997150997152, "grad_norm": 0.17423704266548157, "learning_rate": 2.3143262242703105e-06, "loss": 0.6787, "step": 15234 }, { "epoch": 17.35213675213675, "grad_norm": 0.19930385053157806, "learning_rate": 2.312368784386765e-06, "loss": 0.8409, "step": 15235 }, { "epoch": 17.353276353276353, "grad_norm": 0.16899840533733368, "learning_rate": 2.310412132507034e-06, "loss": 0.6438, "step": 15236 }, { "epoch": 17.354415954415956, "grad_norm": 0.17552229762077332, "learning_rate": 2.308456268699091e-06, "loss": 0.5095, "step": 15237 }, { "epoch": 17.355555555555554, "grad_norm": 0.23030370473861694, "learning_rate": 2.306501193030866e-06, "loss": 0.9334, "step": 15238 }, { "epoch": 17.356695156695157, "grad_norm": 0.21691684424877167, "learning_rate": 2.3045469055702593e-06, "loss": 0.5538, "step": 15239 }, { "epoch": 17.35783475783476, "grad_norm": 0.20816679298877716, "learning_rate": 2.3025934063851533e-06, "loss": 0.558, "step": 15240 }, { "epoch": 17.358974358974358, "grad_norm": 0.20304793119430542, "learning_rate": 2.3006406955433936e-06, "loss": 0.7394, "step": 15241 }, { "epoch": 17.36011396011396, "grad_norm": 0.1755012720823288, "learning_rate": 2.2986887731128116e-06, "loss": 0.6438, "step": 15242 }, { "epoch": 17.361253561253562, "grad_norm": 0.189613476395607, "learning_rate": 2.2967376391611923e-06, "loss": 0.5026, "step": 15243 }, { "epoch": 17.36239316239316, "grad_norm": 0.24741695821285248, "learning_rate": 2.2947872937563094e-06, "loss": 0.3569, "step": 15244 }, { "epoch": 17.363532763532763, "grad_norm": 0.1851482093334198, "learning_rate": 2.2928377369659026e-06, "loss": 0.7729, "step": 15245 }, { "epoch": 17.364672364672366, "grad_norm": 1.4638245105743408, "learning_rate": 2.2908889688576905e-06, "loss": 0.3429, "step": 15246 }, { "epoch": 17.365811965811965, "grad_norm": 0.16212010383605957, "learning_rate": 2.288940989499347e-06, "loss": 0.6244, "step": 15247 }, { "epoch": 17.366951566951567, "grad_norm": 0.21030201017856598, "learning_rate": 2.2869937989585423e-06, "loss": 0.5332, "step": 15248 }, { "epoch": 17.36809116809117, "grad_norm": 0.20888923108577728, "learning_rate": 2.2850473973029083e-06, "loss": 0.8135, "step": 15249 }, { "epoch": 17.369230769230768, "grad_norm": 0.22561891376972198, "learning_rate": 2.283101784600039e-06, "loss": 0.8256, "step": 15250 }, { "epoch": 17.37037037037037, "grad_norm": 0.20610755681991577, "learning_rate": 2.281156960917519e-06, "loss": 0.5839, "step": 15251 }, { "epoch": 17.371509971509973, "grad_norm": 0.1993379443883896, "learning_rate": 2.2792129263228934e-06, "loss": 0.5128, "step": 15252 }, { "epoch": 17.37264957264957, "grad_norm": 0.1732664853334427, "learning_rate": 2.2772696808836895e-06, "loss": 0.7115, "step": 15253 }, { "epoch": 17.373789173789174, "grad_norm": 0.23456768691539764, "learning_rate": 2.275327224667392e-06, "loss": 0.5868, "step": 15254 }, { "epoch": 17.374928774928776, "grad_norm": 0.18440523743629456, "learning_rate": 2.2733855577414754e-06, "loss": 0.7298, "step": 15255 }, { "epoch": 17.376068376068375, "grad_norm": 0.20289857685565948, "learning_rate": 2.2714446801733765e-06, "loss": 0.3849, "step": 15256 }, { "epoch": 17.377207977207977, "grad_norm": 0.17441540956497192, "learning_rate": 2.2695045920305052e-06, "loss": 0.6876, "step": 15257 }, { "epoch": 17.37834757834758, "grad_norm": 0.17561806738376617, "learning_rate": 2.267565293380253e-06, "loss": 0.4977, "step": 15258 }, { "epoch": 17.379487179487178, "grad_norm": 0.18843214213848114, "learning_rate": 2.2656267842899675e-06, "loss": 0.3904, "step": 15259 }, { "epoch": 17.38062678062678, "grad_norm": 0.20129486918449402, "learning_rate": 2.2636890648269877e-06, "loss": 0.6357, "step": 15260 }, { "epoch": 17.381766381766383, "grad_norm": 0.18350058794021606, "learning_rate": 2.2617521350586145e-06, "loss": 0.6551, "step": 15261 }, { "epoch": 17.38290598290598, "grad_norm": 0.20070157945156097, "learning_rate": 2.259815995052114e-06, "loss": 0.761, "step": 15262 }, { "epoch": 17.384045584045584, "grad_norm": 0.21926534175872803, "learning_rate": 2.2578806448747408e-06, "loss": 0.6202, "step": 15263 }, { "epoch": 17.385185185185186, "grad_norm": 0.18719224631786346, "learning_rate": 2.2559460845937156e-06, "loss": 0.794, "step": 15264 }, { "epoch": 17.386324786324785, "grad_norm": 0.1868806928396225, "learning_rate": 2.2540123142762326e-06, "loss": 0.5571, "step": 15265 }, { "epoch": 17.387464387464387, "grad_norm": 0.21154086291790009, "learning_rate": 2.2520793339894487e-06, "loss": 0.6729, "step": 15266 }, { "epoch": 17.38860398860399, "grad_norm": 0.1811610460281372, "learning_rate": 2.2501471438005074e-06, "loss": 0.7013, "step": 15267 }, { "epoch": 17.38974358974359, "grad_norm": 0.2186114490032196, "learning_rate": 2.248215743776516e-06, "loss": 0.6498, "step": 15268 }, { "epoch": 17.39088319088319, "grad_norm": 0.18534058332443237, "learning_rate": 2.246285133984563e-06, "loss": 0.5917, "step": 15269 }, { "epoch": 17.392022792022793, "grad_norm": 0.22112229466438293, "learning_rate": 2.2443553144916975e-06, "loss": 0.5508, "step": 15270 }, { "epoch": 17.39316239316239, "grad_norm": 0.18932108581066132, "learning_rate": 2.242426285364951e-06, "loss": 0.7679, "step": 15271 }, { "epoch": 17.394301994301994, "grad_norm": 0.20873083174228668, "learning_rate": 2.2404980466713273e-06, "loss": 0.6958, "step": 15272 }, { "epoch": 17.395441595441596, "grad_norm": 0.1730998009443283, "learning_rate": 2.2385705984777934e-06, "loss": 0.787, "step": 15273 }, { "epoch": 17.396581196581195, "grad_norm": 0.22182199358940125, "learning_rate": 2.2366439408512936e-06, "loss": 0.583, "step": 15274 }, { "epoch": 17.397720797720797, "grad_norm": 0.20717033743858337, "learning_rate": 2.2347180738587493e-06, "loss": 0.5336, "step": 15275 }, { "epoch": 17.3988603988604, "grad_norm": 0.1839270144701004, "learning_rate": 2.232792997567057e-06, "loss": 0.6468, "step": 15276 }, { "epoch": 17.4, "grad_norm": 0.19520770013332367, "learning_rate": 2.2308687120430688e-06, "loss": 0.7095, "step": 15277 }, { "epoch": 17.4011396011396, "grad_norm": 0.17511405050754547, "learning_rate": 2.2289452173536256e-06, "loss": 0.8228, "step": 15278 }, { "epoch": 17.402279202279203, "grad_norm": 0.2063295543193817, "learning_rate": 2.2270225135655354e-06, "loss": 0.6654, "step": 15279 }, { "epoch": 17.403418803418802, "grad_norm": 0.21008919179439545, "learning_rate": 2.2251006007455806e-06, "loss": 0.608, "step": 15280 }, { "epoch": 17.404558404558404, "grad_norm": 0.21195709705352783, "learning_rate": 2.223179478960513e-06, "loss": 0.736, "step": 15281 }, { "epoch": 17.405698005698007, "grad_norm": 0.19682569801807404, "learning_rate": 2.221259148277058e-06, "loss": 0.4586, "step": 15282 }, { "epoch": 17.406837606837605, "grad_norm": 0.1939670443534851, "learning_rate": 2.219339608761917e-06, "loss": 0.703, "step": 15283 }, { "epoch": 17.407977207977208, "grad_norm": 0.18122750520706177, "learning_rate": 2.217420860481756e-06, "loss": 0.7662, "step": 15284 }, { "epoch": 17.40911680911681, "grad_norm": 0.18972159922122955, "learning_rate": 2.215502903503222e-06, "loss": 0.6504, "step": 15285 }, { "epoch": 17.41025641025641, "grad_norm": 0.23849743604660034, "learning_rate": 2.2135857378929287e-06, "loss": 0.5109, "step": 15286 }, { "epoch": 17.41139601139601, "grad_norm": 0.2310958057641983, "learning_rate": 2.2116693637174694e-06, "loss": 0.8016, "step": 15287 }, { "epoch": 17.412535612535613, "grad_norm": 0.18404515087604523, "learning_rate": 2.2097537810433973e-06, "loss": 0.9114, "step": 15288 }, { "epoch": 17.413675213675212, "grad_norm": 0.2456667721271515, "learning_rate": 2.2078389899372497e-06, "loss": 0.2226, "step": 15289 }, { "epoch": 17.414814814814815, "grad_norm": 0.17694532871246338, "learning_rate": 2.205924990465533e-06, "loss": 0.7566, "step": 15290 }, { "epoch": 17.415954415954417, "grad_norm": 0.17129088938236237, "learning_rate": 2.204011782694723e-06, "loss": 0.8028, "step": 15291 }, { "epoch": 17.417094017094016, "grad_norm": 0.22568799555301666, "learning_rate": 2.2020993666912764e-06, "loss": 0.5809, "step": 15292 }, { "epoch": 17.418233618233618, "grad_norm": 0.17860203981399536, "learning_rate": 2.2001877425216117e-06, "loss": 0.636, "step": 15293 }, { "epoch": 17.41937321937322, "grad_norm": 0.23044751584529877, "learning_rate": 2.1982769102521254e-06, "loss": 0.5667, "step": 15294 }, { "epoch": 17.42051282051282, "grad_norm": 0.20839495956897736, "learning_rate": 2.196366869949193e-06, "loss": 0.6226, "step": 15295 }, { "epoch": 17.42165242165242, "grad_norm": 0.18788960576057434, "learning_rate": 2.194457621679144e-06, "loss": 0.7101, "step": 15296 }, { "epoch": 17.422792022792024, "grad_norm": 0.167411208152771, "learning_rate": 2.1925491655082982e-06, "loss": 0.6827, "step": 15297 }, { "epoch": 17.423931623931622, "grad_norm": 0.19627101719379425, "learning_rate": 2.190641501502941e-06, "loss": 0.5104, "step": 15298 }, { "epoch": 17.425071225071225, "grad_norm": 0.18951691687107086, "learning_rate": 2.188734629729333e-06, "loss": 0.5709, "step": 15299 }, { "epoch": 17.426210826210827, "grad_norm": 0.2600310742855072, "learning_rate": 2.1868285502537e-06, "loss": 0.4652, "step": 15300 }, { "epoch": 17.427350427350426, "grad_norm": 0.1929951161146164, "learning_rate": 2.1849232631422467e-06, "loss": 0.7723, "step": 15301 }, { "epoch": 17.428490028490028, "grad_norm": 0.23146596550941467, "learning_rate": 2.183018768461151e-06, "loss": 0.393, "step": 15302 }, { "epoch": 17.42962962962963, "grad_norm": 0.19300377368927002, "learning_rate": 2.1811150662765596e-06, "loss": 0.6173, "step": 15303 }, { "epoch": 17.43076923076923, "grad_norm": 0.19949676096439362, "learning_rate": 2.179212156654595e-06, "loss": 0.7441, "step": 15304 }, { "epoch": 17.43190883190883, "grad_norm": 0.1906082034111023, "learning_rate": 2.177310039661348e-06, "loss": 0.5934, "step": 15305 }, { "epoch": 17.433048433048434, "grad_norm": 0.2418171465396881, "learning_rate": 2.175408715362892e-06, "loss": 0.4327, "step": 15306 }, { "epoch": 17.434188034188033, "grad_norm": 0.1856294721364975, "learning_rate": 2.173508183825254e-06, "loss": 0.7891, "step": 15307 }, { "epoch": 17.435327635327635, "grad_norm": 0.2543734908103943, "learning_rate": 2.171608445114451e-06, "loss": 0.6179, "step": 15308 }, { "epoch": 17.436467236467237, "grad_norm": 0.18569955229759216, "learning_rate": 2.169709499296463e-06, "loss": 0.7025, "step": 15309 }, { "epoch": 17.437606837606836, "grad_norm": 0.23999355733394623, "learning_rate": 2.167811346437254e-06, "loss": 0.4552, "step": 15310 }, { "epoch": 17.43874643874644, "grad_norm": 0.16314929723739624, "learning_rate": 2.165913986602741e-06, "loss": 0.5465, "step": 15311 }, { "epoch": 17.43988603988604, "grad_norm": 0.21630598604679108, "learning_rate": 2.1640174198588297e-06, "loss": 0.6926, "step": 15312 }, { "epoch": 17.44102564102564, "grad_norm": 0.21854954957962036, "learning_rate": 2.1621216462713893e-06, "loss": 0.6521, "step": 15313 }, { "epoch": 17.442165242165242, "grad_norm": 0.20090152323246002, "learning_rate": 2.160226665906273e-06, "loss": 0.6718, "step": 15314 }, { "epoch": 17.443304843304844, "grad_norm": 0.1926439106464386, "learning_rate": 2.158332478829292e-06, "loss": 0.4476, "step": 15315 }, { "epoch": 17.444444444444443, "grad_norm": 0.2181670367717743, "learning_rate": 2.156439085106238e-06, "loss": 0.6576, "step": 15316 }, { "epoch": 17.445584045584045, "grad_norm": 0.22731441259384155, "learning_rate": 2.15454648480288e-06, "loss": 0.667, "step": 15317 }, { "epoch": 17.446723646723648, "grad_norm": 0.1826423853635788, "learning_rate": 2.1526546779849443e-06, "loss": 0.5988, "step": 15318 }, { "epoch": 17.447863247863246, "grad_norm": 0.20163589715957642, "learning_rate": 2.1507636647181418e-06, "loss": 0.7636, "step": 15319 }, { "epoch": 17.44900284900285, "grad_norm": 0.18035119771957397, "learning_rate": 2.14887344506815e-06, "loss": 0.7143, "step": 15320 }, { "epoch": 17.45014245014245, "grad_norm": 0.1917208731174469, "learning_rate": 2.1469840191006314e-06, "loss": 0.5918, "step": 15321 }, { "epoch": 17.45128205128205, "grad_norm": 0.20104704797267914, "learning_rate": 2.145095386881199e-06, "loss": 0.9115, "step": 15322 }, { "epoch": 17.452421652421652, "grad_norm": 0.3120897114276886, "learning_rate": 2.1432075484754565e-06, "loss": 0.4841, "step": 15323 }, { "epoch": 17.453561253561254, "grad_norm": 0.17598046362400055, "learning_rate": 2.141320503948971e-06, "loss": 0.8, "step": 15324 }, { "epoch": 17.454700854700853, "grad_norm": 0.18346905708312988, "learning_rate": 2.139434253367284e-06, "loss": 0.8112, "step": 15325 }, { "epoch": 17.455840455840455, "grad_norm": 0.2171083241701126, "learning_rate": 2.1375487967959156e-06, "loss": 0.6508, "step": 15326 }, { "epoch": 17.456980056980058, "grad_norm": 0.1841529756784439, "learning_rate": 2.135664134300347e-06, "loss": 0.7771, "step": 15327 }, { "epoch": 17.458119658119656, "grad_norm": 0.18029426038265228, "learning_rate": 2.133780265946045e-06, "loss": 0.8219, "step": 15328 }, { "epoch": 17.45925925925926, "grad_norm": 0.22011998295783997, "learning_rate": 2.1318971917984323e-06, "loss": 0.4506, "step": 15329 }, { "epoch": 17.46039886039886, "grad_norm": 0.21217116713523865, "learning_rate": 2.1300149119229173e-06, "loss": 0.5719, "step": 15330 }, { "epoch": 17.46153846153846, "grad_norm": 0.1769542396068573, "learning_rate": 2.128133426384879e-06, "loss": 0.6893, "step": 15331 }, { "epoch": 17.462678062678062, "grad_norm": 0.2163042426109314, "learning_rate": 2.1262527352496677e-06, "loss": 0.6376, "step": 15332 }, { "epoch": 17.463817663817665, "grad_norm": 0.17681114375591278, "learning_rate": 2.124372838582597e-06, "loss": 0.677, "step": 15333 }, { "epoch": 17.464957264957263, "grad_norm": 0.21395109593868256, "learning_rate": 2.1224937364489687e-06, "loss": 0.6916, "step": 15334 }, { "epoch": 17.466096866096866, "grad_norm": 0.23505502939224243, "learning_rate": 2.120615428914044e-06, "loss": 0.5663, "step": 15335 }, { "epoch": 17.467236467236468, "grad_norm": 0.2834552824497223, "learning_rate": 2.118737916043065e-06, "loss": 0.672, "step": 15336 }, { "epoch": 17.468376068376067, "grad_norm": 0.20520302653312683, "learning_rate": 2.1168611979012403e-06, "loss": 0.6569, "step": 15337 }, { "epoch": 17.46951566951567, "grad_norm": 0.21140910685062408, "learning_rate": 2.114985274553755e-06, "loss": 0.4709, "step": 15338 }, { "epoch": 17.47065527065527, "grad_norm": 0.23354817926883698, "learning_rate": 2.1131101460657665e-06, "loss": 0.659, "step": 15339 }, { "epoch": 17.47179487179487, "grad_norm": 0.17846237123012543, "learning_rate": 2.111235812502407e-06, "loss": 0.7709, "step": 15340 }, { "epoch": 17.472934472934472, "grad_norm": 0.1721949428319931, "learning_rate": 2.1093622739287665e-06, "loss": 0.6594, "step": 15341 }, { "epoch": 17.474074074074075, "grad_norm": 0.1907200664281845, "learning_rate": 2.107489530409923e-06, "loss": 0.6038, "step": 15342 }, { "epoch": 17.475213675213674, "grad_norm": 0.23151063919067383, "learning_rate": 2.105617582010924e-06, "loss": 0.6874, "step": 15343 }, { "epoch": 17.476353276353276, "grad_norm": 0.1753738522529602, "learning_rate": 2.103746428796788e-06, "loss": 0.9033, "step": 15344 }, { "epoch": 17.477492877492878, "grad_norm": 0.17159105837345123, "learning_rate": 2.1018760708325037e-06, "loss": 0.6414, "step": 15345 }, { "epoch": 17.478632478632477, "grad_norm": 0.3715958297252655, "learning_rate": 2.1000065081830304e-06, "loss": 0.6692, "step": 15346 }, { "epoch": 17.47977207977208, "grad_norm": 0.2088504582643509, "learning_rate": 2.0981377409133056e-06, "loss": 0.6677, "step": 15347 }, { "epoch": 17.48091168091168, "grad_norm": 0.16269053518772125, "learning_rate": 2.096269769088238e-06, "loss": 0.5255, "step": 15348 }, { "epoch": 17.48205128205128, "grad_norm": 0.16256828606128693, "learning_rate": 2.0944025927727095e-06, "loss": 0.7041, "step": 15349 }, { "epoch": 17.483190883190883, "grad_norm": 0.1829254925251007, "learning_rate": 2.092536212031568e-06, "loss": 0.6186, "step": 15350 }, { "epoch": 17.484330484330485, "grad_norm": 0.1834312081336975, "learning_rate": 2.0906706269296416e-06, "loss": 0.5941, "step": 15351 }, { "epoch": 17.485470085470084, "grad_norm": 0.18137308955192566, "learning_rate": 2.0888058375317238e-06, "loss": 0.648, "step": 15352 }, { "epoch": 17.486609686609686, "grad_norm": 0.21743400394916534, "learning_rate": 2.0869418439025845e-06, "loss": 0.5386, "step": 15353 }, { "epoch": 17.48774928774929, "grad_norm": 0.21215492486953735, "learning_rate": 2.0850786461069667e-06, "loss": 0.6154, "step": 15354 }, { "epoch": 17.488888888888887, "grad_norm": 0.23170483112335205, "learning_rate": 2.083216244209585e-06, "loss": 0.7102, "step": 15355 }, { "epoch": 17.49002849002849, "grad_norm": 0.15444518625736237, "learning_rate": 2.0813546382751236e-06, "loss": 0.7619, "step": 15356 }, { "epoch": 17.491168091168092, "grad_norm": 0.19130298495292664, "learning_rate": 2.0794938283682423e-06, "loss": 0.6027, "step": 15357 }, { "epoch": 17.49230769230769, "grad_norm": 0.20855511724948883, "learning_rate": 2.07763381455357e-06, "loss": 0.7724, "step": 15358 }, { "epoch": 17.493447293447293, "grad_norm": 0.1930067092180252, "learning_rate": 2.075774596895713e-06, "loss": 0.6319, "step": 15359 }, { "epoch": 17.494586894586895, "grad_norm": 0.19758287072181702, "learning_rate": 2.0739161754592452e-06, "loss": 0.5473, "step": 15360 }, { "epoch": 17.495726495726494, "grad_norm": 0.15608827769756317, "learning_rate": 2.072058550308717e-06, "loss": 0.6884, "step": 15361 }, { "epoch": 17.496866096866096, "grad_norm": 0.2612965404987335, "learning_rate": 2.0702017215086495e-06, "loss": 0.6141, "step": 15362 }, { "epoch": 17.4980056980057, "grad_norm": 0.21490062773227692, "learning_rate": 2.0683456891235323e-06, "loss": 0.5766, "step": 15363 }, { "epoch": 17.499145299145297, "grad_norm": 0.20124967396259308, "learning_rate": 2.0664904532178285e-06, "loss": 0.6135, "step": 15364 }, { "epoch": 17.5002849002849, "grad_norm": 0.15642400085926056, "learning_rate": 2.0646360138559808e-06, "loss": 0.6718, "step": 15365 }, { "epoch": 17.501424501424502, "grad_norm": 0.20701226592063904, "learning_rate": 2.062782371102401e-06, "loss": 0.6231, "step": 15366 }, { "epoch": 17.5025641025641, "grad_norm": 0.16767756640911102, "learning_rate": 2.060929525021463e-06, "loss": 0.7096, "step": 15367 }, { "epoch": 17.503703703703703, "grad_norm": 0.18611721694469452, "learning_rate": 2.0590774756775235e-06, "loss": 0.642, "step": 15368 }, { "epoch": 17.504843304843305, "grad_norm": 0.20109455287456512, "learning_rate": 2.057226223134909e-06, "loss": 0.5996, "step": 15369 }, { "epoch": 17.505982905982904, "grad_norm": 0.19398508965969086, "learning_rate": 2.055375767457926e-06, "loss": 0.7829, "step": 15370 }, { "epoch": 17.507122507122507, "grad_norm": 0.19312256574630737, "learning_rate": 2.0535261087108404e-06, "loss": 0.5825, "step": 15371 }, { "epoch": 17.50826210826211, "grad_norm": 0.2314954400062561, "learning_rate": 2.0516772469578922e-06, "loss": 0.545, "step": 15372 }, { "epoch": 17.509401709401708, "grad_norm": 0.2093094289302826, "learning_rate": 2.049829182263305e-06, "loss": 0.5974, "step": 15373 }, { "epoch": 17.51054131054131, "grad_norm": 0.21281267702579498, "learning_rate": 2.047981914691266e-06, "loss": 0.5286, "step": 15374 }, { "epoch": 17.511680911680912, "grad_norm": 0.2425657957792282, "learning_rate": 2.046135444305927e-06, "loss": 0.6006, "step": 15375 }, { "epoch": 17.51282051282051, "grad_norm": 0.22368358075618744, "learning_rate": 2.0442897711714313e-06, "loss": 0.6731, "step": 15376 }, { "epoch": 17.513960113960113, "grad_norm": 0.17175684869289398, "learning_rate": 2.042444895351878e-06, "loss": 0.5703, "step": 15377 }, { "epoch": 17.515099715099716, "grad_norm": 0.2873460650444031, "learning_rate": 2.0406008169113507e-06, "loss": 0.3727, "step": 15378 }, { "epoch": 17.516239316239318, "grad_norm": 0.19145934283733368, "learning_rate": 2.038757535913893e-06, "loss": 0.5564, "step": 15379 }, { "epoch": 17.517378917378917, "grad_norm": 0.15687519311904907, "learning_rate": 2.0369150524235244e-06, "loss": 0.7346, "step": 15380 }, { "epoch": 17.51851851851852, "grad_norm": 0.21336393058300018, "learning_rate": 2.035073366504253e-06, "loss": 0.5313, "step": 15381 }, { "epoch": 17.51965811965812, "grad_norm": 0.17659983038902283, "learning_rate": 2.0332324782200347e-06, "loss": 0.7484, "step": 15382 }, { "epoch": 17.52079772079772, "grad_norm": 0.19936230778694153, "learning_rate": 2.031392387634809e-06, "loss": 0.7472, "step": 15383 }, { "epoch": 17.521937321937322, "grad_norm": 0.21555398404598236, "learning_rate": 2.0295530948124917e-06, "loss": 0.6158, "step": 15384 }, { "epoch": 17.523076923076925, "grad_norm": 0.47986331582069397, "learning_rate": 2.027714599816968e-06, "loss": 0.6851, "step": 15385 }, { "epoch": 17.524216524216524, "grad_norm": 0.21136222779750824, "learning_rate": 2.0258769027120873e-06, "loss": 0.6985, "step": 15386 }, { "epoch": 17.525356125356126, "grad_norm": 0.17603352665901184, "learning_rate": 2.024040003561678e-06, "loss": 0.7078, "step": 15387 }, { "epoch": 17.526495726495728, "grad_norm": 0.25389912724494934, "learning_rate": 2.022203902429545e-06, "loss": 0.5969, "step": 15388 }, { "epoch": 17.527635327635327, "grad_norm": 0.18012681603431702, "learning_rate": 2.020368599379466e-06, "loss": 0.7431, "step": 15389 }, { "epoch": 17.52877492877493, "grad_norm": 0.20365744829177856, "learning_rate": 2.018534094475172e-06, "loss": 0.7093, "step": 15390 }, { "epoch": 17.52991452991453, "grad_norm": 0.18745318055152893, "learning_rate": 2.016700387780385e-06, "loss": 0.3249, "step": 15391 }, { "epoch": 17.53105413105413, "grad_norm": 0.19238592684268951, "learning_rate": 2.0148674793588064e-06, "loss": 0.5874, "step": 15392 }, { "epoch": 17.532193732193733, "grad_norm": 0.1820547729730606, "learning_rate": 2.0130353692740854e-06, "loss": 0.8376, "step": 15393 }, { "epoch": 17.533333333333335, "grad_norm": 0.160737544298172, "learning_rate": 2.0112040575898607e-06, "loss": 0.7618, "step": 15394 }, { "epoch": 17.534472934472934, "grad_norm": 0.21993643045425415, "learning_rate": 2.0093735443697387e-06, "loss": 0.6073, "step": 15395 }, { "epoch": 17.535612535612536, "grad_norm": 0.20880837738513947, "learning_rate": 2.0075438296773023e-06, "loss": 0.3951, "step": 15396 }, { "epoch": 17.53675213675214, "grad_norm": 0.21602734923362732, "learning_rate": 2.0057149135760954e-06, "loss": 0.5836, "step": 15397 }, { "epoch": 17.537891737891737, "grad_norm": 0.1663624346256256, "learning_rate": 2.003886796129642e-06, "loss": 0.8863, "step": 15398 }, { "epoch": 17.53903133903134, "grad_norm": 0.21745102107524872, "learning_rate": 2.0020594774014383e-06, "loss": 0.794, "step": 15399 }, { "epoch": 17.540170940170942, "grad_norm": 0.22152374684810638, "learning_rate": 2.0002329574549617e-06, "loss": 0.6123, "step": 15400 }, { "epoch": 17.54131054131054, "grad_norm": 0.22281567752361298, "learning_rate": 1.998407236353636e-06, "loss": 0.647, "step": 15401 }, { "epoch": 17.542450142450143, "grad_norm": 0.17523208260536194, "learning_rate": 1.9965823141608804e-06, "loss": 0.637, "step": 15402 }, { "epoch": 17.543589743589745, "grad_norm": 0.21158164739608765, "learning_rate": 1.994758190940088e-06, "loss": 0.629, "step": 15403 }, { "epoch": 17.544729344729344, "grad_norm": 0.2418418675661087, "learning_rate": 1.992934866754603e-06, "loss": 0.5566, "step": 15404 }, { "epoch": 17.545868945868946, "grad_norm": 0.1990119069814682, "learning_rate": 1.9911123416677615e-06, "loss": 0.5334, "step": 15405 }, { "epoch": 17.54700854700855, "grad_norm": 0.19692152738571167, "learning_rate": 1.9892906157428616e-06, "loss": 0.6778, "step": 15406 }, { "epoch": 17.548148148148147, "grad_norm": 0.18656007945537567, "learning_rate": 1.987469689043184e-06, "loss": 0.5933, "step": 15407 }, { "epoch": 17.54928774928775, "grad_norm": 0.16103674471378326, "learning_rate": 1.985649561631961e-06, "loss": 0.825, "step": 15408 }, { "epoch": 17.550427350427352, "grad_norm": 0.17847539484500885, "learning_rate": 1.983830233572423e-06, "loss": 0.8594, "step": 15409 }, { "epoch": 17.55156695156695, "grad_norm": 0.16353803873062134, "learning_rate": 1.9820117049277526e-06, "loss": 0.5623, "step": 15410 }, { "epoch": 17.552706552706553, "grad_norm": 0.19815440475940704, "learning_rate": 1.9801939757611213e-06, "loss": 0.6037, "step": 15411 }, { "epoch": 17.553846153846155, "grad_norm": 0.18016420304775238, "learning_rate": 1.9783770461356544e-06, "loss": 0.4899, "step": 15412 }, { "epoch": 17.554985754985754, "grad_norm": 0.2547997534275055, "learning_rate": 1.976560916114456e-06, "loss": 0.8854, "step": 15413 }, { "epoch": 17.556125356125357, "grad_norm": 0.21617785096168518, "learning_rate": 1.974745585760618e-06, "loss": 0.613, "step": 15414 }, { "epoch": 17.55726495726496, "grad_norm": 0.21860112249851227, "learning_rate": 1.9729310551371866e-06, "loss": 0.548, "step": 15415 }, { "epoch": 17.558404558404558, "grad_norm": 0.19205626845359802, "learning_rate": 1.971117324307184e-06, "loss": 0.6271, "step": 15416 }, { "epoch": 17.55954415954416, "grad_norm": 0.21441836655139923, "learning_rate": 1.969304393333604e-06, "loss": 0.6238, "step": 15417 }, { "epoch": 17.560683760683762, "grad_norm": 0.19718217849731445, "learning_rate": 1.967492262279416e-06, "loss": 0.5062, "step": 15418 }, { "epoch": 17.56182336182336, "grad_norm": 0.21474851667881012, "learning_rate": 1.9656809312075697e-06, "loss": 0.7898, "step": 15419 }, { "epoch": 17.562962962962963, "grad_norm": 0.19963958859443665, "learning_rate": 1.9638704001809615e-06, "loss": 0.7924, "step": 15420 }, { "epoch": 17.564102564102566, "grad_norm": 0.19876530766487122, "learning_rate": 1.962060669262486e-06, "loss": 0.6621, "step": 15421 }, { "epoch": 17.565242165242164, "grad_norm": 0.15920500457286835, "learning_rate": 1.9602517385149953e-06, "loss": 0.6871, "step": 15422 }, { "epoch": 17.566381766381767, "grad_norm": 0.16280314326286316, "learning_rate": 1.9584436080013285e-06, "loss": 0.781, "step": 15423 }, { "epoch": 17.56752136752137, "grad_norm": 0.19113875925540924, "learning_rate": 1.9566362777842685e-06, "loss": 0.6086, "step": 15424 }, { "epoch": 17.568660968660968, "grad_norm": 0.1675650179386139, "learning_rate": 1.9548297479266065e-06, "loss": 0.7763, "step": 15425 }, { "epoch": 17.56980056980057, "grad_norm": 0.19173195958137512, "learning_rate": 1.953024018491087e-06, "loss": 0.937, "step": 15426 }, { "epoch": 17.570940170940172, "grad_norm": 0.1962994784116745, "learning_rate": 1.9512190895404152e-06, "loss": 0.7101, "step": 15427 }, { "epoch": 17.57207977207977, "grad_norm": 0.16905248165130615, "learning_rate": 1.949414961137291e-06, "loss": 0.7945, "step": 15428 }, { "epoch": 17.573219373219374, "grad_norm": 0.21983909606933594, "learning_rate": 1.9476116333443735e-06, "loss": 0.7035, "step": 15429 }, { "epoch": 17.574358974358976, "grad_norm": 0.23587347567081451, "learning_rate": 1.9458091062243027e-06, "loss": 0.8793, "step": 15430 }, { "epoch": 17.575498575498575, "grad_norm": 0.16407811641693115, "learning_rate": 1.944007379839677e-06, "loss": 0.6725, "step": 15431 }, { "epoch": 17.576638176638177, "grad_norm": 0.170736625790596, "learning_rate": 1.9422064542530817e-06, "loss": 0.673, "step": 15432 }, { "epoch": 17.57777777777778, "grad_norm": 0.2096918821334839, "learning_rate": 1.9404063295270612e-06, "loss": 0.7357, "step": 15433 }, { "epoch": 17.578917378917378, "grad_norm": 0.1520949751138687, "learning_rate": 1.9386070057241495e-06, "loss": 0.7768, "step": 15434 }, { "epoch": 17.58005698005698, "grad_norm": 0.17503471672534943, "learning_rate": 1.9368084829068266e-06, "loss": 0.7439, "step": 15435 }, { "epoch": 17.581196581196583, "grad_norm": 0.21332886815071106, "learning_rate": 1.9350107611375735e-06, "loss": 0.4735, "step": 15436 }, { "epoch": 17.58233618233618, "grad_norm": 0.17435243725776672, "learning_rate": 1.9332138404788288e-06, "loss": 0.7822, "step": 15437 }, { "epoch": 17.583475783475784, "grad_norm": 0.21956662833690643, "learning_rate": 1.931417720992998e-06, "loss": 0.628, "step": 15438 }, { "epoch": 17.584615384615386, "grad_norm": 0.17011047899723053, "learning_rate": 1.9296224027424704e-06, "loss": 0.7007, "step": 15439 }, { "epoch": 17.585754985754985, "grad_norm": 0.16453446447849274, "learning_rate": 1.927827885789599e-06, "loss": 0.6905, "step": 15440 }, { "epoch": 17.586894586894587, "grad_norm": 0.1648949235677719, "learning_rate": 1.9260341701967166e-06, "loss": 0.6129, "step": 15441 }, { "epoch": 17.58803418803419, "grad_norm": 0.18956655263900757, "learning_rate": 1.9242412560261186e-06, "loss": 0.6696, "step": 15442 }, { "epoch": 17.58917378917379, "grad_norm": 0.18419325351715088, "learning_rate": 1.92244914334008e-06, "loss": 0.8715, "step": 15443 }, { "epoch": 17.59031339031339, "grad_norm": 0.20067398250102997, "learning_rate": 1.920657832200845e-06, "loss": 0.6447, "step": 15444 }, { "epoch": 17.591452991452993, "grad_norm": 0.15579760074615479, "learning_rate": 1.9188673226706364e-06, "loss": 0.6549, "step": 15445 }, { "epoch": 17.59259259259259, "grad_norm": 0.22809506952762604, "learning_rate": 1.9170776148116326e-06, "loss": 0.8009, "step": 15446 }, { "epoch": 17.593732193732194, "grad_norm": 0.1710824966430664, "learning_rate": 1.9152887086860054e-06, "loss": 0.4306, "step": 15447 }, { "epoch": 17.594871794871796, "grad_norm": 0.19887270033359528, "learning_rate": 1.9135006043558835e-06, "loss": 0.7019, "step": 15448 }, { "epoch": 17.596011396011395, "grad_norm": 0.18668244779109955, "learning_rate": 1.9117133018833776e-06, "loss": 0.6674, "step": 15449 }, { "epoch": 17.597150997150997, "grad_norm": 0.2371102124452591, "learning_rate": 1.9099268013305583e-06, "loss": 0.7222, "step": 15450 }, { "epoch": 17.5982905982906, "grad_norm": 0.17918461561203003, "learning_rate": 1.908141102759478e-06, "loss": 0.5778, "step": 15451 }, { "epoch": 17.5994301994302, "grad_norm": 0.19559653103351593, "learning_rate": 1.9063562062321627e-06, "loss": 0.8688, "step": 15452 }, { "epoch": 17.6005698005698, "grad_norm": 0.18899329006671906, "learning_rate": 1.904572111810604e-06, "loss": 0.7753, "step": 15453 }, { "epoch": 17.601709401709403, "grad_norm": 0.18411612510681152, "learning_rate": 1.9027888195567694e-06, "loss": 0.8394, "step": 15454 }, { "epoch": 17.602849002849002, "grad_norm": 0.19840501248836517, "learning_rate": 1.9010063295325926e-06, "loss": 0.6398, "step": 15455 }, { "epoch": 17.603988603988604, "grad_norm": 0.21453194320201874, "learning_rate": 1.8992246417999903e-06, "loss": 0.5722, "step": 15456 }, { "epoch": 17.605128205128207, "grad_norm": 0.301025927066803, "learning_rate": 1.8974437564208442e-06, "loss": 0.5245, "step": 15457 }, { "epoch": 17.606267806267805, "grad_norm": 0.167201429605484, "learning_rate": 1.895663673457007e-06, "loss": 0.6582, "step": 15458 }, { "epoch": 17.607407407407408, "grad_norm": 0.17727838456630707, "learning_rate": 1.8938843929703077e-06, "loss": 0.6775, "step": 15459 }, { "epoch": 17.60854700854701, "grad_norm": 0.23858334124088287, "learning_rate": 1.8921059150225516e-06, "loss": 0.5558, "step": 15460 }, { "epoch": 17.60968660968661, "grad_norm": 0.2909860908985138, "learning_rate": 1.8903282396754984e-06, "loss": 0.3752, "step": 15461 }, { "epoch": 17.61082621082621, "grad_norm": 0.241691455245018, "learning_rate": 1.8885513669908983e-06, "loss": 0.7017, "step": 15462 }, { "epoch": 17.611965811965813, "grad_norm": 0.1912352293729782, "learning_rate": 1.886775297030463e-06, "loss": 0.7422, "step": 15463 }, { "epoch": 17.613105413105412, "grad_norm": 0.18150320649147034, "learning_rate": 1.8850000298558905e-06, "loss": 0.6309, "step": 15464 }, { "epoch": 17.614245014245014, "grad_norm": 0.19993852078914642, "learning_rate": 1.8832255655288288e-06, "loss": 0.4911, "step": 15465 }, { "epoch": 17.615384615384617, "grad_norm": 0.2244868129491806, "learning_rate": 1.8814519041109147e-06, "loss": 0.7259, "step": 15466 }, { "epoch": 17.616524216524216, "grad_norm": 0.1799953728914261, "learning_rate": 1.8796790456637514e-06, "loss": 0.7384, "step": 15467 }, { "epoch": 17.617663817663818, "grad_norm": 0.2096245288848877, "learning_rate": 1.8779069902489148e-06, "loss": 0.6046, "step": 15468 }, { "epoch": 17.61880341880342, "grad_norm": 0.20663857460021973, "learning_rate": 1.876135737927956e-06, "loss": 0.5447, "step": 15469 }, { "epoch": 17.61994301994302, "grad_norm": 0.18800821900367737, "learning_rate": 1.8743652887623947e-06, "loss": 0.6231, "step": 15470 }, { "epoch": 17.62108262108262, "grad_norm": 0.18880976736545563, "learning_rate": 1.8725956428137265e-06, "loss": 0.6914, "step": 15471 }, { "epoch": 17.622222222222224, "grad_norm": 0.19186227023601532, "learning_rate": 1.8708268001434075e-06, "loss": 0.6203, "step": 15472 }, { "epoch": 17.623361823361822, "grad_norm": 0.19230565428733826, "learning_rate": 1.869058760812878e-06, "loss": 0.6205, "step": 15473 }, { "epoch": 17.624501424501425, "grad_norm": 0.21398407220840454, "learning_rate": 1.8672915248835492e-06, "loss": 0.8591, "step": 15474 }, { "epoch": 17.625641025641027, "grad_norm": 0.3133992850780487, "learning_rate": 1.865525092416806e-06, "loss": 0.7318, "step": 15475 }, { "epoch": 17.626780626780626, "grad_norm": 0.21800687909126282, "learning_rate": 1.8637594634739908e-06, "loss": 0.7356, "step": 15476 }, { "epoch": 17.627920227920228, "grad_norm": 0.19687959551811218, "learning_rate": 1.861994638116435e-06, "loss": 0.3595, "step": 15477 }, { "epoch": 17.62905982905983, "grad_norm": 0.23806995153427124, "learning_rate": 1.8602306164054366e-06, "loss": 0.6662, "step": 15478 }, { "epoch": 17.63019943019943, "grad_norm": 0.2590540945529938, "learning_rate": 1.858467398402261e-06, "loss": 0.6737, "step": 15479 }, { "epoch": 17.63133903133903, "grad_norm": 0.24640680849552155, "learning_rate": 1.8567049841681532e-06, "loss": 0.4767, "step": 15480 }, { "epoch": 17.632478632478634, "grad_norm": 0.18963520228862762, "learning_rate": 1.8549433737643256e-06, "loss": 0.6836, "step": 15481 }, { "epoch": 17.633618233618233, "grad_norm": 0.1933334469795227, "learning_rate": 1.8531825672519682e-06, "loss": 0.6558, "step": 15482 }, { "epoch": 17.634757834757835, "grad_norm": 0.20356690883636475, "learning_rate": 1.8514225646922289e-06, "loss": 0.7608, "step": 15483 }, { "epoch": 17.635897435897437, "grad_norm": 0.2365938127040863, "learning_rate": 1.8496633661462453e-06, "loss": 0.8029, "step": 15484 }, { "epoch": 17.637037037037036, "grad_norm": 0.16450777649879456, "learning_rate": 1.8479049716751128e-06, "loss": 0.7396, "step": 15485 }, { "epoch": 17.63817663817664, "grad_norm": 0.23959478735923767, "learning_rate": 1.846147381339916e-06, "loss": 0.5964, "step": 15486 }, { "epoch": 17.63931623931624, "grad_norm": 0.2136934995651245, "learning_rate": 1.8443905952016893e-06, "loss": 0.4586, "step": 15487 }, { "epoch": 17.64045584045584, "grad_norm": 0.1984747350215912, "learning_rate": 1.8426346133214562e-06, "loss": 0.5861, "step": 15488 }, { "epoch": 17.64159544159544, "grad_norm": 0.18648794293403625, "learning_rate": 1.8408794357602039e-06, "loss": 0.7181, "step": 15489 }, { "epoch": 17.642735042735044, "grad_norm": 0.235711470246315, "learning_rate": 1.8391250625789002e-06, "loss": 0.8174, "step": 15490 }, { "epoch": 17.643874643874643, "grad_norm": 0.17229218780994415, "learning_rate": 1.8373714938384718e-06, "loss": 0.7393, "step": 15491 }, { "epoch": 17.645014245014245, "grad_norm": 0.20683033764362335, "learning_rate": 1.8356187295998305e-06, "loss": 0.7333, "step": 15492 }, { "epoch": 17.646153846153847, "grad_norm": 0.2287161648273468, "learning_rate": 1.8338667699238533e-06, "loss": 0.5783, "step": 15493 }, { "epoch": 17.647293447293446, "grad_norm": 0.19930461049079895, "learning_rate": 1.8321156148713935e-06, "loss": 0.5555, "step": 15494 }, { "epoch": 17.64843304843305, "grad_norm": 0.21522825956344604, "learning_rate": 1.830365264503267e-06, "loss": 0.694, "step": 15495 }, { "epoch": 17.64957264957265, "grad_norm": 0.25273633003234863, "learning_rate": 1.8286157188802722e-06, "loss": 0.5542, "step": 15496 }, { "epoch": 17.65071225071225, "grad_norm": 0.15592874586582184, "learning_rate": 1.8268669780631741e-06, "loss": 0.8216, "step": 15497 }, { "epoch": 17.651851851851852, "grad_norm": 0.22126252949237823, "learning_rate": 1.8251190421127163e-06, "loss": 0.6417, "step": 15498 }, { "epoch": 17.652991452991454, "grad_norm": 0.18743862211704254, "learning_rate": 1.8233719110896026e-06, "loss": 0.6671, "step": 15499 }, { "epoch": 17.654131054131053, "grad_norm": 0.17891569435596466, "learning_rate": 1.8216255850545178e-06, "loss": 0.633, "step": 15500 }, { "epoch": 17.655270655270655, "grad_norm": 0.23442067205905914, "learning_rate": 1.8198800640681163e-06, "loss": 0.4959, "step": 15501 }, { "epoch": 17.656410256410258, "grad_norm": 0.19340966641902924, "learning_rate": 1.8181353481910247e-06, "loss": 0.8506, "step": 15502 }, { "epoch": 17.657549857549856, "grad_norm": 0.19096554815769196, "learning_rate": 1.8163914374838441e-06, "loss": 0.6534, "step": 15503 }, { "epoch": 17.65868945868946, "grad_norm": 0.22529169917106628, "learning_rate": 1.8146483320071462e-06, "loss": 0.6786, "step": 15504 }, { "epoch": 17.65982905982906, "grad_norm": 0.18433967232704163, "learning_rate": 1.8129060318214735e-06, "loss": 0.5319, "step": 15505 }, { "epoch": 17.66096866096866, "grad_norm": 0.19485558569431305, "learning_rate": 1.8111645369873336e-06, "loss": 0.5592, "step": 15506 }, { "epoch": 17.662108262108262, "grad_norm": 0.1785300076007843, "learning_rate": 1.8094238475652225e-06, "loss": 0.6756, "step": 15507 }, { "epoch": 17.663247863247864, "grad_norm": 0.23500244319438934, "learning_rate": 1.8076839636155918e-06, "loss": 0.5598, "step": 15508 }, { "epoch": 17.664387464387463, "grad_norm": 0.16340969502925873, "learning_rate": 1.8059448851988819e-06, "loss": 0.9245, "step": 15509 }, { "epoch": 17.665527065527066, "grad_norm": 0.2364928424358368, "learning_rate": 1.8042066123754864e-06, "loss": 0.5783, "step": 15510 }, { "epoch": 17.666666666666668, "grad_norm": 0.19518473744392395, "learning_rate": 1.8024691452057846e-06, "loss": 0.5705, "step": 15511 }, { "epoch": 17.667806267806267, "grad_norm": 0.18936499953269958, "learning_rate": 1.8007324837501198e-06, "loss": 0.7235, "step": 15512 }, { "epoch": 17.66894586894587, "grad_norm": 0.22147583961486816, "learning_rate": 1.7989966280688165e-06, "loss": 0.6459, "step": 15513 }, { "epoch": 17.67008547008547, "grad_norm": 0.17571549117565155, "learning_rate": 1.7972615782221648e-06, "loss": 0.8446, "step": 15514 }, { "epoch": 17.67122507122507, "grad_norm": 0.26569825410842896, "learning_rate": 1.795527334270422e-06, "loss": 0.5387, "step": 15515 }, { "epoch": 17.672364672364672, "grad_norm": 0.2654166519641876, "learning_rate": 1.793793896273835e-06, "loss": 0.3612, "step": 15516 }, { "epoch": 17.673504273504275, "grad_norm": 0.22360172867774963, "learning_rate": 1.7920612642925993e-06, "loss": 0.6004, "step": 15517 }, { "epoch": 17.674643874643873, "grad_norm": 0.15204519033432007, "learning_rate": 1.790329438386895e-06, "loss": 0.6687, "step": 15518 }, { "epoch": 17.675783475783476, "grad_norm": 0.20888058841228485, "learning_rate": 1.7885984186168798e-06, "loss": 0.6181, "step": 15519 }, { "epoch": 17.676923076923078, "grad_norm": 0.19444310665130615, "learning_rate": 1.7868682050426743e-06, "loss": 0.7009, "step": 15520 }, { "epoch": 17.678062678062677, "grad_norm": 0.2132650762796402, "learning_rate": 1.78513879772437e-06, "loss": 0.7071, "step": 15521 }, { "epoch": 17.67920227920228, "grad_norm": 0.1768898367881775, "learning_rate": 1.7834101967220351e-06, "loss": 0.64, "step": 15522 }, { "epoch": 17.68034188034188, "grad_norm": 0.1777723878622055, "learning_rate": 1.7816824020957106e-06, "loss": 0.6448, "step": 15523 }, { "epoch": 17.68148148148148, "grad_norm": 0.24505949020385742, "learning_rate": 1.779955413905407e-06, "loss": 0.5736, "step": 15524 }, { "epoch": 17.682621082621083, "grad_norm": 0.18339896202087402, "learning_rate": 1.7782292322111066e-06, "loss": 0.7177, "step": 15525 }, { "epoch": 17.683760683760685, "grad_norm": 0.17547276616096497, "learning_rate": 1.7765038570727643e-06, "loss": 0.791, "step": 15526 }, { "epoch": 17.684900284900284, "grad_norm": 0.23864220082759857, "learning_rate": 1.77477928855031e-06, "loss": 0.4275, "step": 15527 }, { "epoch": 17.686039886039886, "grad_norm": 0.20151357352733612, "learning_rate": 1.773055526703643e-06, "loss": 0.6343, "step": 15528 }, { "epoch": 17.68717948717949, "grad_norm": 0.18393632769584656, "learning_rate": 1.7713325715926293e-06, "loss": 0.5443, "step": 15529 }, { "epoch": 17.688319088319087, "grad_norm": 0.2020386904478073, "learning_rate": 1.7696104232771155e-06, "loss": 0.6255, "step": 15530 }, { "epoch": 17.68945868945869, "grad_norm": 0.21631336212158203, "learning_rate": 1.767889081816912e-06, "loss": 0.6578, "step": 15531 }, { "epoch": 17.69059829059829, "grad_norm": 0.2140851616859436, "learning_rate": 1.7661685472718153e-06, "loss": 0.4453, "step": 15532 }, { "epoch": 17.69173789173789, "grad_norm": 0.17820830643177032, "learning_rate": 1.7644488197015751e-06, "loss": 0.3786, "step": 15533 }, { "epoch": 17.692877492877493, "grad_norm": 0.1866726577281952, "learning_rate": 1.7627298991659185e-06, "loss": 0.5997, "step": 15534 }, { "epoch": 17.694017094017095, "grad_norm": 0.18119822442531586, "learning_rate": 1.7610117857245673e-06, "loss": 0.4361, "step": 15535 }, { "epoch": 17.695156695156694, "grad_norm": 0.22917959094047546, "learning_rate": 1.7592944794371764e-06, "loss": 0.4815, "step": 15536 }, { "epoch": 17.696296296296296, "grad_norm": 0.1893644481897354, "learning_rate": 1.7575779803634035e-06, "loss": 0.6566, "step": 15537 }, { "epoch": 17.6974358974359, "grad_norm": 0.2197505682706833, "learning_rate": 1.7558622885628623e-06, "loss": 0.6209, "step": 15538 }, { "epoch": 17.698575498575497, "grad_norm": 0.292765736579895, "learning_rate": 1.7541474040951494e-06, "loss": 0.5363, "step": 15539 }, { "epoch": 17.6997150997151, "grad_norm": 0.17893919348716736, "learning_rate": 1.7524333270198202e-06, "loss": 0.4011, "step": 15540 }, { "epoch": 17.700854700854702, "grad_norm": 0.2205260545015335, "learning_rate": 1.7507200573964127e-06, "loss": 0.7413, "step": 15541 }, { "epoch": 17.7019943019943, "grad_norm": 0.2102389633655548, "learning_rate": 1.7490075952844326e-06, "loss": 0.8024, "step": 15542 }, { "epoch": 17.703133903133903, "grad_norm": 0.2010532170534134, "learning_rate": 1.7472959407433653e-06, "loss": 0.6227, "step": 15543 }, { "epoch": 17.704273504273505, "grad_norm": 0.19559378921985626, "learning_rate": 1.7455850938326496e-06, "loss": 0.7316, "step": 15544 }, { "epoch": 17.705413105413104, "grad_norm": 0.19908377528190613, "learning_rate": 1.7438750546117127e-06, "loss": 0.6025, "step": 15545 }, { "epoch": 17.706552706552706, "grad_norm": 0.21301917731761932, "learning_rate": 1.742165823139949e-06, "loss": 0.5778, "step": 15546 }, { "epoch": 17.70769230769231, "grad_norm": 0.21259737014770508, "learning_rate": 1.7404573994767276e-06, "loss": 0.4874, "step": 15547 }, { "epoch": 17.708831908831907, "grad_norm": 0.25575268268585205, "learning_rate": 1.7387497836813843e-06, "loss": 0.5198, "step": 15548 }, { "epoch": 17.70997150997151, "grad_norm": 0.179165318608284, "learning_rate": 1.73704297581323e-06, "loss": 0.5788, "step": 15549 }, { "epoch": 17.711111111111112, "grad_norm": 0.22417140007019043, "learning_rate": 1.7353369759315509e-06, "loss": 0.5924, "step": 15550 }, { "epoch": 17.71225071225071, "grad_norm": 0.18318435549736023, "learning_rate": 1.7336317840955907e-06, "loss": 0.5719, "step": 15551 }, { "epoch": 17.713390313390313, "grad_norm": 0.21470986306667328, "learning_rate": 1.731927400364583e-06, "loss": 0.6964, "step": 15552 }, { "epoch": 17.714529914529916, "grad_norm": 0.1701890379190445, "learning_rate": 1.7302238247977248e-06, "loss": 0.6671, "step": 15553 }, { "epoch": 17.715669515669514, "grad_norm": 0.20296506583690643, "learning_rate": 1.7285210574541883e-06, "loss": 0.61, "step": 15554 }, { "epoch": 17.716809116809117, "grad_norm": 0.1775161325931549, "learning_rate": 1.7268190983931065e-06, "loss": 0.793, "step": 15555 }, { "epoch": 17.71794871794872, "grad_norm": 0.1933393031358719, "learning_rate": 1.7251179476736019e-06, "loss": 0.5851, "step": 15556 }, { "epoch": 17.719088319088318, "grad_norm": 0.2070257067680359, "learning_rate": 1.7234176053547547e-06, "loss": 0.4786, "step": 15557 }, { "epoch": 17.72022792022792, "grad_norm": 0.1920633316040039, "learning_rate": 1.721718071495626e-06, "loss": 0.6262, "step": 15558 }, { "epoch": 17.721367521367522, "grad_norm": 0.1719004213809967, "learning_rate": 1.720019346155244e-06, "loss": 0.6826, "step": 15559 }, { "epoch": 17.72250712250712, "grad_norm": 0.20648297667503357, "learning_rate": 1.7183214293926108e-06, "loss": 0.5219, "step": 15560 }, { "epoch": 17.723646723646723, "grad_norm": 0.16962285339832306, "learning_rate": 1.7166243212667049e-06, "loss": 0.8749, "step": 15561 }, { "epoch": 17.724786324786326, "grad_norm": 0.2371315211057663, "learning_rate": 1.7149280218364594e-06, "loss": 0.478, "step": 15562 }, { "epoch": 17.725925925925925, "grad_norm": 0.1887923628091812, "learning_rate": 1.7132325311607966e-06, "loss": 0.7168, "step": 15563 }, { "epoch": 17.727065527065527, "grad_norm": 0.17830583453178406, "learning_rate": 1.7115378492986084e-06, "loss": 0.7262, "step": 15564 }, { "epoch": 17.72820512820513, "grad_norm": 0.2064090520143509, "learning_rate": 1.7098439763087587e-06, "loss": 0.5849, "step": 15565 }, { "epoch": 17.729344729344728, "grad_norm": 0.23627474904060364, "learning_rate": 1.7081509122500727e-06, "loss": 0.5145, "step": 15566 }, { "epoch": 17.73048433048433, "grad_norm": 0.24249598383903503, "learning_rate": 1.7064586571813563e-06, "loss": 0.7986, "step": 15567 }, { "epoch": 17.731623931623933, "grad_norm": 0.2103555053472519, "learning_rate": 1.7047672111613872e-06, "loss": 0.674, "step": 15568 }, { "epoch": 17.73276353276353, "grad_norm": 0.19475817680358887, "learning_rate": 1.7030765742489214e-06, "loss": 0.7642, "step": 15569 }, { "epoch": 17.733903133903134, "grad_norm": 0.2143593579530716, "learning_rate": 1.7013867465026672e-06, "loss": 0.7425, "step": 15570 }, { "epoch": 17.735042735042736, "grad_norm": 0.19551685452461243, "learning_rate": 1.6996977279813253e-06, "loss": 0.5916, "step": 15571 }, { "epoch": 17.736182336182335, "grad_norm": 0.1768016666173935, "learning_rate": 1.698009518743554e-06, "loss": 0.6166, "step": 15572 }, { "epoch": 17.737321937321937, "grad_norm": 0.19153043627738953, "learning_rate": 1.696322118848001e-06, "loss": 0.7695, "step": 15573 }, { "epoch": 17.73846153846154, "grad_norm": 0.2019730657339096, "learning_rate": 1.6946355283532584e-06, "loss": 0.6971, "step": 15574 }, { "epoch": 17.739601139601138, "grad_norm": 0.17428846657276154, "learning_rate": 1.6929497473179178e-06, "loss": 0.4945, "step": 15575 }, { "epoch": 17.74074074074074, "grad_norm": 0.2166702300310135, "learning_rate": 1.6912647758005245e-06, "loss": 0.502, "step": 15576 }, { "epoch": 17.741880341880343, "grad_norm": 0.19433961808681488, "learning_rate": 1.6895806138596092e-06, "loss": 0.6245, "step": 15577 }, { "epoch": 17.74301994301994, "grad_norm": 0.1903829723596573, "learning_rate": 1.6878972615536587e-06, "loss": 0.7285, "step": 15578 }, { "epoch": 17.744159544159544, "grad_norm": 0.17509286105632782, "learning_rate": 1.6862147189411426e-06, "loss": 0.6913, "step": 15579 }, { "epoch": 17.745299145299146, "grad_norm": 0.1851557195186615, "learning_rate": 1.6845329860805087e-06, "loss": 0.705, "step": 15580 }, { "epoch": 17.746438746438745, "grad_norm": 0.17817610502243042, "learning_rate": 1.6828520630301574e-06, "loss": 0.5326, "step": 15581 }, { "epoch": 17.747578347578347, "grad_norm": 0.18409055471420288, "learning_rate": 1.6811719498484785e-06, "loss": 0.7783, "step": 15582 }, { "epoch": 17.74871794871795, "grad_norm": 0.26104357838630676, "learning_rate": 1.679492646593825e-06, "loss": 0.5521, "step": 15583 }, { "epoch": 17.74985754985755, "grad_norm": 0.24983125925064087, "learning_rate": 1.677814153324525e-06, "loss": 0.5487, "step": 15584 }, { "epoch": 17.75099715099715, "grad_norm": 0.19891224801540375, "learning_rate": 1.676136470098874e-06, "loss": 0.5938, "step": 15585 }, { "epoch": 17.752136752136753, "grad_norm": 0.16363197565078735, "learning_rate": 1.674459596975142e-06, "loss": 0.5812, "step": 15586 }, { "epoch": 17.753276353276352, "grad_norm": 0.1892014592885971, "learning_rate": 1.6727835340115737e-06, "loss": 0.7943, "step": 15587 }, { "epoch": 17.754415954415954, "grad_norm": 0.18088212609291077, "learning_rate": 1.6711082812663898e-06, "loss": 0.684, "step": 15588 }, { "epoch": 17.755555555555556, "grad_norm": 0.2498263418674469, "learning_rate": 1.6694338387977655e-06, "loss": 0.5798, "step": 15589 }, { "epoch": 17.756695156695155, "grad_norm": 0.19375599920749664, "learning_rate": 1.667760206663857e-06, "loss": 0.5276, "step": 15590 }, { "epoch": 17.757834757834758, "grad_norm": 0.1859721541404724, "learning_rate": 1.6660873849228125e-06, "loss": 0.8522, "step": 15591 }, { "epoch": 17.75897435897436, "grad_norm": 0.21408973634243011, "learning_rate": 1.6644153736327134e-06, "loss": 0.5591, "step": 15592 }, { "epoch": 17.76011396011396, "grad_norm": 0.1931608021259308, "learning_rate": 1.6627441728516435e-06, "loss": 0.8221, "step": 15593 }, { "epoch": 17.76125356125356, "grad_norm": 0.16885171830654144, "learning_rate": 1.6610737826376454e-06, "loss": 0.7237, "step": 15594 }, { "epoch": 17.762393162393163, "grad_norm": 0.19227465987205505, "learning_rate": 1.6594042030487421e-06, "loss": 0.4849, "step": 15595 }, { "epoch": 17.763532763532762, "grad_norm": 0.2134411633014679, "learning_rate": 1.6577354341429125e-06, "loss": 0.4955, "step": 15596 }, { "epoch": 17.764672364672364, "grad_norm": 0.22480376064777374, "learning_rate": 1.6560674759781236e-06, "loss": 0.8602, "step": 15597 }, { "epoch": 17.765811965811967, "grad_norm": 0.19876371324062347, "learning_rate": 1.6544003286123071e-06, "loss": 0.6761, "step": 15598 }, { "epoch": 17.766951566951565, "grad_norm": 0.1717088222503662, "learning_rate": 1.6527339921033725e-06, "loss": 0.7948, "step": 15599 }, { "epoch": 17.768091168091168, "grad_norm": 0.24230657517910004, "learning_rate": 1.651068466509187e-06, "loss": 0.5212, "step": 15600 }, { "epoch": 17.76923076923077, "grad_norm": 0.24282784759998322, "learning_rate": 1.6494037518875988e-06, "loss": 0.5624, "step": 15601 }, { "epoch": 17.77037037037037, "grad_norm": 0.25488370656967163, "learning_rate": 1.6477398482964423e-06, "loss": 0.4, "step": 15602 }, { "epoch": 17.77150997150997, "grad_norm": 0.16509465873241425, "learning_rate": 1.6460767557934965e-06, "loss": 0.879, "step": 15603 }, { "epoch": 17.772649572649573, "grad_norm": 0.17876017093658447, "learning_rate": 1.644414474436526e-06, "loss": 0.7514, "step": 15604 }, { "epoch": 17.773789173789172, "grad_norm": 0.190785214304924, "learning_rate": 1.6427530042832706e-06, "loss": 0.8318, "step": 15605 }, { "epoch": 17.774928774928775, "grad_norm": 0.19601422548294067, "learning_rate": 1.6410923453914346e-06, "loss": 0.7242, "step": 15606 }, { "epoch": 17.776068376068377, "grad_norm": 0.19514648616313934, "learning_rate": 1.639432497818702e-06, "loss": 0.4328, "step": 15607 }, { "epoch": 17.777207977207976, "grad_norm": 0.16870303452014923, "learning_rate": 1.6377734616227187e-06, "loss": 0.8108, "step": 15608 }, { "epoch": 17.778347578347578, "grad_norm": 0.23923449218273163, "learning_rate": 1.6361152368611078e-06, "loss": 0.6936, "step": 15609 }, { "epoch": 17.77948717948718, "grad_norm": 0.16083301603794098, "learning_rate": 1.6344578235914676e-06, "loss": 0.5575, "step": 15610 }, { "epoch": 17.78062678062678, "grad_norm": 0.2206811010837555, "learning_rate": 1.6328012218713662e-06, "loss": 0.5659, "step": 15611 }, { "epoch": 17.78176638176638, "grad_norm": 0.17298242449760437, "learning_rate": 1.6311454317583296e-06, "loss": 0.9719, "step": 15612 }, { "epoch": 17.782905982905984, "grad_norm": 0.22341564297676086, "learning_rate": 1.6294904533098815e-06, "loss": 0.7255, "step": 15613 }, { "epoch": 17.784045584045582, "grad_norm": 0.22084711492061615, "learning_rate": 1.6278362865835007e-06, "loss": 0.6097, "step": 15614 }, { "epoch": 17.785185185185185, "grad_norm": 0.16300833225250244, "learning_rate": 1.6261829316366384e-06, "loss": 0.6971, "step": 15615 }, { "epoch": 17.786324786324787, "grad_norm": 0.17927750945091248, "learning_rate": 1.624530388526721e-06, "loss": 0.734, "step": 15616 }, { "epoch": 17.787464387464386, "grad_norm": 0.23114407062530518, "learning_rate": 1.6228786573111444e-06, "loss": 0.574, "step": 15617 }, { "epoch": 17.788603988603988, "grad_norm": 0.21871796250343323, "learning_rate": 1.6212277380472846e-06, "loss": 0.6249, "step": 15618 }, { "epoch": 17.78974358974359, "grad_norm": 0.16673311591148376, "learning_rate": 1.619577630792471e-06, "loss": 0.7506, "step": 15619 }, { "epoch": 17.79088319088319, "grad_norm": 0.18423733115196228, "learning_rate": 1.6179283356040243e-06, "loss": 0.536, "step": 15620 }, { "epoch": 17.79202279202279, "grad_norm": 0.21031668782234192, "learning_rate": 1.6162798525392293e-06, "loss": 0.4542, "step": 15621 }, { "epoch": 17.793162393162394, "grad_norm": 0.1846408098936081, "learning_rate": 1.61463218165534e-06, "loss": 0.3919, "step": 15622 }, { "epoch": 17.794301994301993, "grad_norm": 0.1976088583469391, "learning_rate": 1.6129853230095804e-06, "loss": 0.5886, "step": 15623 }, { "epoch": 17.795441595441595, "grad_norm": 0.17543578147888184, "learning_rate": 1.61133927665916e-06, "loss": 0.8148, "step": 15624 }, { "epoch": 17.796581196581197, "grad_norm": 0.17480209469795227, "learning_rate": 1.6096940426612473e-06, "loss": 0.7421, "step": 15625 }, { "epoch": 17.797720797720796, "grad_norm": 0.22213301062583923, "learning_rate": 1.6080496210729795e-06, "loss": 0.4293, "step": 15626 }, { "epoch": 17.7988603988604, "grad_norm": 0.21744494140148163, "learning_rate": 1.606406011951478e-06, "loss": 0.6143, "step": 15627 }, { "epoch": 17.8, "grad_norm": 0.19951052963733673, "learning_rate": 1.60476321535383e-06, "loss": 0.6161, "step": 15628 }, { "epoch": 17.8011396011396, "grad_norm": 0.1538543403148651, "learning_rate": 1.603121231337093e-06, "loss": 0.7263, "step": 15629 }, { "epoch": 17.802279202279202, "grad_norm": 0.2472684532403946, "learning_rate": 1.6014800599582963e-06, "loss": 0.555, "step": 15630 }, { "epoch": 17.803418803418804, "grad_norm": 0.19252094626426697, "learning_rate": 1.5998397012744415e-06, "loss": 0.5233, "step": 15631 }, { "epoch": 17.804558404558403, "grad_norm": 0.20167063176631927, "learning_rate": 1.5982001553425052e-06, "loss": 0.7915, "step": 15632 }, { "epoch": 17.805698005698005, "grad_norm": 0.1946546733379364, "learning_rate": 1.5965614222194363e-06, "loss": 0.5434, "step": 15633 }, { "epoch": 17.806837606837608, "grad_norm": 0.26714178919792175, "learning_rate": 1.5949235019621422e-06, "loss": 0.4186, "step": 15634 }, { "epoch": 17.807977207977206, "grad_norm": 0.22108449041843414, "learning_rate": 1.5932863946275216e-06, "loss": 0.6067, "step": 15635 }, { "epoch": 17.80911680911681, "grad_norm": 0.1902119368314743, "learning_rate": 1.5916501002724378e-06, "loss": 0.5094, "step": 15636 }, { "epoch": 17.81025641025641, "grad_norm": 0.18795819580554962, "learning_rate": 1.590014618953714e-06, "loss": 0.8118, "step": 15637 }, { "epoch": 17.81139601139601, "grad_norm": 0.17217494547367096, "learning_rate": 1.5883799507281637e-06, "loss": 0.5974, "step": 15638 }, { "epoch": 17.812535612535612, "grad_norm": 0.2681303918361664, "learning_rate": 1.5867460956525555e-06, "loss": 0.4903, "step": 15639 }, { "epoch": 17.813675213675214, "grad_norm": 0.2004638910293579, "learning_rate": 1.585113053783649e-06, "loss": 0.6443, "step": 15640 }, { "epoch": 17.814814814814813, "grad_norm": 0.15981297194957733, "learning_rate": 1.583480825178152e-06, "loss": 0.7571, "step": 15641 }, { "epoch": 17.815954415954415, "grad_norm": 0.233234241604805, "learning_rate": 1.5818494098927632e-06, "loss": 0.6427, "step": 15642 }, { "epoch": 17.817094017094018, "grad_norm": 0.15844886004924774, "learning_rate": 1.5802188079841435e-06, "loss": 0.6918, "step": 15643 }, { "epoch": 17.81823361823362, "grad_norm": 0.21250970661640167, "learning_rate": 1.578589019508933e-06, "loss": 0.5825, "step": 15644 }, { "epoch": 17.81937321937322, "grad_norm": 0.22062163054943085, "learning_rate": 1.576960044523726e-06, "loss": 0.4389, "step": 15645 }, { "epoch": 17.82051282051282, "grad_norm": 0.2203861027956009, "learning_rate": 1.5753318830851155e-06, "loss": 0.5551, "step": 15646 }, { "epoch": 17.821652421652423, "grad_norm": 0.1712387502193451, "learning_rate": 1.5737045352496482e-06, "loss": 0.7451, "step": 15647 }, { "epoch": 17.822792022792022, "grad_norm": 0.17983700335025787, "learning_rate": 1.5720780010738484e-06, "loss": 0.7685, "step": 15648 }, { "epoch": 17.823931623931625, "grad_norm": 0.1772400438785553, "learning_rate": 1.5704522806142013e-06, "loss": 0.482, "step": 15649 }, { "epoch": 17.825071225071227, "grad_norm": 0.22852641344070435, "learning_rate": 1.5688273739271786e-06, "loss": 0.4757, "step": 15650 }, { "epoch": 17.826210826210826, "grad_norm": 0.17057965695858002, "learning_rate": 1.5672032810692155e-06, "loss": 0.6854, "step": 15651 }, { "epoch": 17.827350427350428, "grad_norm": 0.19366249442100525, "learning_rate": 1.5655800020967282e-06, "loss": 0.8097, "step": 15652 }, { "epoch": 17.82849002849003, "grad_norm": 0.197560653090477, "learning_rate": 1.5639575370660908e-06, "loss": 0.5299, "step": 15653 }, { "epoch": 17.82962962962963, "grad_norm": 0.15306705236434937, "learning_rate": 1.5623358860336528e-06, "loss": 0.7513, "step": 15654 }, { "epoch": 17.83076923076923, "grad_norm": 0.16111943125724792, "learning_rate": 1.560715049055747e-06, "loss": 0.6649, "step": 15655 }, { "epoch": 17.831908831908834, "grad_norm": 0.1969628632068634, "learning_rate": 1.5590950261886645e-06, "loss": 0.5163, "step": 15656 }, { "epoch": 17.833048433048432, "grad_norm": 0.16821090877056122, "learning_rate": 1.5574758174886738e-06, "loss": 0.4574, "step": 15657 }, { "epoch": 17.834188034188035, "grad_norm": 0.22840365767478943, "learning_rate": 1.5558574230120165e-06, "loss": 0.6578, "step": 15658 }, { "epoch": 17.835327635327637, "grad_norm": 0.17995165288448334, "learning_rate": 1.5542398428149058e-06, "loss": 0.6351, "step": 15659 }, { "epoch": 17.836467236467236, "grad_norm": 0.17947416007518768, "learning_rate": 1.5526230769535188e-06, "loss": 0.7299, "step": 15660 }, { "epoch": 17.837606837606838, "grad_norm": 0.2007407546043396, "learning_rate": 1.551007125484011e-06, "loss": 0.5662, "step": 15661 }, { "epoch": 17.83874643874644, "grad_norm": 0.20293985307216644, "learning_rate": 1.5493919884625118e-06, "loss": 0.4846, "step": 15662 }, { "epoch": 17.83988603988604, "grad_norm": 0.23599407076835632, "learning_rate": 1.5477776659451215e-06, "loss": 0.552, "step": 15663 }, { "epoch": 17.84102564102564, "grad_norm": 0.16593383252620697, "learning_rate": 1.5461641579879032e-06, "loss": 0.9204, "step": 15664 }, { "epoch": 17.842165242165244, "grad_norm": 0.2062879502773285, "learning_rate": 1.5445514646469012e-06, "loss": 0.6868, "step": 15665 }, { "epoch": 17.843304843304843, "grad_norm": 0.2270360291004181, "learning_rate": 1.5429395859781314e-06, "loss": 0.8417, "step": 15666 }, { "epoch": 17.844444444444445, "grad_norm": 0.1631319373846054, "learning_rate": 1.5413285220375745e-06, "loss": 0.5998, "step": 15667 }, { "epoch": 17.845584045584047, "grad_norm": 0.21692709624767303, "learning_rate": 1.5397182728811909e-06, "loss": 0.4068, "step": 15668 }, { "epoch": 17.846723646723646, "grad_norm": 0.23554235696792603, "learning_rate": 1.5381088385649083e-06, "loss": 0.6377, "step": 15669 }, { "epoch": 17.84786324786325, "grad_norm": 0.17600174248218536, "learning_rate": 1.5365002191446293e-06, "loss": 0.8951, "step": 15670 }, { "epoch": 17.84900284900285, "grad_norm": 0.22851988673210144, "learning_rate": 1.53489241467622e-06, "loss": 0.6134, "step": 15671 }, { "epoch": 17.85014245014245, "grad_norm": 0.18456017971038818, "learning_rate": 1.533285425215525e-06, "loss": 0.9537, "step": 15672 }, { "epoch": 17.851282051282052, "grad_norm": 0.20807267725467682, "learning_rate": 1.5316792508183602e-06, "loss": 0.6913, "step": 15673 }, { "epoch": 17.852421652421654, "grad_norm": 0.21124258637428284, "learning_rate": 1.5300738915405205e-06, "loss": 0.5291, "step": 15674 }, { "epoch": 17.853561253561253, "grad_norm": 0.18766073882579803, "learning_rate": 1.5284693474377497e-06, "loss": 0.6316, "step": 15675 }, { "epoch": 17.854700854700855, "grad_norm": 0.2111806571483612, "learning_rate": 1.526865618565787e-06, "loss": 0.5725, "step": 15676 }, { "epoch": 17.855840455840458, "grad_norm": 0.19177082180976868, "learning_rate": 1.525262704980332e-06, "loss": 0.6037, "step": 15677 }, { "epoch": 17.856980056980056, "grad_norm": 0.21159958839416504, "learning_rate": 1.5236606067370596e-06, "loss": 0.7083, "step": 15678 }, { "epoch": 17.85811965811966, "grad_norm": 0.21092787384986877, "learning_rate": 1.5220593238916141e-06, "loss": 0.677, "step": 15679 }, { "epoch": 17.85925925925926, "grad_norm": 0.23395919799804688, "learning_rate": 1.5204588564996126e-06, "loss": 0.6052, "step": 15680 }, { "epoch": 17.86039886039886, "grad_norm": 0.1797294318675995, "learning_rate": 1.5188592046166489e-06, "loss": 0.5012, "step": 15681 }, { "epoch": 17.861538461538462, "grad_norm": 0.19395455718040466, "learning_rate": 1.5172603682982734e-06, "loss": 0.9367, "step": 15682 }, { "epoch": 17.862678062678064, "grad_norm": 0.18395520746707916, "learning_rate": 1.515662347600022e-06, "loss": 0.8208, "step": 15683 }, { "epoch": 17.863817663817663, "grad_norm": 0.17953245341777802, "learning_rate": 1.5140651425774033e-06, "loss": 0.6289, "step": 15684 }, { "epoch": 17.864957264957265, "grad_norm": 0.2031773030757904, "learning_rate": 1.512468753285884e-06, "loss": 0.5615, "step": 15685 }, { "epoch": 17.866096866096868, "grad_norm": 0.17379479110240936, "learning_rate": 1.5108731797809223e-06, "loss": 0.8522, "step": 15686 }, { "epoch": 17.867236467236467, "grad_norm": 0.19298098981380463, "learning_rate": 1.5092784221179269e-06, "loss": 0.6752, "step": 15687 }, { "epoch": 17.86837606837607, "grad_norm": 0.18998976051807404, "learning_rate": 1.5076844803522922e-06, "loss": 0.77, "step": 15688 }, { "epoch": 17.86951566951567, "grad_norm": 0.17749148607254028, "learning_rate": 1.5060913545393796e-06, "loss": 0.4653, "step": 15689 }, { "epoch": 17.87065527065527, "grad_norm": 0.20059289038181305, "learning_rate": 1.5044990447345197e-06, "loss": 0.7338, "step": 15690 }, { "epoch": 17.871794871794872, "grad_norm": 0.1818057745695114, "learning_rate": 1.502907550993024e-06, "loss": 0.6244, "step": 15691 }, { "epoch": 17.872934472934475, "grad_norm": 0.18002621829509735, "learning_rate": 1.5013168733701649e-06, "loss": 0.7185, "step": 15692 }, { "epoch": 17.874074074074073, "grad_norm": 0.2134348303079605, "learning_rate": 1.4997270119211954e-06, "loss": 0.6021, "step": 15693 }, { "epoch": 17.875213675213676, "grad_norm": 0.23193307220935822, "learning_rate": 1.4981379667013322e-06, "loss": 0.5269, "step": 15694 }, { "epoch": 17.876353276353278, "grad_norm": 0.17786921560764313, "learning_rate": 1.496549737765765e-06, "loss": 0.8064, "step": 15695 }, { "epoch": 17.877492877492877, "grad_norm": 0.17929306626319885, "learning_rate": 1.4949623251696604e-06, "loss": 0.7414, "step": 15696 }, { "epoch": 17.87863247863248, "grad_norm": 0.17453618347644806, "learning_rate": 1.4933757289681576e-06, "loss": 0.7455, "step": 15697 }, { "epoch": 17.87977207977208, "grad_norm": 0.17964501678943634, "learning_rate": 1.4917899492163546e-06, "loss": 0.4788, "step": 15698 }, { "epoch": 17.88091168091168, "grad_norm": 0.18611229956150055, "learning_rate": 1.4902049859693374e-06, "loss": 0.807, "step": 15699 }, { "epoch": 17.882051282051282, "grad_norm": 0.18048332631587982, "learning_rate": 1.488620839282151e-06, "loss": 0.7006, "step": 15700 }, { "epoch": 17.883190883190885, "grad_norm": 0.23939697444438934, "learning_rate": 1.4870375092098182e-06, "loss": 0.5045, "step": 15701 }, { "epoch": 17.884330484330484, "grad_norm": 0.29259318113327026, "learning_rate": 1.485454995807334e-06, "loss": 0.33, "step": 15702 }, { "epoch": 17.885470085470086, "grad_norm": 0.22974389791488647, "learning_rate": 1.483873299129665e-06, "loss": 0.557, "step": 15703 }, { "epoch": 17.886609686609688, "grad_norm": 0.24285724759101868, "learning_rate": 1.4822924192317483e-06, "loss": 0.4113, "step": 15704 }, { "epoch": 17.887749287749287, "grad_norm": 0.16974323987960815, "learning_rate": 1.4807123561684871e-06, "loss": 0.7404, "step": 15705 }, { "epoch": 17.88888888888889, "grad_norm": 0.20796890556812286, "learning_rate": 1.4791331099947626e-06, "loss": 0.549, "step": 15706 }, { "epoch": 17.89002849002849, "grad_norm": 0.1859317421913147, "learning_rate": 1.4775546807654279e-06, "loss": 0.7613, "step": 15707 }, { "epoch": 17.89116809116809, "grad_norm": 0.22831900417804718, "learning_rate": 1.4759770685353114e-06, "loss": 0.3131, "step": 15708 }, { "epoch": 17.892307692307693, "grad_norm": 0.22934114933013916, "learning_rate": 1.474400273359197e-06, "loss": 0.6561, "step": 15709 }, { "epoch": 17.893447293447295, "grad_norm": 0.1744450479745865, "learning_rate": 1.4728242952918582e-06, "loss": 0.8713, "step": 15710 }, { "epoch": 17.894586894586894, "grad_norm": 0.18341250717639923, "learning_rate": 1.4712491343880308e-06, "loss": 0.8197, "step": 15711 }, { "epoch": 17.895726495726496, "grad_norm": 0.24606621265411377, "learning_rate": 1.4696747907024272e-06, "loss": 0.6444, "step": 15712 }, { "epoch": 17.8968660968661, "grad_norm": 0.20632360875606537, "learning_rate": 1.4681012642897258e-06, "loss": 0.5589, "step": 15713 }, { "epoch": 17.898005698005697, "grad_norm": 0.23047134280204773, "learning_rate": 1.4665285552045798e-06, "loss": 0.5622, "step": 15714 }, { "epoch": 17.8991452991453, "grad_norm": 0.1647486686706543, "learning_rate": 1.4649566635016182e-06, "loss": 0.9014, "step": 15715 }, { "epoch": 17.900284900284902, "grad_norm": 0.21783824265003204, "learning_rate": 1.4633855892354275e-06, "loss": 0.5439, "step": 15716 }, { "epoch": 17.9014245014245, "grad_norm": 0.1910092532634735, "learning_rate": 1.4618153324605838e-06, "loss": 0.5971, "step": 15717 }, { "epoch": 17.902564102564103, "grad_norm": 0.19612379372119904, "learning_rate": 1.4602458932316239e-06, "loss": 0.4906, "step": 15718 }, { "epoch": 17.903703703703705, "grad_norm": 0.23693189024925232, "learning_rate": 1.4586772716030622e-06, "loss": 0.6419, "step": 15719 }, { "epoch": 17.904843304843304, "grad_norm": 0.2047819048166275, "learning_rate": 1.457109467629375e-06, "loss": 0.6399, "step": 15720 }, { "epoch": 17.905982905982906, "grad_norm": 0.23813176155090332, "learning_rate": 1.4555424813650158e-06, "loss": 0.6639, "step": 15721 }, { "epoch": 17.90712250712251, "grad_norm": 0.18176306784152985, "learning_rate": 1.4539763128644134e-06, "loss": 0.7799, "step": 15722 }, { "epoch": 17.908262108262107, "grad_norm": 0.2089502066373825, "learning_rate": 1.4524109621819715e-06, "loss": 0.665, "step": 15723 }, { "epoch": 17.90940170940171, "grad_norm": 0.1719292402267456, "learning_rate": 1.4508464293720492e-06, "loss": 0.886, "step": 15724 }, { "epoch": 17.910541310541312, "grad_norm": 0.23162811994552612, "learning_rate": 1.449282714488992e-06, "loss": 0.5855, "step": 15725 }, { "epoch": 17.91168091168091, "grad_norm": 0.21349689364433289, "learning_rate": 1.4477198175871094e-06, "loss": 0.7581, "step": 15726 }, { "epoch": 17.912820512820513, "grad_norm": 0.1813352406024933, "learning_rate": 1.446157738720691e-06, "loss": 0.6351, "step": 15727 }, { "epoch": 17.913960113960115, "grad_norm": 0.15334013104438782, "learning_rate": 1.4445964779439824e-06, "loss": 0.7737, "step": 15728 }, { "epoch": 17.915099715099714, "grad_norm": 0.2020421177148819, "learning_rate": 1.4430360353112183e-06, "loss": 0.5556, "step": 15729 }, { "epoch": 17.916239316239317, "grad_norm": 0.18615838885307312, "learning_rate": 1.441476410876591e-06, "loss": 0.7083, "step": 15730 }, { "epoch": 17.91737891737892, "grad_norm": 0.188862144947052, "learning_rate": 1.4399176046942825e-06, "loss": 0.6021, "step": 15731 }, { "epoch": 17.918518518518518, "grad_norm": 0.1673618108034134, "learning_rate": 1.4383596168184188e-06, "loss": 0.5754, "step": 15732 }, { "epoch": 17.91965811965812, "grad_norm": 0.20038911700248718, "learning_rate": 1.4368024473031178e-06, "loss": 0.7084, "step": 15733 }, { "epoch": 17.920797720797722, "grad_norm": 0.17062163352966309, "learning_rate": 1.435246096202475e-06, "loss": 0.609, "step": 15734 }, { "epoch": 17.92193732193732, "grad_norm": 0.22755929827690125, "learning_rate": 1.4336905635705333e-06, "loss": 0.6411, "step": 15735 }, { "epoch": 17.923076923076923, "grad_norm": 0.16928339004516602, "learning_rate": 1.4321358494613273e-06, "loss": 0.7167, "step": 15736 }, { "epoch": 17.924216524216526, "grad_norm": 0.20951497554779053, "learning_rate": 1.4305819539288557e-06, "loss": 0.7137, "step": 15737 }, { "epoch": 17.925356125356124, "grad_norm": 0.19941192865371704, "learning_rate": 1.4290288770270915e-06, "loss": 0.8405, "step": 15738 }, { "epoch": 17.926495726495727, "grad_norm": 0.21432682871818542, "learning_rate": 1.4274766188099697e-06, "loss": 0.6568, "step": 15739 }, { "epoch": 17.92763532763533, "grad_norm": 0.16878537833690643, "learning_rate": 1.4259251793314111e-06, "loss": 0.7838, "step": 15740 }, { "epoch": 17.928774928774928, "grad_norm": 0.18712733685970306, "learning_rate": 1.4243745586453e-06, "loss": 0.5487, "step": 15741 }, { "epoch": 17.92991452991453, "grad_norm": 0.19850511848926544, "learning_rate": 1.4228247568054993e-06, "loss": 0.8193, "step": 15742 }, { "epoch": 17.931054131054132, "grad_norm": 0.2295110672712326, "learning_rate": 1.4212757738658266e-06, "loss": 0.7032, "step": 15743 }, { "epoch": 17.93219373219373, "grad_norm": 0.22638626396656036, "learning_rate": 1.4197276098800838e-06, "loss": 0.6077, "step": 15744 }, { "epoch": 17.933333333333334, "grad_norm": 0.2030586153268814, "learning_rate": 1.4181802649020554e-06, "loss": 0.7718, "step": 15745 }, { "epoch": 17.934472934472936, "grad_norm": 0.22802267968654633, "learning_rate": 1.4166337389854734e-06, "loss": 0.425, "step": 15746 }, { "epoch": 17.935612535612535, "grad_norm": 0.17927877604961395, "learning_rate": 1.415088032184056e-06, "loss": 0.6523, "step": 15747 }, { "epoch": 17.936752136752137, "grad_norm": 0.19029876589775085, "learning_rate": 1.413543144551488e-06, "loss": 0.6057, "step": 15748 }, { "epoch": 17.93789173789174, "grad_norm": 0.1827959418296814, "learning_rate": 1.4119990761414348e-06, "loss": 0.771, "step": 15749 }, { "epoch": 17.939031339031338, "grad_norm": 0.20706240832805634, "learning_rate": 1.4104558270075175e-06, "loss": 0.6524, "step": 15750 }, { "epoch": 17.94017094017094, "grad_norm": 0.183136984705925, "learning_rate": 1.4089133972033402e-06, "loss": 0.578, "step": 15751 }, { "epoch": 17.941310541310543, "grad_norm": 0.18875323235988617, "learning_rate": 1.407371786782477e-06, "loss": 0.547, "step": 15752 }, { "epoch": 17.94245014245014, "grad_norm": 0.2019789069890976, "learning_rate": 1.4058309957984739e-06, "loss": 0.5378, "step": 15753 }, { "epoch": 17.943589743589744, "grad_norm": 0.176071897149086, "learning_rate": 1.4042910243048434e-06, "loss": 0.7113, "step": 15754 }, { "epoch": 17.944729344729346, "grad_norm": 0.25191131234169006, "learning_rate": 1.402751872355068e-06, "loss": 0.4129, "step": 15755 }, { "epoch": 17.945868945868945, "grad_norm": 0.1877506822347641, "learning_rate": 1.4012135400026216e-06, "loss": 0.6665, "step": 15756 }, { "epoch": 17.947008547008547, "grad_norm": 0.17223647236824036, "learning_rate": 1.3996760273009223e-06, "loss": 0.6929, "step": 15757 }, { "epoch": 17.94814814814815, "grad_norm": 0.1804594099521637, "learning_rate": 1.398139334303375e-06, "loss": 0.541, "step": 15758 }, { "epoch": 17.94928774928775, "grad_norm": 0.2069043070077896, "learning_rate": 1.3966034610633533e-06, "loss": 0.7153, "step": 15759 }, { "epoch": 17.95042735042735, "grad_norm": 0.1842239648103714, "learning_rate": 1.3950684076342092e-06, "loss": 0.749, "step": 15760 }, { "epoch": 17.951566951566953, "grad_norm": 0.23782020807266235, "learning_rate": 1.393534174069247e-06, "loss": 0.614, "step": 15761 }, { "epoch": 17.95270655270655, "grad_norm": 0.2035999298095703, "learning_rate": 1.3920007604217605e-06, "loss": 0.5895, "step": 15762 }, { "epoch": 17.953846153846154, "grad_norm": 0.20039676129817963, "learning_rate": 1.3904681667450125e-06, "loss": 0.7869, "step": 15763 }, { "epoch": 17.954985754985756, "grad_norm": 0.21923471987247467, "learning_rate": 1.38893639309223e-06, "loss": 0.7398, "step": 15764 }, { "epoch": 17.956125356125355, "grad_norm": 0.23911401629447937, "learning_rate": 1.3874054395166202e-06, "loss": 0.4584, "step": 15765 }, { "epoch": 17.957264957264957, "grad_norm": 0.2090776115655899, "learning_rate": 1.3858753060713464e-06, "loss": 0.5422, "step": 15766 }, { "epoch": 17.95840455840456, "grad_norm": 0.14810439944267273, "learning_rate": 1.3843459928095687e-06, "loss": 0.5826, "step": 15767 }, { "epoch": 17.95954415954416, "grad_norm": 0.1938624531030655, "learning_rate": 1.3828174997844001e-06, "loss": 0.6167, "step": 15768 }, { "epoch": 17.96068376068376, "grad_norm": 0.24723225831985474, "learning_rate": 1.3812898270489232e-06, "loss": 0.7233, "step": 15769 }, { "epoch": 17.961823361823363, "grad_norm": 0.21326233446598053, "learning_rate": 1.379762974656204e-06, "loss": 0.6627, "step": 15770 }, { "epoch": 17.962962962962962, "grad_norm": 0.1856732815504074, "learning_rate": 1.3782369426592694e-06, "loss": 0.7113, "step": 15771 }, { "epoch": 17.964102564102564, "grad_norm": 0.19795556366443634, "learning_rate": 1.3767117311111328e-06, "loss": 0.6747, "step": 15772 }, { "epoch": 17.965242165242167, "grad_norm": 0.21814678609371185, "learning_rate": 1.375187340064757e-06, "loss": 0.7673, "step": 15773 }, { "epoch": 17.966381766381765, "grad_norm": 0.2197718769311905, "learning_rate": 1.373663769573094e-06, "loss": 0.7267, "step": 15774 }, { "epoch": 17.967521367521368, "grad_norm": 0.19596217572689056, "learning_rate": 1.3721410196890604e-06, "loss": 0.903, "step": 15775 }, { "epoch": 17.96866096866097, "grad_norm": 0.17794634401798248, "learning_rate": 1.3706190904655497e-06, "loss": 0.9005, "step": 15776 }, { "epoch": 17.96980056980057, "grad_norm": 0.17554956674575806, "learning_rate": 1.3690979819554112e-06, "loss": 0.7723, "step": 15777 }, { "epoch": 17.97094017094017, "grad_norm": 0.18461477756500244, "learning_rate": 1.3675776942114914e-06, "loss": 0.7401, "step": 15778 }, { "epoch": 17.972079772079773, "grad_norm": 0.16596505045890808, "learning_rate": 1.3660582272865874e-06, "loss": 0.6513, "step": 15779 }, { "epoch": 17.973219373219372, "grad_norm": 0.17433393001556396, "learning_rate": 1.3645395812334733e-06, "loss": 0.5522, "step": 15780 }, { "epoch": 17.974358974358974, "grad_norm": 0.21385356783866882, "learning_rate": 1.3630217561048985e-06, "loss": 0.5244, "step": 15781 }, { "epoch": 17.975498575498577, "grad_norm": 0.1666141003370285, "learning_rate": 1.3615047519535768e-06, "loss": 0.7708, "step": 15782 }, { "epoch": 17.976638176638176, "grad_norm": 0.2162417769432068, "learning_rate": 1.3599885688322073e-06, "loss": 0.5591, "step": 15783 }, { "epoch": 17.977777777777778, "grad_norm": 0.18287643790245056, "learning_rate": 1.3584732067934397e-06, "loss": 0.8681, "step": 15784 }, { "epoch": 17.97891737891738, "grad_norm": 0.20362326502799988, "learning_rate": 1.3569586658899152e-06, "loss": 0.6579, "step": 15785 }, { "epoch": 17.98005698005698, "grad_norm": 0.20295435190200806, "learning_rate": 1.3554449461742308e-06, "loss": 0.8604, "step": 15786 }, { "epoch": 17.98119658119658, "grad_norm": 0.2299310564994812, "learning_rate": 1.353932047698972e-06, "loss": 0.7428, "step": 15787 }, { "epoch": 17.982336182336184, "grad_norm": 0.20330417156219482, "learning_rate": 1.3524199705166774e-06, "loss": 0.7127, "step": 15788 }, { "epoch": 17.983475783475782, "grad_norm": 0.23929935693740845, "learning_rate": 1.3509087146798633e-06, "loss": 0.7436, "step": 15789 }, { "epoch": 17.984615384615385, "grad_norm": 0.17229998111724854, "learning_rate": 1.3493982802410322e-06, "loss": 0.5942, "step": 15790 }, { "epoch": 17.985754985754987, "grad_norm": 0.20117415487766266, "learning_rate": 1.3478886672526336e-06, "loss": 0.6428, "step": 15791 }, { "epoch": 17.986894586894586, "grad_norm": 0.15806029736995697, "learning_rate": 1.3463798757671064e-06, "loss": 0.6213, "step": 15792 }, { "epoch": 17.988034188034188, "grad_norm": 0.2126135379076004, "learning_rate": 1.3448719058368532e-06, "loss": 0.6405, "step": 15793 }, { "epoch": 17.98917378917379, "grad_norm": 0.16452065110206604, "learning_rate": 1.3433647575142567e-06, "loss": 0.7101, "step": 15794 }, { "epoch": 17.99031339031339, "grad_norm": 0.21072053909301758, "learning_rate": 1.3418584308516529e-06, "loss": 0.7111, "step": 15795 }, { "epoch": 17.99145299145299, "grad_norm": 0.16452158987522125, "learning_rate": 1.3403529259013641e-06, "loss": 0.5449, "step": 15796 }, { "epoch": 17.992592592592594, "grad_norm": 0.2283734679222107, "learning_rate": 1.3388482427156845e-06, "loss": 0.5565, "step": 15797 }, { "epoch": 17.993732193732193, "grad_norm": 0.1818462759256363, "learning_rate": 1.3373443813468778e-06, "loss": 0.899, "step": 15798 }, { "epoch": 17.994871794871795, "grad_norm": 0.21215508878231049, "learning_rate": 1.335841341847166e-06, "loss": 0.6596, "step": 15799 }, { "epoch": 17.996011396011397, "grad_norm": 0.18539436161518097, "learning_rate": 1.3343391242687603e-06, "loss": 0.5004, "step": 15800 }, { "epoch": 17.997150997150996, "grad_norm": 0.17485128343105316, "learning_rate": 1.3328377286638439e-06, "loss": 0.6803, "step": 15801 }, { "epoch": 17.9982905982906, "grad_norm": 0.16342106461524963, "learning_rate": 1.3313371550845583e-06, "loss": 0.7169, "step": 15802 }, { "epoch": 17.9994301994302, "grad_norm": 0.19986993074417114, "learning_rate": 1.3298374035830174e-06, "loss": 0.5385, "step": 15803 }, { "epoch": 18.0, "grad_norm": 0.3066229224205017, "learning_rate": 1.3283384742113215e-06, "loss": 0.5134, "step": 15804 }, { "epoch": 18.001139601139602, "grad_norm": 0.18824172019958496, "learning_rate": 1.3268403670215228e-06, "loss": 0.566, "step": 15805 }, { "epoch": 18.0022792022792, "grad_norm": 0.2161741405725479, "learning_rate": 1.3253430820656665e-06, "loss": 0.5104, "step": 15806 }, { "epoch": 18.003418803418803, "grad_norm": 0.20008744299411774, "learning_rate": 1.3238466193957467e-06, "loss": 0.8661, "step": 15807 }, { "epoch": 18.004558404558406, "grad_norm": 0.18964844942092896, "learning_rate": 1.3223509790637411e-06, "loss": 0.6734, "step": 15808 }, { "epoch": 18.005698005698004, "grad_norm": 0.23607204854488373, "learning_rate": 1.3208561611216003e-06, "loss": 0.6106, "step": 15809 }, { "epoch": 18.006837606837607, "grad_norm": 0.20779858529567719, "learning_rate": 1.319362165621249e-06, "loss": 0.6712, "step": 15810 }, { "epoch": 18.00797720797721, "grad_norm": 0.20699867606163025, "learning_rate": 1.3178689926145627e-06, "loss": 0.5852, "step": 15811 }, { "epoch": 18.009116809116808, "grad_norm": 0.1768336296081543, "learning_rate": 1.3163766421534163e-06, "loss": 0.5923, "step": 15812 }, { "epoch": 18.01025641025641, "grad_norm": 0.18291160464286804, "learning_rate": 1.3148851142896434e-06, "loss": 0.8139, "step": 15813 }, { "epoch": 18.011396011396013, "grad_norm": 0.16323508322238922, "learning_rate": 1.3133944090750388e-06, "loss": 0.6649, "step": 15814 }, { "epoch": 18.01253561253561, "grad_norm": 0.2666858434677124, "learning_rate": 1.3119045265613889e-06, "loss": 0.5979, "step": 15815 }, { "epoch": 18.013675213675214, "grad_norm": 0.23866496980190277, "learning_rate": 1.3104154668004353e-06, "loss": 0.4313, "step": 15816 }, { "epoch": 18.014814814814816, "grad_norm": 0.26230302453041077, "learning_rate": 1.308927229843901e-06, "loss": 0.283, "step": 15817 }, { "epoch": 18.015954415954415, "grad_norm": 0.19195322692394257, "learning_rate": 1.307439815743472e-06, "loss": 0.6843, "step": 15818 }, { "epoch": 18.017094017094017, "grad_norm": 0.2123197317123413, "learning_rate": 1.3059532245508154e-06, "loss": 0.6105, "step": 15819 }, { "epoch": 18.01823361823362, "grad_norm": 0.17628802359104156, "learning_rate": 1.3044674563175597e-06, "loss": 0.8107, "step": 15820 }, { "epoch": 18.019373219373218, "grad_norm": 0.20673112571239471, "learning_rate": 1.3029825110953158e-06, "loss": 0.6474, "step": 15821 }, { "epoch": 18.02051282051282, "grad_norm": 0.304705411195755, "learning_rate": 1.3014983889356513e-06, "loss": 0.7788, "step": 15822 }, { "epoch": 18.021652421652423, "grad_norm": 0.2333732545375824, "learning_rate": 1.3000150898901192e-06, "loss": 0.5934, "step": 15823 }, { "epoch": 18.02279202279202, "grad_norm": 0.21590343117713928, "learning_rate": 1.298532614010245e-06, "loss": 0.6026, "step": 15824 }, { "epoch": 18.023931623931624, "grad_norm": 0.18734954297542572, "learning_rate": 1.2970509613475068e-06, "loss": 0.7734, "step": 15825 }, { "epoch": 18.025071225071226, "grad_norm": 0.20928458869457245, "learning_rate": 1.295570131953372e-06, "loss": 0.3744, "step": 15826 }, { "epoch": 18.026210826210825, "grad_norm": 0.20024080574512482, "learning_rate": 1.294090125879277e-06, "loss": 0.592, "step": 15827 }, { "epoch": 18.027350427350427, "grad_norm": 0.17945152521133423, "learning_rate": 1.2926109431766226e-06, "loss": 0.6757, "step": 15828 }, { "epoch": 18.02849002849003, "grad_norm": 0.17156749963760376, "learning_rate": 1.2911325838967842e-06, "loss": 0.7507, "step": 15829 }, { "epoch": 18.02962962962963, "grad_norm": 0.1979169100522995, "learning_rate": 1.2896550480911123e-06, "loss": 0.6886, "step": 15830 }, { "epoch": 18.03076923076923, "grad_norm": 0.19085948169231415, "learning_rate": 1.2881783358109217e-06, "loss": 0.7854, "step": 15831 }, { "epoch": 18.031908831908833, "grad_norm": 0.23247428238391876, "learning_rate": 1.28670244710751e-06, "loss": 0.6149, "step": 15832 }, { "epoch": 18.03304843304843, "grad_norm": 0.19736453890800476, "learning_rate": 1.2852273820321282e-06, "loss": 0.6389, "step": 15833 }, { "epoch": 18.034188034188034, "grad_norm": 0.21781659126281738, "learning_rate": 1.2837531406360181e-06, "loss": 0.475, "step": 15834 }, { "epoch": 18.035327635327636, "grad_norm": 0.2099636048078537, "learning_rate": 1.2822797229703837e-06, "loss": 0.8111, "step": 15835 }, { "epoch": 18.036467236467235, "grad_norm": 0.19832612574100494, "learning_rate": 1.2808071290863948e-06, "loss": 0.5439, "step": 15836 }, { "epoch": 18.037606837606837, "grad_norm": 0.19173333048820496, "learning_rate": 1.2793353590352052e-06, "loss": 0.7532, "step": 15837 }, { "epoch": 18.03874643874644, "grad_norm": 0.21488866209983826, "learning_rate": 1.2778644128679269e-06, "loss": 0.6298, "step": 15838 }, { "epoch": 18.03988603988604, "grad_norm": 0.1884155124425888, "learning_rate": 1.2763942906356601e-06, "loss": 0.5331, "step": 15839 }, { "epoch": 18.04102564102564, "grad_norm": 0.1631074994802475, "learning_rate": 1.2749249923894536e-06, "loss": 0.8695, "step": 15840 }, { "epoch": 18.042165242165243, "grad_norm": 0.19923914968967438, "learning_rate": 1.2734565181803492e-06, "loss": 0.6837, "step": 15841 }, { "epoch": 18.043304843304842, "grad_norm": 0.1920376569032669, "learning_rate": 1.2719888680593456e-06, "loss": 0.408, "step": 15842 }, { "epoch": 18.044444444444444, "grad_norm": 0.20901142060756683, "learning_rate": 1.270522042077424e-06, "loss": 0.456, "step": 15843 }, { "epoch": 18.045584045584047, "grad_norm": 0.1856703907251358, "learning_rate": 1.2690560402855238e-06, "loss": 0.7221, "step": 15844 }, { "epoch": 18.046723646723645, "grad_norm": 0.15371152758598328, "learning_rate": 1.2675908627345718e-06, "loss": 0.6246, "step": 15845 }, { "epoch": 18.047863247863248, "grad_norm": 0.16257111728191376, "learning_rate": 1.2661265094754516e-06, "loss": 0.7655, "step": 15846 }, { "epoch": 18.04900284900285, "grad_norm": 0.22813346982002258, "learning_rate": 1.264662980559031e-06, "loss": 0.4758, "step": 15847 }, { "epoch": 18.05014245014245, "grad_norm": 0.17259332537651062, "learning_rate": 1.2632002760361333e-06, "loss": 0.709, "step": 15848 }, { "epoch": 18.05128205128205, "grad_norm": 0.19387666881084442, "learning_rate": 1.2617383959575652e-06, "loss": 0.631, "step": 15849 }, { "epoch": 18.052421652421653, "grad_norm": 0.19855740666389465, "learning_rate": 1.2602773403741025e-06, "loss": 0.7316, "step": 15850 }, { "epoch": 18.053561253561252, "grad_norm": 0.1869831383228302, "learning_rate": 1.2588171093364991e-06, "loss": 0.6693, "step": 15851 }, { "epoch": 18.054700854700855, "grad_norm": 0.18067550659179688, "learning_rate": 1.2573577028954592e-06, "loss": 0.7039, "step": 15852 }, { "epoch": 18.055840455840457, "grad_norm": 0.1634341925382614, "learning_rate": 1.2558991211016808e-06, "loss": 0.5873, "step": 15853 }, { "epoch": 18.056980056980056, "grad_norm": 0.1706697791814804, "learning_rate": 1.2544413640058233e-06, "loss": 0.6032, "step": 15854 }, { "epoch": 18.058119658119658, "grad_norm": 0.20070824027061462, "learning_rate": 1.2529844316585159e-06, "loss": 0.5887, "step": 15855 }, { "epoch": 18.05925925925926, "grad_norm": 0.18240754306316376, "learning_rate": 1.2515283241103625e-06, "loss": 0.6586, "step": 15856 }, { "epoch": 18.06039886039886, "grad_norm": 0.2466755360364914, "learning_rate": 1.2500730414119422e-06, "loss": 0.6891, "step": 15857 }, { "epoch": 18.06153846153846, "grad_norm": 0.18024280667304993, "learning_rate": 1.2486185836138003e-06, "loss": 0.6849, "step": 15858 }, { "epoch": 18.062678062678064, "grad_norm": 0.19057750701904297, "learning_rate": 1.2471649507664469e-06, "loss": 0.6037, "step": 15859 }, { "epoch": 18.063817663817662, "grad_norm": 0.18585965037345886, "learning_rate": 1.2457121429203743e-06, "loss": 0.6832, "step": 15860 }, { "epoch": 18.064957264957265, "grad_norm": 0.18791721761226654, "learning_rate": 1.2442601601260457e-06, "loss": 0.8144, "step": 15861 }, { "epoch": 18.066096866096867, "grad_norm": 0.16386424005031586, "learning_rate": 1.2428090024338922e-06, "loss": 0.7695, "step": 15862 }, { "epoch": 18.067236467236466, "grad_norm": 0.22035211324691772, "learning_rate": 1.241358669894313e-06, "loss": 0.5671, "step": 15863 }, { "epoch": 18.068376068376068, "grad_norm": 0.18681329488754272, "learning_rate": 1.2399091625576814e-06, "loss": 0.5536, "step": 15864 }, { "epoch": 18.06951566951567, "grad_norm": 0.17148402333259583, "learning_rate": 1.238460480474349e-06, "loss": 0.5449, "step": 15865 }, { "epoch": 18.07065527065527, "grad_norm": 0.1965188980102539, "learning_rate": 1.237012623694625e-06, "loss": 0.654, "step": 15866 }, { "epoch": 18.07179487179487, "grad_norm": 0.19194090366363525, "learning_rate": 1.2355655922688004e-06, "loss": 0.5753, "step": 15867 }, { "epoch": 18.072934472934474, "grad_norm": 0.2225450724363327, "learning_rate": 1.2341193862471373e-06, "loss": 0.6352, "step": 15868 }, { "epoch": 18.074074074074073, "grad_norm": 0.17703381180763245, "learning_rate": 1.232674005679868e-06, "loss": 0.5726, "step": 15869 }, { "epoch": 18.075213675213675, "grad_norm": 0.18432699143886566, "learning_rate": 1.2312294506171855e-06, "loss": 0.7207, "step": 15870 }, { "epoch": 18.076353276353277, "grad_norm": 0.21086126565933228, "learning_rate": 1.2297857211092722e-06, "loss": 0.5844, "step": 15871 }, { "epoch": 18.077492877492876, "grad_norm": 0.21550235152244568, "learning_rate": 1.2283428172062656e-06, "loss": 0.6006, "step": 15872 }, { "epoch": 18.07863247863248, "grad_norm": 0.3013046085834503, "learning_rate": 1.2269007389582892e-06, "loss": 0.3329, "step": 15873 }, { "epoch": 18.07977207977208, "grad_norm": 0.1899062544107437, "learning_rate": 1.2254594864154256e-06, "loss": 0.701, "step": 15874 }, { "epoch": 18.08091168091168, "grad_norm": 0.20286870002746582, "learning_rate": 1.2240190596277317e-06, "loss": 0.4609, "step": 15875 }, { "epoch": 18.08205128205128, "grad_norm": 0.2039937525987625, "learning_rate": 1.22257945864524e-06, "loss": 0.469, "step": 15876 }, { "epoch": 18.083190883190884, "grad_norm": 0.20729224383831024, "learning_rate": 1.2211406835179517e-06, "loss": 0.8769, "step": 15877 }, { "epoch": 18.084330484330483, "grad_norm": 0.1667632758617401, "learning_rate": 1.2197027342958412e-06, "loss": 0.6936, "step": 15878 }, { "epoch": 18.085470085470085, "grad_norm": 0.23237772285938263, "learning_rate": 1.2182656110288516e-06, "loss": 0.4519, "step": 15879 }, { "epoch": 18.086609686609687, "grad_norm": 0.20046734809875488, "learning_rate": 1.2168293137668957e-06, "loss": 0.726, "step": 15880 }, { "epoch": 18.087749287749286, "grad_norm": 0.2263612598180771, "learning_rate": 1.2153938425598644e-06, "loss": 0.5326, "step": 15881 }, { "epoch": 18.08888888888889, "grad_norm": 0.1870729625225067, "learning_rate": 1.2139591974576092e-06, "loss": 0.6429, "step": 15882 }, { "epoch": 18.09002849002849, "grad_norm": 0.1688375025987625, "learning_rate": 1.2125253785099655e-06, "loss": 0.7038, "step": 15883 }, { "epoch": 18.09116809116809, "grad_norm": 0.15771566331386566, "learning_rate": 1.2110923857667295e-06, "loss": 0.5643, "step": 15884 }, { "epoch": 18.092307692307692, "grad_norm": 0.1544315665960312, "learning_rate": 1.209660219277678e-06, "loss": 0.7892, "step": 15885 }, { "epoch": 18.093447293447294, "grad_norm": 0.16746006906032562, "learning_rate": 1.208228879092549e-06, "loss": 0.8208, "step": 15886 }, { "epoch": 18.094586894586893, "grad_norm": 0.21814511716365814, "learning_rate": 1.2067983652610555e-06, "loss": 0.8497, "step": 15887 }, { "epoch": 18.095726495726495, "grad_norm": 0.20921318233013153, "learning_rate": 1.2053686778328883e-06, "loss": 0.4532, "step": 15888 }, { "epoch": 18.096866096866098, "grad_norm": 0.2083912193775177, "learning_rate": 1.203939816857702e-06, "loss": 0.6343, "step": 15889 }, { "epoch": 18.098005698005696, "grad_norm": 0.22173093259334564, "learning_rate": 1.2025117823851267e-06, "loss": 0.6096, "step": 15890 }, { "epoch": 18.0991452991453, "grad_norm": 0.24359305202960968, "learning_rate": 1.2010845744647587e-06, "loss": 0.4292, "step": 15891 }, { "epoch": 18.1002849002849, "grad_norm": 0.16970635950565338, "learning_rate": 1.1996581931461748e-06, "loss": 0.7707, "step": 15892 }, { "epoch": 18.1014245014245, "grad_norm": 0.16749995946884155, "learning_rate": 1.1982326384789106e-06, "loss": 0.8025, "step": 15893 }, { "epoch": 18.102564102564102, "grad_norm": 0.1994680017232895, "learning_rate": 1.196807910512479e-06, "loss": 0.6891, "step": 15894 }, { "epoch": 18.103703703703705, "grad_norm": 0.18482127785682678, "learning_rate": 1.1953840092963715e-06, "loss": 0.7436, "step": 15895 }, { "epoch": 18.104843304843303, "grad_norm": 0.1713782399892807, "learning_rate": 1.1939609348800423e-06, "loss": 0.603, "step": 15896 }, { "epoch": 18.105982905982906, "grad_norm": 0.1746223419904709, "learning_rate": 1.1925386873129135e-06, "loss": 0.5961, "step": 15897 }, { "epoch": 18.107122507122508, "grad_norm": 0.17016907036304474, "learning_rate": 1.1911172666443842e-06, "loss": 0.8368, "step": 15898 }, { "epoch": 18.108262108262107, "grad_norm": 0.20107758045196533, "learning_rate": 1.189696672923829e-06, "loss": 0.4812, "step": 15899 }, { "epoch": 18.10940170940171, "grad_norm": 0.19996224343776703, "learning_rate": 1.1882769062005888e-06, "loss": 0.7433, "step": 15900 }, { "epoch": 18.11054131054131, "grad_norm": 0.1794893443584442, "learning_rate": 1.1868579665239716e-06, "loss": 0.8003, "step": 15901 }, { "epoch": 18.11168091168091, "grad_norm": 0.17528975009918213, "learning_rate": 1.1854398539432626e-06, "loss": 0.556, "step": 15902 }, { "epoch": 18.112820512820512, "grad_norm": 0.20127664506435394, "learning_rate": 1.1840225685077227e-06, "loss": 0.5355, "step": 15903 }, { "epoch": 18.113960113960115, "grad_norm": 0.1681235134601593, "learning_rate": 1.1826061102665708e-06, "loss": 0.7987, "step": 15904 }, { "epoch": 18.115099715099714, "grad_norm": 0.17833998799324036, "learning_rate": 1.1811904792690065e-06, "loss": 0.7327, "step": 15905 }, { "epoch": 18.116239316239316, "grad_norm": 0.20956158638000488, "learning_rate": 1.1797756755641986e-06, "loss": 0.4477, "step": 15906 }, { "epoch": 18.117378917378918, "grad_norm": 0.17220225930213928, "learning_rate": 1.1783616992012913e-06, "loss": 0.947, "step": 15907 }, { "epoch": 18.118518518518517, "grad_norm": 0.2909105122089386, "learning_rate": 1.176948550229387e-06, "loss": 0.5673, "step": 15908 }, { "epoch": 18.11965811965812, "grad_norm": 0.16907162964344025, "learning_rate": 1.1755362286975741e-06, "loss": 0.7082, "step": 15909 }, { "epoch": 18.12079772079772, "grad_norm": 0.221530020236969, "learning_rate": 1.174124734654905e-06, "loss": 0.6505, "step": 15910 }, { "epoch": 18.12193732193732, "grad_norm": 0.15925301611423492, "learning_rate": 1.1727140681504045e-06, "loss": 0.5782, "step": 15911 }, { "epoch": 18.123076923076923, "grad_norm": 0.22340020537376404, "learning_rate": 1.1713042292330722e-06, "loss": 0.4607, "step": 15912 }, { "epoch": 18.124216524216525, "grad_norm": 0.19746434688568115, "learning_rate": 1.1698952179518718e-06, "loss": 0.4612, "step": 15913 }, { "epoch": 18.125356125356124, "grad_norm": 0.169794499874115, "learning_rate": 1.1684870343557446e-06, "loss": 0.9059, "step": 15914 }, { "epoch": 18.126495726495726, "grad_norm": 0.19517719745635986, "learning_rate": 1.167079678493599e-06, "loss": 0.4896, "step": 15915 }, { "epoch": 18.12763532763533, "grad_norm": 0.192497119307518, "learning_rate": 1.1656731504143176e-06, "loss": 0.6869, "step": 15916 }, { "epoch": 18.128774928774927, "grad_norm": 0.18236230313777924, "learning_rate": 1.1642674501667506e-06, "loss": 0.6197, "step": 15917 }, { "epoch": 18.12991452991453, "grad_norm": 0.23407305777072906, "learning_rate": 1.1628625777997283e-06, "loss": 0.6292, "step": 15918 }, { "epoch": 18.13105413105413, "grad_norm": 0.18812473118305206, "learning_rate": 1.1614585333620365e-06, "loss": 0.8725, "step": 15919 }, { "epoch": 18.13219373219373, "grad_norm": 0.2166595607995987, "learning_rate": 1.1600553169024448e-06, "loss": 0.472, "step": 15920 }, { "epoch": 18.133333333333333, "grad_norm": 0.20252515375614166, "learning_rate": 1.1586529284696918e-06, "loss": 0.4766, "step": 15921 }, { "epoch": 18.134472934472935, "grad_norm": 0.1772758662700653, "learning_rate": 1.1572513681124914e-06, "loss": 0.5942, "step": 15922 }, { "epoch": 18.135612535612534, "grad_norm": 0.19498567283153534, "learning_rate": 1.1558506358795156e-06, "loss": 0.4519, "step": 15923 }, { "epoch": 18.136752136752136, "grad_norm": 0.19647033512592316, "learning_rate": 1.1544507318194203e-06, "loss": 0.5228, "step": 15924 }, { "epoch": 18.13789173789174, "grad_norm": 0.19874857366085052, "learning_rate": 1.1530516559808246e-06, "loss": 0.5935, "step": 15925 }, { "epoch": 18.139031339031337, "grad_norm": 0.22173920273780823, "learning_rate": 1.1516534084123288e-06, "loss": 0.56, "step": 15926 }, { "epoch": 18.14017094017094, "grad_norm": 0.21998946368694305, "learning_rate": 1.1502559891624882e-06, "loss": 0.7126, "step": 15927 }, { "epoch": 18.141310541310542, "grad_norm": 0.20598110556602478, "learning_rate": 1.1488593982798474e-06, "loss": 0.5714, "step": 15928 }, { "epoch": 18.14245014245014, "grad_norm": 0.26100605726242065, "learning_rate": 1.1474636358129066e-06, "loss": 0.1406, "step": 15929 }, { "epoch": 18.143589743589743, "grad_norm": 0.24370066821575165, "learning_rate": 1.1460687018101546e-06, "loss": 0.503, "step": 15930 }, { "epoch": 18.144729344729345, "grad_norm": 0.20355720818042755, "learning_rate": 1.1446745963200306e-06, "loss": 0.6663, "step": 15931 }, { "epoch": 18.145868945868944, "grad_norm": 0.1552165448665619, "learning_rate": 1.1432813193909597e-06, "loss": 0.3006, "step": 15932 }, { "epoch": 18.147008547008546, "grad_norm": 0.1596253663301468, "learning_rate": 1.1418888710713394e-06, "loss": 0.6474, "step": 15933 }, { "epoch": 18.14814814814815, "grad_norm": 0.19343087077140808, "learning_rate": 1.1404972514095252e-06, "loss": 0.5742, "step": 15934 }, { "epoch": 18.149287749287748, "grad_norm": 0.17367888987064362, "learning_rate": 1.1391064604538538e-06, "loss": 0.6066, "step": 15935 }, { "epoch": 18.15042735042735, "grad_norm": 0.23630227148532867, "learning_rate": 1.1377164982526333e-06, "loss": 0.7701, "step": 15936 }, { "epoch": 18.151566951566952, "grad_norm": 0.1966976374387741, "learning_rate": 1.136327364854145e-06, "loss": 0.7617, "step": 15937 }, { "epoch": 18.15270655270655, "grad_norm": 0.15290910005569458, "learning_rate": 1.1349390603066307e-06, "loss": 0.7526, "step": 15938 }, { "epoch": 18.153846153846153, "grad_norm": 0.22119775414466858, "learning_rate": 1.13355158465831e-06, "loss": 0.5943, "step": 15939 }, { "epoch": 18.154985754985756, "grad_norm": 0.21148540079593658, "learning_rate": 1.1321649379573752e-06, "loss": 0.4899, "step": 15940 }, { "epoch": 18.156125356125354, "grad_norm": 0.21941092610359192, "learning_rate": 1.130779120251993e-06, "loss": 0.558, "step": 15941 }, { "epoch": 18.157264957264957, "grad_norm": 0.19267788529396057, "learning_rate": 1.129394131590289e-06, "loss": 0.4554, "step": 15942 }, { "epoch": 18.15840455840456, "grad_norm": 0.18841686844825745, "learning_rate": 1.128009972020369e-06, "loss": 0.7152, "step": 15943 }, { "epoch": 18.159544159544158, "grad_norm": 0.19724957644939423, "learning_rate": 1.126626641590317e-06, "loss": 0.7831, "step": 15944 }, { "epoch": 18.16068376068376, "grad_norm": 0.20876359939575195, "learning_rate": 1.1252441403481696e-06, "loss": 0.5887, "step": 15945 }, { "epoch": 18.161823361823362, "grad_norm": 0.18697749078273773, "learning_rate": 1.123862468341949e-06, "loss": 0.7821, "step": 15946 }, { "epoch": 18.162962962962965, "grad_norm": 0.21110433340072632, "learning_rate": 1.1224816256196453e-06, "loss": 0.5808, "step": 15947 }, { "epoch": 18.164102564102564, "grad_norm": 0.17982840538024902, "learning_rate": 1.1211016122292222e-06, "loss": 0.5423, "step": 15948 }, { "epoch": 18.165242165242166, "grad_norm": 0.15705281496047974, "learning_rate": 1.119722428218603e-06, "loss": 0.9529, "step": 15949 }, { "epoch": 18.166381766381768, "grad_norm": 0.16538876295089722, "learning_rate": 1.1183440736356966e-06, "loss": 0.5573, "step": 15950 }, { "epoch": 18.167521367521367, "grad_norm": 0.2571662366390228, "learning_rate": 1.1169665485283726e-06, "loss": 0.7858, "step": 15951 }, { "epoch": 18.16866096866097, "grad_norm": 0.16697154939174652, "learning_rate": 1.115589852944482e-06, "loss": 0.8273, "step": 15952 }, { "epoch": 18.16980056980057, "grad_norm": 0.20390774309635162, "learning_rate": 1.1142139869318364e-06, "loss": 0.7989, "step": 15953 }, { "epoch": 18.17094017094017, "grad_norm": 0.17538999021053314, "learning_rate": 1.1128389505382225e-06, "loss": 0.5698, "step": 15954 }, { "epoch": 18.172079772079773, "grad_norm": 0.18313254415988922, "learning_rate": 1.1114647438114078e-06, "loss": 0.5203, "step": 15955 }, { "epoch": 18.173219373219375, "grad_norm": 0.17053240537643433, "learning_rate": 1.1100913667991125e-06, "loss": 0.591, "step": 15956 }, { "epoch": 18.174358974358974, "grad_norm": 0.19222436845302582, "learning_rate": 1.1087188195490428e-06, "loss": 0.7421, "step": 15957 }, { "epoch": 18.175498575498576, "grad_norm": 0.1683763563632965, "learning_rate": 1.1073471021088689e-06, "loss": 0.6887, "step": 15958 }, { "epoch": 18.17663817663818, "grad_norm": 0.20859429240226746, "learning_rate": 1.105976214526236e-06, "loss": 0.4182, "step": 15959 }, { "epoch": 18.177777777777777, "grad_norm": 0.20938421785831451, "learning_rate": 1.1046061568487586e-06, "loss": 0.4057, "step": 15960 }, { "epoch": 18.17891737891738, "grad_norm": 0.18100666999816895, "learning_rate": 1.1032369291240214e-06, "loss": 0.723, "step": 15961 }, { "epoch": 18.180056980056982, "grad_norm": 0.21834257245063782, "learning_rate": 1.1018685313995802e-06, "loss": 0.6354, "step": 15962 }, { "epoch": 18.18119658119658, "grad_norm": 0.19915148615837097, "learning_rate": 1.1005009637229669e-06, "loss": 0.6653, "step": 15963 }, { "epoch": 18.182336182336183, "grad_norm": 0.21545255184173584, "learning_rate": 1.099134226141682e-06, "loss": 0.4773, "step": 15964 }, { "epoch": 18.183475783475785, "grad_norm": 0.1650085300207138, "learning_rate": 1.097768318703185e-06, "loss": 0.5612, "step": 15965 }, { "epoch": 18.184615384615384, "grad_norm": 0.1827668994665146, "learning_rate": 1.0964032414549298e-06, "loss": 0.6094, "step": 15966 }, { "epoch": 18.185754985754986, "grad_norm": 0.217435821890831, "learning_rate": 1.095038994444328e-06, "loss": 0.6947, "step": 15967 }, { "epoch": 18.18689458689459, "grad_norm": 0.16685186326503754, "learning_rate": 1.0936755777187585e-06, "loss": 0.9233, "step": 15968 }, { "epoch": 18.188034188034187, "grad_norm": 0.1902848482131958, "learning_rate": 1.092312991325578e-06, "loss": 0.5463, "step": 15969 }, { "epoch": 18.18917378917379, "grad_norm": 0.2106180489063263, "learning_rate": 1.0909512353121154e-06, "loss": 0.4301, "step": 15970 }, { "epoch": 18.190313390313392, "grad_norm": 0.2520906925201416, "learning_rate": 1.0895903097256688e-06, "loss": 0.3797, "step": 15971 }, { "epoch": 18.19145299145299, "grad_norm": 0.20214390754699707, "learning_rate": 1.0882302146135004e-06, "loss": 0.6817, "step": 15972 }, { "epoch": 18.192592592592593, "grad_norm": 0.1918715387582779, "learning_rate": 1.0868709500228557e-06, "loss": 0.6998, "step": 15973 }, { "epoch": 18.193732193732195, "grad_norm": 0.22887898981571198, "learning_rate": 1.0855125160009416e-06, "loss": 0.6342, "step": 15974 }, { "epoch": 18.194871794871794, "grad_norm": 0.21144236624240875, "learning_rate": 1.084154912594948e-06, "loss": 0.4033, "step": 15975 }, { "epoch": 18.196011396011396, "grad_norm": 0.2070854902267456, "learning_rate": 1.0827981398520177e-06, "loss": 0.7286, "step": 15976 }, { "epoch": 18.197150997151, "grad_norm": 0.20363754034042358, "learning_rate": 1.0814421978192825e-06, "loss": 0.8095, "step": 15977 }, { "epoch": 18.198290598290598, "grad_norm": 0.22051745653152466, "learning_rate": 1.0800870865438407e-06, "loss": 0.7489, "step": 15978 }, { "epoch": 18.1994301994302, "grad_norm": 0.2056119292974472, "learning_rate": 1.0787328060727493e-06, "loss": 0.5298, "step": 15979 }, { "epoch": 18.200569800569802, "grad_norm": 0.1733495444059372, "learning_rate": 1.077379356453051e-06, "loss": 0.6862, "step": 15980 }, { "epoch": 18.2017094017094, "grad_norm": 0.19605806469917297, "learning_rate": 1.0760267377317556e-06, "loss": 0.6083, "step": 15981 }, { "epoch": 18.202849002849003, "grad_norm": 0.27621230483055115, "learning_rate": 1.0746749499558478e-06, "loss": 0.6252, "step": 15982 }, { "epoch": 18.203988603988606, "grad_norm": 0.22263580560684204, "learning_rate": 1.0733239931722705e-06, "loss": 0.7787, "step": 15983 }, { "epoch": 18.205128205128204, "grad_norm": 0.19939032196998596, "learning_rate": 1.0719738674279473e-06, "loss": 0.5776, "step": 15984 }, { "epoch": 18.206267806267807, "grad_norm": 0.27290910482406616, "learning_rate": 1.070624572769774e-06, "loss": 0.5814, "step": 15985 }, { "epoch": 18.20740740740741, "grad_norm": 0.16844192147254944, "learning_rate": 1.0692761092446213e-06, "loss": 0.6483, "step": 15986 }, { "epoch": 18.208547008547008, "grad_norm": 0.19200433790683746, "learning_rate": 1.0679284768993103e-06, "loss": 0.7458, "step": 15987 }, { "epoch": 18.20968660968661, "grad_norm": 0.22204038500785828, "learning_rate": 1.0665816757806618e-06, "loss": 0.5282, "step": 15988 }, { "epoch": 18.210826210826212, "grad_norm": 0.15707382559776306, "learning_rate": 1.0652357059354494e-06, "loss": 0.8963, "step": 15989 }, { "epoch": 18.21196581196581, "grad_norm": 0.23402263224124908, "learning_rate": 1.0638905674104193e-06, "loss": 0.8085, "step": 15990 }, { "epoch": 18.213105413105414, "grad_norm": 0.1872255951166153, "learning_rate": 1.062546260252295e-06, "loss": 0.701, "step": 15991 }, { "epoch": 18.214245014245016, "grad_norm": 0.2242417186498642, "learning_rate": 1.0612027845077698e-06, "loss": 0.7828, "step": 15992 }, { "epoch": 18.215384615384615, "grad_norm": 0.1795329749584198, "learning_rate": 1.0598601402235037e-06, "loss": 0.6698, "step": 15993 }, { "epoch": 18.216524216524217, "grad_norm": 0.24393922090530396, "learning_rate": 1.0585183274461287e-06, "loss": 0.7277, "step": 15994 }, { "epoch": 18.21766381766382, "grad_norm": 0.2110724300146103, "learning_rate": 1.0571773462222517e-06, "loss": 0.6454, "step": 15995 }, { "epoch": 18.218803418803418, "grad_norm": 0.21045121550559998, "learning_rate": 1.055837196598447e-06, "loss": 0.6849, "step": 15996 }, { "epoch": 18.21994301994302, "grad_norm": 0.1899309754371643, "learning_rate": 1.0544978786212662e-06, "loss": 0.7113, "step": 15997 }, { "epoch": 18.221082621082623, "grad_norm": 0.19039730727672577, "learning_rate": 1.0531593923372218e-06, "loss": 0.7073, "step": 15998 }, { "epoch": 18.22222222222222, "grad_norm": 0.16976770758628845, "learning_rate": 1.0518217377928046e-06, "loss": 0.7008, "step": 15999 }, { "epoch": 18.223361823361824, "grad_norm": 0.17399312555789948, "learning_rate": 1.0504849150344776e-06, "loss": 0.6882, "step": 16000 }, { "epoch": 18.224501424501426, "grad_norm": 0.19433003664016724, "learning_rate": 1.0491489241086754e-06, "loss": 0.7706, "step": 16001 }, { "epoch": 18.225641025641025, "grad_norm": 0.20473644137382507, "learning_rate": 1.047813765061792e-06, "loss": 0.7283, "step": 16002 }, { "epoch": 18.226780626780627, "grad_norm": 0.3713143467903137, "learning_rate": 1.0464794379402065e-06, "loss": 0.7069, "step": 16003 }, { "epoch": 18.22792022792023, "grad_norm": 0.21117597818374634, "learning_rate": 1.0451459427902599e-06, "loss": 0.5685, "step": 16004 }, { "epoch": 18.22905982905983, "grad_norm": 0.21739910542964935, "learning_rate": 1.0438132796582762e-06, "loss": 0.6592, "step": 16005 }, { "epoch": 18.23019943019943, "grad_norm": 0.15645365417003632, "learning_rate": 1.0424814485905321e-06, "loss": 0.7862, "step": 16006 }, { "epoch": 18.231339031339033, "grad_norm": 0.24789197742938995, "learning_rate": 1.0411504496332935e-06, "loss": 0.3764, "step": 16007 }, { "epoch": 18.23247863247863, "grad_norm": 0.1837555170059204, "learning_rate": 1.0398202828327847e-06, "loss": 0.5905, "step": 16008 }, { "epoch": 18.233618233618234, "grad_norm": 0.16961321234703064, "learning_rate": 1.0384909482352074e-06, "loss": 0.7501, "step": 16009 }, { "epoch": 18.234757834757836, "grad_norm": 0.16449250280857086, "learning_rate": 1.0371624458867357e-06, "loss": 0.7763, "step": 16010 }, { "epoch": 18.235897435897435, "grad_norm": 0.17696818709373474, "learning_rate": 1.0358347758335106e-06, "loss": 0.6562, "step": 16011 }, { "epoch": 18.237037037037037, "grad_norm": 0.2112455666065216, "learning_rate": 1.0345079381216483e-06, "loss": 0.686, "step": 16012 }, { "epoch": 18.23817663817664, "grad_norm": 0.1881578415632248, "learning_rate": 1.0331819327972253e-06, "loss": 0.8405, "step": 16013 }, { "epoch": 18.23931623931624, "grad_norm": 0.21206188201904297, "learning_rate": 1.0318567599063051e-06, "loss": 0.545, "step": 16014 }, { "epoch": 18.24045584045584, "grad_norm": 0.21515263617038727, "learning_rate": 1.0305324194949117e-06, "loss": 0.576, "step": 16015 }, { "epoch": 18.241595441595443, "grad_norm": 0.1821409910917282, "learning_rate": 1.0292089116090475e-06, "loss": 0.6085, "step": 16016 }, { "epoch": 18.242735042735042, "grad_norm": 0.22497190535068512, "learning_rate": 1.0278862362946728e-06, "loss": 0.5394, "step": 16017 }, { "epoch": 18.243874643874644, "grad_norm": 0.1855476349592209, "learning_rate": 1.0265643935977342e-06, "loss": 0.7609, "step": 16018 }, { "epoch": 18.245014245014247, "grad_norm": 0.2256733924150467, "learning_rate": 1.0252433835641422e-06, "loss": 0.7753, "step": 16019 }, { "epoch": 18.246153846153845, "grad_norm": 0.17753781378269196, "learning_rate": 1.0239232062397797e-06, "loss": 0.7024, "step": 16020 }, { "epoch": 18.247293447293448, "grad_norm": 0.18134057521820068, "learning_rate": 1.022603861670493e-06, "loss": 0.7823, "step": 16021 }, { "epoch": 18.24843304843305, "grad_norm": 0.18681129813194275, "learning_rate": 1.0212853499021153e-06, "loss": 0.7798, "step": 16022 }, { "epoch": 18.24957264957265, "grad_norm": 0.24428966641426086, "learning_rate": 1.019967670980443e-06, "loss": 0.6506, "step": 16023 }, { "epoch": 18.25071225071225, "grad_norm": 0.16551734507083893, "learning_rate": 1.0186508249512339e-06, "loss": 0.5754, "step": 16024 }, { "epoch": 18.251851851851853, "grad_norm": 0.21288277208805084, "learning_rate": 1.0173348118602322e-06, "loss": 0.544, "step": 16025 }, { "epoch": 18.252991452991452, "grad_norm": 0.2549050748348236, "learning_rate": 1.0160196317531428e-06, "loss": 0.4273, "step": 16026 }, { "epoch": 18.254131054131054, "grad_norm": 0.1870443969964981, "learning_rate": 1.0147052846756544e-06, "loss": 0.5073, "step": 16027 }, { "epoch": 18.255270655270657, "grad_norm": 0.22737272083759308, "learning_rate": 1.0133917706734053e-06, "loss": 0.747, "step": 16028 }, { "epoch": 18.256410256410255, "grad_norm": 0.164210706949234, "learning_rate": 1.0120790897920256e-06, "loss": 0.5032, "step": 16029 }, { "epoch": 18.257549857549858, "grad_norm": 0.20671352744102478, "learning_rate": 1.0107672420771042e-06, "loss": 0.5299, "step": 16030 }, { "epoch": 18.25868945868946, "grad_norm": 0.18295066058635712, "learning_rate": 1.0094562275742125e-06, "loss": 0.8214, "step": 16031 }, { "epoch": 18.25982905982906, "grad_norm": 0.1918344795703888, "learning_rate": 1.0081460463288727e-06, "loss": 0.6617, "step": 16032 }, { "epoch": 18.26096866096866, "grad_norm": 0.19031167030334473, "learning_rate": 1.0068366983866013e-06, "loss": 0.5399, "step": 16033 }, { "epoch": 18.262108262108264, "grad_norm": 0.18421906232833862, "learning_rate": 1.0055281837928754e-06, "loss": 0.7753, "step": 16034 }, { "epoch": 18.263247863247862, "grad_norm": 0.18231862783432007, "learning_rate": 1.0042205025931396e-06, "loss": 0.8264, "step": 16035 }, { "epoch": 18.264387464387465, "grad_norm": 0.27754512429237366, "learning_rate": 1.0029136548328127e-06, "loss": 0.4332, "step": 16036 }, { "epoch": 18.265527065527067, "grad_norm": 0.25456663966178894, "learning_rate": 1.0016076405572865e-06, "loss": 0.5675, "step": 16037 }, { "epoch": 18.266666666666666, "grad_norm": 0.2550671100616455, "learning_rate": 1.0003024598119248e-06, "loss": 0.5403, "step": 16038 }, { "epoch": 18.267806267806268, "grad_norm": 0.23232153058052063, "learning_rate": 9.989981126420605e-07, "loss": 0.3538, "step": 16039 }, { "epoch": 18.26894586894587, "grad_norm": 0.1822681427001953, "learning_rate": 9.976945990929909e-07, "loss": 0.6123, "step": 16040 }, { "epoch": 18.27008547008547, "grad_norm": 0.20538312196731567, "learning_rate": 9.963919192099963e-07, "loss": 0.8579, "step": 16041 }, { "epoch": 18.27122507122507, "grad_norm": 0.19148221611976624, "learning_rate": 9.950900730383184e-07, "loss": 0.415, "step": 16042 }, { "epoch": 18.272364672364674, "grad_norm": 0.199871227145195, "learning_rate": 9.937890606231764e-07, "loss": 0.6397, "step": 16043 }, { "epoch": 18.273504273504273, "grad_norm": 0.19247028231620789, "learning_rate": 9.924888820097567e-07, "loss": 0.606, "step": 16044 }, { "epoch": 18.274643874643875, "grad_norm": 0.20238713920116425, "learning_rate": 9.911895372432227e-07, "loss": 0.5232, "step": 16045 }, { "epoch": 18.275783475783477, "grad_norm": 0.19953593611717224, "learning_rate": 9.898910263687e-07, "loss": 0.8065, "step": 16046 }, { "epoch": 18.276923076923076, "grad_norm": 0.1754782646894455, "learning_rate": 9.885933494312883e-07, "loss": 0.443, "step": 16047 }, { "epoch": 18.27806267806268, "grad_norm": 0.20692458748817444, "learning_rate": 9.872965064760597e-07, "loss": 0.5389, "step": 16048 }, { "epoch": 18.27920227920228, "grad_norm": 0.25308653712272644, "learning_rate": 9.860004975480618e-07, "loss": 0.8961, "step": 16049 }, { "epoch": 18.28034188034188, "grad_norm": 0.19349783658981323, "learning_rate": 9.847053226923058e-07, "loss": 0.6028, "step": 16050 }, { "epoch": 18.28148148148148, "grad_norm": 0.20703546702861786, "learning_rate": 9.83410981953775e-07, "loss": 0.6376, "step": 16051 }, { "epoch": 18.282621082621084, "grad_norm": 0.19268111884593964, "learning_rate": 9.821174753774254e-07, "loss": 0.574, "step": 16052 }, { "epoch": 18.283760683760683, "grad_norm": 0.19388440251350403, "learning_rate": 9.808248030081845e-07, "loss": 0.573, "step": 16053 }, { "epoch": 18.284900284900285, "grad_norm": 0.23397889733314514, "learning_rate": 9.79532964890953e-07, "loss": 0.806, "step": 16054 }, { "epoch": 18.286039886039887, "grad_norm": 0.23620925843715668, "learning_rate": 9.782419610705973e-07, "loss": 0.6473, "step": 16055 }, { "epoch": 18.287179487179486, "grad_norm": 0.18553614616394043, "learning_rate": 9.769517915919596e-07, "loss": 0.7546, "step": 16056 }, { "epoch": 18.28831908831909, "grad_norm": 0.17249667644500732, "learning_rate": 9.756624564998513e-07, "loss": 0.7413, "step": 16057 }, { "epoch": 18.28945868945869, "grad_norm": 0.19826942682266235, "learning_rate": 9.743739558390503e-07, "loss": 0.809, "step": 16058 }, { "epoch": 18.29059829059829, "grad_norm": 0.18703773617744446, "learning_rate": 9.730862896543124e-07, "loss": 0.6999, "step": 16059 }, { "epoch": 18.291737891737892, "grad_norm": 0.1930650770664215, "learning_rate": 9.717994579903634e-07, "loss": 0.4993, "step": 16060 }, { "epoch": 18.292877492877494, "grad_norm": 0.18729305267333984, "learning_rate": 9.705134608918975e-07, "loss": 0.7114, "step": 16061 }, { "epoch": 18.294017094017093, "grad_norm": 0.22105178236961365, "learning_rate": 9.692282984035794e-07, "loss": 0.5916, "step": 16062 }, { "epoch": 18.295156695156695, "grad_norm": 0.23766843974590302, "learning_rate": 9.679439705700482e-07, "loss": 0.637, "step": 16063 }, { "epoch": 18.296296296296298, "grad_norm": 0.1865086406469345, "learning_rate": 9.666604774359101e-07, "loss": 0.8526, "step": 16064 }, { "epoch": 18.297435897435896, "grad_norm": 0.19988276064395905, "learning_rate": 9.65377819045743e-07, "loss": 0.8234, "step": 16065 }, { "epoch": 18.2985754985755, "grad_norm": 0.17610132694244385, "learning_rate": 9.640959954441032e-07, "loss": 0.6586, "step": 16066 }, { "epoch": 18.2997150997151, "grad_norm": 0.21880441904067993, "learning_rate": 9.628150066755076e-07, "loss": 0.7334, "step": 16067 }, { "epoch": 18.3008547008547, "grad_norm": 0.19010087847709656, "learning_rate": 9.615348527844514e-07, "loss": 0.8148, "step": 16068 }, { "epoch": 18.301994301994302, "grad_norm": 0.21880176663398743, "learning_rate": 9.602555338153934e-07, "loss": 0.6192, "step": 16069 }, { "epoch": 18.303133903133904, "grad_norm": 0.20157532393932343, "learning_rate": 9.589770498127704e-07, "loss": 0.7407, "step": 16070 }, { "epoch": 18.304273504273503, "grad_norm": 0.1672639548778534, "learning_rate": 9.576994008209883e-07, "loss": 0.4369, "step": 16071 }, { "epoch": 18.305413105413106, "grad_norm": 0.1705726683139801, "learning_rate": 9.564225868844257e-07, "loss": 0.6773, "step": 16072 }, { "epoch": 18.306552706552708, "grad_norm": 0.18530426919460297, "learning_rate": 9.55146608047422e-07, "loss": 0.5391, "step": 16073 }, { "epoch": 18.307692307692307, "grad_norm": 0.25975337624549866, "learning_rate": 9.538714643543e-07, "loss": 0.6085, "step": 16074 }, { "epoch": 18.30883190883191, "grad_norm": 0.2506848871707916, "learning_rate": 9.525971558493524e-07, "loss": 0.5404, "step": 16075 }, { "epoch": 18.30997150997151, "grad_norm": 0.1733851432800293, "learning_rate": 9.513236825768323e-07, "loss": 0.5343, "step": 16076 }, { "epoch": 18.31111111111111, "grad_norm": 0.17692172527313232, "learning_rate": 9.500510445809768e-07, "loss": 0.5883, "step": 16077 }, { "epoch": 18.312250712250712, "grad_norm": 0.16592204570770264, "learning_rate": 9.487792419059865e-07, "loss": 0.8538, "step": 16078 }, { "epoch": 18.313390313390315, "grad_norm": 0.18896038830280304, "learning_rate": 9.475082745960345e-07, "loss": 0.7501, "step": 16079 }, { "epoch": 18.314529914529913, "grad_norm": 0.21196994185447693, "learning_rate": 9.46238142695266e-07, "loss": 0.7132, "step": 16080 }, { "epoch": 18.315669515669516, "grad_norm": 0.18229085206985474, "learning_rate": 9.449688462477929e-07, "loss": 0.641, "step": 16081 }, { "epoch": 18.316809116809118, "grad_norm": 0.2193417251110077, "learning_rate": 9.437003852977022e-07, "loss": 0.7161, "step": 16082 }, { "epoch": 18.317948717948717, "grad_norm": 0.2101801633834839, "learning_rate": 9.424327598890531e-07, "loss": 0.5073, "step": 16083 }, { "epoch": 18.31908831908832, "grad_norm": 0.21685943007469177, "learning_rate": 9.41165970065877e-07, "loss": 0.5648, "step": 16084 }, { "epoch": 18.32022792022792, "grad_norm": 0.23681126534938812, "learning_rate": 9.399000158721638e-07, "loss": 0.6195, "step": 16085 }, { "epoch": 18.32136752136752, "grad_norm": 0.16676515340805054, "learning_rate": 9.386348973518893e-07, "loss": 0.8512, "step": 16086 }, { "epoch": 18.322507122507123, "grad_norm": 0.20879307389259338, "learning_rate": 9.373706145489935e-07, "loss": 0.5226, "step": 16087 }, { "epoch": 18.323646723646725, "grad_norm": 0.2310570925474167, "learning_rate": 9.361071675073912e-07, "loss": 0.5879, "step": 16088 }, { "epoch": 18.324786324786324, "grad_norm": 0.25332340598106384, "learning_rate": 9.348445562709613e-07, "loss": 0.4888, "step": 16089 }, { "epoch": 18.325925925925926, "grad_norm": 0.18049627542495728, "learning_rate": 9.335827808835574e-07, "loss": 0.816, "step": 16090 }, { "epoch": 18.32706552706553, "grad_norm": 0.2308613657951355, "learning_rate": 9.323218413890111e-07, "loss": 0.6842, "step": 16091 }, { "epoch": 18.328205128205127, "grad_norm": 0.1703282594680786, "learning_rate": 9.310617378311126e-07, "loss": 0.7388, "step": 16092 }, { "epoch": 18.32934472934473, "grad_norm": 0.20733553171157837, "learning_rate": 9.298024702536268e-07, "loss": 0.515, "step": 16093 }, { "epoch": 18.33048433048433, "grad_norm": 0.20539535582065582, "learning_rate": 9.285440387002964e-07, "loss": 0.7314, "step": 16094 }, { "epoch": 18.33162393162393, "grad_norm": 0.2035544216632843, "learning_rate": 9.272864432148282e-07, "loss": 0.8127, "step": 16095 }, { "epoch": 18.332763532763533, "grad_norm": 0.15647606551647186, "learning_rate": 9.260296838409038e-07, "loss": 0.7105, "step": 16096 }, { "epoch": 18.333903133903135, "grad_norm": 0.19675278663635254, "learning_rate": 9.24773760622169e-07, "loss": 0.604, "step": 16097 }, { "epoch": 18.335042735042734, "grad_norm": 0.2259322851896286, "learning_rate": 9.2351867360225e-07, "loss": 0.5047, "step": 16098 }, { "epoch": 18.336182336182336, "grad_norm": 0.17692503333091736, "learning_rate": 9.222644228247368e-07, "loss": 0.7321, "step": 16099 }, { "epoch": 18.33732193732194, "grad_norm": 0.18350818753242493, "learning_rate": 9.210110083331947e-07, "loss": 0.8457, "step": 16100 }, { "epoch": 18.338461538461537, "grad_norm": 0.19642199575901031, "learning_rate": 9.197584301711582e-07, "loss": 0.7522, "step": 16101 }, { "epoch": 18.33960113960114, "grad_norm": 0.22106897830963135, "learning_rate": 9.185066883821341e-07, "loss": 0.6094, "step": 16102 }, { "epoch": 18.340740740740742, "grad_norm": 0.1990758329629898, "learning_rate": 9.172557830095935e-07, "loss": 0.5999, "step": 16103 }, { "epoch": 18.34188034188034, "grad_norm": 0.17298246920108795, "learning_rate": 9.160057140969902e-07, "loss": 0.8371, "step": 16104 }, { "epoch": 18.343019943019943, "grad_norm": 0.17569628357887268, "learning_rate": 9.147564816877369e-07, "loss": 0.6274, "step": 16105 }, { "epoch": 18.344159544159545, "grad_norm": 0.17075328528881073, "learning_rate": 9.13508085825232e-07, "loss": 0.7926, "step": 16106 }, { "epoch": 18.345299145299144, "grad_norm": 0.19348692893981934, "learning_rate": 9.122605265528244e-07, "loss": 0.6492, "step": 16107 }, { "epoch": 18.346438746438746, "grad_norm": 0.17550738155841827, "learning_rate": 9.110138039138488e-07, "loss": 0.6892, "step": 16108 }, { "epoch": 18.34757834757835, "grad_norm": 0.1994296759366989, "learning_rate": 9.097679179516095e-07, "loss": 0.6749, "step": 16109 }, { "epoch": 18.348717948717947, "grad_norm": 0.25115880370140076, "learning_rate": 9.085228687093799e-07, "loss": 0.6444, "step": 16110 }, { "epoch": 18.34985754985755, "grad_norm": 0.2618429958820343, "learning_rate": 9.072786562304036e-07, "loss": 0.4291, "step": 16111 }, { "epoch": 18.350997150997152, "grad_norm": 0.2156478464603424, "learning_rate": 9.060352805578931e-07, "loss": 0.6608, "step": 16112 }, { "epoch": 18.35213675213675, "grad_norm": 0.18937750160694122, "learning_rate": 9.047927417350388e-07, "loss": 0.6339, "step": 16113 }, { "epoch": 18.353276353276353, "grad_norm": 0.18046444654464722, "learning_rate": 9.035510398049923e-07, "loss": 0.6544, "step": 16114 }, { "epoch": 18.354415954415956, "grad_norm": 0.18438094854354858, "learning_rate": 9.023101748108859e-07, "loss": 0.5341, "step": 16115 }, { "epoch": 18.355555555555554, "grad_norm": 0.2069292813539505, "learning_rate": 9.010701467958127e-07, "loss": 0.6938, "step": 16116 }, { "epoch": 18.356695156695157, "grad_norm": 0.1813022941350937, "learning_rate": 8.998309558028467e-07, "loss": 0.7141, "step": 16117 }, { "epoch": 18.35783475783476, "grad_norm": 0.186764195561409, "learning_rate": 8.985926018750312e-07, "loss": 0.7199, "step": 16118 }, { "epoch": 18.358974358974358, "grad_norm": 0.1632576286792755, "learning_rate": 8.973550850553709e-07, "loss": 0.6903, "step": 16119 }, { "epoch": 18.36011396011396, "grad_norm": 0.1646881252527237, "learning_rate": 8.961184053868449e-07, "loss": 0.6153, "step": 16120 }, { "epoch": 18.361253561253562, "grad_norm": 0.21068136394023895, "learning_rate": 8.948825629124219e-07, "loss": 0.7535, "step": 16121 }, { "epoch": 18.36239316239316, "grad_norm": 0.16106244921684265, "learning_rate": 8.936475576750119e-07, "loss": 0.6235, "step": 16122 }, { "epoch": 18.363532763532763, "grad_norm": 0.19227463006973267, "learning_rate": 8.924133897175168e-07, "loss": 0.7561, "step": 16123 }, { "epoch": 18.364672364672366, "grad_norm": 0.18547876179218292, "learning_rate": 8.911800590827996e-07, "loss": 0.6382, "step": 16124 }, { "epoch": 18.365811965811965, "grad_norm": 0.21618832647800446, "learning_rate": 8.899475658137007e-07, "loss": 0.7581, "step": 16125 }, { "epoch": 18.366951566951567, "grad_norm": 0.19110552966594696, "learning_rate": 8.887159099530251e-07, "loss": 0.6661, "step": 16126 }, { "epoch": 18.36809116809117, "grad_norm": 0.2559528648853302, "learning_rate": 8.874850915435495e-07, "loss": 0.7246, "step": 16127 }, { "epoch": 18.369230769230768, "grad_norm": 0.2422475963830948, "learning_rate": 8.862551106280287e-07, "loss": 0.361, "step": 16128 }, { "epoch": 18.37037037037037, "grad_norm": 0.19314159452915192, "learning_rate": 8.850259672491839e-07, "loss": 0.4311, "step": 16129 }, { "epoch": 18.371509971509973, "grad_norm": 0.19628053903579712, "learning_rate": 8.837976614496978e-07, "loss": 0.6354, "step": 16130 }, { "epoch": 18.37264957264957, "grad_norm": 0.19356375932693481, "learning_rate": 8.825701932722391e-07, "loss": 0.8193, "step": 16131 }, { "epoch": 18.373789173789174, "grad_norm": 0.19120517373085022, "learning_rate": 8.813435627594457e-07, "loss": 0.7391, "step": 16132 }, { "epoch": 18.374928774928776, "grad_norm": 0.20273134112358093, "learning_rate": 8.801177699539142e-07, "loss": 0.6134, "step": 16133 }, { "epoch": 18.376068376068375, "grad_norm": 0.24117746949195862, "learning_rate": 8.788928148982217e-07, "loss": 0.6725, "step": 16134 }, { "epoch": 18.377207977207977, "grad_norm": 0.22128960490226746, "learning_rate": 8.776686976349147e-07, "loss": 0.6186, "step": 16135 }, { "epoch": 18.37834757834758, "grad_norm": 0.2156917154788971, "learning_rate": 8.764454182065146e-07, "loss": 0.6418, "step": 16136 }, { "epoch": 18.379487179487178, "grad_norm": 0.224776491522789, "learning_rate": 8.752229766555015e-07, "loss": 0.6653, "step": 16137 }, { "epoch": 18.38062678062678, "grad_norm": 0.18596409261226654, "learning_rate": 8.740013730243357e-07, "loss": 0.6871, "step": 16138 }, { "epoch": 18.381766381766383, "grad_norm": 0.21921518445014954, "learning_rate": 8.727806073554528e-07, "loss": 0.5403, "step": 16139 }, { "epoch": 18.38290598290598, "grad_norm": 0.16866829991340637, "learning_rate": 8.715606796912495e-07, "loss": 0.7576, "step": 16140 }, { "epoch": 18.384045584045584, "grad_norm": 0.18714411556720734, "learning_rate": 8.703415900740974e-07, "loss": 0.9033, "step": 16141 }, { "epoch": 18.385185185185186, "grad_norm": 0.19236190617084503, "learning_rate": 8.691233385463321e-07, "loss": 0.6799, "step": 16142 }, { "epoch": 18.386324786324785, "grad_norm": 0.1837623119354248, "learning_rate": 8.679059251502835e-07, "loss": 0.5914, "step": 16143 }, { "epoch": 18.387464387464387, "grad_norm": 0.19275638461112976, "learning_rate": 8.66689349928218e-07, "loss": 0.544, "step": 16144 }, { "epoch": 18.38860398860399, "grad_norm": 0.1795569509267807, "learning_rate": 8.654736129224017e-07, "loss": 0.8339, "step": 16145 }, { "epoch": 18.38974358974359, "grad_norm": 0.18885968625545502, "learning_rate": 8.642587141750563e-07, "loss": 0.4927, "step": 16146 }, { "epoch": 18.39088319088319, "grad_norm": 0.20421624183654785, "learning_rate": 8.630446537283815e-07, "loss": 0.5268, "step": 16147 }, { "epoch": 18.392022792022793, "grad_norm": 0.17060433328151703, "learning_rate": 8.618314316245407e-07, "loss": 0.7207, "step": 16148 }, { "epoch": 18.39316239316239, "grad_norm": 0.25263509154319763, "learning_rate": 8.606190479056725e-07, "loss": 0.6565, "step": 16149 }, { "epoch": 18.394301994301994, "grad_norm": 0.17055436968803406, "learning_rate": 8.594075026138904e-07, "loss": 0.5902, "step": 16150 }, { "epoch": 18.395441595441596, "grad_norm": 0.21823427081108093, "learning_rate": 8.581967957912746e-07, "loss": 0.6674, "step": 16151 }, { "epoch": 18.396581196581195, "grad_norm": 0.20258358120918274, "learning_rate": 8.569869274798719e-07, "loss": 0.6395, "step": 16152 }, { "epoch": 18.397720797720797, "grad_norm": 0.1745457798242569, "learning_rate": 8.557778977217046e-07, "loss": 0.8352, "step": 16153 }, { "epoch": 18.3988603988604, "grad_norm": 0.1940583735704422, "learning_rate": 8.545697065587694e-07, "loss": 0.6205, "step": 16154 }, { "epoch": 18.4, "grad_norm": 0.20821359753608704, "learning_rate": 8.533623540330327e-07, "loss": 0.6508, "step": 16155 }, { "epoch": 18.4011396011396, "grad_norm": 0.2047506719827652, "learning_rate": 8.521558401864193e-07, "loss": 0.7315, "step": 16156 }, { "epoch": 18.402279202279203, "grad_norm": 0.16641731560230255, "learning_rate": 8.509501650608432e-07, "loss": 0.6271, "step": 16157 }, { "epoch": 18.403418803418802, "grad_norm": 0.23004211485385895, "learning_rate": 8.497453286981788e-07, "loss": 0.5899, "step": 16158 }, { "epoch": 18.404558404558404, "grad_norm": 0.1965126395225525, "learning_rate": 8.485413311402734e-07, "loss": 0.5774, "step": 16159 }, { "epoch": 18.405698005698007, "grad_norm": 0.2303241342306137, "learning_rate": 8.473381724289409e-07, "loss": 0.5625, "step": 16160 }, { "epoch": 18.406837606837605, "grad_norm": 0.19273196160793304, "learning_rate": 8.461358526059754e-07, "loss": 0.5605, "step": 16161 }, { "epoch": 18.407977207977208, "grad_norm": 0.2125997543334961, "learning_rate": 8.449343717131325e-07, "loss": 0.6399, "step": 16162 }, { "epoch": 18.40911680911681, "grad_norm": 0.1999417543411255, "learning_rate": 8.437337297921511e-07, "loss": 0.7604, "step": 16163 }, { "epoch": 18.41025641025641, "grad_norm": 0.16549277305603027, "learning_rate": 8.425339268847199e-07, "loss": 0.7765, "step": 16164 }, { "epoch": 18.41139601139601, "grad_norm": 0.19712743163108826, "learning_rate": 8.413349630325223e-07, "loss": 0.4729, "step": 16165 }, { "epoch": 18.412535612535613, "grad_norm": 0.19830380380153656, "learning_rate": 8.401368382772029e-07, "loss": 0.637, "step": 16166 }, { "epoch": 18.413675213675212, "grad_norm": 0.23345257341861725, "learning_rate": 8.389395526603644e-07, "loss": 0.4638, "step": 16167 }, { "epoch": 18.414814814814815, "grad_norm": 0.20206978917121887, "learning_rate": 8.377431062236013e-07, "loss": 0.7629, "step": 16168 }, { "epoch": 18.415954415954417, "grad_norm": 0.2112087458372116, "learning_rate": 8.365474990084638e-07, "loss": 0.5297, "step": 16169 }, { "epoch": 18.417094017094016, "grad_norm": 0.1696016639471054, "learning_rate": 8.353527310564879e-07, "loss": 0.6227, "step": 16170 }, { "epoch": 18.418233618233618, "grad_norm": 0.2516838014125824, "learning_rate": 8.341588024091602e-07, "loss": 0.6091, "step": 16171 }, { "epoch": 18.41937321937322, "grad_norm": 0.21054531633853912, "learning_rate": 8.329657131079527e-07, "loss": 0.6746, "step": 16172 }, { "epoch": 18.42051282051282, "grad_norm": 0.17801140248775482, "learning_rate": 8.317734631943047e-07, "loss": 0.6221, "step": 16173 }, { "epoch": 18.42165242165242, "grad_norm": 0.21043559908866882, "learning_rate": 8.30582052709633e-07, "loss": 0.5761, "step": 16174 }, { "epoch": 18.422792022792024, "grad_norm": 0.17518368363380432, "learning_rate": 8.293914816953046e-07, "loss": 0.828, "step": 16175 }, { "epoch": 18.423931623931622, "grad_norm": 0.18897578120231628, "learning_rate": 8.282017501926837e-07, "loss": 0.5563, "step": 16176 }, { "epoch": 18.425071225071225, "grad_norm": 0.19218704104423523, "learning_rate": 8.270128582430925e-07, "loss": 0.554, "step": 16177 }, { "epoch": 18.426210826210827, "grad_norm": 0.19123661518096924, "learning_rate": 8.258248058878148e-07, "loss": 0.8325, "step": 16178 }, { "epoch": 18.427350427350426, "grad_norm": 0.19908234477043152, "learning_rate": 8.246375931681232e-07, "loss": 0.5215, "step": 16179 }, { "epoch": 18.428490028490028, "grad_norm": 0.18487827479839325, "learning_rate": 8.234512201252487e-07, "loss": 0.6166, "step": 16180 }, { "epoch": 18.42962962962963, "grad_norm": 0.21101725101470947, "learning_rate": 8.222656868004053e-07, "loss": 0.6704, "step": 16181 }, { "epoch": 18.43076923076923, "grad_norm": 0.19184231758117676, "learning_rate": 8.210809932347575e-07, "loss": 0.6684, "step": 16182 }, { "epoch": 18.43190883190883, "grad_norm": 0.24409416317939758, "learning_rate": 8.19897139469461e-07, "loss": 0.4151, "step": 16183 }, { "epoch": 18.433048433048434, "grad_norm": 0.18449606001377106, "learning_rate": 8.187141255456304e-07, "loss": 0.5247, "step": 16184 }, { "epoch": 18.434188034188033, "grad_norm": 0.21148008108139038, "learning_rate": 8.175319515043606e-07, "loss": 0.7081, "step": 16185 }, { "epoch": 18.435327635327635, "grad_norm": 0.21608728170394897, "learning_rate": 8.163506173867047e-07, "loss": 0.8131, "step": 16186 }, { "epoch": 18.436467236467237, "grad_norm": 0.19518068432807922, "learning_rate": 8.151701232336967e-07, "loss": 0.7202, "step": 16187 }, { "epoch": 18.437606837606836, "grad_norm": 0.2155269980430603, "learning_rate": 8.139904690863426e-07, "loss": 0.7246, "step": 16188 }, { "epoch": 18.43874643874644, "grad_norm": 0.15008096396923065, "learning_rate": 8.128116549856097e-07, "loss": 0.593, "step": 16189 }, { "epoch": 18.43988603988604, "grad_norm": 0.2401498258113861, "learning_rate": 8.116336809724428e-07, "loss": 0.4578, "step": 16190 }, { "epoch": 18.44102564102564, "grad_norm": 0.1776338815689087, "learning_rate": 8.104565470877568e-07, "loss": 0.8418, "step": 16191 }, { "epoch": 18.442165242165242, "grad_norm": 0.2272671014070511, "learning_rate": 8.09280253372438e-07, "loss": 0.5513, "step": 16192 }, { "epoch": 18.443304843304844, "grad_norm": 0.19338665902614594, "learning_rate": 8.081047998673375e-07, "loss": 0.7093, "step": 16193 }, { "epoch": 18.444444444444443, "grad_norm": 0.17604541778564453, "learning_rate": 8.069301866132861e-07, "loss": 0.8846, "step": 16194 }, { "epoch": 18.445584045584045, "grad_norm": 0.21153387427330017, "learning_rate": 8.057564136510792e-07, "loss": 0.4936, "step": 16195 }, { "epoch": 18.446723646723648, "grad_norm": 0.1837536096572876, "learning_rate": 8.045834810214898e-07, "loss": 0.7118, "step": 16196 }, { "epoch": 18.447863247863246, "grad_norm": 0.2227238118648529, "learning_rate": 8.034113887652517e-07, "loss": 0.7348, "step": 16197 }, { "epoch": 18.44900284900285, "grad_norm": 0.20759250223636627, "learning_rate": 8.02240136923077e-07, "loss": 0.6929, "step": 16198 }, { "epoch": 18.45014245014245, "grad_norm": 0.18193592131137848, "learning_rate": 8.010697255356469e-07, "loss": 0.7964, "step": 16199 }, { "epoch": 18.45128205128205, "grad_norm": 0.1874343603849411, "learning_rate": 7.999001546436152e-07, "loss": 0.6776, "step": 16200 }, { "epoch": 18.452421652421652, "grad_norm": 0.20273059606552124, "learning_rate": 7.987314242875965e-07, "loss": 0.4911, "step": 16201 }, { "epoch": 18.453561253561254, "grad_norm": 0.18550534546375275, "learning_rate": 7.975635345081917e-07, "loss": 0.706, "step": 16202 }, { "epoch": 18.454700854700853, "grad_norm": 0.20157377421855927, "learning_rate": 7.963964853459626e-07, "loss": 0.7658, "step": 16203 }, { "epoch": 18.455840455840455, "grad_norm": 0.1587774008512497, "learning_rate": 7.952302768414466e-07, "loss": 0.6919, "step": 16204 }, { "epoch": 18.456980056980058, "grad_norm": 0.20730525255203247, "learning_rate": 7.940649090351415e-07, "loss": 0.6302, "step": 16205 }, { "epoch": 18.458119658119656, "grad_norm": 0.22193406522274017, "learning_rate": 7.929003819675291e-07, "loss": 0.6042, "step": 16206 }, { "epoch": 18.45925925925926, "grad_norm": 0.18707987666130066, "learning_rate": 7.917366956790573e-07, "loss": 0.6165, "step": 16207 }, { "epoch": 18.46039886039886, "grad_norm": 0.20526203513145447, "learning_rate": 7.90573850210144e-07, "loss": 0.4681, "step": 16208 }, { "epoch": 18.46153846153846, "grad_norm": 0.16481465101242065, "learning_rate": 7.894118456011762e-07, "loss": 0.6136, "step": 16209 }, { "epoch": 18.462678062678062, "grad_norm": 0.1793680191040039, "learning_rate": 7.882506818925134e-07, "loss": 0.7111, "step": 16210 }, { "epoch": 18.463817663817665, "grad_norm": 0.21176199615001678, "learning_rate": 7.870903591244899e-07, "loss": 0.5728, "step": 16211 }, { "epoch": 18.464957264957263, "grad_norm": 0.2248607575893402, "learning_rate": 7.859308773374013e-07, "loss": 0.4769, "step": 16212 }, { "epoch": 18.466096866096866, "grad_norm": 0.19010235369205475, "learning_rate": 7.847722365715238e-07, "loss": 0.8921, "step": 16213 }, { "epoch": 18.467236467236468, "grad_norm": 0.25821489095687866, "learning_rate": 7.836144368670972e-07, "loss": 0.4498, "step": 16214 }, { "epoch": 18.468376068376067, "grad_norm": 0.24605529010295868, "learning_rate": 7.824574782643395e-07, "loss": 0.5791, "step": 16215 }, { "epoch": 18.46951566951567, "grad_norm": 0.17802326381206512, "learning_rate": 7.813013608034297e-07, "loss": 0.7837, "step": 16216 }, { "epoch": 18.47065527065527, "grad_norm": 0.1973274201154709, "learning_rate": 7.801460845245273e-07, "loss": 0.7205, "step": 16217 }, { "epoch": 18.47179487179487, "grad_norm": 0.21337224543094635, "learning_rate": 7.789916494677529e-07, "loss": 0.4841, "step": 16218 }, { "epoch": 18.472934472934472, "grad_norm": 0.18827608227729797, "learning_rate": 7.778380556732079e-07, "loss": 0.75, "step": 16219 }, { "epoch": 18.474074074074075, "grad_norm": 0.19029515981674194, "learning_rate": 7.766853031809573e-07, "loss": 0.5274, "step": 16220 }, { "epoch": 18.475213675213674, "grad_norm": 0.1854611337184906, "learning_rate": 7.755333920310415e-07, "loss": 0.6066, "step": 16221 }, { "epoch": 18.476353276353276, "grad_norm": 0.20245634019374847, "learning_rate": 7.743823222634728e-07, "loss": 0.7071, "step": 16222 }, { "epoch": 18.477492877492878, "grad_norm": 0.23420220613479614, "learning_rate": 7.73232093918222e-07, "loss": 0.6315, "step": 16223 }, { "epoch": 18.478632478632477, "grad_norm": 0.17568355798721313, "learning_rate": 7.720827070352432e-07, "loss": 0.6573, "step": 16224 }, { "epoch": 18.47977207977208, "grad_norm": 0.21530553698539734, "learning_rate": 7.70934161654463e-07, "loss": 0.5061, "step": 16225 }, { "epoch": 18.48091168091168, "grad_norm": 0.21519404649734497, "learning_rate": 7.697864578157688e-07, "loss": 0.5939, "step": 16226 }, { "epoch": 18.48205128205128, "grad_norm": 0.22826364636421204, "learning_rate": 7.686395955590231e-07, "loss": 0.5632, "step": 16227 }, { "epoch": 18.483190883190883, "grad_norm": 0.19922730326652527, "learning_rate": 7.674935749240608e-07, "loss": 0.6566, "step": 16228 }, { "epoch": 18.484330484330485, "grad_norm": 0.216302752494812, "learning_rate": 7.663483959506861e-07, "loss": 0.6584, "step": 16229 }, { "epoch": 18.485470085470084, "grad_norm": 0.19709351658821106, "learning_rate": 7.65204058678673e-07, "loss": 0.6128, "step": 16230 }, { "epoch": 18.486609686609686, "grad_norm": 0.18579338490962982, "learning_rate": 7.640605631477699e-07, "loss": 0.9303, "step": 16231 }, { "epoch": 18.48774928774929, "grad_norm": 0.22104182839393616, "learning_rate": 7.629179093976923e-07, "loss": 0.6292, "step": 16232 }, { "epoch": 18.488888888888887, "grad_norm": 0.18530291318893433, "learning_rate": 7.617760974681282e-07, "loss": 0.4868, "step": 16233 }, { "epoch": 18.49002849002849, "grad_norm": 0.20386242866516113, "learning_rate": 7.6063512739874e-07, "loss": 0.5239, "step": 16234 }, { "epoch": 18.491168091168092, "grad_norm": 0.1997651904821396, "learning_rate": 7.594949992291489e-07, "loss": 0.7122, "step": 16235 }, { "epoch": 18.49230769230769, "grad_norm": 0.19039437174797058, "learning_rate": 7.583557129989565e-07, "loss": 0.6687, "step": 16236 }, { "epoch": 18.493447293447293, "grad_norm": 0.15081074833869934, "learning_rate": 7.572172687477341e-07, "loss": 0.5981, "step": 16237 }, { "epoch": 18.494586894586895, "grad_norm": 0.23857636749744415, "learning_rate": 7.560796665150305e-07, "loss": 0.5274, "step": 16238 }, { "epoch": 18.495726495726494, "grad_norm": 0.2259291708469391, "learning_rate": 7.549429063403446e-07, "loss": 0.5356, "step": 16239 }, { "epoch": 18.496866096866096, "grad_norm": 0.22006577253341675, "learning_rate": 7.538069882631671e-07, "loss": 0.6589, "step": 16240 }, { "epoch": 18.4980056980057, "grad_norm": 0.21424247324466705, "learning_rate": 7.526719123229526e-07, "loss": 0.7241, "step": 16241 }, { "epoch": 18.499145299145297, "grad_norm": 0.18898151814937592, "learning_rate": 7.515376785591194e-07, "loss": 0.5691, "step": 16242 }, { "epoch": 18.5002849002849, "grad_norm": 0.25201770663261414, "learning_rate": 7.504042870110667e-07, "loss": 0.4755, "step": 16243 }, { "epoch": 18.501424501424502, "grad_norm": 0.1744522601366043, "learning_rate": 7.492717377181602e-07, "loss": 0.5187, "step": 16244 }, { "epoch": 18.5025641025641, "grad_norm": 0.21565017104148865, "learning_rate": 7.481400307197405e-07, "loss": 0.4777, "step": 16245 }, { "epoch": 18.503703703703703, "grad_norm": 0.15349186956882477, "learning_rate": 7.47009166055107e-07, "loss": 0.8353, "step": 16246 }, { "epoch": 18.504843304843305, "grad_norm": 0.16424761712551117, "learning_rate": 7.458791437635393e-07, "loss": 0.8842, "step": 16247 }, { "epoch": 18.505982905982904, "grad_norm": 0.16871342062950134, "learning_rate": 7.447499638842892e-07, "loss": 0.6337, "step": 16248 }, { "epoch": 18.507122507122507, "grad_norm": 0.19683855772018433, "learning_rate": 7.436216264565781e-07, "loss": 0.7552, "step": 16249 }, { "epoch": 18.50826210826211, "grad_norm": 0.2150336056947708, "learning_rate": 7.424941315195888e-07, "loss": 0.6509, "step": 16250 }, { "epoch": 18.509401709401708, "grad_norm": 0.22778502106666565, "learning_rate": 7.413674791124897e-07, "loss": 0.5306, "step": 16251 }, { "epoch": 18.51054131054131, "grad_norm": 0.18186631798744202, "learning_rate": 7.40241669274408e-07, "loss": 0.6358, "step": 16252 }, { "epoch": 18.511680911680912, "grad_norm": 0.22945746779441833, "learning_rate": 7.391167020444483e-07, "loss": 0.632, "step": 16253 }, { "epoch": 18.51282051282051, "grad_norm": 0.20270879566669464, "learning_rate": 7.379925774616824e-07, "loss": 0.6971, "step": 16254 }, { "epoch": 18.513960113960113, "grad_norm": 0.17987211048603058, "learning_rate": 7.36869295565154e-07, "loss": 0.616, "step": 16255 }, { "epoch": 18.515099715099716, "grad_norm": 0.20830969512462616, "learning_rate": 7.357468563938819e-07, "loss": 0.4023, "step": 16256 }, { "epoch": 18.516239316239318, "grad_norm": 0.22534967958927155, "learning_rate": 7.346252599868486e-07, "loss": 0.6377, "step": 16257 }, { "epoch": 18.517378917378917, "grad_norm": 0.25643622875213623, "learning_rate": 7.335045063830065e-07, "loss": 0.5016, "step": 16258 }, { "epoch": 18.51851851851852, "grad_norm": 0.16182145476341248, "learning_rate": 7.323845956212883e-07, "loss": 0.6762, "step": 16259 }, { "epoch": 18.51965811965812, "grad_norm": 0.21061931550502777, "learning_rate": 7.312655277405905e-07, "loss": 0.6598, "step": 16260 }, { "epoch": 18.52079772079772, "grad_norm": 0.1625216007232666, "learning_rate": 7.301473027797794e-07, "loss": 0.7835, "step": 16261 }, { "epoch": 18.521937321937322, "grad_norm": 0.21150529384613037, "learning_rate": 7.290299207776935e-07, "loss": 0.5575, "step": 16262 }, { "epoch": 18.523076923076925, "grad_norm": 0.21286988258361816, "learning_rate": 7.279133817731432e-07, "loss": 0.4572, "step": 16263 }, { "epoch": 18.524216524216524, "grad_norm": 0.15413245558738708, "learning_rate": 7.267976858049114e-07, "loss": 0.7582, "step": 16264 }, { "epoch": 18.525356125356126, "grad_norm": 0.2521764636039734, "learning_rate": 7.256828329117449e-07, "loss": 0.5218, "step": 16265 }, { "epoch": 18.526495726495728, "grad_norm": 0.20870208740234375, "learning_rate": 7.24568823132371e-07, "loss": 0.6468, "step": 16266 }, { "epoch": 18.527635327635327, "grad_norm": 0.19469735026359558, "learning_rate": 7.234556565054812e-07, "loss": 0.6161, "step": 16267 }, { "epoch": 18.52877492877493, "grad_norm": 0.18879103660583496, "learning_rate": 7.22343333069736e-07, "loss": 0.7047, "step": 16268 }, { "epoch": 18.52991452991453, "grad_norm": 0.19390064477920532, "learning_rate": 7.212318528637685e-07, "loss": 0.7774, "step": 16269 }, { "epoch": 18.53105413105413, "grad_norm": 0.18350745737552643, "learning_rate": 7.201212159261867e-07, "loss": 0.8099, "step": 16270 }, { "epoch": 18.532193732193733, "grad_norm": 0.22411833703517914, "learning_rate": 7.190114222955652e-07, "loss": 0.625, "step": 16271 }, { "epoch": 18.533333333333335, "grad_norm": 0.19694851338863373, "learning_rate": 7.17902472010451e-07, "loss": 0.4554, "step": 16272 }, { "epoch": 18.534472934472934, "grad_norm": 0.22289873659610748, "learning_rate": 7.167943651093578e-07, "loss": 0.6104, "step": 16273 }, { "epoch": 18.535612535612536, "grad_norm": 0.21234916150569916, "learning_rate": 7.156871016307771e-07, "loss": 0.7507, "step": 16274 }, { "epoch": 18.53675213675214, "grad_norm": 0.18940860033035278, "learning_rate": 7.145806816131639e-07, "loss": 0.588, "step": 16275 }, { "epoch": 18.537891737891737, "grad_norm": 0.16590355336666107, "learning_rate": 7.134751050949489e-07, "loss": 0.8098, "step": 16276 }, { "epoch": 18.53903133903134, "grad_norm": 0.2010948657989502, "learning_rate": 7.123703721145319e-07, "loss": 0.673, "step": 16277 }, { "epoch": 18.540170940170942, "grad_norm": 0.1987306922674179, "learning_rate": 7.112664827102822e-07, "loss": 0.566, "step": 16278 }, { "epoch": 18.54131054131054, "grad_norm": 0.16266214847564697, "learning_rate": 7.101634369205467e-07, "loss": 0.8276, "step": 16279 }, { "epoch": 18.542450142450143, "grad_norm": 0.21019327640533447, "learning_rate": 7.090612347836284e-07, "loss": 0.5039, "step": 16280 }, { "epoch": 18.543589743589745, "grad_norm": 0.21681322157382965, "learning_rate": 7.079598763378131e-07, "loss": 0.7382, "step": 16281 }, { "epoch": 18.544729344729344, "grad_norm": 0.15988457202911377, "learning_rate": 7.068593616213565e-07, "loss": 0.5709, "step": 16282 }, { "epoch": 18.545868945868946, "grad_norm": 0.1858881264925003, "learning_rate": 7.057596906724806e-07, "loss": 0.6184, "step": 16283 }, { "epoch": 18.54700854700855, "grad_norm": 0.18496987223625183, "learning_rate": 7.046608635293799e-07, "loss": 0.6723, "step": 16284 }, { "epoch": 18.548148148148147, "grad_norm": 0.2218230962753296, "learning_rate": 7.035628802302185e-07, "loss": 0.673, "step": 16285 }, { "epoch": 18.54928774928775, "grad_norm": 0.17258048057556152, "learning_rate": 7.024657408131352e-07, "loss": 0.7943, "step": 16286 }, { "epoch": 18.550427350427352, "grad_norm": 0.19041316211223602, "learning_rate": 7.013694453162329e-07, "loss": 0.4222, "step": 16287 }, { "epoch": 18.55156695156695, "grad_norm": 0.17228615283966064, "learning_rate": 7.002739937775949e-07, "loss": 0.6787, "step": 16288 }, { "epoch": 18.552706552706553, "grad_norm": 0.17214156687259674, "learning_rate": 6.991793862352631e-07, "loss": 0.5802, "step": 16289 }, { "epoch": 18.553846153846155, "grad_norm": 0.19898304343223572, "learning_rate": 6.980856227272597e-07, "loss": 0.7196, "step": 16290 }, { "epoch": 18.554985754985754, "grad_norm": 0.2032332420349121, "learning_rate": 6.969927032915741e-07, "loss": 0.654, "step": 16291 }, { "epoch": 18.556125356125357, "grad_norm": 0.2081405520439148, "learning_rate": 6.959006279661617e-07, "loss": 0.752, "step": 16292 }, { "epoch": 18.55726495726496, "grad_norm": 0.18995144963264465, "learning_rate": 6.948093967889591e-07, "loss": 0.5449, "step": 16293 }, { "epoch": 18.558404558404558, "grad_norm": 0.1944435089826584, "learning_rate": 6.937190097978691e-07, "loss": 0.424, "step": 16294 }, { "epoch": 18.55954415954416, "grad_norm": 0.20616506040096283, "learning_rate": 6.92629467030756e-07, "loss": 0.7207, "step": 16295 }, { "epoch": 18.560683760683762, "grad_norm": 0.1970258355140686, "learning_rate": 6.915407685254698e-07, "loss": 0.7153, "step": 16296 }, { "epoch": 18.56182336182336, "grad_norm": 0.17759563028812408, "learning_rate": 6.904529143198196e-07, "loss": 0.7204, "step": 16297 }, { "epoch": 18.562962962962963, "grad_norm": 0.17698125541210175, "learning_rate": 6.893659044515887e-07, "loss": 0.5877, "step": 16298 }, { "epoch": 18.564102564102566, "grad_norm": 0.19148777425289154, "learning_rate": 6.882797389585388e-07, "loss": 0.7251, "step": 16299 }, { "epoch": 18.565242165242164, "grad_norm": 0.17550741136074066, "learning_rate": 6.871944178783896e-07, "loss": 0.5844, "step": 16300 }, { "epoch": 18.566381766381767, "grad_norm": 0.19892731308937073, "learning_rate": 6.861099412488386e-07, "loss": 0.4684, "step": 16301 }, { "epoch": 18.56752136752137, "grad_norm": 0.23341961205005646, "learning_rate": 6.850263091075532e-07, "loss": 0.6921, "step": 16302 }, { "epoch": 18.568660968660968, "grad_norm": 0.21087487041950226, "learning_rate": 6.839435214921697e-07, "loss": 0.6139, "step": 16303 }, { "epoch": 18.56980056980057, "grad_norm": 0.19287163019180298, "learning_rate": 6.82861578440297e-07, "loss": 0.7285, "step": 16304 }, { "epoch": 18.570940170940172, "grad_norm": 0.19143036007881165, "learning_rate": 6.817804799895161e-07, "loss": 0.5606, "step": 16305 }, { "epoch": 18.57207977207977, "grad_norm": 0.19633238017559052, "learning_rate": 6.807002261773721e-07, "loss": 0.6803, "step": 16306 }, { "epoch": 18.573219373219374, "grad_norm": 0.18908600509166718, "learning_rate": 6.796208170413903e-07, "loss": 0.7697, "step": 16307 }, { "epoch": 18.574358974358976, "grad_norm": 0.20927876234054565, "learning_rate": 6.785422526190521e-07, "loss": 0.6892, "step": 16308 }, { "epoch": 18.575498575498575, "grad_norm": 0.17534089088439941, "learning_rate": 6.77464532947833e-07, "loss": 0.7656, "step": 16309 }, { "epoch": 18.576638176638177, "grad_norm": 0.18784134089946747, "learning_rate": 6.763876580651557e-07, "loss": 0.7512, "step": 16310 }, { "epoch": 18.57777777777778, "grad_norm": 0.1713857352733612, "learning_rate": 6.753116280084237e-07, "loss": 0.8112, "step": 16311 }, { "epoch": 18.578917378917378, "grad_norm": 0.2013809233903885, "learning_rate": 6.742364428150128e-07, "loss": 0.4333, "step": 16312 }, { "epoch": 18.58005698005698, "grad_norm": 0.21230103075504303, "learning_rate": 6.731621025222706e-07, "loss": 0.8469, "step": 16313 }, { "epoch": 18.581196581196583, "grad_norm": 0.17354519665241241, "learning_rate": 6.720886071675037e-07, "loss": 0.6986, "step": 16314 }, { "epoch": 18.58233618233618, "grad_norm": 0.2070615440607071, "learning_rate": 6.710159567880014e-07, "loss": 0.4701, "step": 16315 }, { "epoch": 18.583475783475784, "grad_norm": 0.22802412509918213, "learning_rate": 6.699441514210175e-07, "loss": 0.8033, "step": 16316 }, { "epoch": 18.584615384615386, "grad_norm": 0.19977618753910065, "learning_rate": 6.68873191103786e-07, "loss": 0.5289, "step": 16317 }, { "epoch": 18.585754985754985, "grad_norm": 0.18995045125484467, "learning_rate": 6.678030758734994e-07, "loss": 0.8933, "step": 16318 }, { "epoch": 18.586894586894587, "grad_norm": 0.20440229773521423, "learning_rate": 6.667338057673194e-07, "loss": 0.497, "step": 16319 }, { "epoch": 18.58803418803419, "grad_norm": 0.16072645783424377, "learning_rate": 6.656653808223972e-07, "loss": 0.5715, "step": 16320 }, { "epoch": 18.58917378917379, "grad_norm": 0.16689789295196533, "learning_rate": 6.645978010758336e-07, "loss": 0.7835, "step": 16321 }, { "epoch": 18.59031339031339, "grad_norm": 0.21596330404281616, "learning_rate": 6.635310665647099e-07, "loss": 0.7861, "step": 16322 }, { "epoch": 18.591452991452993, "grad_norm": 0.2213994413614273, "learning_rate": 6.624651773260798e-07, "loss": 0.7217, "step": 16323 }, { "epoch": 18.59259259259259, "grad_norm": 0.19551752507686615, "learning_rate": 6.614001333969638e-07, "loss": 0.6508, "step": 16324 }, { "epoch": 18.593732193732194, "grad_norm": 0.17777617275714874, "learning_rate": 6.603359348143517e-07, "loss": 0.5114, "step": 16325 }, { "epoch": 18.594871794871796, "grad_norm": 0.1685660481452942, "learning_rate": 6.592725816152057e-07, "loss": 0.8462, "step": 16326 }, { "epoch": 18.596011396011395, "grad_norm": 0.1765321046113968, "learning_rate": 6.5821007383646e-07, "loss": 0.5678, "step": 16327 }, { "epoch": 18.597150997150997, "grad_norm": 0.20969374477863312, "learning_rate": 6.571484115150211e-07, "loss": 0.682, "step": 16328 }, { "epoch": 18.5982905982906, "grad_norm": 0.1690845638513565, "learning_rate": 6.560875946877598e-07, "loss": 0.7952, "step": 16329 }, { "epoch": 18.5994301994302, "grad_norm": 0.19686806201934814, "learning_rate": 6.550276233915187e-07, "loss": 0.7186, "step": 16330 }, { "epoch": 18.6005698005698, "grad_norm": 0.22469277679920197, "learning_rate": 6.539684976631211e-07, "loss": 0.5949, "step": 16331 }, { "epoch": 18.601709401709403, "grad_norm": 0.19334806501865387, "learning_rate": 6.529102175393487e-07, "loss": 0.7002, "step": 16332 }, { "epoch": 18.602849002849002, "grad_norm": 0.19611862301826477, "learning_rate": 6.518527830569582e-07, "loss": 0.712, "step": 16333 }, { "epoch": 18.603988603988604, "grad_norm": 0.2791908383369446, "learning_rate": 6.507961942526785e-07, "loss": 0.3825, "step": 16334 }, { "epoch": 18.605128205128207, "grad_norm": 0.15559428930282593, "learning_rate": 6.497404511632111e-07, "loss": 0.668, "step": 16335 }, { "epoch": 18.606267806267805, "grad_norm": 0.1852877140045166, "learning_rate": 6.486855538252179e-07, "loss": 0.6352, "step": 16336 }, { "epoch": 18.607407407407408, "grad_norm": 0.17558401823043823, "learning_rate": 6.476315022753421e-07, "loss": 0.9307, "step": 16337 }, { "epoch": 18.60854700854701, "grad_norm": 0.1833890676498413, "learning_rate": 6.465782965501932e-07, "loss": 0.56, "step": 16338 }, { "epoch": 18.60968660968661, "grad_norm": 0.22029294073581696, "learning_rate": 6.455259366863531e-07, "loss": 0.5293, "step": 16339 }, { "epoch": 18.61082621082621, "grad_norm": 0.17233218252658844, "learning_rate": 6.44474422720373e-07, "loss": 0.6149, "step": 16340 }, { "epoch": 18.611965811965813, "grad_norm": 0.1834934502840042, "learning_rate": 6.434237546887684e-07, "loss": 0.4983, "step": 16341 }, { "epoch": 18.613105413105412, "grad_norm": 0.1694076955318451, "learning_rate": 6.423739326280459e-07, "loss": 0.6432, "step": 16342 }, { "epoch": 18.614245014245014, "grad_norm": 0.20694801211357117, "learning_rate": 6.413249565746543e-07, "loss": 0.7677, "step": 16343 }, { "epoch": 18.615384615384617, "grad_norm": 0.2359916716814041, "learning_rate": 6.402768265650367e-07, "loss": 0.5534, "step": 16344 }, { "epoch": 18.616524216524216, "grad_norm": 0.19315852224826813, "learning_rate": 6.392295426355916e-07, "loss": 0.6173, "step": 16345 }, { "epoch": 18.617663817663818, "grad_norm": 0.24828165769577026, "learning_rate": 6.381831048227011e-07, "loss": 0.4476, "step": 16346 }, { "epoch": 18.61880341880342, "grad_norm": 0.18524330854415894, "learning_rate": 6.371375131627055e-07, "loss": 0.5514, "step": 16347 }, { "epoch": 18.61994301994302, "grad_norm": 0.23572108149528503, "learning_rate": 6.360927676919204e-07, "loss": 0.6882, "step": 16348 }, { "epoch": 18.62108262108262, "grad_norm": 0.17911289632320404, "learning_rate": 6.35048868446636e-07, "loss": 0.8192, "step": 16349 }, { "epoch": 18.622222222222224, "grad_norm": 0.18745560944080353, "learning_rate": 6.340058154631096e-07, "loss": 0.7429, "step": 16350 }, { "epoch": 18.623361823361822, "grad_norm": 0.171620711684227, "learning_rate": 6.329636087775647e-07, "loss": 0.7539, "step": 16351 }, { "epoch": 18.624501424501425, "grad_norm": 0.18377149105072021, "learning_rate": 6.319222484262005e-07, "loss": 0.8062, "step": 16352 }, { "epoch": 18.625641025641027, "grad_norm": 0.17271770536899567, "learning_rate": 6.308817344451934e-07, "loss": 0.709, "step": 16353 }, { "epoch": 18.626780626780626, "grad_norm": 0.1912023425102234, "learning_rate": 6.298420668706812e-07, "loss": 0.6841, "step": 16354 }, { "epoch": 18.627920227920228, "grad_norm": 0.20034343004226685, "learning_rate": 6.288032457387683e-07, "loss": 0.5396, "step": 16355 }, { "epoch": 18.62905982905983, "grad_norm": 0.17258824408054352, "learning_rate": 6.277652710855397e-07, "loss": 0.8374, "step": 16356 }, { "epoch": 18.63019943019943, "grad_norm": 0.16941462457180023, "learning_rate": 6.2672814294705e-07, "loss": 0.6927, "step": 16357 }, { "epoch": 18.63133903133903, "grad_norm": 0.18177206814289093, "learning_rate": 6.256918613593176e-07, "loss": 0.7024, "step": 16358 }, { "epoch": 18.632478632478634, "grad_norm": 0.215084969997406, "learning_rate": 6.24656426358336e-07, "loss": 0.6746, "step": 16359 }, { "epoch": 18.633618233618233, "grad_norm": 0.18582645058631897, "learning_rate": 6.236218379800707e-07, "loss": 0.5369, "step": 16360 }, { "epoch": 18.634757834757835, "grad_norm": 0.17084524035453796, "learning_rate": 6.225880962604513e-07, "loss": 0.5692, "step": 16361 }, { "epoch": 18.635897435897437, "grad_norm": 0.2446059286594391, "learning_rate": 6.215552012353882e-07, "loss": 0.4709, "step": 16362 }, { "epoch": 18.637037037037036, "grad_norm": 0.17531663179397583, "learning_rate": 6.205231529407496e-07, "loss": 0.5219, "step": 16363 }, { "epoch": 18.63817663817664, "grad_norm": 0.17034143209457397, "learning_rate": 6.194919514123904e-07, "loss": 0.7795, "step": 16364 }, { "epoch": 18.63931623931624, "grad_norm": 0.16582253575325012, "learning_rate": 6.184615966861207e-07, "loss": 0.6171, "step": 16365 }, { "epoch": 18.64045584045584, "grad_norm": 0.18334414064884186, "learning_rate": 6.174320887977286e-07, "loss": 0.7847, "step": 16366 }, { "epoch": 18.64159544159544, "grad_norm": 0.22042441368103027, "learning_rate": 6.164034277829745e-07, "loss": 0.4481, "step": 16367 }, { "epoch": 18.642735042735044, "grad_norm": 0.18286308646202087, "learning_rate": 6.153756136775823e-07, "loss": 0.477, "step": 16368 }, { "epoch": 18.643874643874643, "grad_norm": 0.19711682200431824, "learning_rate": 6.143486465172571e-07, "loss": 0.7364, "step": 16369 }, { "epoch": 18.645014245014245, "grad_norm": 0.16850189864635468, "learning_rate": 6.133225263376591e-07, "loss": 0.6956, "step": 16370 }, { "epoch": 18.646153846153847, "grad_norm": 0.17246362566947937, "learning_rate": 6.122972531744347e-07, "loss": 0.7879, "step": 16371 }, { "epoch": 18.647293447293446, "grad_norm": 0.19572462141513824, "learning_rate": 6.112728270631946e-07, "loss": 0.8546, "step": 16372 }, { "epoch": 18.64843304843305, "grad_norm": 0.26401737332344055, "learning_rate": 6.102492480395183e-07, "loss": 0.4992, "step": 16373 }, { "epoch": 18.64957264957265, "grad_norm": 0.2246359884738922, "learning_rate": 6.092265161389527e-07, "loss": 0.5464, "step": 16374 }, { "epoch": 18.65071225071225, "grad_norm": 0.1910015493631363, "learning_rate": 6.082046313970302e-07, "loss": 0.8257, "step": 16375 }, { "epoch": 18.651851851851852, "grad_norm": 0.24764880537986755, "learning_rate": 6.071835938492393e-07, "loss": 0.4395, "step": 16376 }, { "epoch": 18.652991452991454, "grad_norm": 0.2275485396385193, "learning_rate": 6.061634035310404e-07, "loss": 0.5034, "step": 16377 }, { "epoch": 18.654131054131053, "grad_norm": 0.203792005777359, "learning_rate": 6.051440604778718e-07, "loss": 0.7497, "step": 16378 }, { "epoch": 18.655270655270655, "grad_norm": 0.18049485981464386, "learning_rate": 6.041255647251332e-07, "loss": 0.7349, "step": 16379 }, { "epoch": 18.656410256410258, "grad_norm": 0.18229711055755615, "learning_rate": 6.03107916308207e-07, "loss": 0.4229, "step": 16380 }, { "epoch": 18.657549857549856, "grad_norm": 0.18593524396419525, "learning_rate": 6.020911152624292e-07, "loss": 0.6194, "step": 16381 }, { "epoch": 18.65868945868946, "grad_norm": 0.22720669209957123, "learning_rate": 6.010751616231242e-07, "loss": 0.6662, "step": 16382 }, { "epoch": 18.65982905982906, "grad_norm": 0.2030264288187027, "learning_rate": 6.000600554255775e-07, "loss": 0.8845, "step": 16383 }, { "epoch": 18.66096866096866, "grad_norm": 0.22050747275352478, "learning_rate": 5.990457967050445e-07, "loss": 0.6346, "step": 16384 }, { "epoch": 18.662108262108262, "grad_norm": 0.18795745074748993, "learning_rate": 5.980323854967496e-07, "loss": 0.6945, "step": 16385 }, { "epoch": 18.663247863247864, "grad_norm": 0.2565062940120697, "learning_rate": 5.97019821835898e-07, "loss": 0.4727, "step": 16386 }, { "epoch": 18.664387464387463, "grad_norm": 0.182623028755188, "learning_rate": 5.960081057576589e-07, "loss": 0.7858, "step": 16387 }, { "epoch": 18.665527065527066, "grad_norm": 0.1975908875465393, "learning_rate": 5.949972372971679e-07, "loss": 0.6938, "step": 16388 }, { "epoch": 18.666666666666668, "grad_norm": 0.2245030254125595, "learning_rate": 5.939872164895388e-07, "loss": 0.7397, "step": 16389 }, { "epoch": 18.667806267806267, "grad_norm": 0.17188383638858795, "learning_rate": 5.929780433698462e-07, "loss": 0.9977, "step": 16390 }, { "epoch": 18.66894586894587, "grad_norm": 0.17740632593631744, "learning_rate": 5.919697179731482e-07, "loss": 0.6673, "step": 16391 }, { "epoch": 18.67008547008547, "grad_norm": 0.17257684469223022, "learning_rate": 5.909622403344667e-07, "loss": 0.695, "step": 16392 }, { "epoch": 18.67122507122507, "grad_norm": 0.21933795511722565, "learning_rate": 5.899556104887904e-07, "loss": 0.5065, "step": 16393 }, { "epoch": 18.672364672364672, "grad_norm": 0.19246655702590942, "learning_rate": 5.889498284710803e-07, "loss": 0.7339, "step": 16394 }, { "epoch": 18.673504273504275, "grad_norm": 0.19669175148010254, "learning_rate": 5.879448943162752e-07, "loss": 0.605, "step": 16395 }, { "epoch": 18.674643874643873, "grad_norm": 0.20934002101421356, "learning_rate": 5.869408080592775e-07, "loss": 0.663, "step": 16396 }, { "epoch": 18.675783475783476, "grad_norm": 0.2102241963148117, "learning_rate": 5.859375697349623e-07, "loss": 0.4298, "step": 16397 }, { "epoch": 18.676923076923078, "grad_norm": 0.1985064297914505, "learning_rate": 5.849351793781738e-07, "loss": 0.7757, "step": 16398 }, { "epoch": 18.678062678062677, "grad_norm": 0.17709557712078094, "learning_rate": 5.839336370237286e-07, "loss": 0.5256, "step": 16399 }, { "epoch": 18.67920227920228, "grad_norm": 0.19953429698944092, "learning_rate": 5.829329427064129e-07, "loss": 0.8831, "step": 16400 }, { "epoch": 18.68034188034188, "grad_norm": 0.17611843347549438, "learning_rate": 5.81933096460982e-07, "loss": 0.4334, "step": 16401 }, { "epoch": 18.68148148148148, "grad_norm": 0.19796553254127502, "learning_rate": 5.809340983221639e-07, "loss": 0.6072, "step": 16402 }, { "epoch": 18.682621082621083, "grad_norm": 0.1590607613325119, "learning_rate": 5.799359483246614e-07, "loss": 0.6833, "step": 16403 }, { "epoch": 18.683760683760685, "grad_norm": 0.19662630558013916, "learning_rate": 5.789386465031354e-07, "loss": 0.5794, "step": 16404 }, { "epoch": 18.684900284900284, "grad_norm": 0.20472319424152374, "learning_rate": 5.779421928922279e-07, "loss": 0.6026, "step": 16405 }, { "epoch": 18.686039886039886, "grad_norm": 0.2505066990852356, "learning_rate": 5.7694658752655e-07, "loss": 0.6035, "step": 16406 }, { "epoch": 18.68717948717949, "grad_norm": 0.16703948378562927, "learning_rate": 5.759518304406797e-07, "loss": 0.7095, "step": 16407 }, { "epoch": 18.688319088319087, "grad_norm": 0.20619726181030273, "learning_rate": 5.749579216691697e-07, "loss": 0.653, "step": 16408 }, { "epoch": 18.68945868945869, "grad_norm": 0.21306900680065155, "learning_rate": 5.739648612465398e-07, "loss": 0.7377, "step": 16409 }, { "epoch": 18.69059829059829, "grad_norm": 0.228653684258461, "learning_rate": 5.729726492072846e-07, "loss": 0.6316, "step": 16410 }, { "epoch": 18.69173789173789, "grad_norm": 0.25274547934532166, "learning_rate": 5.719812855858625e-07, "loss": 0.5878, "step": 16411 }, { "epoch": 18.692877492877493, "grad_norm": 0.21942993998527527, "learning_rate": 5.709907704167073e-07, "loss": 0.478, "step": 16412 }, { "epoch": 18.694017094017095, "grad_norm": 0.19420869648456573, "learning_rate": 5.700011037342217e-07, "loss": 0.7059, "step": 16413 }, { "epoch": 18.695156695156694, "grad_norm": 0.20585720241069794, "learning_rate": 5.690122855727842e-07, "loss": 0.6092, "step": 16414 }, { "epoch": 18.696296296296296, "grad_norm": 0.1916133612394333, "learning_rate": 5.680243159667309e-07, "loss": 0.7625, "step": 16415 }, { "epoch": 18.6974358974359, "grad_norm": 0.19194625318050385, "learning_rate": 5.670371949503845e-07, "loss": 0.5937, "step": 16416 }, { "epoch": 18.698575498575497, "grad_norm": 0.21500788629055023, "learning_rate": 5.660509225580229e-07, "loss": 0.6049, "step": 16417 }, { "epoch": 18.6997150997151, "grad_norm": 0.19294331967830658, "learning_rate": 5.650654988239107e-07, "loss": 0.5393, "step": 16418 }, { "epoch": 18.700854700854702, "grad_norm": 0.20396853983402252, "learning_rate": 5.640809237822675e-07, "loss": 0.6981, "step": 16419 }, { "epoch": 18.7019943019943, "grad_norm": 0.15139256417751312, "learning_rate": 5.630971974672938e-07, "loss": 0.5209, "step": 16420 }, { "epoch": 18.703133903133903, "grad_norm": 0.17322292923927307, "learning_rate": 5.621143199131567e-07, "loss": 0.7695, "step": 16421 }, { "epoch": 18.704273504273505, "grad_norm": 0.21698355674743652, "learning_rate": 5.611322911539957e-07, "loss": 0.6869, "step": 16422 }, { "epoch": 18.705413105413104, "grad_norm": 0.1870664656162262, "learning_rate": 5.601511112239139e-07, "loss": 0.7331, "step": 16423 }, { "epoch": 18.706552706552706, "grad_norm": 0.16194890439510345, "learning_rate": 5.591707801569951e-07, "loss": 0.8158, "step": 16424 }, { "epoch": 18.70769230769231, "grad_norm": 0.1954636424779892, "learning_rate": 5.581912979872872e-07, "loss": 0.6078, "step": 16425 }, { "epoch": 18.708831908831907, "grad_norm": 0.20802627503871918, "learning_rate": 5.572126647488129e-07, "loss": 0.4431, "step": 16426 }, { "epoch": 18.70997150997151, "grad_norm": 0.19310083985328674, "learning_rate": 5.562348804755562e-07, "loss": 0.5396, "step": 16427 }, { "epoch": 18.711111111111112, "grad_norm": 0.19261355698108673, "learning_rate": 5.552579452014872e-07, "loss": 0.5911, "step": 16428 }, { "epoch": 18.71225071225071, "grad_norm": 0.1854696422815323, "learning_rate": 5.542818589605287e-07, "loss": 0.7231, "step": 16429 }, { "epoch": 18.713390313390313, "grad_norm": 0.23579032719135284, "learning_rate": 5.533066217865896e-07, "loss": 0.7758, "step": 16430 }, { "epoch": 18.714529914529916, "grad_norm": 0.16397906839847565, "learning_rate": 5.5233223371354e-07, "loss": 0.7157, "step": 16431 }, { "epoch": 18.715669515669514, "grad_norm": 0.19662268459796906, "learning_rate": 5.513586947752225e-07, "loss": 0.6233, "step": 16432 }, { "epoch": 18.716809116809117, "grad_norm": 0.22158661484718323, "learning_rate": 5.503860050054571e-07, "loss": 0.7254, "step": 16433 }, { "epoch": 18.71794871794872, "grad_norm": 0.17012442648410797, "learning_rate": 5.494141644380168e-07, "loss": 0.6851, "step": 16434 }, { "epoch": 18.719088319088318, "grad_norm": 0.19285432994365692, "learning_rate": 5.484431731066636e-07, "loss": 0.8449, "step": 16435 }, { "epoch": 18.72022792022792, "grad_norm": 0.20975373685359955, "learning_rate": 5.474730310451203e-07, "loss": 0.5786, "step": 16436 }, { "epoch": 18.721367521367522, "grad_norm": 0.18161320686340332, "learning_rate": 5.465037382870853e-07, "loss": 0.4212, "step": 16437 }, { "epoch": 18.72250712250712, "grad_norm": 0.20052970945835114, "learning_rate": 5.455352948662202e-07, "loss": 0.5507, "step": 16438 }, { "epoch": 18.723646723646723, "grad_norm": 0.30297166109085083, "learning_rate": 5.445677008161648e-07, "loss": 0.5945, "step": 16439 }, { "epoch": 18.724786324786326, "grad_norm": 0.15235814452171326, "learning_rate": 5.436009561705258e-07, "loss": 0.6678, "step": 16440 }, { "epoch": 18.725925925925925, "grad_norm": 0.15892931818962097, "learning_rate": 5.426350609628817e-07, "loss": 0.664, "step": 16441 }, { "epoch": 18.727065527065527, "grad_norm": 0.2232954055070877, "learning_rate": 5.416700152267778e-07, "loss": 0.6181, "step": 16442 }, { "epoch": 18.72820512820513, "grad_norm": 0.20584240555763245, "learning_rate": 5.407058189957376e-07, "loss": 0.4324, "step": 16443 }, { "epoch": 18.729344729344728, "grad_norm": 0.18428319692611694, "learning_rate": 5.39742472303248e-07, "loss": 0.5289, "step": 16444 }, { "epoch": 18.73048433048433, "grad_norm": 0.24277548491954803, "learning_rate": 5.387799751827682e-07, "loss": 0.544, "step": 16445 }, { "epoch": 18.731623931623933, "grad_norm": 0.2018672227859497, "learning_rate": 5.378183276677273e-07, "loss": 0.7585, "step": 16446 }, { "epoch": 18.73276353276353, "grad_norm": 0.18324849009513855, "learning_rate": 5.368575297915262e-07, "loss": 0.6266, "step": 16447 }, { "epoch": 18.733903133903134, "grad_norm": 0.17677345871925354, "learning_rate": 5.358975815875411e-07, "loss": 0.5818, "step": 16448 }, { "epoch": 18.735042735042736, "grad_norm": 0.1939893662929535, "learning_rate": 5.349384830891035e-07, "loss": 0.4949, "step": 16449 }, { "epoch": 18.736182336182335, "grad_norm": 0.18341238796710968, "learning_rate": 5.33980234329537e-07, "loss": 0.6179, "step": 16450 }, { "epoch": 18.737321937321937, "grad_norm": 0.22697743773460388, "learning_rate": 5.330228353421147e-07, "loss": 0.7024, "step": 16451 }, { "epoch": 18.73846153846154, "grad_norm": 0.18950581550598145, "learning_rate": 5.320662861600962e-07, "loss": 0.7092, "step": 16452 }, { "epoch": 18.739601139601138, "grad_norm": 0.1713227927684784, "learning_rate": 5.311105868166994e-07, "loss": 0.8411, "step": 16453 }, { "epoch": 18.74074074074074, "grad_norm": 0.15473496913909912, "learning_rate": 5.301557373451255e-07, "loss": 0.7369, "step": 16454 }, { "epoch": 18.741880341880343, "grad_norm": 0.2119859904050827, "learning_rate": 5.292017377785341e-07, "loss": 0.6594, "step": 16455 }, { "epoch": 18.74301994301994, "grad_norm": 0.18900814652442932, "learning_rate": 5.282485881500626e-07, "loss": 0.7711, "step": 16456 }, { "epoch": 18.744159544159544, "grad_norm": 0.2021235227584839, "learning_rate": 5.272962884928123e-07, "loss": 0.5514, "step": 16457 }, { "epoch": 18.745299145299146, "grad_norm": 0.1852232962846756, "learning_rate": 5.263448388398622e-07, "loss": 0.7873, "step": 16458 }, { "epoch": 18.746438746438745, "grad_norm": 0.20583923161029816, "learning_rate": 5.253942392242639e-07, "loss": 0.5979, "step": 16459 }, { "epoch": 18.747578347578347, "grad_norm": 0.21064208447933197, "learning_rate": 5.24444489679024e-07, "loss": 0.6498, "step": 16460 }, { "epoch": 18.74871794871795, "grad_norm": 0.227210134267807, "learning_rate": 5.234955902371358e-07, "loss": 0.5567, "step": 16461 }, { "epoch": 18.74985754985755, "grad_norm": 0.1994452029466629, "learning_rate": 5.225475409315561e-07, "loss": 0.4754, "step": 16462 }, { "epoch": 18.75099715099715, "grad_norm": 0.17867498099803925, "learning_rate": 5.216003417952142e-07, "loss": 0.92, "step": 16463 }, { "epoch": 18.752136752136753, "grad_norm": 0.19446507096290588, "learning_rate": 5.20653992861006e-07, "loss": 0.5061, "step": 16464 }, { "epoch": 18.753276353276352, "grad_norm": 0.2091256082057953, "learning_rate": 5.197084941618052e-07, "loss": 0.7883, "step": 16465 }, { "epoch": 18.754415954415954, "grad_norm": 0.18889965116977692, "learning_rate": 5.187638457304495e-07, "loss": 0.6264, "step": 16466 }, { "epoch": 18.755555555555556, "grad_norm": 0.17998528480529785, "learning_rate": 5.178200475997513e-07, "loss": 0.6503, "step": 16467 }, { "epoch": 18.756695156695155, "grad_norm": 0.21396511793136597, "learning_rate": 5.168770998024874e-07, "loss": 0.51, "step": 16468 }, { "epoch": 18.757834757834758, "grad_norm": 0.17627449333667755, "learning_rate": 5.159350023714094e-07, "loss": 0.6077, "step": 16469 }, { "epoch": 18.75897435897436, "grad_norm": 0.16725680232048035, "learning_rate": 5.149937553392409e-07, "loss": 0.6504, "step": 16470 }, { "epoch": 18.76011396011396, "grad_norm": 0.2420182079076767, "learning_rate": 5.140533587386753e-07, "loss": 0.8192, "step": 16471 }, { "epoch": 18.76125356125356, "grad_norm": 0.25417861342430115, "learning_rate": 5.131138126023699e-07, "loss": 0.62, "step": 16472 }, { "epoch": 18.762393162393163, "grad_norm": 0.2393779456615448, "learning_rate": 5.121751169629596e-07, "loss": 0.3951, "step": 16473 }, { "epoch": 18.763532763532762, "grad_norm": 0.17460033297538757, "learning_rate": 5.112372718530545e-07, "loss": 0.745, "step": 16474 }, { "epoch": 18.764672364672364, "grad_norm": 0.18978163599967957, "learning_rate": 5.103002773052201e-07, "loss": 0.7875, "step": 16475 }, { "epoch": 18.765811965811967, "grad_norm": 0.18757495284080505, "learning_rate": 5.093641333520055e-07, "loss": 0.7697, "step": 16476 }, { "epoch": 18.766951566951565, "grad_norm": 0.16876503825187683, "learning_rate": 5.084288400259235e-07, "loss": 0.9017, "step": 16477 }, { "epoch": 18.768091168091168, "grad_norm": 0.18147236108779907, "learning_rate": 5.074943973594621e-07, "loss": 0.9934, "step": 16478 }, { "epoch": 18.76923076923077, "grad_norm": 0.20209896564483643, "learning_rate": 5.065608053850701e-07, "loss": 0.7274, "step": 16479 }, { "epoch": 18.77037037037037, "grad_norm": 0.9826755523681641, "learning_rate": 5.056280641351829e-07, "loss": 0.6857, "step": 16480 }, { "epoch": 18.77150997150997, "grad_norm": 0.2170691341161728, "learning_rate": 5.046961736421885e-07, "loss": 0.6784, "step": 16481 }, { "epoch": 18.772649572649573, "grad_norm": 0.20924778282642365, "learning_rate": 5.037651339384636e-07, "loss": 0.6677, "step": 16482 }, { "epoch": 18.773789173789172, "grad_norm": 0.24490778148174286, "learning_rate": 5.02834945056338e-07, "loss": 0.6486, "step": 16483 }, { "epoch": 18.774928774928775, "grad_norm": 0.20174965262413025, "learning_rate": 5.019056070281192e-07, "loss": 0.4965, "step": 16484 }, { "epoch": 18.776068376068377, "grad_norm": 0.17393049597740173, "learning_rate": 5.009771198860925e-07, "loss": 0.8133, "step": 16485 }, { "epoch": 18.777207977207976, "grad_norm": 0.2102983295917511, "learning_rate": 5.000494836625013e-07, "loss": 0.6405, "step": 16486 }, { "epoch": 18.778347578347578, "grad_norm": 0.20062249898910522, "learning_rate": 4.991226983895675e-07, "loss": 0.7842, "step": 16487 }, { "epoch": 18.77948717948718, "grad_norm": 0.19834795594215393, "learning_rate": 4.981967640994789e-07, "loss": 0.7031, "step": 16488 }, { "epoch": 18.78062678062678, "grad_norm": 0.20465931296348572, "learning_rate": 4.972716808244016e-07, "loss": 0.5652, "step": 16489 }, { "epoch": 18.78176638176638, "grad_norm": 0.21190428733825684, "learning_rate": 4.96347448596457e-07, "loss": 0.5751, "step": 16490 }, { "epoch": 18.782905982905984, "grad_norm": 0.1887473165988922, "learning_rate": 4.954240674477501e-07, "loss": 0.8227, "step": 16491 }, { "epoch": 18.784045584045582, "grad_norm": 0.15295910835266113, "learning_rate": 4.945015374103551e-07, "loss": 0.7139, "step": 16492 }, { "epoch": 18.785185185185185, "grad_norm": 0.21601083874702454, "learning_rate": 4.935798585163132e-07, "loss": 0.5737, "step": 16493 }, { "epoch": 18.786324786324787, "grad_norm": 0.20243585109710693, "learning_rate": 4.926590307976347e-07, "loss": 0.5227, "step": 16494 }, { "epoch": 18.787464387464386, "grad_norm": 0.19312019646167755, "learning_rate": 4.917390542863026e-07, "loss": 0.5336, "step": 16495 }, { "epoch": 18.788603988603988, "grad_norm": 0.19280482828617096, "learning_rate": 4.908199290142718e-07, "loss": 0.6827, "step": 16496 }, { "epoch": 18.78974358974359, "grad_norm": 0.17543920874595642, "learning_rate": 4.899016550134638e-07, "loss": 0.8294, "step": 16497 }, { "epoch": 18.79088319088319, "grad_norm": 0.18286250531673431, "learning_rate": 4.889842323157757e-07, "loss": 0.763, "step": 16498 }, { "epoch": 18.79202279202279, "grad_norm": 0.18195152282714844, "learning_rate": 4.880676609530704e-07, "loss": 0.7088, "step": 16499 }, { "epoch": 18.793162393162394, "grad_norm": 0.1589564085006714, "learning_rate": 4.871519409571867e-07, "loss": 0.839, "step": 16500 }, { "epoch": 18.794301994301993, "grad_norm": 0.15771450102329254, "learning_rate": 4.862370723599214e-07, "loss": 0.7269, "step": 16501 }, { "epoch": 18.795441595441595, "grad_norm": 0.19341163337230682, "learning_rate": 4.853230551930599e-07, "loss": 0.8105, "step": 16502 }, { "epoch": 18.796581196581197, "grad_norm": 0.2219672054052353, "learning_rate": 4.844098894883409e-07, "loss": 0.3761, "step": 16503 }, { "epoch": 18.797720797720796, "grad_norm": 0.17847643792629242, "learning_rate": 4.834975752774889e-07, "loss": 0.4626, "step": 16504 }, { "epoch": 18.7988603988604, "grad_norm": 0.18879228830337524, "learning_rate": 4.825861125921816e-07, "loss": 0.6161, "step": 16505 }, { "epoch": 18.8, "grad_norm": 0.2137671262025833, "learning_rate": 4.816755014640851e-07, "loss": 0.6276, "step": 16506 }, { "epoch": 18.8011396011396, "grad_norm": 0.19195720553398132, "learning_rate": 4.807657419248212e-07, "loss": 0.786, "step": 16507 }, { "epoch": 18.802279202279202, "grad_norm": 0.19901008903980255, "learning_rate": 4.798568340059928e-07, "loss": 0.5904, "step": 16508 }, { "epoch": 18.803418803418804, "grad_norm": 0.17620739340782166, "learning_rate": 4.789487777391688e-07, "loss": 0.7698, "step": 16509 }, { "epoch": 18.804558404558403, "grad_norm": 0.23002579808235168, "learning_rate": 4.780415731558851e-07, "loss": 0.5707, "step": 16510 }, { "epoch": 18.805698005698005, "grad_norm": 0.1925451159477234, "learning_rate": 4.771352202876528e-07, "loss": 0.579, "step": 16511 }, { "epoch": 18.806837606837608, "grad_norm": 0.17723438143730164, "learning_rate": 4.7622971916595483e-07, "loss": 0.8273, "step": 16512 }, { "epoch": 18.807977207977206, "grad_norm": 0.21860025823116302, "learning_rate": 4.7532506982223835e-07, "loss": 0.7466, "step": 16513 }, { "epoch": 18.80911680911681, "grad_norm": 0.19728177785873413, "learning_rate": 4.7442127228792264e-07, "loss": 0.7042, "step": 16514 }, { "epoch": 18.81025641025641, "grad_norm": 0.17997390031814575, "learning_rate": 4.735183265944049e-07, "loss": 0.5867, "step": 16515 }, { "epoch": 18.81139601139601, "grad_norm": 0.22678206861019135, "learning_rate": 4.7261623277304324e-07, "loss": 0.6038, "step": 16516 }, { "epoch": 18.812535612535612, "grad_norm": 0.20823009312152863, "learning_rate": 4.71714990855171e-07, "loss": 0.7805, "step": 16517 }, { "epoch": 18.813675213675214, "grad_norm": 0.19987599551677704, "learning_rate": 4.708146008720854e-07, "loss": 0.5464, "step": 16518 }, { "epoch": 18.814814814814813, "grad_norm": 0.1835150420665741, "learning_rate": 4.699150628550697e-07, "loss": 0.7399, "step": 16519 }, { "epoch": 18.815954415954415, "grad_norm": 0.19724053144454956, "learning_rate": 4.6901637683536e-07, "loss": 0.5585, "step": 16520 }, { "epoch": 18.817094017094018, "grad_norm": 0.16801817715168, "learning_rate": 4.681185428441731e-07, "loss": 0.4347, "step": 16521 }, { "epoch": 18.81823361823362, "grad_norm": 0.18089032173156738, "learning_rate": 4.6722156091268956e-07, "loss": 0.8058, "step": 16522 }, { "epoch": 18.81937321937322, "grad_norm": 0.19650566577911377, "learning_rate": 4.663254310720705e-07, "loss": 0.6817, "step": 16523 }, { "epoch": 18.82051282051282, "grad_norm": 0.20789745450019836, "learning_rate": 4.6543015335343554e-07, "loss": 0.6766, "step": 16524 }, { "epoch": 18.821652421652423, "grad_norm": 0.21882633864879608, "learning_rate": 4.6453572778787925e-07, "loss": 0.8038, "step": 16525 }, { "epoch": 18.822792022792022, "grad_norm": 0.1772392988204956, "learning_rate": 4.6364215440647117e-07, "loss": 0.6001, "step": 16526 }, { "epoch": 18.823931623931625, "grad_norm": 0.26594915986061096, "learning_rate": 4.6274943324025046e-07, "loss": 0.5209, "step": 16527 }, { "epoch": 18.825071225071227, "grad_norm": 0.22183556854724884, "learning_rate": 4.6185756432021444e-07, "loss": 0.5135, "step": 16528 }, { "epoch": 18.826210826210826, "grad_norm": 0.17441076040267944, "learning_rate": 4.609665476773439e-07, "loss": 0.6903, "step": 16529 }, { "epoch": 18.827350427350428, "grad_norm": 0.16808576881885529, "learning_rate": 4.6007638334259193e-07, "loss": 0.5876, "step": 16530 }, { "epoch": 18.82849002849003, "grad_norm": 0.17738579213619232, "learning_rate": 4.591870713468727e-07, "loss": 0.6782, "step": 16531 }, { "epoch": 18.82962962962963, "grad_norm": 0.21790333092212677, "learning_rate": 4.582986117210697e-07, "loss": 0.5292, "step": 16532 }, { "epoch": 18.83076923076923, "grad_norm": 0.1882610023021698, "learning_rate": 4.5741100449605003e-07, "loss": 0.8497, "step": 16533 }, { "epoch": 18.831908831908834, "grad_norm": 0.24303381145000458, "learning_rate": 4.5652424970263895e-07, "loss": 0.5634, "step": 16534 }, { "epoch": 18.833048433048432, "grad_norm": 0.1699916273355484, "learning_rate": 4.5563834737163137e-07, "loss": 0.7074, "step": 16535 }, { "epoch": 18.834188034188035, "grad_norm": 0.20706459879875183, "learning_rate": 4.5475329753380256e-07, "loss": 0.6965, "step": 16536 }, { "epoch": 18.835327635327637, "grad_norm": 0.19681118428707123, "learning_rate": 4.538691002198919e-07, "loss": 0.6697, "step": 16537 }, { "epoch": 18.836467236467236, "grad_norm": 0.18500886857509613, "learning_rate": 4.529857554606137e-07, "loss": 0.726, "step": 16538 }, { "epoch": 18.837606837606838, "grad_norm": 0.20500552654266357, "learning_rate": 4.5210326328664067e-07, "loss": 0.6376, "step": 16539 }, { "epoch": 18.83874643874644, "grad_norm": 0.20892156660556793, "learning_rate": 4.51221623728626e-07, "loss": 0.7291, "step": 16540 }, { "epoch": 18.83988603988604, "grad_norm": 0.18950065970420837, "learning_rate": 4.5034083681719797e-07, "loss": 0.6232, "step": 16541 }, { "epoch": 18.84102564102564, "grad_norm": 0.16415338218212128, "learning_rate": 4.494609025829405e-07, "loss": 0.6148, "step": 16542 }, { "epoch": 18.842165242165244, "grad_norm": 0.24179856479167938, "learning_rate": 4.4858182105642356e-07, "loss": 0.7007, "step": 16543 }, { "epoch": 18.843304843304843, "grad_norm": 0.18492870032787323, "learning_rate": 4.477035922681755e-07, "loss": 0.5736, "step": 16544 }, { "epoch": 18.844444444444445, "grad_norm": 0.15735740959644318, "learning_rate": 4.468262162486997e-07, "loss": 0.7419, "step": 16545 }, { "epoch": 18.845584045584047, "grad_norm": 0.17746874690055847, "learning_rate": 4.4594969302847454e-07, "loss": 0.7512, "step": 16546 }, { "epoch": 18.846723646723646, "grad_norm": 0.2393515706062317, "learning_rate": 4.4507402263793686e-07, "loss": 0.7666, "step": 16547 }, { "epoch": 18.84786324786325, "grad_norm": 0.1816677302122116, "learning_rate": 4.441992051075039e-07, "loss": 0.8176, "step": 16548 }, { "epoch": 18.84900284900285, "grad_norm": 0.21298880875110626, "learning_rate": 4.4332524046756254e-07, "loss": 0.5793, "step": 16549 }, { "epoch": 18.85014245014245, "grad_norm": 0.1775524765253067, "learning_rate": 4.4245212874846895e-07, "loss": 0.7174, "step": 16550 }, { "epoch": 18.851282051282052, "grad_norm": 0.20027805864810944, "learning_rate": 4.4157986998054346e-07, "loss": 0.6773, "step": 16551 }, { "epoch": 18.852421652421654, "grad_norm": 0.18412970006465912, "learning_rate": 4.407084641940867e-07, "loss": 0.6431, "step": 16552 }, { "epoch": 18.853561253561253, "grad_norm": 0.19605478644371033, "learning_rate": 4.3983791141936346e-07, "loss": 0.4805, "step": 16553 }, { "epoch": 18.854700854700855, "grad_norm": 0.18446141481399536, "learning_rate": 4.3896821168660786e-07, "loss": 0.6969, "step": 16554 }, { "epoch": 18.855840455840458, "grad_norm": 0.23959897458553314, "learning_rate": 4.3809936502603177e-07, "loss": 0.6129, "step": 16555 }, { "epoch": 18.856980056980056, "grad_norm": 0.2176634520292282, "learning_rate": 4.3723137146780836e-07, "loss": 0.7465, "step": 16556 }, { "epoch": 18.85811965811966, "grad_norm": 0.25287431478500366, "learning_rate": 4.3636423104209126e-07, "loss": 0.517, "step": 16557 }, { "epoch": 18.85925925925926, "grad_norm": 0.2089501917362213, "learning_rate": 4.3549794377899244e-07, "loss": 0.6454, "step": 16558 }, { "epoch": 18.86039886039886, "grad_norm": 0.15604382753372192, "learning_rate": 4.346325097086018e-07, "loss": 0.7389, "step": 16559 }, { "epoch": 18.861538461538462, "grad_norm": 0.1590043604373932, "learning_rate": 4.337679288609786e-07, "loss": 0.5481, "step": 16560 }, { "epoch": 18.862678062678064, "grad_norm": 0.18959836661815643, "learning_rate": 4.329042012661544e-07, "loss": 0.823, "step": 16561 }, { "epoch": 18.863817663817663, "grad_norm": 0.19817541539669037, "learning_rate": 4.320413269541246e-07, "loss": 0.7384, "step": 16562 }, { "epoch": 18.864957264957265, "grad_norm": 0.21355056762695312, "learning_rate": 4.3117930595486246e-07, "loss": 0.441, "step": 16563 }, { "epoch": 18.866096866096868, "grad_norm": 0.25660574436187744, "learning_rate": 4.303181382983107e-07, "loss": 0.5913, "step": 16564 }, { "epoch": 18.867236467236467, "grad_norm": 0.19600361585617065, "learning_rate": 4.2945782401437317e-07, "loss": 0.6876, "step": 16565 }, { "epoch": 18.86837606837607, "grad_norm": 0.23927044868469238, "learning_rate": 4.285983631329371e-07, "loss": 0.5857, "step": 16566 }, { "epoch": 18.86951566951567, "grad_norm": 0.22985951602458954, "learning_rate": 4.2773975568384803e-07, "loss": 0.5603, "step": 16567 }, { "epoch": 18.87065527065527, "grad_norm": 0.15287210047245026, "learning_rate": 4.268820016969349e-07, "loss": 0.6285, "step": 16568 }, { "epoch": 18.871794871794872, "grad_norm": 0.19072280824184418, "learning_rate": 4.2602510120198504e-07, "loss": 0.6754, "step": 16569 }, { "epoch": 18.872934472934475, "grad_norm": 0.16646698117256165, "learning_rate": 4.2516905422876343e-07, "loss": 0.5203, "step": 16570 }, { "epoch": 18.874074074074073, "grad_norm": 0.20522910356521606, "learning_rate": 4.243138608069991e-07, "loss": 0.639, "step": 16571 }, { "epoch": 18.875213675213676, "grad_norm": 0.22436067461967468, "learning_rate": 4.2345952096639886e-07, "loss": 0.584, "step": 16572 }, { "epoch": 18.876353276353278, "grad_norm": 0.18598589301109314, "learning_rate": 4.226060347366334e-07, "loss": 0.4548, "step": 16573 }, { "epoch": 18.877492877492877, "grad_norm": 0.19741828739643097, "learning_rate": 4.2175340214735126e-07, "loss": 0.614, "step": 16574 }, { "epoch": 18.87863247863248, "grad_norm": 0.1856069564819336, "learning_rate": 4.2090162322816487e-07, "loss": 0.5637, "step": 16575 }, { "epoch": 18.87977207977208, "grad_norm": 0.16308023035526276, "learning_rate": 4.200506980086533e-07, "loss": 0.6801, "step": 16576 }, { "epoch": 18.88091168091168, "grad_norm": 0.16635264456272125, "learning_rate": 4.19200626518379e-07, "loss": 0.7788, "step": 16577 }, { "epoch": 18.882051282051282, "grad_norm": 0.22018620371818542, "learning_rate": 4.1835140878686286e-07, "loss": 0.5671, "step": 16578 }, { "epoch": 18.883190883190885, "grad_norm": 0.17615817487239838, "learning_rate": 4.175030448436062e-07, "loss": 0.6738, "step": 16579 }, { "epoch": 18.884330484330484, "grad_norm": 0.2155941277742386, "learning_rate": 4.1665553471806617e-07, "loss": 0.5148, "step": 16580 }, { "epoch": 18.885470085470086, "grad_norm": 0.2144429236650467, "learning_rate": 4.158088784396857e-07, "loss": 0.7315, "step": 16581 }, { "epoch": 18.886609686609688, "grad_norm": 0.2490888237953186, "learning_rate": 4.149630760378692e-07, "loss": 0.3359, "step": 16582 }, { "epoch": 18.887749287749287, "grad_norm": 0.20041395723819733, "learning_rate": 4.1411812754199864e-07, "loss": 0.6532, "step": 16583 }, { "epoch": 18.88888888888889, "grad_norm": 0.21015304327011108, "learning_rate": 4.1327403298140886e-07, "loss": 0.6025, "step": 16584 }, { "epoch": 18.89002849002849, "grad_norm": 0.2071191966533661, "learning_rate": 4.124307923854293e-07, "loss": 0.5838, "step": 16585 }, { "epoch": 18.89116809116809, "grad_norm": 0.20768992602825165, "learning_rate": 4.115884057833447e-07, "loss": 0.3889, "step": 16586 }, { "epoch": 18.892307692307693, "grad_norm": 0.17783516645431519, "learning_rate": 4.107468732044151e-07, "loss": 0.5076, "step": 16587 }, { "epoch": 18.893447293447295, "grad_norm": 0.20266379415988922, "learning_rate": 4.099061946778643e-07, "loss": 0.5666, "step": 16588 }, { "epoch": 18.894586894586894, "grad_norm": 0.1698126494884491, "learning_rate": 4.0906637023289675e-07, "loss": 0.8561, "step": 16589 }, { "epoch": 18.895726495726496, "grad_norm": 0.30874332785606384, "learning_rate": 4.0822739989867796e-07, "loss": 0.8681, "step": 16590 }, { "epoch": 18.8968660968661, "grad_norm": 0.21939298510551453, "learning_rate": 4.0738928370435135e-07, "loss": 0.5728, "step": 16591 }, { "epoch": 18.898005698005697, "grad_norm": 0.23421180248260498, "learning_rate": 4.065520216790214e-07, "loss": 0.5417, "step": 16592 }, { "epoch": 18.8991452991453, "grad_norm": 0.1682051122188568, "learning_rate": 4.0571561385177313e-07, "loss": 0.7409, "step": 16593 }, { "epoch": 18.900284900284902, "grad_norm": 0.2224481999874115, "learning_rate": 4.048800602516584e-07, "loss": 0.622, "step": 16594 }, { "epoch": 18.9014245014245, "grad_norm": 0.1950470209121704, "learning_rate": 4.0404536090769276e-07, "loss": 0.4211, "step": 16595 }, { "epoch": 18.902564102564103, "grad_norm": 0.25210875272750854, "learning_rate": 4.032115158488725e-07, "loss": 0.5964, "step": 16596 }, { "epoch": 18.903703703703705, "grad_norm": 0.20010331273078918, "learning_rate": 4.0237852510415787e-07, "loss": 0.8349, "step": 16597 }, { "epoch": 18.904843304843304, "grad_norm": 0.1533205658197403, "learning_rate": 4.015463887024812e-07, "loss": 0.5577, "step": 16598 }, { "epoch": 18.905982905982906, "grad_norm": 0.2169148027896881, "learning_rate": 4.0071510667274436e-07, "loss": 0.6079, "step": 16599 }, { "epoch": 18.90712250712251, "grad_norm": 0.16232413053512573, "learning_rate": 3.9988467904381875e-07, "loss": 0.7123, "step": 16600 }, { "epoch": 18.908262108262107, "grad_norm": 0.22531388700008392, "learning_rate": 3.990551058445507e-07, "loss": 0.49, "step": 16601 }, { "epoch": 18.90940170940171, "grad_norm": 0.19916465878486633, "learning_rate": 3.982263871037506e-07, "loss": 0.6874, "step": 16602 }, { "epoch": 18.910541310541312, "grad_norm": 0.21139632165431976, "learning_rate": 3.9739852285020364e-07, "loss": 0.7132, "step": 16603 }, { "epoch": 18.91168091168091, "grad_norm": 0.2011098563671112, "learning_rate": 3.9657151311266183e-07, "loss": 0.4991, "step": 16604 }, { "epoch": 18.912820512820513, "grad_norm": 0.18525491654872894, "learning_rate": 3.9574535791985224e-07, "loss": 0.8635, "step": 16605 }, { "epoch": 18.913960113960115, "grad_norm": 0.1907753348350525, "learning_rate": 3.9492005730046857e-07, "loss": 0.7534, "step": 16606 }, { "epoch": 18.915099715099714, "grad_norm": 0.20814749598503113, "learning_rate": 3.9409561128317394e-07, "loss": 0.5925, "step": 16607 }, { "epoch": 18.916239316239317, "grad_norm": 0.20650431513786316, "learning_rate": 3.9327201989660665e-07, "loss": 0.5924, "step": 16608 }, { "epoch": 18.91737891737892, "grad_norm": 0.1958417445421219, "learning_rate": 3.9244928316937434e-07, "loss": 0.8564, "step": 16609 }, { "epoch": 18.918518518518518, "grad_norm": 0.1731303185224533, "learning_rate": 3.91627401130043e-07, "loss": 0.4139, "step": 16610 }, { "epoch": 18.91965811965812, "grad_norm": 0.17427939176559448, "learning_rate": 3.9080637380717043e-07, "loss": 0.8021, "step": 16611 }, { "epoch": 18.920797720797722, "grad_norm": 0.15428948402404785, "learning_rate": 3.8998620122926434e-07, "loss": 0.6157, "step": 16612 }, { "epoch": 18.92193732193732, "grad_norm": 0.28682294487953186, "learning_rate": 3.8916688342481853e-07, "loss": 0.6556, "step": 16613 }, { "epoch": 18.923076923076923, "grad_norm": 0.22330935299396515, "learning_rate": 3.8834842042228536e-07, "loss": 0.6234, "step": 16614 }, { "epoch": 18.924216524216526, "grad_norm": 0.1999473124742508, "learning_rate": 3.8753081225009205e-07, "loss": 0.68, "step": 16615 }, { "epoch": 18.925356125356124, "grad_norm": 0.19231605529785156, "learning_rate": 3.8671405893664083e-07, "loss": 0.6057, "step": 16616 }, { "epoch": 18.926495726495727, "grad_norm": 0.19735603034496307, "learning_rate": 3.8589816051029514e-07, "loss": 0.7346, "step": 16617 }, { "epoch": 18.92763532763533, "grad_norm": 0.19661226868629456, "learning_rate": 3.8508311699939626e-07, "loss": 0.6048, "step": 16618 }, { "epoch": 18.928774928774928, "grad_norm": 0.19001834094524384, "learning_rate": 3.8426892843225205e-07, "loss": 0.6328, "step": 16619 }, { "epoch": 18.92991452991453, "grad_norm": 0.18638885021209717, "learning_rate": 3.8345559483714265e-07, "loss": 0.5536, "step": 16620 }, { "epoch": 18.931054131054132, "grad_norm": 0.19392959773540497, "learning_rate": 3.8264311624231497e-07, "loss": 0.4132, "step": 16621 }, { "epoch": 18.93219373219373, "grad_norm": 0.21309886872768402, "learning_rate": 3.8183149267599083e-07, "loss": 0.5396, "step": 16622 }, { "epoch": 18.933333333333334, "grad_norm": 0.19839166104793549, "learning_rate": 3.8102072416635604e-07, "loss": 0.7381, "step": 16623 }, { "epoch": 18.934472934472936, "grad_norm": 0.17323803901672363, "learning_rate": 3.8021081074157694e-07, "loss": 0.6108, "step": 16624 }, { "epoch": 18.935612535612535, "grad_norm": 0.22733454406261444, "learning_rate": 3.7940175242978105e-07, "loss": 0.6199, "step": 16625 }, { "epoch": 18.936752136752137, "grad_norm": 0.18306003510951996, "learning_rate": 3.785935492590681e-07, "loss": 0.7503, "step": 16626 }, { "epoch": 18.93789173789174, "grad_norm": 0.20709216594696045, "learning_rate": 3.7778620125751007e-07, "loss": 0.8921, "step": 16627 }, { "epoch": 18.939031339031338, "grad_norm": 0.19395828247070312, "learning_rate": 3.769797084531512e-07, "loss": 0.737, "step": 16628 }, { "epoch": 18.94017094017094, "grad_norm": 0.1846737116575241, "learning_rate": 3.761740708739969e-07, "loss": 0.551, "step": 16629 }, { "epoch": 18.941310541310543, "grad_norm": 0.20248864591121674, "learning_rate": 3.753692885480359e-07, "loss": 0.5887, "step": 16630 }, { "epoch": 18.94245014245014, "grad_norm": 0.1981780081987381, "learning_rate": 3.745653615032152e-07, "loss": 0.789, "step": 16631 }, { "epoch": 18.943589743589744, "grad_norm": 0.19893518090248108, "learning_rate": 3.7376228976746254e-07, "loss": 0.6667, "step": 16632 }, { "epoch": 18.944729344729346, "grad_norm": 0.20437784492969513, "learning_rate": 3.729600733686639e-07, "loss": 0.4562, "step": 16633 }, { "epoch": 18.945868945868945, "grad_norm": 0.1854739636182785, "learning_rate": 3.7215871233468866e-07, "loss": 0.7083, "step": 16634 }, { "epoch": 18.947008547008547, "grad_norm": 0.1786850392818451, "learning_rate": 3.713582066933702e-07, "loss": 0.5751, "step": 16635 }, { "epoch": 18.94814814814815, "grad_norm": 0.1907089501619339, "learning_rate": 3.705585564725084e-07, "loss": 0.7099, "step": 16636 }, { "epoch": 18.94928774928775, "grad_norm": 0.19957898557186127, "learning_rate": 3.6975976169987826e-07, "loss": 0.5763, "step": 16637 }, { "epoch": 18.95042735042735, "grad_norm": 0.1790960133075714, "learning_rate": 3.6896182240322705e-07, "loss": 0.6079, "step": 16638 }, { "epoch": 18.951566951566953, "grad_norm": 0.18549738824367523, "learning_rate": 3.6816473861026603e-07, "loss": 0.6331, "step": 16639 }, { "epoch": 18.95270655270655, "grad_norm": 0.2153673768043518, "learning_rate": 3.6736851034868125e-07, "loss": 0.5501, "step": 16640 }, { "epoch": 18.953846153846154, "grad_norm": 0.23373520374298096, "learning_rate": 3.6657313764612846e-07, "loss": 0.6644, "step": 16641 }, { "epoch": 18.954985754985756, "grad_norm": 0.19167746603488922, "learning_rate": 3.6577862053023273e-07, "loss": 0.7159, "step": 16642 }, { "epoch": 18.956125356125355, "grad_norm": 0.19353190064430237, "learning_rate": 3.649849590285914e-07, "loss": 0.7332, "step": 16643 }, { "epoch": 18.957264957264957, "grad_norm": 0.1788264811038971, "learning_rate": 3.6419215316876587e-07, "loss": 0.5823, "step": 16644 }, { "epoch": 18.95840455840456, "grad_norm": 0.16599953174591064, "learning_rate": 3.634002029782979e-07, "loss": 0.7679, "step": 16645 }, { "epoch": 18.95954415954416, "grad_norm": 0.20295533537864685, "learning_rate": 3.6260910848469055e-07, "loss": 0.7243, "step": 16646 }, { "epoch": 18.96068376068376, "grad_norm": 0.18700546026229858, "learning_rate": 3.6181886971542453e-07, "loss": 0.6274, "step": 16647 }, { "epoch": 18.961823361823363, "grad_norm": 0.1797284036874771, "learning_rate": 3.610294866979419e-07, "loss": 0.7285, "step": 16648 }, { "epoch": 18.962962962962962, "grad_norm": 0.20255152881145477, "learning_rate": 3.602409594596623e-07, "loss": 0.5016, "step": 16649 }, { "epoch": 18.964102564102564, "grad_norm": 0.1935027539730072, "learning_rate": 3.59453288027975e-07, "loss": 0.6789, "step": 16650 }, { "epoch": 18.965242165242167, "grad_norm": 0.17068713903427124, "learning_rate": 3.5866647243023576e-07, "loss": 0.7219, "step": 16651 }, { "epoch": 18.966381766381765, "grad_norm": 0.20726989209651947, "learning_rate": 3.5788051269377565e-07, "loss": 0.6189, "step": 16652 }, { "epoch": 18.967521367521368, "grad_norm": 0.17680394649505615, "learning_rate": 3.5709540884588946e-07, "loss": 0.6625, "step": 16653 }, { "epoch": 18.96866096866097, "grad_norm": 0.2064637690782547, "learning_rate": 3.5631116091384973e-07, "loss": 0.65, "step": 16654 }, { "epoch": 18.96980056980057, "grad_norm": 0.18331217765808105, "learning_rate": 3.5552776892489313e-07, "loss": 0.7459, "step": 16655 }, { "epoch": 18.97094017094017, "grad_norm": 0.1848146915435791, "learning_rate": 3.547452329062284e-07, "loss": 0.538, "step": 16656 }, { "epoch": 18.972079772079773, "grad_norm": 0.19916410744190216, "learning_rate": 3.5396355288503936e-07, "loss": 0.8061, "step": 16657 }, { "epoch": 18.973219373219372, "grad_norm": 0.23589324951171875, "learning_rate": 3.5318272888847095e-07, "loss": 0.6315, "step": 16658 }, { "epoch": 18.974358974358974, "grad_norm": 0.172958105802536, "learning_rate": 3.524027609436459e-07, "loss": 0.7962, "step": 16659 }, { "epoch": 18.975498575498577, "grad_norm": 0.2174021601676941, "learning_rate": 3.516236490776537e-07, "loss": 0.773, "step": 16660 }, { "epoch": 18.976638176638176, "grad_norm": 0.15929563343524933, "learning_rate": 3.508453933175532e-07, "loss": 0.78, "step": 16661 }, { "epoch": 18.977777777777778, "grad_norm": 0.20452843606472015, "learning_rate": 3.500679936903811e-07, "loss": 0.642, "step": 16662 }, { "epoch": 18.97891737891738, "grad_norm": 0.31728649139404297, "learning_rate": 3.492914502231326e-07, "loss": 0.4238, "step": 16663 }, { "epoch": 18.98005698005698, "grad_norm": 0.20840921998023987, "learning_rate": 3.485157629427832e-07, "loss": 0.8743, "step": 16664 }, { "epoch": 18.98119658119658, "grad_norm": 0.1814262568950653, "learning_rate": 3.4774093187627253e-07, "loss": 0.5862, "step": 16665 }, { "epoch": 18.982336182336184, "grad_norm": 0.2093941867351532, "learning_rate": 3.4696695705051796e-07, "loss": 0.774, "step": 16666 }, { "epoch": 18.983475783475782, "grad_norm": 0.19571354985237122, "learning_rate": 3.461938384923924e-07, "loss": 0.8105, "step": 16667 }, { "epoch": 18.984615384615385, "grad_norm": 0.1960640549659729, "learning_rate": 3.4542157622875213e-07, "loss": 0.7862, "step": 16668 }, { "epoch": 18.985754985754987, "grad_norm": 0.16605553030967712, "learning_rate": 3.44650170286423e-07, "loss": 0.843, "step": 16669 }, { "epoch": 18.986894586894586, "grad_norm": 0.18908195197582245, "learning_rate": 3.4387962069219737e-07, "loss": 0.676, "step": 16670 }, { "epoch": 18.988034188034188, "grad_norm": 0.20927131175994873, "learning_rate": 3.4310992747283444e-07, "loss": 0.6447, "step": 16671 }, { "epoch": 18.98917378917379, "grad_norm": 0.2044903039932251, "learning_rate": 3.4234109065506835e-07, "loss": 0.7814, "step": 16672 }, { "epoch": 18.99031339031339, "grad_norm": 0.19696706533432007, "learning_rate": 3.415731102656083e-07, "loss": 0.8497, "step": 16673 }, { "epoch": 18.99145299145299, "grad_norm": 0.2360168695449829, "learning_rate": 3.408059863311247e-07, "loss": 0.5818, "step": 16674 }, { "epoch": 18.992592592592594, "grad_norm": 0.2683320641517639, "learning_rate": 3.400397188782628e-07, "loss": 0.411, "step": 16675 }, { "epoch": 18.993732193732193, "grad_norm": 0.1961396485567093, "learning_rate": 3.392743079336347e-07, "loss": 0.832, "step": 16676 }, { "epoch": 18.994871794871795, "grad_norm": 0.1874992400407791, "learning_rate": 3.385097535238302e-07, "loss": 0.5999, "step": 16677 }, { "epoch": 18.996011396011397, "grad_norm": 0.1800270676612854, "learning_rate": 3.377460556754003e-07, "loss": 0.3464, "step": 16678 }, { "epoch": 18.997150997150996, "grad_norm": 0.22845672070980072, "learning_rate": 3.369832144148682e-07, "loss": 0.718, "step": 16679 }, { "epoch": 18.9982905982906, "grad_norm": 0.17252226173877716, "learning_rate": 3.3622122976873506e-07, "loss": 0.7316, "step": 16680 }, { "epoch": 18.9994301994302, "grad_norm": 0.20764583349227905, "learning_rate": 3.3546010176346564e-07, "loss": 0.467, "step": 16681 }, { "epoch": 19.0, "grad_norm": 0.37283486127853394, "learning_rate": 3.3469983042549167e-07, "loss": 0.7267, "step": 16682 }, { "epoch": 19.001139601139602, "grad_norm": 0.16425985097885132, "learning_rate": 3.3394041578122257e-07, "loss": 0.9197, "step": 16683 }, { "epoch": 19.0022792022792, "grad_norm": 0.1914873868227005, "learning_rate": 3.3318185785703724e-07, "loss": 0.7005, "step": 16684 }, { "epoch": 19.003418803418803, "grad_norm": 0.24253977835178375, "learning_rate": 3.324241566792785e-07, "loss": 0.3597, "step": 16685 }, { "epoch": 19.004558404558406, "grad_norm": 0.19261282682418823, "learning_rate": 3.3166731227426693e-07, "loss": 0.733, "step": 16686 }, { "epoch": 19.005698005698004, "grad_norm": 0.19126534461975098, "learning_rate": 3.3091132466828435e-07, "loss": 0.5217, "step": 16687 }, { "epoch": 19.006837606837607, "grad_norm": 0.20644229650497437, "learning_rate": 3.3015619388759576e-07, "loss": 0.7747, "step": 16688 }, { "epoch": 19.00797720797721, "grad_norm": 0.22872857749462128, "learning_rate": 3.2940191995842196e-07, "loss": 0.5749, "step": 16689 }, { "epoch": 19.009116809116808, "grad_norm": 0.19443026185035706, "learning_rate": 3.286485029069641e-07, "loss": 0.6413, "step": 16690 }, { "epoch": 19.01025641025641, "grad_norm": 0.20380333065986633, "learning_rate": 3.278959427593903e-07, "loss": 0.8625, "step": 16691 }, { "epoch": 19.011396011396013, "grad_norm": 0.22462737560272217, "learning_rate": 3.271442395418406e-07, "loss": 0.6351, "step": 16692 }, { "epoch": 19.01253561253561, "grad_norm": 0.1561097949743271, "learning_rate": 3.263933932804192e-07, "loss": 0.7111, "step": 16693 }, { "epoch": 19.013675213675214, "grad_norm": 0.2274744063615799, "learning_rate": 3.25643404001208e-07, "loss": 0.4553, "step": 16694 }, { "epoch": 19.014814814814816, "grad_norm": 0.18747691810131073, "learning_rate": 3.248942717302583e-07, "loss": 0.4617, "step": 16695 }, { "epoch": 19.015954415954415, "grad_norm": 0.16808752715587616, "learning_rate": 3.2414599649358837e-07, "loss": 0.5426, "step": 16696 }, { "epoch": 19.017094017094017, "grad_norm": 0.19207409024238586, "learning_rate": 3.233985783171828e-07, "loss": 0.6627, "step": 16697 }, { "epoch": 19.01823361823362, "grad_norm": 0.17813901603221893, "learning_rate": 3.2265201722700976e-07, "loss": 0.7791, "step": 16698 }, { "epoch": 19.019373219373218, "grad_norm": 0.17404155433177948, "learning_rate": 3.2190631324899303e-07, "loss": 0.5857, "step": 16699 }, { "epoch": 19.02051282051282, "grad_norm": 0.1948934644460678, "learning_rate": 3.21161466409034e-07, "loss": 0.7377, "step": 16700 }, { "epoch": 19.021652421652423, "grad_norm": 0.16898861527442932, "learning_rate": 3.204174767330065e-07, "loss": 0.5093, "step": 16701 }, { "epoch": 19.02279202279202, "grad_norm": 0.1974274218082428, "learning_rate": 3.1967434424674815e-07, "loss": 0.5356, "step": 16702 }, { "epoch": 19.023931623931624, "grad_norm": 0.17614935338497162, "learning_rate": 3.1893206897607164e-07, "loss": 0.5183, "step": 16703 }, { "epoch": 19.025071225071226, "grad_norm": 0.20292778313159943, "learning_rate": 3.1819065094675635e-07, "loss": 0.77, "step": 16704 }, { "epoch": 19.026210826210825, "grad_norm": 0.20143486559391022, "learning_rate": 3.1745009018455396e-07, "loss": 0.4547, "step": 16705 }, { "epoch": 19.027350427350427, "grad_norm": 0.15823499858379364, "learning_rate": 3.1671038671518825e-07, "loss": 0.7027, "step": 16706 }, { "epoch": 19.02849002849003, "grad_norm": 0.19536300003528595, "learning_rate": 3.159715405643526e-07, "loss": 0.7036, "step": 16707 }, { "epoch": 19.02962962962963, "grad_norm": 0.15541143715381622, "learning_rate": 3.152335517577043e-07, "loss": 0.8638, "step": 16708 }, { "epoch": 19.03076923076923, "grad_norm": 0.22746704518795013, "learning_rate": 3.144964203208783e-07, "loss": 0.6358, "step": 16709 }, { "epoch": 19.031908831908833, "grad_norm": 0.18629853427410126, "learning_rate": 3.137601462794765e-07, "loss": 0.7183, "step": 16710 }, { "epoch": 19.03304843304843, "grad_norm": 0.2248799353837967, "learning_rate": 3.1302472965907547e-07, "loss": 0.4789, "step": 16711 }, { "epoch": 19.034188034188034, "grad_norm": 0.2067008763551712, "learning_rate": 3.122901704852133e-07, "loss": 0.5447, "step": 16712 }, { "epoch": 19.035327635327636, "grad_norm": 0.24397136270999908, "learning_rate": 3.1155646878340274e-07, "loss": 0.716, "step": 16713 }, { "epoch": 19.036467236467235, "grad_norm": 0.1896265745162964, "learning_rate": 3.108236245791318e-07, "loss": 0.758, "step": 16714 }, { "epoch": 19.037606837606837, "grad_norm": 0.20399513840675354, "learning_rate": 3.1009163789785244e-07, "loss": 0.7326, "step": 16715 }, { "epoch": 19.03874643874644, "grad_norm": 0.20660154521465302, "learning_rate": 3.0936050876498314e-07, "loss": 0.6979, "step": 16716 }, { "epoch": 19.03988603988604, "grad_norm": 0.16227521002292633, "learning_rate": 3.0863023720592577e-07, "loss": 0.573, "step": 16717 }, { "epoch": 19.04102564102564, "grad_norm": 0.18760254979133606, "learning_rate": 3.079008232460462e-07, "loss": 0.7601, "step": 16718 }, { "epoch": 19.042165242165243, "grad_norm": 0.20014169812202454, "learning_rate": 3.071722669106686e-07, "loss": 0.8153, "step": 16719 }, { "epoch": 19.043304843304842, "grad_norm": 0.17412996292114258, "learning_rate": 3.0644456822510603e-07, "loss": 0.4703, "step": 16720 }, { "epoch": 19.044444444444444, "grad_norm": 0.2562756836414337, "learning_rate": 3.0571772721462997e-07, "loss": 0.8207, "step": 16721 }, { "epoch": 19.045584045584047, "grad_norm": 0.18322330713272095, "learning_rate": 3.049917439044897e-07, "loss": 0.6766, "step": 16722 }, { "epoch": 19.046723646723645, "grad_norm": 0.21178914606571198, "learning_rate": 3.0426661831989557e-07, "loss": 0.7312, "step": 16723 }, { "epoch": 19.047863247863248, "grad_norm": 0.18006394803524017, "learning_rate": 3.0354235048603574e-07, "loss": 0.6363, "step": 16724 }, { "epoch": 19.04900284900285, "grad_norm": 0.15349280834197998, "learning_rate": 3.028189404280651e-07, "loss": 0.6743, "step": 16725 }, { "epoch": 19.05014245014245, "grad_norm": 0.19188684225082397, "learning_rate": 3.0209638817111364e-07, "loss": 0.6271, "step": 16726 }, { "epoch": 19.05128205128205, "grad_norm": 0.22258351743221283, "learning_rate": 3.013746937402667e-07, "loss": 0.6821, "step": 16727 }, { "epoch": 19.052421652421653, "grad_norm": 0.16944876313209534, "learning_rate": 3.006538571606043e-07, "loss": 0.538, "step": 16728 }, { "epoch": 19.053561253561252, "grad_norm": 0.18892964720726013, "learning_rate": 2.999338784571565e-07, "loss": 0.5521, "step": 16729 }, { "epoch": 19.054700854700855, "grad_norm": 0.16914568841457367, "learning_rate": 2.9921475765492814e-07, "loss": 0.5762, "step": 16730 }, { "epoch": 19.055840455840457, "grad_norm": 0.2065064162015915, "learning_rate": 2.984964947788993e-07, "loss": 0.651, "step": 16731 }, { "epoch": 19.056980056980056, "grad_norm": 0.17748785018920898, "learning_rate": 2.977790898540167e-07, "loss": 0.7335, "step": 16732 }, { "epoch": 19.058119658119658, "grad_norm": 0.1709255576133728, "learning_rate": 2.970625429051993e-07, "loss": 0.6767, "step": 16733 }, { "epoch": 19.05925925925926, "grad_norm": 0.17027568817138672, "learning_rate": 2.9634685395733e-07, "loss": 0.8709, "step": 16734 }, { "epoch": 19.06039886039886, "grad_norm": 0.2431151568889618, "learning_rate": 2.9563202303527213e-07, "loss": 0.5081, "step": 16735 }, { "epoch": 19.06153846153846, "grad_norm": 0.1895310878753662, "learning_rate": 2.9491805016385044e-07, "loss": 0.7936, "step": 16736 }, { "epoch": 19.062678062678064, "grad_norm": 0.15086257457733154, "learning_rate": 2.9420493536786443e-07, "loss": 0.6619, "step": 16737 }, { "epoch": 19.063817663817662, "grad_norm": 0.18453842401504517, "learning_rate": 2.934926786720832e-07, "loss": 0.6104, "step": 16738 }, { "epoch": 19.064957264957265, "grad_norm": 0.15260860323905945, "learning_rate": 2.927812801012425e-07, "loss": 0.6298, "step": 16739 }, { "epoch": 19.066096866096867, "grad_norm": 0.22402848303318024, "learning_rate": 2.920707396800532e-07, "loss": 0.5562, "step": 16740 }, { "epoch": 19.067236467236466, "grad_norm": 0.21188044548034668, "learning_rate": 2.913610574331954e-07, "loss": 0.646, "step": 16741 }, { "epoch": 19.068376068376068, "grad_norm": 0.22330155968666077, "learning_rate": 2.906522333853162e-07, "loss": 0.5249, "step": 16742 }, { "epoch": 19.06951566951567, "grad_norm": 0.20336827635765076, "learning_rate": 2.8994426756103755e-07, "loss": 0.6334, "step": 16743 }, { "epoch": 19.07065527065527, "grad_norm": 0.1761748343706131, "learning_rate": 2.892371599849453e-07, "loss": 0.7732, "step": 16744 }, { "epoch": 19.07179487179487, "grad_norm": 0.16703934967517853, "learning_rate": 2.885309106816031e-07, "loss": 0.8338, "step": 16745 }, { "epoch": 19.072934472934474, "grad_norm": 0.18746022880077362, "learning_rate": 2.8782551967553863e-07, "loss": 0.8131, "step": 16746 }, { "epoch": 19.074074074074073, "grad_norm": 0.20968621969223022, "learning_rate": 2.8712098699125166e-07, "loss": 0.5618, "step": 16747 }, { "epoch": 19.075213675213675, "grad_norm": 0.25017520785331726, "learning_rate": 2.864173126532144e-07, "loss": 0.6002, "step": 16748 }, { "epoch": 19.076353276353277, "grad_norm": 0.18701662123203278, "learning_rate": 2.857144966858655e-07, "loss": 0.4895, "step": 16749 }, { "epoch": 19.077492877492876, "grad_norm": 0.20848360657691956, "learning_rate": 2.85012539113616e-07, "loss": 0.6475, "step": 16750 }, { "epoch": 19.07863247863248, "grad_norm": 0.2461550384759903, "learning_rate": 2.843114399608493e-07, "loss": 0.4977, "step": 16751 }, { "epoch": 19.07977207977208, "grad_norm": 0.18037942051887512, "learning_rate": 2.8361119925191527e-07, "loss": 0.7748, "step": 16752 }, { "epoch": 19.08091168091168, "grad_norm": 0.18490144610404968, "learning_rate": 2.8291181701113336e-07, "loss": 0.5969, "step": 16753 }, { "epoch": 19.08205128205128, "grad_norm": 0.1518007069826126, "learning_rate": 2.822132932627952e-07, "loss": 0.672, "step": 16754 }, { "epoch": 19.083190883190884, "grad_norm": 0.16539475321769714, "learning_rate": 2.8151562803116485e-07, "loss": 0.7403, "step": 16755 }, { "epoch": 19.084330484330483, "grad_norm": 0.19967080652713776, "learning_rate": 2.8081882134047554e-07, "loss": 0.7216, "step": 16756 }, { "epoch": 19.085470085470085, "grad_norm": 0.17736263573169708, "learning_rate": 2.801228732149247e-07, "loss": 0.5971, "step": 16757 }, { "epoch": 19.086609686609687, "grad_norm": 0.1553797423839569, "learning_rate": 2.7942778367868463e-07, "loss": 0.7746, "step": 16758 }, { "epoch": 19.087749287749286, "grad_norm": 0.2018582671880722, "learning_rate": 2.7873355275589985e-07, "loss": 0.7954, "step": 16759 }, { "epoch": 19.08888888888889, "grad_norm": 0.2148551046848297, "learning_rate": 2.780401804706845e-07, "loss": 0.7441, "step": 16760 }, { "epoch": 19.09002849002849, "grad_norm": 0.2104743868112564, "learning_rate": 2.773476668471164e-07, "loss": 0.6336, "step": 16761 }, { "epoch": 19.09116809116809, "grad_norm": 0.26149600744247437, "learning_rate": 2.766560119092515e-07, "loss": 0.3598, "step": 16762 }, { "epoch": 19.092307692307692, "grad_norm": 0.1936904639005661, "learning_rate": 2.7596521568111487e-07, "loss": 0.6437, "step": 16763 }, { "epoch": 19.093447293447294, "grad_norm": 0.18678182363510132, "learning_rate": 2.7527527818669854e-07, "loss": 0.753, "step": 16764 }, { "epoch": 19.094586894586893, "grad_norm": 0.196497842669487, "learning_rate": 2.7458619944996376e-07, "loss": 0.7477, "step": 16765 }, { "epoch": 19.095726495726495, "grad_norm": 0.16993390023708344, "learning_rate": 2.738979794948443e-07, "loss": 0.7557, "step": 16766 }, { "epoch": 19.096866096866098, "grad_norm": 0.1916753053665161, "learning_rate": 2.73210618345246e-07, "loss": 0.6982, "step": 16767 }, { "epoch": 19.098005698005696, "grad_norm": 0.18876004219055176, "learning_rate": 2.725241160250414e-07, "loss": 0.8045, "step": 16768 }, { "epoch": 19.0991452991453, "grad_norm": 0.1865483969449997, "learning_rate": 2.718384725580753e-07, "loss": 0.7067, "step": 16769 }, { "epoch": 19.1002849002849, "grad_norm": 0.15715231001377106, "learning_rate": 2.7115368796816196e-07, "loss": 0.6819, "step": 16770 }, { "epoch": 19.1014245014245, "grad_norm": 0.23827806115150452, "learning_rate": 2.7046976227908803e-07, "loss": 0.6992, "step": 16771 }, { "epoch": 19.102564102564102, "grad_norm": 0.1858184039592743, "learning_rate": 2.697866955146011e-07, "loss": 0.686, "step": 16772 }, { "epoch": 19.103703703703705, "grad_norm": 0.20208309590816498, "learning_rate": 2.691044876984322e-07, "loss": 0.3459, "step": 16773 }, { "epoch": 19.104843304843303, "grad_norm": 0.20424501597881317, "learning_rate": 2.6842313885427626e-07, "loss": 0.5818, "step": 16774 }, { "epoch": 19.105982905982906, "grad_norm": 0.20619229972362518, "learning_rate": 2.6774264900579494e-07, "loss": 0.5573, "step": 16775 }, { "epoch": 19.107122507122508, "grad_norm": 0.18667425215244293, "learning_rate": 2.670630181766276e-07, "loss": 0.7136, "step": 16776 }, { "epoch": 19.108262108262107, "grad_norm": 0.21457980573177338, "learning_rate": 2.6638424639037486e-07, "loss": 0.5283, "step": 16777 }, { "epoch": 19.10940170940171, "grad_norm": 0.21677808463573456, "learning_rate": 2.657063336706178e-07, "loss": 0.671, "step": 16778 }, { "epoch": 19.11054131054131, "grad_norm": 0.25177788734436035, "learning_rate": 2.65029280040896e-07, "loss": 0.5678, "step": 16779 }, { "epoch": 19.11168091168091, "grad_norm": 0.19061064720153809, "learning_rate": 2.643530855247323e-07, "loss": 0.5776, "step": 16780 }, { "epoch": 19.112820512820512, "grad_norm": 0.19353726506233215, "learning_rate": 2.6367775014560505e-07, "loss": 0.7864, "step": 16781 }, { "epoch": 19.113960113960115, "grad_norm": 0.21912068128585815, "learning_rate": 2.6300327392697886e-07, "loss": 0.4603, "step": 16782 }, { "epoch": 19.115099715099714, "grad_norm": 0.17616406083106995, "learning_rate": 2.6232965689227395e-07, "loss": 0.8158, "step": 16783 }, { "epoch": 19.116239316239316, "grad_norm": 0.21496127545833588, "learning_rate": 2.6165689906488823e-07, "loss": 0.6297, "step": 16784 }, { "epoch": 19.117378917378918, "grad_norm": 0.25769445300102234, "learning_rate": 2.609850004681891e-07, "loss": 0.4489, "step": 16785 }, { "epoch": 19.118518518518517, "grad_norm": 0.20533311367034912, "learning_rate": 2.603139611255162e-07, "loss": 0.51, "step": 16786 }, { "epoch": 19.11965811965812, "grad_norm": 0.1962464451789856, "learning_rate": 2.596437810601704e-07, "loss": 0.7582, "step": 16787 }, { "epoch": 19.12079772079772, "grad_norm": 0.16168759763240814, "learning_rate": 2.5897446029543305e-07, "loss": 0.7717, "step": 16788 }, { "epoch": 19.12193732193732, "grad_norm": 0.21514791250228882, "learning_rate": 2.583059988545522e-07, "loss": 0.5402, "step": 16789 }, { "epoch": 19.123076923076923, "grad_norm": 0.1739581674337387, "learning_rate": 2.576383967607454e-07, "loss": 0.9189, "step": 16790 }, { "epoch": 19.124216524216525, "grad_norm": 0.2398281991481781, "learning_rate": 2.5697165403719694e-07, "loss": 0.531, "step": 16791 }, { "epoch": 19.125356125356124, "grad_norm": 0.21551766991615295, "learning_rate": 2.5630577070706595e-07, "loss": 0.5487, "step": 16792 }, { "epoch": 19.126495726495726, "grad_norm": 0.19920246303081512, "learning_rate": 2.556407467934813e-07, "loss": 0.5966, "step": 16793 }, { "epoch": 19.12763532763533, "grad_norm": 0.1649751216173172, "learning_rate": 2.54976582319541e-07, "loss": 0.8289, "step": 16794 }, { "epoch": 19.128774928774927, "grad_norm": 0.23176871240139008, "learning_rate": 2.543132773083129e-07, "loss": 0.694, "step": 16795 }, { "epoch": 19.12991452991453, "grad_norm": 0.20490753650665283, "learning_rate": 2.53650831782834e-07, "loss": 0.6516, "step": 16796 }, { "epoch": 19.13105413105413, "grad_norm": 0.2096950113773346, "learning_rate": 2.529892457661165e-07, "loss": 0.6214, "step": 16797 }, { "epoch": 19.13219373219373, "grad_norm": 0.20809367299079895, "learning_rate": 2.5232851928113644e-07, "loss": 0.6129, "step": 16798 }, { "epoch": 19.133333333333333, "grad_norm": 0.15231585502624512, "learning_rate": 2.516686523508449e-07, "loss": 0.6113, "step": 16799 }, { "epoch": 19.134472934472935, "grad_norm": 0.2246239334344864, "learning_rate": 2.510096449981569e-07, "loss": 0.4266, "step": 16800 }, { "epoch": 19.135612535612534, "grad_norm": 0.18098628520965576, "learning_rate": 2.503514972459653e-07, "loss": 0.6132, "step": 16801 }, { "epoch": 19.136752136752136, "grad_norm": 0.18563774228096008, "learning_rate": 2.496942091171267e-07, "loss": 0.6306, "step": 16802 }, { "epoch": 19.13789173789174, "grad_norm": 0.23148155212402344, "learning_rate": 2.490377806344729e-07, "loss": 0.5569, "step": 16803 }, { "epoch": 19.139031339031337, "grad_norm": 0.1684509962797165, "learning_rate": 2.4838221182080233e-07, "loss": 0.7662, "step": 16804 }, { "epoch": 19.14017094017094, "grad_norm": 0.20798934996128082, "learning_rate": 2.477275026988829e-07, "loss": 0.7302, "step": 16805 }, { "epoch": 19.141310541310542, "grad_norm": 0.1976047158241272, "learning_rate": 2.470736532914575e-07, "loss": 0.5272, "step": 16806 }, { "epoch": 19.14245014245014, "grad_norm": 0.16313964128494263, "learning_rate": 2.4642066362123575e-07, "loss": 0.9377, "step": 16807 }, { "epoch": 19.143589743589743, "grad_norm": 0.1853342205286026, "learning_rate": 2.457685337108995e-07, "loss": 0.6412, "step": 16808 }, { "epoch": 19.144729344729345, "grad_norm": 0.2047988474369049, "learning_rate": 2.4511726358309175e-07, "loss": 0.5575, "step": 16809 }, { "epoch": 19.145868945868944, "grad_norm": 0.1794224977493286, "learning_rate": 2.4446685326043885e-07, "loss": 0.3465, "step": 16810 }, { "epoch": 19.147008547008546, "grad_norm": 0.201945498585701, "learning_rate": 2.4381730276553104e-07, "loss": 0.5762, "step": 16811 }, { "epoch": 19.14814814814815, "grad_norm": 0.21642912924289703, "learning_rate": 2.431686121209281e-07, "loss": 0.5324, "step": 16812 }, { "epoch": 19.149287749287748, "grad_norm": 0.19446443021297455, "learning_rate": 2.42520781349162e-07, "loss": 0.825, "step": 16813 }, { "epoch": 19.15042735042735, "grad_norm": 0.19116437435150146, "learning_rate": 2.418738104727286e-07, "loss": 0.4555, "step": 16814 }, { "epoch": 19.151566951566952, "grad_norm": 0.1666492372751236, "learning_rate": 2.412276995141072e-07, "loss": 0.8135, "step": 16815 }, { "epoch": 19.15270655270655, "grad_norm": 0.20728255808353424, "learning_rate": 2.4058244849573253e-07, "loss": 0.5597, "step": 16816 }, { "epoch": 19.153846153846153, "grad_norm": 0.15866397321224213, "learning_rate": 2.399380574400173e-07, "loss": 0.6154, "step": 16817 }, { "epoch": 19.154985754985756, "grad_norm": 0.19906434416770935, "learning_rate": 2.3929452636934356e-07, "loss": 0.6288, "step": 16818 }, { "epoch": 19.156125356125354, "grad_norm": 0.19023805856704712, "learning_rate": 2.3865185530606296e-07, "loss": 0.6966, "step": 16819 }, { "epoch": 19.157264957264957, "grad_norm": 0.19843755662441254, "learning_rate": 2.3801004427250196e-07, "loss": 0.7683, "step": 16820 }, { "epoch": 19.15840455840456, "grad_norm": 0.18582892417907715, "learning_rate": 2.373690932909428e-07, "loss": 0.4512, "step": 16821 }, { "epoch": 19.159544159544158, "grad_norm": 0.2121107578277588, "learning_rate": 2.3672900238365382e-07, "loss": 0.6215, "step": 16822 }, { "epoch": 19.16068376068376, "grad_norm": 0.19985079765319824, "learning_rate": 2.3608977157286716e-07, "loss": 0.7261, "step": 16823 }, { "epoch": 19.161823361823362, "grad_norm": 0.21276366710662842, "learning_rate": 2.3545140088078454e-07, "loss": 0.8637, "step": 16824 }, { "epoch": 19.162962962962965, "grad_norm": 0.22821033000946045, "learning_rate": 2.3481389032957435e-07, "loss": 0.6965, "step": 16825 }, { "epoch": 19.164102564102564, "grad_norm": 0.22390399873256683, "learning_rate": 2.3417723994138552e-07, "loss": 0.6692, "step": 16826 }, { "epoch": 19.165242165242166, "grad_norm": 0.17491723597049713, "learning_rate": 2.3354144973832537e-07, "loss": 0.684, "step": 16827 }, { "epoch": 19.166381766381768, "grad_norm": 0.21670633554458618, "learning_rate": 2.3290651974247902e-07, "loss": 0.4624, "step": 16828 }, { "epoch": 19.167521367521367, "grad_norm": 0.20747022330760956, "learning_rate": 2.3227244997589825e-07, "loss": 0.7757, "step": 16829 }, { "epoch": 19.16866096866097, "grad_norm": 0.16009530425071716, "learning_rate": 2.3163924046060714e-07, "loss": 0.8162, "step": 16830 }, { "epoch": 19.16980056980057, "grad_norm": 0.2176835536956787, "learning_rate": 2.310068912185992e-07, "loss": 0.4587, "step": 16831 }, { "epoch": 19.17094017094017, "grad_norm": 0.16527660191059113, "learning_rate": 2.303754022718374e-07, "loss": 0.9056, "step": 16832 }, { "epoch": 19.172079772079773, "grad_norm": 0.19874820113182068, "learning_rate": 2.2974477364225423e-07, "loss": 0.701, "step": 16833 }, { "epoch": 19.173219373219375, "grad_norm": 0.18765230476856232, "learning_rate": 2.2911500535175157e-07, "loss": 0.4855, "step": 16834 }, { "epoch": 19.174358974358974, "grad_norm": 0.21307937800884247, "learning_rate": 2.2848609742220916e-07, "loss": 0.5144, "step": 16835 }, { "epoch": 19.175498575498576, "grad_norm": 0.1848912090063095, "learning_rate": 2.2785804987546233e-07, "loss": 0.6323, "step": 16836 }, { "epoch": 19.17663817663818, "grad_norm": 0.19322006404399872, "learning_rate": 2.2723086273332971e-07, "loss": 0.6738, "step": 16837 }, { "epoch": 19.177777777777777, "grad_norm": 0.18617932498455048, "learning_rate": 2.2660453601759668e-07, "loss": 0.7185, "step": 16838 }, { "epoch": 19.17891737891738, "grad_norm": 0.21446798741817474, "learning_rate": 2.259790697500125e-07, "loss": 0.6248, "step": 16839 }, { "epoch": 19.180056980056982, "grad_norm": 0.1776348054409027, "learning_rate": 2.2535446395230697e-07, "loss": 0.8214, "step": 16840 }, { "epoch": 19.18119658119658, "grad_norm": 0.25483688712120056, "learning_rate": 2.2473071864616836e-07, "loss": 0.5343, "step": 16841 }, { "epoch": 19.182336182336183, "grad_norm": 0.18243196606636047, "learning_rate": 2.2410783385326816e-07, "loss": 0.8342, "step": 16842 }, { "epoch": 19.183475783475785, "grad_norm": 0.2042977213859558, "learning_rate": 2.2348580959523357e-07, "loss": 0.5783, "step": 16843 }, { "epoch": 19.184615384615384, "grad_norm": 0.19970044493675232, "learning_rate": 2.2286464589367507e-07, "loss": 0.6981, "step": 16844 }, { "epoch": 19.185754985754986, "grad_norm": 0.17871545255184174, "learning_rate": 2.222443427701615e-07, "loss": 0.625, "step": 16845 }, { "epoch": 19.18689458689459, "grad_norm": 0.2101234495639801, "learning_rate": 2.2162490024624506e-07, "loss": 0.6554, "step": 16846 }, { "epoch": 19.188034188034187, "grad_norm": 0.15810169279575348, "learning_rate": 2.2100631834343356e-07, "loss": 0.8116, "step": 16847 }, { "epoch": 19.18917378917379, "grad_norm": 0.24820293486118317, "learning_rate": 2.2038859708321535e-07, "loss": 0.4985, "step": 16848 }, { "epoch": 19.190313390313392, "grad_norm": 0.17996817827224731, "learning_rate": 2.1977173648704552e-07, "loss": 0.6283, "step": 16849 }, { "epoch": 19.19145299145299, "grad_norm": 0.18732857704162598, "learning_rate": 2.191557365763486e-07, "loss": 0.6905, "step": 16850 }, { "epoch": 19.192592592592593, "grad_norm": 0.16504479944705963, "learning_rate": 2.185405973725213e-07, "loss": 0.75, "step": 16851 }, { "epoch": 19.193732193732195, "grad_norm": 0.2545555830001831, "learning_rate": 2.1792631889692994e-07, "loss": 0.5674, "step": 16852 }, { "epoch": 19.194871794871794, "grad_norm": 0.18723373115062714, "learning_rate": 2.1731290117090742e-07, "loss": 0.7629, "step": 16853 }, { "epoch": 19.196011396011396, "grad_norm": 0.1995868682861328, "learning_rate": 2.167003442157589e-07, "loss": 0.5315, "step": 16854 }, { "epoch": 19.197150997151, "grad_norm": 0.18103499710559845, "learning_rate": 2.1608864805276464e-07, "loss": 0.5966, "step": 16855 }, { "epoch": 19.198290598290598, "grad_norm": 0.19257839024066925, "learning_rate": 2.154778127031659e-07, "loss": 0.6129, "step": 16856 }, { "epoch": 19.1994301994302, "grad_norm": 0.22365933656692505, "learning_rate": 2.148678381881819e-07, "loss": 0.6032, "step": 16857 }, { "epoch": 19.200569800569802, "grad_norm": 0.16736818850040436, "learning_rate": 2.1425872452899565e-07, "loss": 0.8442, "step": 16858 }, { "epoch": 19.2017094017094, "grad_norm": 0.1844698041677475, "learning_rate": 2.1365047174676524e-07, "loss": 0.6009, "step": 16859 }, { "epoch": 19.202849002849003, "grad_norm": 0.19384139776229858, "learning_rate": 2.1304307986261262e-07, "loss": 0.6999, "step": 16860 }, { "epoch": 19.203988603988606, "grad_norm": 0.1743377447128296, "learning_rate": 2.1243654889764596e-07, "loss": 0.8152, "step": 16861 }, { "epoch": 19.205128205128204, "grad_norm": 0.15852127969264984, "learning_rate": 2.1183087887291787e-07, "loss": 0.4292, "step": 16862 }, { "epoch": 19.206267806267807, "grad_norm": 0.2069716453552246, "learning_rate": 2.1122606980947535e-07, "loss": 0.7713, "step": 16863 }, { "epoch": 19.20740740740741, "grad_norm": 0.27197206020355225, "learning_rate": 2.106221217283183e-07, "loss": 0.4657, "step": 16864 }, { "epoch": 19.208547008547008, "grad_norm": 0.20752044022083282, "learning_rate": 2.1001903465042716e-07, "loss": 0.4611, "step": 16865 }, { "epoch": 19.20968660968661, "grad_norm": 0.18452784419059753, "learning_rate": 2.0941680859674627e-07, "loss": 0.59, "step": 16866 }, { "epoch": 19.210826210826212, "grad_norm": 0.20244835317134857, "learning_rate": 2.0881544358819506e-07, "loss": 0.6333, "step": 16867 }, { "epoch": 19.21196581196581, "grad_norm": 0.18190540373325348, "learning_rate": 2.0821493964565953e-07, "loss": 0.8249, "step": 16868 }, { "epoch": 19.213105413105414, "grad_norm": 0.1976470798254013, "learning_rate": 2.0761529678999802e-07, "loss": 0.4892, "step": 16869 }, { "epoch": 19.214245014245016, "grad_norm": 0.21269537508487701, "learning_rate": 2.0701651504203557e-07, "loss": 0.8091, "step": 16870 }, { "epoch": 19.215384615384615, "grad_norm": 0.1921021044254303, "learning_rate": 2.064185944225666e-07, "loss": 0.7302, "step": 16871 }, { "epoch": 19.216524216524217, "grad_norm": 0.22277995944023132, "learning_rate": 2.05821534952369e-07, "loss": 0.7242, "step": 16872 }, { "epoch": 19.21766381766382, "grad_norm": 0.23242759704589844, "learning_rate": 2.052253366521706e-07, "loss": 0.7776, "step": 16873 }, { "epoch": 19.218803418803418, "grad_norm": 0.20705784857273102, "learning_rate": 2.0462999954267982e-07, "loss": 0.6455, "step": 16874 }, { "epoch": 19.21994301994302, "grad_norm": 0.2073516994714737, "learning_rate": 2.0403552364458011e-07, "loss": 0.6994, "step": 16875 }, { "epoch": 19.221082621082623, "grad_norm": 0.1903194934129715, "learning_rate": 2.034419089785161e-07, "loss": 0.6894, "step": 16876 }, { "epoch": 19.22222222222222, "grad_norm": 0.18921184539794922, "learning_rate": 2.0284915556510465e-07, "loss": 0.5576, "step": 16877 }, { "epoch": 19.223361823361824, "grad_norm": 0.17695850133895874, "learning_rate": 2.02257263424932e-07, "loss": 0.6882, "step": 16878 }, { "epoch": 19.224501424501426, "grad_norm": 0.20833054184913635, "learning_rate": 2.0166623257856233e-07, "loss": 0.4894, "step": 16879 }, { "epoch": 19.225641025641025, "grad_norm": 0.2087348997592926, "learning_rate": 2.0107606304651805e-07, "loss": 0.5712, "step": 16880 }, { "epoch": 19.226780626780627, "grad_norm": 0.19440957903862, "learning_rate": 2.0048675484929947e-07, "loss": 0.606, "step": 16881 }, { "epoch": 19.22792022792023, "grad_norm": 0.19782736897468567, "learning_rate": 1.9989830800737351e-07, "loss": 0.6482, "step": 16882 }, { "epoch": 19.22905982905983, "grad_norm": 0.2040553241968155, "learning_rate": 1.9931072254118222e-07, "loss": 0.5844, "step": 16883 }, { "epoch": 19.23019943019943, "grad_norm": 0.18802884221076965, "learning_rate": 1.9872399847113144e-07, "loss": 0.6887, "step": 16884 }, { "epoch": 19.231339031339033, "grad_norm": 0.22762954235076904, "learning_rate": 1.9813813581759933e-07, "loss": 0.4754, "step": 16885 }, { "epoch": 19.23247863247863, "grad_norm": 0.21052877604961395, "learning_rate": 1.9755313460093626e-07, "loss": 0.8026, "step": 16886 }, { "epoch": 19.233618233618234, "grad_norm": 0.20105506479740143, "learning_rate": 1.9696899484145936e-07, "loss": 0.3103, "step": 16887 }, { "epoch": 19.234757834757836, "grad_norm": 0.22901511192321777, "learning_rate": 1.9638571655945793e-07, "loss": 0.5651, "step": 16888 }, { "epoch": 19.235897435897435, "grad_norm": 0.18789878487586975, "learning_rate": 1.9580329977518795e-07, "loss": 0.663, "step": 16889 }, { "epoch": 19.237037037037037, "grad_norm": 0.22932597994804382, "learning_rate": 1.952217445088833e-07, "loss": 0.682, "step": 16890 }, { "epoch": 19.23817663817664, "grad_norm": 0.19326689839363098, "learning_rate": 1.9464105078074169e-07, "loss": 0.5318, "step": 16891 }, { "epoch": 19.23931623931624, "grad_norm": 0.2046305388212204, "learning_rate": 1.940612186109303e-07, "loss": 0.7213, "step": 16892 }, { "epoch": 19.24045584045584, "grad_norm": 0.17293988168239594, "learning_rate": 1.9348224801958857e-07, "loss": 0.7, "step": 16893 }, { "epoch": 19.241595441595443, "grad_norm": 0.26575911045074463, "learning_rate": 1.9290413902682825e-07, "loss": 0.5475, "step": 16894 }, { "epoch": 19.242735042735042, "grad_norm": 0.22037792205810547, "learning_rate": 1.9232689165272767e-07, "loss": 0.6524, "step": 16895 }, { "epoch": 19.243874643874644, "grad_norm": 0.18646907806396484, "learning_rate": 1.9175050591733468e-07, "loss": 0.7682, "step": 16896 }, { "epoch": 19.245014245014247, "grad_norm": 0.21266423165798187, "learning_rate": 1.911749818406694e-07, "loss": 0.6346, "step": 16897 }, { "epoch": 19.246153846153845, "grad_norm": 0.1926053911447525, "learning_rate": 1.9060031944271862e-07, "loss": 0.7582, "step": 16898 }, { "epoch": 19.247293447293448, "grad_norm": 0.20515741407871246, "learning_rate": 1.9002651874344967e-07, "loss": 0.5199, "step": 16899 }, { "epoch": 19.24843304843305, "grad_norm": 0.1906573623418808, "learning_rate": 1.8945357976278555e-07, "loss": 0.8215, "step": 16900 }, { "epoch": 19.24957264957265, "grad_norm": 0.2508430480957031, "learning_rate": 1.8888150252062698e-07, "loss": 0.5091, "step": 16901 }, { "epoch": 19.25071225071225, "grad_norm": 0.19548311829566956, "learning_rate": 1.8831028703684417e-07, "loss": 0.7222, "step": 16902 }, { "epoch": 19.251851851851853, "grad_norm": 0.18210169672966003, "learning_rate": 1.877399333312796e-07, "loss": 0.6195, "step": 16903 }, { "epoch": 19.252991452991452, "grad_norm": 0.15801532566547394, "learning_rate": 1.8717044142373963e-07, "loss": 0.8909, "step": 16904 }, { "epoch": 19.254131054131054, "grad_norm": 0.1779741793870926, "learning_rate": 1.8660181133400568e-07, "loss": 0.756, "step": 16905 }, { "epoch": 19.255270655270657, "grad_norm": 0.1532481163740158, "learning_rate": 1.8603404308182858e-07, "loss": 0.792, "step": 16906 }, { "epoch": 19.256410256410255, "grad_norm": 0.20334388315677643, "learning_rate": 1.8546713668692873e-07, "loss": 0.478, "step": 16907 }, { "epoch": 19.257549857549858, "grad_norm": 0.18965454399585724, "learning_rate": 1.8490109216899587e-07, "loss": 0.5404, "step": 16908 }, { "epoch": 19.25868945868946, "grad_norm": 0.2065407931804657, "learning_rate": 1.8433590954768653e-07, "loss": 0.5383, "step": 16909 }, { "epoch": 19.25982905982906, "grad_norm": 0.19230175018310547, "learning_rate": 1.837715888426378e-07, "loss": 0.8586, "step": 16910 }, { "epoch": 19.26096866096866, "grad_norm": 0.15374523401260376, "learning_rate": 1.8320813007344507e-07, "loss": 0.7498, "step": 16911 }, { "epoch": 19.262108262108264, "grad_norm": 0.22920438647270203, "learning_rate": 1.826455332596816e-07, "loss": 0.6152, "step": 16912 }, { "epoch": 19.263247863247862, "grad_norm": 0.19025102257728577, "learning_rate": 1.8208379842088452e-07, "loss": 0.7298, "step": 16913 }, { "epoch": 19.264387464387465, "grad_norm": 0.1733146458864212, "learning_rate": 1.815229255765688e-07, "loss": 0.6172, "step": 16914 }, { "epoch": 19.265527065527067, "grad_norm": 0.20255063474178314, "learning_rate": 1.8096291474621052e-07, "loss": 0.3158, "step": 16915 }, { "epoch": 19.266666666666666, "grad_norm": 0.21574810147285461, "learning_rate": 1.8040376594926355e-07, "loss": 0.2767, "step": 16916 }, { "epoch": 19.267806267806268, "grad_norm": 0.1896638572216034, "learning_rate": 1.7984547920515127e-07, "loss": 0.7195, "step": 16917 }, { "epoch": 19.26894586894587, "grad_norm": 0.18451404571533203, "learning_rate": 1.7928805453325814e-07, "loss": 0.5955, "step": 16918 }, { "epoch": 19.27008547008547, "grad_norm": 0.22833938896656036, "learning_rate": 1.7873149195294647e-07, "loss": 0.4586, "step": 16919 }, { "epoch": 19.27122507122507, "grad_norm": 0.277601957321167, "learning_rate": 1.7817579148355078e-07, "loss": 0.624, "step": 16920 }, { "epoch": 19.272364672364674, "grad_norm": 0.21973587572574615, "learning_rate": 1.7762095314437233e-07, "loss": 0.5225, "step": 16921 }, { "epoch": 19.273504273504273, "grad_norm": 0.26785463094711304, "learning_rate": 1.77066976954679e-07, "loss": 0.5603, "step": 16922 }, { "epoch": 19.274643874643875, "grad_norm": 0.17875657975673676, "learning_rate": 1.7651386293371098e-07, "loss": 0.7083, "step": 16923 }, { "epoch": 19.275783475783477, "grad_norm": 0.191822811961174, "learning_rate": 1.7596161110068065e-07, "loss": 0.4877, "step": 16924 }, { "epoch": 19.276923076923076, "grad_norm": 0.18875642120838165, "learning_rate": 1.754102214747727e-07, "loss": 0.512, "step": 16925 }, { "epoch": 19.27806267806268, "grad_norm": 0.21474966406822205, "learning_rate": 1.7485969407513293e-07, "loss": 0.6143, "step": 16926 }, { "epoch": 19.27920227920228, "grad_norm": 0.19494162499904633, "learning_rate": 1.7431002892088766e-07, "loss": 0.5833, "step": 16927 }, { "epoch": 19.28034188034188, "grad_norm": 0.162825345993042, "learning_rate": 1.737612260311272e-07, "loss": 0.6457, "step": 16928 }, { "epoch": 19.28148148148148, "grad_norm": 0.19001635909080505, "learning_rate": 1.7321328542490855e-07, "loss": 0.7186, "step": 16929 }, { "epoch": 19.282621082621084, "grad_norm": 0.21253564953804016, "learning_rate": 1.7266620712126645e-07, "loss": 0.6209, "step": 16930 }, { "epoch": 19.283760683760683, "grad_norm": 0.2037236988544464, "learning_rate": 1.7211999113920518e-07, "loss": 0.5061, "step": 16931 }, { "epoch": 19.284900284900285, "grad_norm": 0.1651899516582489, "learning_rate": 1.7157463749769288e-07, "loss": 0.5641, "step": 16932 }, { "epoch": 19.286039886039887, "grad_norm": 0.19152629375457764, "learning_rate": 1.7103014621566993e-07, "loss": 0.5588, "step": 16933 }, { "epoch": 19.287179487179486, "grad_norm": 0.20283615589141846, "learning_rate": 1.7048651731205179e-07, "loss": 0.6209, "step": 16934 }, { "epoch": 19.28831908831909, "grad_norm": 0.20604069530963898, "learning_rate": 1.6994375080571777e-07, "loss": 0.57, "step": 16935 }, { "epoch": 19.28945868945869, "grad_norm": 0.18033136427402496, "learning_rate": 1.694018467155195e-07, "loss": 0.8617, "step": 16936 }, { "epoch": 19.29059829059829, "grad_norm": 0.18603329360485077, "learning_rate": 1.6886080506027801e-07, "loss": 0.491, "step": 16937 }, { "epoch": 19.291737891737892, "grad_norm": 0.16204974055290222, "learning_rate": 1.6832062585878937e-07, "loss": 0.4305, "step": 16938 }, { "epoch": 19.292877492877494, "grad_norm": 0.20586009323596954, "learning_rate": 1.6778130912981084e-07, "loss": 0.6284, "step": 16939 }, { "epoch": 19.294017094017093, "grad_norm": 0.16173145174980164, "learning_rate": 1.6724285489208013e-07, "loss": 0.6411, "step": 16940 }, { "epoch": 19.295156695156695, "grad_norm": 0.16894316673278809, "learning_rate": 1.6670526316429346e-07, "loss": 0.5615, "step": 16941 }, { "epoch": 19.296296296296298, "grad_norm": 0.18487438559532166, "learning_rate": 1.66168533965122e-07, "loss": 0.5633, "step": 16942 }, { "epoch": 19.297435897435896, "grad_norm": 0.19690082967281342, "learning_rate": 1.6563266731321192e-07, "loss": 0.9356, "step": 16943 }, { "epoch": 19.2985754985755, "grad_norm": 0.1948634684085846, "learning_rate": 1.6509766322717612e-07, "loss": 0.8312, "step": 16944 }, { "epoch": 19.2997150997151, "grad_norm": 0.2944418787956238, "learning_rate": 1.645635217255942e-07, "loss": 0.5003, "step": 16945 }, { "epoch": 19.3008547008547, "grad_norm": 0.23983515799045563, "learning_rate": 1.6403024282701795e-07, "loss": 0.5651, "step": 16946 }, { "epoch": 19.301994301994302, "grad_norm": 0.2157278060913086, "learning_rate": 1.6349782654997148e-07, "loss": 0.654, "step": 16947 }, { "epoch": 19.303133903133904, "grad_norm": 0.2278202325105667, "learning_rate": 1.6296627291294552e-07, "loss": 0.5393, "step": 16948 }, { "epoch": 19.304273504273503, "grad_norm": 0.1552683413028717, "learning_rate": 1.6243558193440313e-07, "loss": 0.8572, "step": 16949 }, { "epoch": 19.305413105413106, "grad_norm": 0.2274259477853775, "learning_rate": 1.6190575363277395e-07, "loss": 0.6418, "step": 16950 }, { "epoch": 19.306552706552708, "grad_norm": 0.21096357703208923, "learning_rate": 1.6137678802646827e-07, "loss": 0.5175, "step": 16951 }, { "epoch": 19.307692307692307, "grad_norm": 0.17654189467430115, "learning_rate": 1.6084868513384921e-07, "loss": 0.696, "step": 16952 }, { "epoch": 19.30883190883191, "grad_norm": 0.22242681682109833, "learning_rate": 1.6032144497326594e-07, "loss": 0.5559, "step": 16953 }, { "epoch": 19.30997150997151, "grad_norm": 0.22529473900794983, "learning_rate": 1.597950675630261e-07, "loss": 0.7854, "step": 16954 }, { "epoch": 19.31111111111111, "grad_norm": 0.23013631999492645, "learning_rate": 1.59269552921415e-07, "loss": 0.3146, "step": 16955 }, { "epoch": 19.312250712250712, "grad_norm": 0.1959642767906189, "learning_rate": 1.587449010666847e-07, "loss": 0.6456, "step": 16956 }, { "epoch": 19.313390313390315, "grad_norm": 0.2010372281074524, "learning_rate": 1.5822111201705682e-07, "loss": 0.6799, "step": 16957 }, { "epoch": 19.314529914529913, "grad_norm": 0.21289879083633423, "learning_rate": 1.5769818579072505e-07, "loss": 0.7041, "step": 16958 }, { "epoch": 19.315669515669516, "grad_norm": 0.2144283801317215, "learning_rate": 1.5717612240585266e-07, "loss": 0.5488, "step": 16959 }, { "epoch": 19.316809116809118, "grad_norm": 0.21097925305366516, "learning_rate": 1.566549218805724e-07, "loss": 0.593, "step": 16960 }, { "epoch": 19.317948717948717, "grad_norm": 0.2372552901506424, "learning_rate": 1.5613458423298365e-07, "loss": 0.6971, "step": 16961 }, { "epoch": 19.31908831908832, "grad_norm": 0.20000500977039337, "learning_rate": 1.5561510948116642e-07, "loss": 0.7047, "step": 16962 }, { "epoch": 19.32022792022792, "grad_norm": 0.17207293212413788, "learning_rate": 1.5509649764315347e-07, "loss": 0.6819, "step": 16963 }, { "epoch": 19.32136752136752, "grad_norm": 0.18278390169143677, "learning_rate": 1.5457874873696653e-07, "loss": 0.544, "step": 16964 }, { "epoch": 19.322507122507123, "grad_norm": 0.20752927660942078, "learning_rate": 1.5406186278058286e-07, "loss": 0.519, "step": 16965 }, { "epoch": 19.323646723646725, "grad_norm": 0.16611790657043457, "learning_rate": 1.5354583979195758e-07, "loss": 0.5877, "step": 16966 }, { "epoch": 19.324786324786324, "grad_norm": 0.16754280030727386, "learning_rate": 1.5303067978901242e-07, "loss": 0.7034, "step": 16967 }, { "epoch": 19.325925925925926, "grad_norm": 0.17728129029273987, "learning_rate": 1.5251638278964419e-07, "loss": 0.7641, "step": 16968 }, { "epoch": 19.32706552706553, "grad_norm": 0.16807030141353607, "learning_rate": 1.5200294881171084e-07, "loss": 0.8213, "step": 16969 }, { "epoch": 19.328205128205127, "grad_norm": 0.19782152771949768, "learning_rate": 1.514903778730481e-07, "loss": 0.5519, "step": 16970 }, { "epoch": 19.32934472934473, "grad_norm": 0.19398966431617737, "learning_rate": 1.509786699914556e-07, "loss": 0.7432, "step": 16971 }, { "epoch": 19.33048433048433, "grad_norm": 0.1973167508840561, "learning_rate": 1.504678251847108e-07, "loss": 0.8417, "step": 16972 }, { "epoch": 19.33162393162393, "grad_norm": 0.1974257081747055, "learning_rate": 1.4995784347055508e-07, "loss": 0.7497, "step": 16973 }, { "epoch": 19.332763532763533, "grad_norm": 0.15442010760307312, "learning_rate": 1.4944872486670204e-07, "loss": 0.7071, "step": 16974 }, { "epoch": 19.333903133903135, "grad_norm": 0.22179816663265228, "learning_rate": 1.4894046939083195e-07, "loss": 0.6395, "step": 16975 }, { "epoch": 19.335042735042734, "grad_norm": 0.17436963319778442, "learning_rate": 1.4843307706060016e-07, "loss": 0.7704, "step": 16976 }, { "epoch": 19.336182336182336, "grad_norm": 0.1819789707660675, "learning_rate": 1.4792654789363148e-07, "loss": 0.8175, "step": 16977 }, { "epoch": 19.33732193732194, "grad_norm": 0.20858220756053925, "learning_rate": 1.4742088190751735e-07, "loss": 0.3258, "step": 16978 }, { "epoch": 19.338461538461537, "grad_norm": 0.17292723059654236, "learning_rate": 1.4691607911981875e-07, "loss": 0.6172, "step": 16979 }, { "epoch": 19.33960113960114, "grad_norm": 0.20629368722438812, "learning_rate": 1.4641213954807442e-07, "loss": 0.7085, "step": 16980 }, { "epoch": 19.340740740740742, "grad_norm": 0.25998514890670776, "learning_rate": 1.4590906320978148e-07, "loss": 0.594, "step": 16981 }, { "epoch": 19.34188034188034, "grad_norm": 0.23073017597198486, "learning_rate": 1.4540685012241483e-07, "loss": 0.6886, "step": 16982 }, { "epoch": 19.343019943019943, "grad_norm": 0.23051905632019043, "learning_rate": 1.4490550030342165e-07, "loss": 0.4629, "step": 16983 }, { "epoch": 19.344159544159545, "grad_norm": 0.1976567953824997, "learning_rate": 1.444050137702102e-07, "loss": 0.8199, "step": 16984 }, { "epoch": 19.345299145299144, "grad_norm": 0.2493298351764679, "learning_rate": 1.439053905401666e-07, "loss": 0.5049, "step": 16985 }, { "epoch": 19.346438746438746, "grad_norm": 0.2229515016078949, "learning_rate": 1.4340663063064363e-07, "loss": 0.5099, "step": 16986 }, { "epoch": 19.34757834757835, "grad_norm": 0.19988198578357697, "learning_rate": 1.4290873405896354e-07, "loss": 0.7719, "step": 16987 }, { "epoch": 19.348717948717947, "grad_norm": 0.17936088144779205, "learning_rate": 1.4241170084241806e-07, "loss": 0.6148, "step": 16988 }, { "epoch": 19.34985754985755, "grad_norm": 0.16914109885692596, "learning_rate": 1.419155309982767e-07, "loss": 0.4429, "step": 16989 }, { "epoch": 19.350997150997152, "grad_norm": 0.17672418057918549, "learning_rate": 1.4142022454376736e-07, "loss": 0.5318, "step": 16990 }, { "epoch": 19.35213675213675, "grad_norm": 0.1927495002746582, "learning_rate": 1.409257814960957e-07, "loss": 0.6414, "step": 16991 }, { "epoch": 19.353276353276353, "grad_norm": 0.18565373122692108, "learning_rate": 1.404322018724341e-07, "loss": 0.7684, "step": 16992 }, { "epoch": 19.354415954415956, "grad_norm": 0.20426031947135925, "learning_rate": 1.3993948568992443e-07, "loss": 0.4539, "step": 16993 }, { "epoch": 19.355555555555554, "grad_norm": 0.20206066966056824, "learning_rate": 1.3944763296568353e-07, "loss": 0.6285, "step": 16994 }, { "epoch": 19.356695156695157, "grad_norm": 0.2475404143333435, "learning_rate": 1.3895664371679218e-07, "loss": 0.8911, "step": 16995 }, { "epoch": 19.35783475783476, "grad_norm": 0.1861857771873474, "learning_rate": 1.3846651796030617e-07, "loss": 0.7475, "step": 16996 }, { "epoch": 19.358974358974358, "grad_norm": 0.21384894847869873, "learning_rate": 1.3797725571324805e-07, "loss": 0.4894, "step": 16997 }, { "epoch": 19.36011396011396, "grad_norm": 0.16749976575374603, "learning_rate": 1.3748885699260972e-07, "loss": 0.7155, "step": 16998 }, { "epoch": 19.361253561253562, "grad_norm": 0.20118291676044464, "learning_rate": 1.3700132181535264e-07, "loss": 0.7377, "step": 16999 }, { "epoch": 19.36239316239316, "grad_norm": 0.20364977419376373, "learning_rate": 1.365146501984188e-07, "loss": 0.7462, "step": 17000 }, { "epoch": 19.363532763532763, "grad_norm": 0.24547569453716278, "learning_rate": 1.3602884215870305e-07, "loss": 0.6276, "step": 17001 }, { "epoch": 19.364672364672366, "grad_norm": 0.25913381576538086, "learning_rate": 1.3554389771308073e-07, "loss": 0.6307, "step": 17002 }, { "epoch": 19.365811965811965, "grad_norm": 0.19459164142608643, "learning_rate": 1.3505981687839674e-07, "loss": 0.4771, "step": 17003 }, { "epoch": 19.366951566951567, "grad_norm": 0.14924933016300201, "learning_rate": 1.345765996714654e-07, "loss": 0.8247, "step": 17004 }, { "epoch": 19.36809116809117, "grad_norm": 0.2035389095544815, "learning_rate": 1.3409424610906772e-07, "loss": 0.5336, "step": 17005 }, { "epoch": 19.369230769230768, "grad_norm": 0.17139534652233124, "learning_rate": 1.33612756207957e-07, "loss": 0.7279, "step": 17006 }, { "epoch": 19.37037037037037, "grad_norm": 0.22130882740020752, "learning_rate": 1.3313212998486146e-07, "loss": 0.7468, "step": 17007 }, { "epoch": 19.371509971509973, "grad_norm": 0.20196962356567383, "learning_rate": 1.3265236745647058e-07, "loss": 0.7199, "step": 17008 }, { "epoch": 19.37264957264957, "grad_norm": 0.18781882524490356, "learning_rate": 1.32173468639446e-07, "loss": 0.5938, "step": 17009 }, { "epoch": 19.373789173789174, "grad_norm": 0.18114197254180908, "learning_rate": 1.3169543355042723e-07, "loss": 0.7466, "step": 17010 }, { "epoch": 19.374928774928776, "grad_norm": 0.19962655007839203, "learning_rate": 1.3121826220601207e-07, "loss": 0.5502, "step": 17011 }, { "epoch": 19.376068376068375, "grad_norm": 0.18108335137367249, "learning_rate": 1.3074195462277617e-07, "loss": 0.7512, "step": 17012 }, { "epoch": 19.377207977207977, "grad_norm": 0.20177017152309418, "learning_rate": 1.302665108172618e-07, "loss": 0.7553, "step": 17013 }, { "epoch": 19.37834757834758, "grad_norm": 0.2146926075220108, "learning_rate": 1.2979193080598363e-07, "loss": 0.3559, "step": 17014 }, { "epoch": 19.379487179487178, "grad_norm": 0.22491852939128876, "learning_rate": 1.2931821460542837e-07, "loss": 0.7937, "step": 17015 }, { "epoch": 19.38062678062678, "grad_norm": 0.18990732729434967, "learning_rate": 1.2884536223204124e-07, "loss": 0.6904, "step": 17016 }, { "epoch": 19.381766381766383, "grad_norm": 0.1621163934469223, "learning_rate": 1.2837337370225356e-07, "loss": 0.3372, "step": 17017 }, { "epoch": 19.38290598290598, "grad_norm": 0.1749613732099533, "learning_rate": 1.2790224903245495e-07, "loss": 0.5611, "step": 17018 }, { "epoch": 19.384045584045584, "grad_norm": 0.19548732042312622, "learning_rate": 1.2743198823901293e-07, "loss": 0.7064, "step": 17019 }, { "epoch": 19.385185185185186, "grad_norm": 0.20567107200622559, "learning_rate": 1.2696259133825329e-07, "loss": 0.6203, "step": 17020 }, { "epoch": 19.386324786324785, "grad_norm": 0.20791058242321014, "learning_rate": 1.2649405834648798e-07, "loss": 0.8267, "step": 17021 }, { "epoch": 19.387464387464387, "grad_norm": 0.20129592716693878, "learning_rate": 1.260263892799818e-07, "loss": 0.874, "step": 17022 }, { "epoch": 19.38860398860399, "grad_norm": 0.25185975432395935, "learning_rate": 1.2555958415498836e-07, "loss": 0.4448, "step": 17023 }, { "epoch": 19.38974358974359, "grad_norm": 0.20412062108516693, "learning_rate": 1.2509364298771143e-07, "loss": 0.5128, "step": 17024 }, { "epoch": 19.39088319088319, "grad_norm": 0.24313119053840637, "learning_rate": 1.2462856579434078e-07, "loss": 0.6976, "step": 17025 }, { "epoch": 19.392022792022793, "grad_norm": 0.190816268324852, "learning_rate": 1.2416435259102466e-07, "loss": 0.6577, "step": 17026 }, { "epoch": 19.39316239316239, "grad_norm": 0.17809267342090607, "learning_rate": 1.237010033938918e-07, "loss": 0.6978, "step": 17027 }, { "epoch": 19.394301994301994, "grad_norm": 0.17608125507831573, "learning_rate": 1.232385182190321e-07, "loss": 0.7697, "step": 17028 }, { "epoch": 19.395441595441596, "grad_norm": 0.1857132464647293, "learning_rate": 1.227768970825105e-07, "loss": 0.7473, "step": 17029 }, { "epoch": 19.396581196581195, "grad_norm": 0.18189045786857605, "learning_rate": 1.223161400003614e-07, "loss": 0.7103, "step": 17030 }, { "epoch": 19.397720797720797, "grad_norm": 0.1787533015012741, "learning_rate": 1.218562469885831e-07, "loss": 0.7709, "step": 17031 }, { "epoch": 19.3988603988604, "grad_norm": 0.16988062858581543, "learning_rate": 1.2139721806315452e-07, "loss": 0.7394, "step": 17032 }, { "epoch": 19.4, "grad_norm": 0.1955726146697998, "learning_rate": 1.209390532400184e-07, "loss": 0.7282, "step": 17033 }, { "epoch": 19.4011396011396, "grad_norm": 0.17550189793109894, "learning_rate": 1.2048175253508433e-07, "loss": 0.6783, "step": 17034 }, { "epoch": 19.402279202279203, "grad_norm": 0.17031417787075043, "learning_rate": 1.2002531596423949e-07, "loss": 0.5903, "step": 17035 }, { "epoch": 19.403418803418802, "grad_norm": 0.18011051416397095, "learning_rate": 1.1956974354333516e-07, "loss": 0.8027, "step": 17036 }, { "epoch": 19.404558404558404, "grad_norm": 0.20305760204792023, "learning_rate": 1.191150352881948e-07, "loss": 0.7499, "step": 17037 }, { "epoch": 19.405698005698007, "grad_norm": 0.1608572006225586, "learning_rate": 1.1866119121461128e-07, "loss": 0.6855, "step": 17038 }, { "epoch": 19.406837606837605, "grad_norm": 0.19151221215724945, "learning_rate": 1.182082113383498e-07, "loss": 0.5605, "step": 17039 }, { "epoch": 19.407977207977208, "grad_norm": 0.2052474319934845, "learning_rate": 1.1775609567513946e-07, "loss": 0.7317, "step": 17040 }, { "epoch": 19.40911680911681, "grad_norm": 0.21271418035030365, "learning_rate": 1.1730484424068989e-07, "loss": 0.6543, "step": 17041 }, { "epoch": 19.41025641025641, "grad_norm": 0.20730236172676086, "learning_rate": 1.1685445705066911e-07, "loss": 0.5903, "step": 17042 }, { "epoch": 19.41139601139601, "grad_norm": 0.3019479513168335, "learning_rate": 1.1640493412072295e-07, "loss": 0.5455, "step": 17043 }, { "epoch": 19.412535612535613, "grad_norm": 0.2187756597995758, "learning_rate": 1.1595627546646393e-07, "loss": 0.6563, "step": 17044 }, { "epoch": 19.413675213675212, "grad_norm": 0.21233493089675903, "learning_rate": 1.1550848110347401e-07, "loss": 0.6528, "step": 17045 }, { "epoch": 19.414814814814815, "grad_norm": 0.2932858467102051, "learning_rate": 1.1506155104730743e-07, "loss": 0.4308, "step": 17046 }, { "epoch": 19.415954415954417, "grad_norm": 0.21783111989498138, "learning_rate": 1.1461548531348509e-07, "loss": 0.4531, "step": 17047 }, { "epoch": 19.417094017094016, "grad_norm": 0.17653638124465942, "learning_rate": 1.1417028391750296e-07, "loss": 0.8428, "step": 17048 }, { "epoch": 19.418233618233618, "grad_norm": 0.19558821618556976, "learning_rate": 1.1372594687482363e-07, "loss": 0.5278, "step": 17049 }, { "epoch": 19.41937321937322, "grad_norm": 0.18002977967262268, "learning_rate": 1.1328247420088201e-07, "loss": 0.6476, "step": 17050 }, { "epoch": 19.42051282051282, "grad_norm": 0.2039240598678589, "learning_rate": 1.128398659110741e-07, "loss": 0.6512, "step": 17051 }, { "epoch": 19.42165242165242, "grad_norm": 0.21624480187892914, "learning_rate": 1.1239812202078204e-07, "loss": 0.5828, "step": 17052 }, { "epoch": 19.422792022792024, "grad_norm": 0.22936004400253296, "learning_rate": 1.1195724254534356e-07, "loss": 0.4879, "step": 17053 }, { "epoch": 19.423931623931622, "grad_norm": 0.16383817791938782, "learning_rate": 1.1151722750007143e-07, "loss": 0.7886, "step": 17054 }, { "epoch": 19.425071225071225, "grad_norm": 0.22113212943077087, "learning_rate": 1.1107807690024785e-07, "loss": 0.6697, "step": 17055 }, { "epoch": 19.426210826210827, "grad_norm": 0.18560701608657837, "learning_rate": 1.1063979076113006e-07, "loss": 0.6258, "step": 17056 }, { "epoch": 19.427350427350426, "grad_norm": 0.2129720002412796, "learning_rate": 1.1020236909793646e-07, "loss": 0.644, "step": 17057 }, { "epoch": 19.428490028490028, "grad_norm": 0.21148735284805298, "learning_rate": 1.0976581192586321e-07, "loss": 0.6784, "step": 17058 }, { "epoch": 19.42962962962963, "grad_norm": 0.17602939903736115, "learning_rate": 1.0933011926007042e-07, "loss": 0.9103, "step": 17059 }, { "epoch": 19.43076923076923, "grad_norm": 0.2385375201702118, "learning_rate": 1.0889529111569319e-07, "loss": 0.5282, "step": 17060 }, { "epoch": 19.43190883190883, "grad_norm": 0.16205713152885437, "learning_rate": 1.0846132750783334e-07, "loss": 0.6132, "step": 17061 }, { "epoch": 19.433048433048434, "grad_norm": 0.1841961294412613, "learning_rate": 1.0802822845156213e-07, "loss": 0.6327, "step": 17062 }, { "epoch": 19.434188034188033, "grad_norm": 0.21208113431930542, "learning_rate": 1.0759599396192588e-07, "loss": 0.3572, "step": 17063 }, { "epoch": 19.435327635327635, "grad_norm": 0.18539439141750336, "learning_rate": 1.0716462405393479e-07, "loss": 0.6487, "step": 17064 }, { "epoch": 19.436467236467237, "grad_norm": 0.18167999386787415, "learning_rate": 1.0673411874257134e-07, "loss": 0.5201, "step": 17065 }, { "epoch": 19.437606837606836, "grad_norm": 0.23086227476596832, "learning_rate": 1.0630447804278743e-07, "loss": 0.6721, "step": 17066 }, { "epoch": 19.43874643874644, "grad_norm": 0.24957765638828278, "learning_rate": 1.0587570196951002e-07, "loss": 0.4445, "step": 17067 }, { "epoch": 19.43988603988604, "grad_norm": 0.23669400811195374, "learning_rate": 1.0544779053762722e-07, "loss": 0.5032, "step": 17068 }, { "epoch": 19.44102564102564, "grad_norm": 0.2455901950597763, "learning_rate": 1.0502074376200211e-07, "loss": 0.648, "step": 17069 }, { "epoch": 19.442165242165242, "grad_norm": 0.16316574811935425, "learning_rate": 1.0459456165746728e-07, "loss": 0.7501, "step": 17070 }, { "epoch": 19.443304843304844, "grad_norm": 0.22059044241905212, "learning_rate": 1.0416924423882757e-07, "loss": 0.6406, "step": 17071 }, { "epoch": 19.444444444444443, "grad_norm": 0.19195523858070374, "learning_rate": 1.0374479152085447e-07, "loss": 0.5575, "step": 17072 }, { "epoch": 19.445584045584045, "grad_norm": 0.1592654287815094, "learning_rate": 1.0332120351828622e-07, "loss": 0.9012, "step": 17073 }, { "epoch": 19.446723646723648, "grad_norm": 0.18094578385353088, "learning_rate": 1.0289848024584159e-07, "loss": 0.5166, "step": 17074 }, { "epoch": 19.447863247863246, "grad_norm": 0.21522584557533264, "learning_rate": 1.0247662171820049e-07, "loss": 0.6894, "step": 17075 }, { "epoch": 19.44900284900285, "grad_norm": 0.16932497918605804, "learning_rate": 1.0205562795001234e-07, "loss": 0.636, "step": 17076 }, { "epoch": 19.45014245014245, "grad_norm": 0.1994141936302185, "learning_rate": 1.0163549895590429e-07, "loss": 0.5816, "step": 17077 }, { "epoch": 19.45128205128205, "grad_norm": 0.1876506209373474, "learning_rate": 1.012162347504647e-07, "loss": 0.7251, "step": 17078 }, { "epoch": 19.452421652421652, "grad_norm": 0.17867793142795563, "learning_rate": 1.0079783534825971e-07, "loss": 0.7104, "step": 17079 }, { "epoch": 19.453561253561254, "grad_norm": 0.2122046947479248, "learning_rate": 1.0038030076381655e-07, "loss": 0.6158, "step": 17080 }, { "epoch": 19.454700854700853, "grad_norm": 0.1705094575881958, "learning_rate": 9.996363101163752e-08, "loss": 0.6513, "step": 17081 }, { "epoch": 19.455840455840455, "grad_norm": 0.1612125188112259, "learning_rate": 9.954782610619995e-08, "loss": 0.6573, "step": 17082 }, { "epoch": 19.456980056980058, "grad_norm": 0.1886008083820343, "learning_rate": 9.913288606194226e-08, "loss": 0.5259, "step": 17083 }, { "epoch": 19.458119658119656, "grad_norm": 0.19780634343624115, "learning_rate": 9.871881089327795e-08, "loss": 0.5057, "step": 17084 }, { "epoch": 19.45925925925926, "grad_norm": 0.24414218962192535, "learning_rate": 9.830560061458716e-08, "loss": 0.5972, "step": 17085 }, { "epoch": 19.46039886039886, "grad_norm": 0.2200600504875183, "learning_rate": 9.78932552402223e-08, "loss": 0.6678, "step": 17086 }, { "epoch": 19.46153846153846, "grad_norm": 0.18297740817070007, "learning_rate": 9.748177478450805e-08, "loss": 0.5821, "step": 17087 }, { "epoch": 19.462678062678062, "grad_norm": 0.1705702394247055, "learning_rate": 9.707115926173293e-08, "loss": 0.6605, "step": 17088 }, { "epoch": 19.463817663817665, "grad_norm": 0.20623593032360077, "learning_rate": 9.666140868615781e-08, "loss": 0.5991, "step": 17089 }, { "epoch": 19.464957264957263, "grad_norm": 0.22248166799545288, "learning_rate": 9.625252307201849e-08, "loss": 0.6856, "step": 17090 }, { "epoch": 19.466096866096866, "grad_norm": 0.17112231254577637, "learning_rate": 9.584450243351472e-08, "loss": 0.4216, "step": 17091 }, { "epoch": 19.467236467236468, "grad_norm": 0.192648783326149, "learning_rate": 9.543734678481575e-08, "loss": 0.4736, "step": 17092 }, { "epoch": 19.468376068376067, "grad_norm": 0.1831110119819641, "learning_rate": 9.50310561400658e-08, "loss": 0.6742, "step": 17093 }, { "epoch": 19.46951566951567, "grad_norm": 0.17312107980251312, "learning_rate": 9.462563051337858e-08, "loss": 0.5684, "step": 17094 }, { "epoch": 19.47065527065527, "grad_norm": 0.2026345431804657, "learning_rate": 9.422106991883173e-08, "loss": 0.7492, "step": 17095 }, { "epoch": 19.47179487179487, "grad_norm": 0.21089421212673187, "learning_rate": 9.381737437047788e-08, "loss": 0.6088, "step": 17096 }, { "epoch": 19.472934472934472, "grad_norm": 0.20423763990402222, "learning_rate": 9.341454388234194e-08, "loss": 0.7286, "step": 17097 }, { "epoch": 19.474074074074075, "grad_norm": 0.17863865196704865, "learning_rate": 9.301257846840994e-08, "loss": 0.7831, "step": 17098 }, { "epoch": 19.475213675213674, "grad_norm": 0.17470353841781616, "learning_rate": 9.26114781426457e-08, "loss": 0.6797, "step": 17099 }, { "epoch": 19.476353276353276, "grad_norm": 0.24701328575611115, "learning_rate": 9.221124291897976e-08, "loss": 0.3647, "step": 17100 }, { "epoch": 19.477492877492878, "grad_norm": 0.22222790122032166, "learning_rate": 9.181187281131487e-08, "loss": 0.6411, "step": 17101 }, { "epoch": 19.478632478632477, "grad_norm": 0.2127547413110733, "learning_rate": 9.14133678335205e-08, "loss": 0.6041, "step": 17102 }, { "epoch": 19.47977207977208, "grad_norm": 0.22702544927597046, "learning_rate": 9.101572799943836e-08, "loss": 0.5715, "step": 17103 }, { "epoch": 19.48091168091168, "grad_norm": 0.18768079578876495, "learning_rate": 9.06189533228824e-08, "loss": 0.7962, "step": 17104 }, { "epoch": 19.48205128205128, "grad_norm": 0.17594477534294128, "learning_rate": 9.022304381763047e-08, "loss": 0.5345, "step": 17105 }, { "epoch": 19.483190883190883, "grad_norm": 0.18248769640922546, "learning_rate": 8.98279994974327e-08, "loss": 0.693, "step": 17106 }, { "epoch": 19.484330484330485, "grad_norm": 0.20692914724349976, "learning_rate": 8.943382037601422e-08, "loss": 0.7601, "step": 17107 }, { "epoch": 19.485470085470084, "grad_norm": 0.16740009188652039, "learning_rate": 8.90405064670613e-08, "loss": 0.7128, "step": 17108 }, { "epoch": 19.486609686609686, "grad_norm": 0.24666811525821686, "learning_rate": 8.864805778423801e-08, "loss": 0.4575, "step": 17109 }, { "epoch": 19.48774928774929, "grad_norm": 0.17780767381191254, "learning_rate": 8.825647434117512e-08, "loss": 0.7061, "step": 17110 }, { "epoch": 19.488888888888887, "grad_norm": 0.23126186430454254, "learning_rate": 8.786575615147007e-08, "loss": 0.6311, "step": 17111 }, { "epoch": 19.49002849002849, "grad_norm": 0.1918278932571411, "learning_rate": 8.747590322869814e-08, "loss": 0.7168, "step": 17112 }, { "epoch": 19.491168091168092, "grad_norm": 0.23038645088672638, "learning_rate": 8.70869155863957e-08, "loss": 0.5251, "step": 17113 }, { "epoch": 19.49230769230769, "grad_norm": 0.20258736610412598, "learning_rate": 8.669879323807694e-08, "loss": 0.745, "step": 17114 }, { "epoch": 19.493447293447293, "grad_norm": 0.20020519196987152, "learning_rate": 8.631153619721998e-08, "loss": 0.7075, "step": 17115 }, { "epoch": 19.494586894586895, "grad_norm": 0.17957910895347595, "learning_rate": 8.592514447727795e-08, "loss": 0.7416, "step": 17116 }, { "epoch": 19.495726495726494, "grad_norm": 0.21466033160686493, "learning_rate": 8.553961809166788e-08, "loss": 0.5896, "step": 17117 }, { "epoch": 19.496866096866096, "grad_norm": 0.1736799031496048, "learning_rate": 8.515495705378185e-08, "loss": 0.6359, "step": 17118 }, { "epoch": 19.4980056980057, "grad_norm": 0.16858024895191193, "learning_rate": 8.477116137698138e-08, "loss": 0.518, "step": 17119 }, { "epoch": 19.499145299145297, "grad_norm": 0.17975902557373047, "learning_rate": 8.43882310745947e-08, "loss": 0.7806, "step": 17120 }, { "epoch": 19.5002849002849, "grad_norm": 0.20028391480445862, "learning_rate": 8.400616615992507e-08, "loss": 0.5858, "step": 17121 }, { "epoch": 19.501424501424502, "grad_norm": 0.19237558543682098, "learning_rate": 8.362496664623687e-08, "loss": 0.6286, "step": 17122 }, { "epoch": 19.5025641025641, "grad_norm": 0.17876315116882324, "learning_rate": 8.324463254677784e-08, "loss": 0.5383, "step": 17123 }, { "epoch": 19.503703703703703, "grad_norm": 0.17792995274066925, "learning_rate": 8.28651638747513e-08, "loss": 0.753, "step": 17124 }, { "epoch": 19.504843304843305, "grad_norm": 0.20652182400226593, "learning_rate": 8.248656064334115e-08, "loss": 0.5331, "step": 17125 }, { "epoch": 19.505982905982904, "grad_norm": 0.18937747180461884, "learning_rate": 8.210882286569521e-08, "loss": 0.7188, "step": 17126 }, { "epoch": 19.507122507122507, "grad_norm": 0.1956784576177597, "learning_rate": 8.173195055493632e-08, "loss": 0.7413, "step": 17127 }, { "epoch": 19.50826210826211, "grad_norm": 0.19371221959590912, "learning_rate": 8.135594372415123e-08, "loss": 0.4869, "step": 17128 }, { "epoch": 19.509401709401708, "grad_norm": 0.2019006609916687, "learning_rate": 8.098080238639893e-08, "loss": 0.5579, "step": 17129 }, { "epoch": 19.51054131054131, "grad_norm": 0.24422931671142578, "learning_rate": 8.060652655471346e-08, "loss": 0.4343, "step": 17130 }, { "epoch": 19.511680911680912, "grad_norm": 0.24223747849464417, "learning_rate": 8.023311624208996e-08, "loss": 0.5611, "step": 17131 }, { "epoch": 19.51282051282051, "grad_norm": 0.18167677521705627, "learning_rate": 7.986057146150138e-08, "loss": 0.634, "step": 17132 }, { "epoch": 19.513960113960113, "grad_norm": 0.173972949385643, "learning_rate": 7.948889222588463e-08, "loss": 0.6816, "step": 17133 }, { "epoch": 19.515099715099716, "grad_norm": 0.16874933242797852, "learning_rate": 7.911807854815156e-08, "loss": 0.6713, "step": 17134 }, { "epoch": 19.516239316239318, "grad_norm": 0.22373968362808228, "learning_rate": 7.874813044117802e-08, "loss": 0.5097, "step": 17135 }, { "epoch": 19.517378917378917, "grad_norm": 0.17092277109622955, "learning_rate": 7.837904791781759e-08, "loss": 0.695, "step": 17136 }, { "epoch": 19.51851851851852, "grad_norm": 0.2304261028766632, "learning_rate": 7.80108309908878e-08, "loss": 0.7551, "step": 17137 }, { "epoch": 19.51965811965812, "grad_norm": 0.19783733785152435, "learning_rate": 7.764347967317842e-08, "loss": 0.6007, "step": 17138 }, { "epoch": 19.52079772079772, "grad_norm": 0.1704883575439453, "learning_rate": 7.727699397744592e-08, "loss": 0.7003, "step": 17139 }, { "epoch": 19.521937321937322, "grad_norm": 0.24890558421611786, "learning_rate": 7.691137391642178e-08, "loss": 0.449, "step": 17140 }, { "epoch": 19.523076923076925, "grad_norm": 0.21356940269470215, "learning_rate": 7.654661950280417e-08, "loss": 0.5135, "step": 17141 }, { "epoch": 19.524216524216524, "grad_norm": 0.2279697060585022, "learning_rate": 7.618273074926074e-08, "loss": 0.6671, "step": 17142 }, { "epoch": 19.525356125356126, "grad_norm": 0.20446214079856873, "learning_rate": 7.581970766843415e-08, "loss": 0.6255, "step": 17143 }, { "epoch": 19.526495726495728, "grad_norm": 0.15221214294433594, "learning_rate": 7.5457550272931e-08, "loss": 0.5565, "step": 17144 }, { "epoch": 19.527635327635327, "grad_norm": 0.18313929438591003, "learning_rate": 7.509625857533009e-08, "loss": 0.8275, "step": 17145 }, { "epoch": 19.52877492877493, "grad_norm": 0.19369709491729736, "learning_rate": 7.473583258817695e-08, "loss": 0.6028, "step": 17146 }, { "epoch": 19.52991452991453, "grad_norm": 0.22477491199970245, "learning_rate": 7.437627232399492e-08, "loss": 0.6689, "step": 17147 }, { "epoch": 19.53105413105413, "grad_norm": 0.177805557847023, "learning_rate": 7.401757779527118e-08, "loss": 0.6733, "step": 17148 }, { "epoch": 19.532193732193733, "grad_norm": 0.1790897399187088, "learning_rate": 7.365974901446248e-08, "loss": 0.5415, "step": 17149 }, { "epoch": 19.533333333333335, "grad_norm": 0.22771689295768738, "learning_rate": 7.330278599400053e-08, "loss": 0.5673, "step": 17150 }, { "epoch": 19.534472934472934, "grad_norm": 0.21820858120918274, "learning_rate": 7.294668874628096e-08, "loss": 0.4605, "step": 17151 }, { "epoch": 19.535612535612536, "grad_norm": 0.16659726202487946, "learning_rate": 7.259145728367167e-08, "loss": 0.6647, "step": 17152 }, { "epoch": 19.53675213675214, "grad_norm": 0.1653488725423813, "learning_rate": 7.223709161851e-08, "loss": 0.6669, "step": 17153 }, { "epoch": 19.537891737891737, "grad_norm": 0.19739170372486115, "learning_rate": 7.188359176310832e-08, "loss": 0.6552, "step": 17154 }, { "epoch": 19.53903133903134, "grad_norm": 0.23167668282985687, "learning_rate": 7.153095772974016e-08, "loss": 0.6213, "step": 17155 }, { "epoch": 19.540170940170942, "grad_norm": 0.15837553143501282, "learning_rate": 7.117918953065683e-08, "loss": 0.8195, "step": 17156 }, { "epoch": 19.54131054131054, "grad_norm": 0.2038544863462448, "learning_rate": 7.082828717807633e-08, "loss": 0.7856, "step": 17157 }, { "epoch": 19.542450142450143, "grad_norm": 0.21026979386806488, "learning_rate": 7.047825068418056e-08, "loss": 0.495, "step": 17158 }, { "epoch": 19.543589743589745, "grad_norm": 0.19288213551044464, "learning_rate": 7.012908006113483e-08, "loss": 0.427, "step": 17159 }, { "epoch": 19.544729344729344, "grad_norm": 0.15973199903964996, "learning_rate": 6.978077532106276e-08, "loss": 0.6121, "step": 17160 }, { "epoch": 19.545868945868946, "grad_norm": 0.19514554738998413, "learning_rate": 6.943333647606298e-08, "loss": 0.5693, "step": 17161 }, { "epoch": 19.54700854700855, "grad_norm": 0.16935944557189941, "learning_rate": 6.908676353820365e-08, "loss": 0.6195, "step": 17162 }, { "epoch": 19.548148148148147, "grad_norm": 0.1701992154121399, "learning_rate": 6.874105651951957e-08, "loss": 0.574, "step": 17163 }, { "epoch": 19.54928774928775, "grad_norm": 0.18068106472492218, "learning_rate": 6.839621543202058e-08, "loss": 0.686, "step": 17164 }, { "epoch": 19.550427350427352, "grad_norm": 0.17694298923015594, "learning_rate": 6.80522402876832e-08, "loss": 0.4524, "step": 17165 }, { "epoch": 19.55156695156695, "grad_norm": 0.2574649751186371, "learning_rate": 6.770913109845345e-08, "loss": 0.4532, "step": 17166 }, { "epoch": 19.552706552706553, "grad_norm": 0.1775989681482315, "learning_rate": 6.736688787625234e-08, "loss": 0.7867, "step": 17167 }, { "epoch": 19.553846153846155, "grad_norm": 0.17278486490249634, "learning_rate": 6.702551063296204e-08, "loss": 0.6284, "step": 17168 }, { "epoch": 19.554985754985754, "grad_norm": 0.1985652595758438, "learning_rate": 6.668499938044248e-08, "loss": 0.7282, "step": 17169 }, { "epoch": 19.556125356125357, "grad_norm": 0.22517012059688568, "learning_rate": 6.634535413051756e-08, "loss": 0.5876, "step": 17170 }, { "epoch": 19.55726495726496, "grad_norm": 0.22253260016441345, "learning_rate": 6.600657489498895e-08, "loss": 0.4699, "step": 17171 }, { "epoch": 19.558404558404558, "grad_norm": 0.17032331228256226, "learning_rate": 6.566866168561947e-08, "loss": 0.939, "step": 17172 }, { "epoch": 19.55954415954416, "grad_norm": 0.2109702080488205, "learning_rate": 6.533161451414693e-08, "loss": 0.8453, "step": 17173 }, { "epoch": 19.560683760683762, "grad_norm": 0.16890640556812286, "learning_rate": 6.499543339227864e-08, "loss": 0.7582, "step": 17174 }, { "epoch": 19.56182336182336, "grad_norm": 0.2271934449672699, "learning_rate": 6.46601183316914e-08, "loss": 0.5252, "step": 17175 }, { "epoch": 19.562962962962963, "grad_norm": 0.222871795296669, "learning_rate": 6.432566934402862e-08, "loss": 0.7076, "step": 17176 }, { "epoch": 19.564102564102566, "grad_norm": 0.19857577979564667, "learning_rate": 6.39920864409116e-08, "loss": 0.5309, "step": 17177 }, { "epoch": 19.565242165242164, "grad_norm": 0.1586875021457672, "learning_rate": 6.365936963392272e-08, "loss": 0.7746, "step": 17178 }, { "epoch": 19.566381766381767, "grad_norm": 0.1662566214799881, "learning_rate": 6.332751893461664e-08, "loss": 0.7652, "step": 17179 }, { "epoch": 19.56752136752137, "grad_norm": 0.2910987436771393, "learning_rate": 6.29965343545258e-08, "loss": 0.7277, "step": 17180 }, { "epoch": 19.568660968660968, "grad_norm": 0.18514537811279297, "learning_rate": 6.266641590513823e-08, "loss": 0.5475, "step": 17181 }, { "epoch": 19.56980056980057, "grad_norm": 0.25863945484161377, "learning_rate": 6.233716359792807e-08, "loss": 0.4553, "step": 17182 }, { "epoch": 19.570940170940172, "grad_norm": 0.20041605830192566, "learning_rate": 6.200877744432232e-08, "loss": 0.6071, "step": 17183 }, { "epoch": 19.57207977207977, "grad_norm": 0.1779608428478241, "learning_rate": 6.168125745573683e-08, "loss": 0.4706, "step": 17184 }, { "epoch": 19.573219373219374, "grad_norm": 0.21291576325893402, "learning_rate": 6.135460364353752e-08, "loss": 0.4194, "step": 17185 }, { "epoch": 19.574358974358976, "grad_norm": 0.15239788591861725, "learning_rate": 6.102881601907639e-08, "loss": 0.6367, "step": 17186 }, { "epoch": 19.575498575498575, "grad_norm": 0.1992502212524414, "learning_rate": 6.070389459366665e-08, "loss": 0.7274, "step": 17187 }, { "epoch": 19.576638176638177, "grad_norm": 0.21773108839988708, "learning_rate": 6.037983937859371e-08, "loss": 0.4291, "step": 17188 }, { "epoch": 19.57777777777778, "grad_norm": 0.18458621203899384, "learning_rate": 6.005665038511521e-08, "loss": 0.7298, "step": 17189 }, { "epoch": 19.578917378917378, "grad_norm": 0.17930319905281067, "learning_rate": 5.973432762445275e-08, "loss": 0.7223, "step": 17190 }, { "epoch": 19.58005698005698, "grad_norm": 0.21082007884979248, "learning_rate": 5.9412871107800135e-08, "loss": 0.8155, "step": 17191 }, { "epoch": 19.581196581196583, "grad_norm": 0.1849924772977829, "learning_rate": 5.9092280846329004e-08, "loss": 0.696, "step": 17192 }, { "epoch": 19.58233618233618, "grad_norm": 0.19565510749816895, "learning_rate": 5.877255685116934e-08, "loss": 0.7884, "step": 17193 }, { "epoch": 19.583475783475784, "grad_norm": 0.2000020444393158, "learning_rate": 5.8453699133428907e-08, "loss": 0.7215, "step": 17194 }, { "epoch": 19.584615384615386, "grad_norm": 0.1640736609697342, "learning_rate": 5.813570770417942e-08, "loss": 0.5945, "step": 17195 }, { "epoch": 19.585754985754985, "grad_norm": 0.18161728978157043, "learning_rate": 5.7818582574467594e-08, "loss": 0.5542, "step": 17196 }, { "epoch": 19.586894586894587, "grad_norm": 0.17179909348487854, "learning_rate": 5.750232375530962e-08, "loss": 0.8178, "step": 17197 }, { "epoch": 19.58803418803419, "grad_norm": 0.2215074598789215, "learning_rate": 5.7186931257685596e-08, "loss": 0.6192, "step": 17198 }, { "epoch": 19.58917378917379, "grad_norm": 0.20067641139030457, "learning_rate": 5.6872405092553425e-08, "loss": 0.6097, "step": 17199 }, { "epoch": 19.59031339031339, "grad_norm": 0.20531664788722992, "learning_rate": 5.655874527083493e-08, "loss": 0.8093, "step": 17200 }, { "epoch": 19.591452991452993, "grad_norm": 0.18867632746696472, "learning_rate": 5.624595180342696e-08, "loss": 0.7636, "step": 17201 }, { "epoch": 19.59259259259259, "grad_norm": 0.2290651500225067, "learning_rate": 5.593402470119302e-08, "loss": 0.6684, "step": 17202 }, { "epoch": 19.593732193732194, "grad_norm": 0.22096139192581177, "learning_rate": 5.562296397496891e-08, "loss": 0.4033, "step": 17203 }, { "epoch": 19.594871794871796, "grad_norm": 0.172435462474823, "learning_rate": 5.531276963555432e-08, "loss": 0.7666, "step": 17204 }, { "epoch": 19.596011396011395, "grad_norm": 0.16623039543628693, "learning_rate": 5.500344169372396e-08, "loss": 0.8804, "step": 17205 }, { "epoch": 19.597150997150997, "grad_norm": 0.20657214522361755, "learning_rate": 5.469498016022479e-08, "loss": 0.8464, "step": 17206 }, { "epoch": 19.5982905982906, "grad_norm": 0.17143048346042633, "learning_rate": 5.4387385045770454e-08, "loss": 0.6488, "step": 17207 }, { "epoch": 19.5994301994302, "grad_norm": 0.18172615766525269, "learning_rate": 5.408065636104132e-08, "loss": 0.6355, "step": 17208 }, { "epoch": 19.6005698005698, "grad_norm": 0.24612468481063843, "learning_rate": 5.377479411668995e-08, "loss": 0.7174, "step": 17209 }, { "epoch": 19.601709401709403, "grad_norm": 0.18231748044490814, "learning_rate": 5.346979832334675e-08, "loss": 0.5194, "step": 17210 }, { "epoch": 19.602849002849002, "grad_norm": 0.17460936307907104, "learning_rate": 5.3165668991597696e-08, "loss": 0.7452, "step": 17211 }, { "epoch": 19.603988603988604, "grad_norm": 0.16702604293823242, "learning_rate": 5.286240613200932e-08, "loss": 0.4361, "step": 17212 }, { "epoch": 19.605128205128207, "grad_norm": 0.16236288845539093, "learning_rate": 5.256000975511488e-08, "loss": 0.748, "step": 17213 }, { "epoch": 19.606267806267805, "grad_norm": 0.18586201965808868, "learning_rate": 5.2258479871417075e-08, "loss": 0.673, "step": 17214 }, { "epoch": 19.607407407407408, "grad_norm": 0.22434577345848083, "learning_rate": 5.195781649139087e-08, "loss": 0.6881, "step": 17215 }, { "epoch": 19.60854700854701, "grad_norm": 0.1948535442352295, "learning_rate": 5.1658019625475116e-08, "loss": 0.6597, "step": 17216 }, { "epoch": 19.60968660968661, "grad_norm": 0.1969522088766098, "learning_rate": 5.135908928408373e-08, "loss": 0.6556, "step": 17217 }, { "epoch": 19.61082621082621, "grad_norm": 0.21910102665424347, "learning_rate": 5.106102547760283e-08, "loss": 0.5069, "step": 17218 }, { "epoch": 19.611965811965813, "grad_norm": 0.17936351895332336, "learning_rate": 5.0763828216379703e-08, "loss": 0.7133, "step": 17219 }, { "epoch": 19.613105413105412, "grad_norm": 0.19450253248214722, "learning_rate": 5.046749751073943e-08, "loss": 0.611, "step": 17220 }, { "epoch": 19.614245014245014, "grad_norm": 0.19936460256576538, "learning_rate": 5.017203337097654e-08, "loss": 0.5414, "step": 17221 }, { "epoch": 19.615384615384617, "grad_norm": 0.17847365140914917, "learning_rate": 4.9877435807349494e-08, "loss": 0.6396, "step": 17222 }, { "epoch": 19.616524216524216, "grad_norm": 0.18983618915081024, "learning_rate": 4.958370483009456e-08, "loss": 0.814, "step": 17223 }, { "epoch": 19.617663817663818, "grad_norm": 0.20069336891174316, "learning_rate": 4.929084044940913e-08, "loss": 0.5583, "step": 17224 }, { "epoch": 19.61880341880342, "grad_norm": 0.2257624864578247, "learning_rate": 4.899884267546839e-08, "loss": 0.7554, "step": 17225 }, { "epoch": 19.61994301994302, "grad_norm": 0.20969153940677643, "learning_rate": 4.870771151841147e-08, "loss": 0.6199, "step": 17226 }, { "epoch": 19.62108262108262, "grad_norm": 0.21343040466308594, "learning_rate": 4.8417446988355265e-08, "loss": 0.8209, "step": 17227 }, { "epoch": 19.622222222222224, "grad_norm": 0.18037155270576477, "learning_rate": 4.812804909537505e-08, "loss": 0.6658, "step": 17228 }, { "epoch": 19.623361823361822, "grad_norm": 0.17783091962337494, "learning_rate": 4.783951784952945e-08, "loss": 0.7338, "step": 17229 }, { "epoch": 19.624501424501425, "grad_norm": 0.21028144657611847, "learning_rate": 4.755185326083267e-08, "loss": 0.8786, "step": 17230 }, { "epoch": 19.625641025641027, "grad_norm": 0.20974908769130707, "learning_rate": 4.726505533928227e-08, "loss": 0.6961, "step": 17231 }, { "epoch": 19.626780626780626, "grad_norm": 0.1904968023300171, "learning_rate": 4.697912409483418e-08, "loss": 0.7653, "step": 17232 }, { "epoch": 19.627920227920228, "grad_norm": 0.2758485972881317, "learning_rate": 4.669405953742489e-08, "loss": 0.5149, "step": 17233 }, { "epoch": 19.62905982905983, "grad_norm": 0.18627852201461792, "learning_rate": 4.6409861676949276e-08, "loss": 0.7498, "step": 17234 }, { "epoch": 19.63019943019943, "grad_norm": 0.20226365327835083, "learning_rate": 4.612653052328553e-08, "loss": 0.7716, "step": 17235 }, { "epoch": 19.63133903133903, "grad_norm": 0.2074815034866333, "learning_rate": 4.584406608626746e-08, "loss": 0.6938, "step": 17236 }, { "epoch": 19.632478632478634, "grad_norm": 0.1934041976928711, "learning_rate": 4.556246837571221e-08, "loss": 0.8236, "step": 17237 }, { "epoch": 19.633618233618233, "grad_norm": 0.19022026658058167, "learning_rate": 4.5281737401395295e-08, "loss": 0.4179, "step": 17238 }, { "epoch": 19.634757834757835, "grad_norm": 0.18007512390613556, "learning_rate": 4.500187317307003e-08, "loss": 0.5701, "step": 17239 }, { "epoch": 19.635897435897437, "grad_norm": 0.20612673461437225, "learning_rate": 4.472287570045641e-08, "loss": 0.6698, "step": 17240 }, { "epoch": 19.637037037037036, "grad_norm": 0.20322008430957794, "learning_rate": 4.444474499324391e-08, "loss": 0.3804, "step": 17241 }, { "epoch": 19.63817663817664, "grad_norm": 0.17203862965106964, "learning_rate": 4.416748106109425e-08, "loss": 0.6458, "step": 17242 }, { "epoch": 19.63931623931624, "grad_norm": 0.23662760853767395, "learning_rate": 4.389108391363583e-08, "loss": 0.6583, "step": 17243 }, { "epoch": 19.64045584045584, "grad_norm": 0.19999438524246216, "learning_rate": 4.361555356046931e-08, "loss": 0.5216, "step": 17244 }, { "epoch": 19.64159544159544, "grad_norm": 0.23440754413604736, "learning_rate": 4.334089001116759e-08, "loss": 0.55, "step": 17245 }, { "epoch": 19.642735042735044, "grad_norm": 0.1712341159582138, "learning_rate": 4.30670932752647e-08, "loss": 0.7136, "step": 17246 }, { "epoch": 19.643874643874643, "grad_norm": 0.18940702080726624, "learning_rate": 4.2794163362275265e-08, "loss": 0.6789, "step": 17247 }, { "epoch": 19.645014245014245, "grad_norm": 0.17617487907409668, "learning_rate": 4.25221002816778e-08, "loss": 0.4361, "step": 17248 }, { "epoch": 19.646153846153847, "grad_norm": 0.19227629899978638, "learning_rate": 4.225090404291754e-08, "loss": 0.7244, "step": 17249 }, { "epoch": 19.647293447293446, "grad_norm": 0.2293432205915451, "learning_rate": 4.198057465542027e-08, "loss": 0.7112, "step": 17250 }, { "epoch": 19.64843304843305, "grad_norm": 0.1894661784172058, "learning_rate": 4.171111212857293e-08, "loss": 0.6733, "step": 17251 }, { "epoch": 19.64957264957265, "grad_norm": 0.20832861959934235, "learning_rate": 4.144251647173469e-08, "loss": 0.6157, "step": 17252 }, { "epoch": 19.65071225071225, "grad_norm": 0.1731022298336029, "learning_rate": 4.1174787694231444e-08, "loss": 0.6309, "step": 17253 }, { "epoch": 19.651851851851852, "grad_norm": 0.20629489421844482, "learning_rate": 4.0907925805366845e-08, "loss": 0.7796, "step": 17254 }, { "epoch": 19.652991452991454, "grad_norm": 0.1739402562379837, "learning_rate": 4.064193081440848e-08, "loss": 0.8284, "step": 17255 }, { "epoch": 19.654131054131053, "grad_norm": 0.20788271725177765, "learning_rate": 4.0376802730593414e-08, "loss": 0.3069, "step": 17256 }, { "epoch": 19.655270655270655, "grad_norm": 0.1854596734046936, "learning_rate": 4.0112541563130935e-08, "loss": 0.7617, "step": 17257 }, { "epoch": 19.656410256410258, "grad_norm": 0.2370794266462326, "learning_rate": 3.984914732119704e-08, "loss": 0.47, "step": 17258 }, { "epoch": 19.657549857549856, "grad_norm": 0.20367947220802307, "learning_rate": 3.9586620013948304e-08, "loss": 0.5533, "step": 17259 }, { "epoch": 19.65868945868946, "grad_norm": 0.192902609705925, "learning_rate": 3.93249596504941e-08, "loss": 0.7089, "step": 17260 }, { "epoch": 19.65982905982906, "grad_norm": 0.20828105509281158, "learning_rate": 3.906416623992715e-08, "loss": 0.5944, "step": 17261 }, { "epoch": 19.66096866096866, "grad_norm": 0.20111463963985443, "learning_rate": 3.8804239791304097e-08, "loss": 0.4979, "step": 17262 }, { "epoch": 19.662108262108262, "grad_norm": 0.18175876140594482, "learning_rate": 3.854518031365384e-08, "loss": 0.6765, "step": 17263 }, { "epoch": 19.663247863247864, "grad_norm": 0.19266632199287415, "learning_rate": 3.828698781597473e-08, "loss": 0.4712, "step": 17264 }, { "epoch": 19.664387464387463, "grad_norm": 0.1943490207195282, "learning_rate": 3.8029662307231815e-08, "loss": 0.6413, "step": 17265 }, { "epoch": 19.665527065527066, "grad_norm": 0.18787044286727905, "learning_rate": 3.7773203796365175e-08, "loss": 0.6085, "step": 17266 }, { "epoch": 19.666666666666668, "grad_norm": 0.1959238350391388, "learning_rate": 3.751761229228157e-08, "loss": 0.3295, "step": 17267 }, { "epoch": 19.667806267806267, "grad_norm": 0.1961667388677597, "learning_rate": 3.7262887803857224e-08, "loss": 0.824, "step": 17268 }, { "epoch": 19.66894586894587, "grad_norm": 0.1957489252090454, "learning_rate": 3.700903033994063e-08, "loss": 0.8551, "step": 17269 }, { "epoch": 19.67008547008547, "grad_norm": 0.20766261219978333, "learning_rate": 3.67560399093525e-08, "loss": 0.8463, "step": 17270 }, { "epoch": 19.67122507122507, "grad_norm": 0.20428958535194397, "learning_rate": 3.6503916520871926e-08, "loss": 0.5918, "step": 17271 }, { "epoch": 19.672364672364672, "grad_norm": 0.21748413145542145, "learning_rate": 3.6252660183261346e-08, "loss": 0.5408, "step": 17272 }, { "epoch": 19.673504273504275, "grad_norm": 0.1851986199617386, "learning_rate": 3.600227090524711e-08, "loss": 0.6178, "step": 17273 }, { "epoch": 19.674643874643873, "grad_norm": 0.19075873494148254, "learning_rate": 3.5752748695527805e-08, "loss": 0.8041, "step": 17274 }, { "epoch": 19.675783475783476, "grad_norm": 0.2254505455493927, "learning_rate": 3.550409356276318e-08, "loss": 0.7691, "step": 17275 }, { "epoch": 19.676923076923078, "grad_norm": 0.22736750543117523, "learning_rate": 3.525630551559633e-08, "loss": 0.4992, "step": 17276 }, { "epoch": 19.678062678062677, "grad_norm": 0.22003592550754547, "learning_rate": 3.5009384562631473e-08, "loss": 0.7605, "step": 17277 }, { "epoch": 19.67920227920228, "grad_norm": 0.21777081489562988, "learning_rate": 3.476333071244231e-08, "loss": 0.6189, "step": 17278 }, { "epoch": 19.68034188034188, "grad_norm": 0.1750691831111908, "learning_rate": 3.451814397358033e-08, "loss": 0.7692, "step": 17279 }, { "epoch": 19.68148148148148, "grad_norm": 0.24190931022167206, "learning_rate": 3.4273824354555395e-08, "loss": 0.423, "step": 17280 }, { "epoch": 19.682621082621083, "grad_norm": 0.20943762362003326, "learning_rate": 3.403037186386071e-08, "loss": 0.6394, "step": 17281 }, { "epoch": 19.683760683760685, "grad_norm": 0.22648000717163086, "learning_rate": 3.378778650994507e-08, "loss": 0.5333, "step": 17282 }, { "epoch": 19.684900284900284, "grad_norm": 0.16168929636478424, "learning_rate": 3.354606830123508e-08, "loss": 0.788, "step": 17283 }, { "epoch": 19.686039886039886, "grad_norm": 0.1891418695449829, "learning_rate": 3.3305217246132335e-08, "loss": 0.6606, "step": 17284 }, { "epoch": 19.68717948717949, "grad_norm": 0.17276088893413544, "learning_rate": 3.306523335299683e-08, "loss": 0.7186, "step": 17285 }, { "epoch": 19.688319088319087, "grad_norm": 0.23356008529663086, "learning_rate": 3.282611663016355e-08, "loss": 0.5289, "step": 17286 }, { "epoch": 19.68945868945869, "grad_norm": 0.20581302046775818, "learning_rate": 3.258786708593975e-08, "loss": 0.4179, "step": 17287 }, { "epoch": 19.69059829059829, "grad_norm": 0.1834385246038437, "learning_rate": 3.235048472859936e-08, "loss": 0.6018, "step": 17288 }, { "epoch": 19.69173789173789, "grad_norm": 0.1842707395553589, "learning_rate": 3.211396956639134e-08, "loss": 0.701, "step": 17289 }, { "epoch": 19.692877492877493, "grad_norm": 0.21426624059677124, "learning_rate": 3.187832160752302e-08, "loss": 0.3809, "step": 17290 }, { "epoch": 19.694017094017095, "grad_norm": 0.17906080186367035, "learning_rate": 3.164354086018506e-08, "loss": 0.8176, "step": 17291 }, { "epoch": 19.695156695156694, "grad_norm": 0.26043346524238586, "learning_rate": 3.140962733252928e-08, "loss": 0.607, "step": 17292 }, { "epoch": 19.696296296296296, "grad_norm": 0.22749273478984833, "learning_rate": 3.1176581032682506e-08, "loss": 0.7603, "step": 17293 }, { "epoch": 19.6974358974359, "grad_norm": 0.19497859477996826, "learning_rate": 3.094440196873827e-08, "loss": 0.7188, "step": 17294 }, { "epoch": 19.698575498575497, "grad_norm": 0.1934613287448883, "learning_rate": 3.071309014875679e-08, "loss": 0.7491, "step": 17295 }, { "epoch": 19.6997150997151, "grad_norm": 0.1895592212677002, "learning_rate": 3.0482645580778846e-08, "loss": 0.699, "step": 17296 }, { "epoch": 19.700854700854702, "grad_norm": 0.18725530803203583, "learning_rate": 3.025306827280361e-08, "loss": 0.7173, "step": 17297 }, { "epoch": 19.7019943019943, "grad_norm": 0.19018876552581787, "learning_rate": 3.002435823280525e-08, "loss": 0.7807, "step": 17298 }, { "epoch": 19.703133903133903, "grad_norm": 0.18515364825725555, "learning_rate": 2.9796515468730192e-08, "loss": 0.8516, "step": 17299 }, { "epoch": 19.704273504273505, "grad_norm": 0.2370092123746872, "learning_rate": 2.956953998848877e-08, "loss": 0.5101, "step": 17300 }, { "epoch": 19.705413105413104, "grad_norm": 0.21618060767650604, "learning_rate": 2.9343431799969122e-08, "loss": 0.5587, "step": 17301 }, { "epoch": 19.706552706552706, "grad_norm": 0.15011045336723328, "learning_rate": 2.9118190911020527e-08, "loss": 0.6464, "step": 17302 }, { "epoch": 19.70769230769231, "grad_norm": 0.2167392075061798, "learning_rate": 2.88938173294645e-08, "loss": 0.6599, "step": 17303 }, { "epoch": 19.708831908831907, "grad_norm": 0.2287842035293579, "learning_rate": 2.8670311063100362e-08, "loss": 0.543, "step": 17304 }, { "epoch": 19.70997150997151, "grad_norm": 0.17364954948425293, "learning_rate": 2.8447672119685797e-08, "loss": 0.6019, "step": 17305 }, { "epoch": 19.711111111111112, "grad_norm": 0.19438974559307098, "learning_rate": 2.8225900506956282e-08, "loss": 0.6255, "step": 17306 }, { "epoch": 19.71225071225071, "grad_norm": 0.22095739841461182, "learning_rate": 2.8004996232613988e-08, "loss": 0.6274, "step": 17307 }, { "epoch": 19.713390313390313, "grad_norm": 0.18750642240047455, "learning_rate": 2.7784959304333337e-08, "loss": 0.8426, "step": 17308 }, { "epoch": 19.714529914529916, "grad_norm": 0.21393738687038422, "learning_rate": 2.7565789729752655e-08, "loss": 0.7651, "step": 17309 }, { "epoch": 19.715669515669514, "grad_norm": 0.17756567895412445, "learning_rate": 2.734748751648808e-08, "loss": 0.6271, "step": 17310 }, { "epoch": 19.716809116809117, "grad_norm": 0.20901112258434296, "learning_rate": 2.7130052672119653e-08, "loss": 0.4133, "step": 17311 }, { "epoch": 19.71794871794872, "grad_norm": 0.18372060358524323, "learning_rate": 2.6913485204199672e-08, "loss": 0.7573, "step": 17312 }, { "epoch": 19.719088319088318, "grad_norm": 0.22114580869674683, "learning_rate": 2.6697785120249895e-08, "loss": 0.529, "step": 17313 }, { "epoch": 19.72022792022792, "grad_norm": 0.17835091054439545, "learning_rate": 2.6482952427764328e-08, "loss": 0.6569, "step": 17314 }, { "epoch": 19.721367521367522, "grad_norm": 0.20545682311058044, "learning_rate": 2.6268987134200897e-08, "loss": 0.5758, "step": 17315 }, { "epoch": 19.72250712250712, "grad_norm": 0.2764587104320526, "learning_rate": 2.6055889246995313e-08, "loss": 0.5116, "step": 17316 }, { "epoch": 19.723646723646723, "grad_norm": 0.1845266968011856, "learning_rate": 2.584365877354722e-08, "loss": 0.4803, "step": 17317 }, { "epoch": 19.724786324786326, "grad_norm": 0.20697034895420074, "learning_rate": 2.5632295721228493e-08, "loss": 0.7492, "step": 17318 }, { "epoch": 19.725925925925925, "grad_norm": 0.2017263025045395, "learning_rate": 2.542180009738049e-08, "loss": 0.7248, "step": 17319 }, { "epoch": 19.727065527065527, "grad_norm": 0.1986721009016037, "learning_rate": 2.5212171909311243e-08, "loss": 0.7983, "step": 17320 }, { "epoch": 19.72820512820513, "grad_norm": 0.23680278658866882, "learning_rate": 2.5003411164306602e-08, "loss": 0.6465, "step": 17321 }, { "epoch": 19.729344729344728, "grad_norm": 0.1814713180065155, "learning_rate": 2.479551786961354e-08, "loss": 0.7606, "step": 17322 }, { "epoch": 19.73048433048433, "grad_norm": 0.20839419960975647, "learning_rate": 2.4588492032456833e-08, "loss": 0.6567, "step": 17323 }, { "epoch": 19.731623931623933, "grad_norm": 0.21391022205352783, "learning_rate": 2.438233366001963e-08, "loss": 0.6914, "step": 17324 }, { "epoch": 19.73276353276353, "grad_norm": 0.22437989711761475, "learning_rate": 2.417704275947119e-08, "loss": 0.5848, "step": 17325 }, { "epoch": 19.733903133903134, "grad_norm": 0.15253695845603943, "learning_rate": 2.3972619337936374e-08, "loss": 0.727, "step": 17326 }, { "epoch": 19.735042735042736, "grad_norm": 0.16123710572719574, "learning_rate": 2.376906340251783e-08, "loss": 0.6649, "step": 17327 }, { "epoch": 19.736182336182335, "grad_norm": 0.20879414677619934, "learning_rate": 2.356637496028491e-08, "loss": 0.6138, "step": 17328 }, { "epoch": 19.737321937321937, "grad_norm": 0.16920596361160278, "learning_rate": 2.3364554018273642e-08, "loss": 0.7812, "step": 17329 }, { "epoch": 19.73846153846154, "grad_norm": 0.19896115362644196, "learning_rate": 2.3163600583500645e-08, "loss": 0.6453, "step": 17330 }, { "epoch": 19.739601139601138, "grad_norm": 0.1775745004415512, "learning_rate": 2.2963514662943664e-08, "loss": 0.814, "step": 17331 }, { "epoch": 19.74074074074074, "grad_norm": 0.20894837379455566, "learning_rate": 2.2764296263547148e-08, "loss": 0.5946, "step": 17332 }, { "epoch": 19.741880341880343, "grad_norm": 0.2276475578546524, "learning_rate": 2.256594539223611e-08, "loss": 0.7004, "step": 17333 }, { "epoch": 19.74301994301994, "grad_norm": 0.17170199751853943, "learning_rate": 2.2368462055899487e-08, "loss": 0.687, "step": 17334 }, { "epoch": 19.744159544159544, "grad_norm": 0.18173982203006744, "learning_rate": 2.2171846261392904e-08, "loss": 0.8004, "step": 17335 }, { "epoch": 19.745299145299146, "grad_norm": 0.19223652780056, "learning_rate": 2.1976098015547008e-08, "loss": 0.7668, "step": 17336 }, { "epoch": 19.746438746438745, "grad_norm": 0.2115246057510376, "learning_rate": 2.1781217325161917e-08, "loss": 0.6602, "step": 17337 }, { "epoch": 19.747578347578347, "grad_norm": 0.1955891102552414, "learning_rate": 2.1587204197007216e-08, "loss": 0.8293, "step": 17338 }, { "epoch": 19.74871794871795, "grad_norm": 0.15871645510196686, "learning_rate": 2.1394058637816406e-08, "loss": 1.0231, "step": 17339 }, { "epoch": 19.74985754985755, "grad_norm": 0.18715499341487885, "learning_rate": 2.120178065430356e-08, "loss": 0.7425, "step": 17340 }, { "epoch": 19.75099715099715, "grad_norm": 0.15829820930957794, "learning_rate": 2.1010370253143896e-08, "loss": 0.6157, "step": 17341 }, { "epoch": 19.752136752136753, "grad_norm": 0.20122310519218445, "learning_rate": 2.0819827440987648e-08, "loss": 0.5028, "step": 17342 }, { "epoch": 19.753276353276352, "grad_norm": 0.1904160976409912, "learning_rate": 2.063015222445175e-08, "loss": 0.5394, "step": 17343 }, { "epoch": 19.754415954415954, "grad_norm": 0.17193228006362915, "learning_rate": 2.044134461012259e-08, "loss": 0.6024, "step": 17344 }, { "epoch": 19.755555555555556, "grad_norm": 0.14988501369953156, "learning_rate": 2.02534046045616e-08, "loss": 0.7363, "step": 17345 }, { "epoch": 19.756695156695155, "grad_norm": 0.21250396966934204, "learning_rate": 2.0066332214294104e-08, "loss": 0.5097, "step": 17346 }, { "epoch": 19.757834757834758, "grad_norm": 0.17534324526786804, "learning_rate": 1.9880127445817687e-08, "loss": 0.4889, "step": 17347 }, { "epoch": 19.75897435897436, "grad_norm": 0.18992437422275543, "learning_rate": 1.9694790305599397e-08, "loss": 0.759, "step": 17348 }, { "epoch": 19.76011396011396, "grad_norm": 0.16042746603488922, "learning_rate": 1.9510320800075754e-08, "loss": 0.7753, "step": 17349 }, { "epoch": 19.76125356125356, "grad_norm": 0.16928979754447937, "learning_rate": 1.9326718935658293e-08, "loss": 0.63, "step": 17350 }, { "epoch": 19.762393162393163, "grad_norm": 0.1815546452999115, "learning_rate": 1.91439847187197e-08, "loss": 0.655, "step": 17351 }, { "epoch": 19.763532763532762, "grad_norm": 0.17108528316020966, "learning_rate": 1.8962118155607668e-08, "loss": 0.7128, "step": 17352 }, { "epoch": 19.764672364672364, "grad_norm": 0.15614262223243713, "learning_rate": 1.8781119252639368e-08, "loss": 1.002, "step": 17353 }, { "epoch": 19.765811965811967, "grad_norm": 0.20092107355594635, "learning_rate": 1.860098801610144e-08, "loss": 0.6723, "step": 17354 }, { "epoch": 19.766951566951565, "grad_norm": 0.2087731659412384, "learning_rate": 1.8421724452252764e-08, "loss": 0.5298, "step": 17355 }, { "epoch": 19.768091168091168, "grad_norm": 0.18618348240852356, "learning_rate": 1.8243328567313368e-08, "loss": 0.7854, "step": 17356 }, { "epoch": 19.76923076923077, "grad_norm": 0.1749459058046341, "learning_rate": 1.8065800367486617e-08, "loss": 0.8249, "step": 17357 }, { "epoch": 19.77037037037037, "grad_norm": 0.20197616517543793, "learning_rate": 1.7889139858934257e-08, "loss": 0.6954, "step": 17358 }, { "epoch": 19.77150997150997, "grad_norm": 0.18469204008579254, "learning_rate": 1.7713347047790263e-08, "loss": 0.8121, "step": 17359 }, { "epoch": 19.772649572649573, "grad_norm": 0.15979142487049103, "learning_rate": 1.7538421940166417e-08, "loss": 0.6758, "step": 17360 }, { "epoch": 19.773789173789172, "grad_norm": 0.19149626791477203, "learning_rate": 1.7364364542135635e-08, "loss": 0.6704, "step": 17361 }, { "epoch": 19.774928774928775, "grad_norm": 0.17909017205238342, "learning_rate": 1.719117485974031e-08, "loss": 0.6038, "step": 17362 }, { "epoch": 19.776068376068377, "grad_norm": 0.18704593181610107, "learning_rate": 1.7018852899000626e-08, "loss": 0.744, "step": 17363 }, { "epoch": 19.777207977207976, "grad_norm": 0.18533900380134583, "learning_rate": 1.6847398665897906e-08, "loss": 0.6598, "step": 17364 }, { "epoch": 19.778347578347578, "grad_norm": 0.1875920295715332, "learning_rate": 1.6676812166391275e-08, "loss": 0.6565, "step": 17365 }, { "epoch": 19.77948717948718, "grad_norm": 0.172038272023201, "learning_rate": 1.6507093406401e-08, "loss": 0.651, "step": 17366 }, { "epoch": 19.78062678062678, "grad_norm": 0.23636727035045624, "learning_rate": 1.633824239182513e-08, "loss": 0.6692, "step": 17367 }, { "epoch": 19.78176638176638, "grad_norm": 0.1715584248304367, "learning_rate": 1.617025912852843e-08, "loss": 0.6428, "step": 17368 }, { "epoch": 19.782905982905984, "grad_norm": 0.16638945043087006, "learning_rate": 1.6003143622345117e-08, "loss": 0.6822, "step": 17369 }, { "epoch": 19.784045584045582, "grad_norm": 0.19432315230369568, "learning_rate": 1.583689587907611e-08, "loss": 0.8053, "step": 17370 }, { "epoch": 19.785185185185185, "grad_norm": 0.17379184067249298, "learning_rate": 1.567151590450011e-08, "loss": 0.7163, "step": 17371 }, { "epoch": 19.786324786324787, "grad_norm": 0.21618704497814178, "learning_rate": 1.550700370435976e-08, "loss": 0.7115, "step": 17372 }, { "epoch": 19.787464387464386, "grad_norm": 0.20042377710342407, "learning_rate": 1.534335928436714e-08, "loss": 0.6158, "step": 17373 }, { "epoch": 19.788603988603988, "grad_norm": 0.19638532400131226, "learning_rate": 1.5180582650209385e-08, "loss": 0.835, "step": 17374 }, { "epoch": 19.78974358974359, "grad_norm": 0.16307075321674347, "learning_rate": 1.501867380753752e-08, "loss": 0.776, "step": 17375 }, { "epoch": 19.79088319088319, "grad_norm": 0.21321846544742584, "learning_rate": 1.4857632761977603e-08, "loss": 0.6376, "step": 17376 }, { "epoch": 19.79202279202279, "grad_norm": 0.16780985891819, "learning_rate": 1.4697459519119605e-08, "loss": 0.7827, "step": 17377 }, { "epoch": 19.793162393162394, "grad_norm": 0.16664044559001923, "learning_rate": 1.4538154084528522e-08, "loss": 0.7853, "step": 17378 }, { "epoch": 19.794301994301993, "grad_norm": 0.1965169906616211, "learning_rate": 1.4379716463738813e-08, "loss": 0.5155, "step": 17379 }, { "epoch": 19.795441595441595, "grad_norm": 0.17581987380981445, "learning_rate": 1.4222146662251635e-08, "loss": 0.7154, "step": 17380 }, { "epoch": 19.796581196581197, "grad_norm": 0.1875821352005005, "learning_rate": 1.4065444685540385e-08, "loss": 0.9377, "step": 17381 }, { "epoch": 19.797720797720796, "grad_norm": 0.23596879839897156, "learning_rate": 1.3909610539047934e-08, "loss": 0.519, "step": 17382 }, { "epoch": 19.7988603988604, "grad_norm": 0.1874542236328125, "learning_rate": 1.3754644228189394e-08, "loss": 0.5031, "step": 17383 }, { "epoch": 19.8, "grad_norm": 0.186298206448555, "learning_rate": 1.3600545758341022e-08, "loss": 0.7241, "step": 17384 }, { "epoch": 19.8011396011396, "grad_norm": 0.222769096493721, "learning_rate": 1.344731513485964e-08, "loss": 0.4098, "step": 17385 }, { "epoch": 19.802279202279202, "grad_norm": 0.18376067280769348, "learning_rate": 1.3294952363065993e-08, "loss": 0.6354, "step": 17386 }, { "epoch": 19.803418803418804, "grad_norm": 0.1565205603837967, "learning_rate": 1.314345744825307e-08, "loss": 0.7067, "step": 17387 }, { "epoch": 19.804558404558403, "grad_norm": 0.2034037858247757, "learning_rate": 1.2992830395680555e-08, "loss": 0.4996, "step": 17388 }, { "epoch": 19.805698005698005, "grad_norm": 0.2041102647781372, "learning_rate": 1.2843071210583146e-08, "loss": 0.5664, "step": 17389 }, { "epoch": 19.806837606837608, "grad_norm": 0.19491428136825562, "learning_rate": 1.2694179898159464e-08, "loss": 0.7311, "step": 17390 }, { "epoch": 19.807977207977206, "grad_norm": 0.21402095258235931, "learning_rate": 1.2546156463583147e-08, "loss": 0.7012, "step": 17391 }, { "epoch": 19.80911680911681, "grad_norm": 0.17485588788986206, "learning_rate": 1.239900091199453e-08, "loss": 0.7125, "step": 17392 }, { "epoch": 19.81025641025641, "grad_norm": 0.23262129724025726, "learning_rate": 1.2252713248506187e-08, "loss": 0.6616, "step": 17393 }, { "epoch": 19.81139601139601, "grad_norm": 0.19495739042758942, "learning_rate": 1.210729347819739e-08, "loss": 0.6964, "step": 17394 }, { "epoch": 19.812535612535612, "grad_norm": 0.17964985966682434, "learning_rate": 1.1962741606116878e-08, "loss": 0.6539, "step": 17395 }, { "epoch": 19.813675213675214, "grad_norm": 0.18871615827083588, "learning_rate": 1.181905763728841e-08, "loss": 0.5455, "step": 17396 }, { "epoch": 19.814814814814813, "grad_norm": 0.21508648991584778, "learning_rate": 1.167624157670244e-08, "loss": 0.75, "step": 17397 }, { "epoch": 19.815954415954415, "grad_norm": 0.21118800342082977, "learning_rate": 1.1534293429318888e-08, "loss": 0.6959, "step": 17398 }, { "epoch": 19.817094017094018, "grad_norm": 0.18347077071666718, "learning_rate": 1.1393213200067144e-08, "loss": 0.8279, "step": 17399 }, { "epoch": 19.81823361823362, "grad_norm": 0.2921779751777649, "learning_rate": 1.1253000893848842e-08, "loss": 0.5825, "step": 17400 }, { "epoch": 19.81937321937322, "grad_norm": 0.20175950229167938, "learning_rate": 1.1113656515535086e-08, "loss": 0.7478, "step": 17401 }, { "epoch": 19.82051282051282, "grad_norm": 0.18214459717273712, "learning_rate": 1.09751800699609e-08, "loss": 0.5814, "step": 17402 }, { "epoch": 19.821652421652423, "grad_norm": 0.181244358420372, "learning_rate": 1.0837571561939097e-08, "loss": 0.8337, "step": 17403 }, { "epoch": 19.822792022792022, "grad_norm": 0.23323874175548553, "learning_rate": 1.070083099624919e-08, "loss": 0.4065, "step": 17404 }, { "epoch": 19.823931623931625, "grad_norm": 0.21152248978614807, "learning_rate": 1.0564958377640156e-08, "loss": 0.7171, "step": 17405 }, { "epoch": 19.825071225071227, "grad_norm": 0.23705808818340302, "learning_rate": 1.0429953710830443e-08, "loss": 0.8134, "step": 17406 }, { "epoch": 19.826210826210826, "grad_norm": 0.24272708594799042, "learning_rate": 1.0295817000513518e-08, "loss": 0.5927, "step": 17407 }, { "epoch": 19.827350427350428, "grad_norm": 0.19271287322044373, "learning_rate": 1.016254825134122e-08, "loss": 0.48, "step": 17408 }, { "epoch": 19.82849002849003, "grad_norm": 0.18881921470165253, "learning_rate": 1.0030147467945949e-08, "loss": 0.5836, "step": 17409 }, { "epoch": 19.82962962962963, "grad_norm": 0.23984551429748535, "learning_rate": 9.898614654929583e-09, "loss": 0.518, "step": 17410 }, { "epoch": 19.83076923076923, "grad_norm": 0.2106814980506897, "learning_rate": 9.767949816855137e-09, "loss": 0.44, "step": 17411 }, { "epoch": 19.831908831908834, "grad_norm": 0.18543528020381927, "learning_rate": 9.638152958263425e-09, "loss": 0.7746, "step": 17412 }, { "epoch": 19.833048433048432, "grad_norm": 0.18596503138542175, "learning_rate": 9.50922408366195e-09, "loss": 0.6435, "step": 17413 }, { "epoch": 19.834188034188035, "grad_norm": 0.22651363909244537, "learning_rate": 9.381163197527687e-09, "loss": 0.492, "step": 17414 }, { "epoch": 19.835327635327637, "grad_norm": 0.2185947149991989, "learning_rate": 9.253970304312632e-09, "loss": 0.615, "step": 17415 }, { "epoch": 19.836467236467236, "grad_norm": 0.19200536608695984, "learning_rate": 9.127645408432695e-09, "loss": 0.9006, "step": 17416 }, { "epoch": 19.837606837606838, "grad_norm": 0.19673481583595276, "learning_rate": 9.002188514273257e-09, "loss": 0.4871, "step": 17417 }, { "epoch": 19.83874643874644, "grad_norm": 0.1683470904827118, "learning_rate": 8.877599626194722e-09, "loss": 0.7764, "step": 17418 }, { "epoch": 19.83988603988604, "grad_norm": 0.2062290906906128, "learning_rate": 8.753878748521405e-09, "loss": 0.6479, "step": 17419 }, { "epoch": 19.84102564102564, "grad_norm": 0.16176415979862213, "learning_rate": 8.631025885552647e-09, "loss": 0.7682, "step": 17420 }, { "epoch": 19.842165242165244, "grad_norm": 0.3066081404685974, "learning_rate": 8.509041041554477e-09, "loss": 0.3119, "step": 17421 }, { "epoch": 19.843304843304843, "grad_norm": 0.20348377525806427, "learning_rate": 8.387924220765176e-09, "loss": 0.7985, "step": 17422 }, { "epoch": 19.844444444444445, "grad_norm": 0.18342828750610352, "learning_rate": 8.267675427386933e-09, "loss": 0.6546, "step": 17423 }, { "epoch": 19.845584045584047, "grad_norm": 0.22837497293949127, "learning_rate": 8.148294665605293e-09, "loss": 0.8583, "step": 17424 }, { "epoch": 19.846723646723646, "grad_norm": 0.19176903367042542, "learning_rate": 8.029781939558612e-09, "loss": 0.6793, "step": 17425 }, { "epoch": 19.84786324786325, "grad_norm": 0.20270290970802307, "learning_rate": 7.912137253365814e-09, "loss": 0.7321, "step": 17426 }, { "epoch": 19.84900284900285, "grad_norm": 0.21365559101104736, "learning_rate": 7.795360611112523e-09, "loss": 0.7893, "step": 17427 }, { "epoch": 19.85014245014245, "grad_norm": 0.19575366377830505, "learning_rate": 7.679452016853827e-09, "loss": 0.6769, "step": 17428 }, { "epoch": 19.851282051282052, "grad_norm": 0.16900449991226196, "learning_rate": 7.564411474619837e-09, "loss": 0.7527, "step": 17429 }, { "epoch": 19.852421652421654, "grad_norm": 0.20102092623710632, "learning_rate": 7.4502389884018035e-09, "loss": 0.6776, "step": 17430 }, { "epoch": 19.853561253561253, "grad_norm": 0.17000263929367065, "learning_rate": 7.3369345621687735e-09, "loss": 0.7118, "step": 17431 }, { "epoch": 19.854700854700855, "grad_norm": 0.20500367879867554, "learning_rate": 7.224498199850938e-09, "loss": 0.5854, "step": 17432 }, { "epoch": 19.855840455840458, "grad_norm": 0.26341789960861206, "learning_rate": 7.1129299053590556e-09, "loss": 0.4035, "step": 17433 }, { "epoch": 19.856980056980056, "grad_norm": 0.22894629836082458, "learning_rate": 7.002229682565031e-09, "loss": 0.425, "step": 17434 }, { "epoch": 19.85811965811966, "grad_norm": 0.15743249654769897, "learning_rate": 6.892397535313011e-09, "loss": 0.5116, "step": 17435 }, { "epoch": 19.85925925925926, "grad_norm": 0.18057456612586975, "learning_rate": 6.783433467422162e-09, "loss": 0.7988, "step": 17436 }, { "epoch": 19.86039886039886, "grad_norm": 0.18068578839302063, "learning_rate": 6.675337482672794e-09, "loss": 0.7424, "step": 17437 }, { "epoch": 19.861538461538462, "grad_norm": 0.17379270493984222, "learning_rate": 6.568109584820236e-09, "loss": 0.5549, "step": 17438 }, { "epoch": 19.862678062678064, "grad_norm": 0.2070305347442627, "learning_rate": 6.461749777592063e-09, "loss": 0.5018, "step": 17439 }, { "epoch": 19.863817663817663, "grad_norm": 0.18210367858409882, "learning_rate": 6.35625806467699e-09, "loss": 0.8099, "step": 17440 }, { "epoch": 19.864957264957265, "grad_norm": 0.17534932494163513, "learning_rate": 6.251634449741528e-09, "loss": 0.5797, "step": 17441 }, { "epoch": 19.866096866096868, "grad_norm": 0.21269935369491577, "learning_rate": 6.147878936421658e-09, "loss": 0.5527, "step": 17442 }, { "epoch": 19.867236467236467, "grad_norm": 0.20287007093429565, "learning_rate": 6.044991528320054e-09, "loss": 0.5098, "step": 17443 }, { "epoch": 19.86837606837607, "grad_norm": 0.1705034375190735, "learning_rate": 5.9429722290088586e-09, "loss": 0.7357, "step": 17444 }, { "epoch": 19.86951566951567, "grad_norm": 0.1602213978767395, "learning_rate": 5.8418210420296825e-09, "loss": 0.9977, "step": 17445 }, { "epoch": 19.87065527065527, "grad_norm": 0.16496537625789642, "learning_rate": 5.741537970901934e-09, "loss": 0.7655, "step": 17446 }, { "epoch": 19.871794871794872, "grad_norm": 0.16658329963684082, "learning_rate": 5.642123019103384e-09, "loss": 0.744, "step": 17447 }, { "epoch": 19.872934472934475, "grad_norm": 0.2549525201320648, "learning_rate": 5.54357619008683e-09, "loss": 0.5534, "step": 17448 }, { "epoch": 19.874074074074073, "grad_norm": 0.19434592127799988, "learning_rate": 5.445897487280083e-09, "loss": 0.7252, "step": 17449 }, { "epoch": 19.875213675213676, "grad_norm": 0.18083354830741882, "learning_rate": 5.349086914069323e-09, "loss": 0.7156, "step": 17450 }, { "epoch": 19.876353276353278, "grad_norm": 0.1755906641483307, "learning_rate": 5.2531444738240785e-09, "loss": 0.7394, "step": 17451 }, { "epoch": 19.877492877492877, "grad_norm": 0.24183988571166992, "learning_rate": 5.158070169869467e-09, "loss": 0.543, "step": 17452 }, { "epoch": 19.87863247863248, "grad_norm": 0.18133480846881866, "learning_rate": 5.063864005513952e-09, "loss": 0.5583, "step": 17453 }, { "epoch": 19.87977207977208, "grad_norm": 0.1737232208251953, "learning_rate": 4.970525984024365e-09, "loss": 0.7472, "step": 17454 }, { "epoch": 19.88091168091168, "grad_norm": 0.230348140001297, "learning_rate": 4.87805610864811e-09, "loss": 0.6935, "step": 17455 }, { "epoch": 19.882051282051282, "grad_norm": 0.19094283878803253, "learning_rate": 4.786454382590955e-09, "loss": 0.6521, "step": 17456 }, { "epoch": 19.883190883190885, "grad_norm": 0.18163184821605682, "learning_rate": 4.695720809039239e-09, "loss": 0.654, "step": 17457 }, { "epoch": 19.884330484330484, "grad_norm": 0.23837114870548248, "learning_rate": 4.605855391140446e-09, "loss": 0.7393, "step": 17458 }, { "epoch": 19.885470085470086, "grad_norm": 0.19984039664268494, "learning_rate": 4.5168581320198524e-09, "loss": 0.8007, "step": 17459 }, { "epoch": 19.886609686609688, "grad_norm": 0.24067912995815277, "learning_rate": 4.428729034763879e-09, "loss": 0.604, "step": 17460 }, { "epoch": 19.887749287749287, "grad_norm": 0.17550112307071686, "learning_rate": 4.341468102439516e-09, "loss": 0.5195, "step": 17461 }, { "epoch": 19.88888888888889, "grad_norm": 0.21476757526397705, "learning_rate": 4.255075338072123e-09, "loss": 0.5572, "step": 17462 }, { "epoch": 19.89002849002849, "grad_norm": 0.22562627494335175, "learning_rate": 4.1695507446648515e-09, "loss": 0.5565, "step": 17463 }, { "epoch": 19.89116809116809, "grad_norm": 0.18826919794082642, "learning_rate": 4.084894325190325e-09, "loss": 0.6722, "step": 17464 }, { "epoch": 19.892307692307693, "grad_norm": 0.1579591929912567, "learning_rate": 4.0011060825823065e-09, "loss": 0.7509, "step": 17465 }, { "epoch": 19.893447293447295, "grad_norm": 0.2192116379737854, "learning_rate": 3.9181860197579075e-09, "loss": 0.7129, "step": 17466 }, { "epoch": 19.894586894586894, "grad_norm": 0.20730279386043549, "learning_rate": 3.8361341395953824e-09, "loss": 0.592, "step": 17467 }, { "epoch": 19.895726495726496, "grad_norm": 0.19416391849517822, "learning_rate": 3.754950444942451e-09, "loss": 0.6956, "step": 17468 }, { "epoch": 19.8968660968661, "grad_norm": 0.19905336201190948, "learning_rate": 3.6746349386190814e-09, "loss": 0.6212, "step": 17469 }, { "epoch": 19.898005698005697, "grad_norm": 0.18987423181533813, "learning_rate": 3.5951876234147087e-09, "loss": 0.6066, "step": 17470 }, { "epoch": 19.8991452991453, "grad_norm": 0.17264628410339355, "learning_rate": 3.516608502093788e-09, "loss": 0.7292, "step": 17471 }, { "epoch": 19.900284900284902, "grad_norm": 0.2468525469303131, "learning_rate": 3.438897577379141e-09, "loss": 0.5559, "step": 17472 }, { "epoch": 19.9014245014245, "grad_norm": 0.19226570427417755, "learning_rate": 3.3620548519713855e-09, "loss": 0.6404, "step": 17473 }, { "epoch": 19.902564102564103, "grad_norm": 0.17536082863807678, "learning_rate": 3.2860803285406085e-09, "loss": 0.5615, "step": 17474 }, { "epoch": 19.903703703703705, "grad_norm": 0.19645725190639496, "learning_rate": 3.2109740097291397e-09, "loss": 0.5813, "step": 17475 }, { "epoch": 19.904843304843304, "grad_norm": 0.1624937653541565, "learning_rate": 3.1367358981376773e-09, "loss": 0.8953, "step": 17476 }, { "epoch": 19.905982905982906, "grad_norm": 0.27368196845054626, "learning_rate": 3.063365996350265e-09, "loss": 0.5482, "step": 17477 }, { "epoch": 19.90712250712251, "grad_norm": 0.16922980546951294, "learning_rate": 2.9908643069148646e-09, "loss": 0.6721, "step": 17478 }, { "epoch": 19.908262108262107, "grad_norm": 0.22800640761852264, "learning_rate": 2.919230832348907e-09, "loss": 0.4294, "step": 17479 }, { "epoch": 19.90940170940171, "grad_norm": 0.1852252036333084, "learning_rate": 2.848465575139292e-09, "loss": 0.867, "step": 17480 }, { "epoch": 19.910541310541312, "grad_norm": 0.21090421080589294, "learning_rate": 2.778568537745163e-09, "loss": 0.6482, "step": 17481 }, { "epoch": 19.91168091168091, "grad_norm": 0.16362862288951874, "learning_rate": 2.7095397225951337e-09, "loss": 0.7249, "step": 17482 }, { "epoch": 19.912820512820513, "grad_norm": 0.1504623293876648, "learning_rate": 2.6413791320845095e-09, "loss": 0.6008, "step": 17483 }, { "epoch": 19.913960113960115, "grad_norm": 0.20504003763198853, "learning_rate": 2.574086768580841e-09, "loss": 0.6468, "step": 17484 }, { "epoch": 19.915099715099714, "grad_norm": 0.17852190136909485, "learning_rate": 2.507662634423924e-09, "loss": 0.6648, "step": 17485 }, { "epoch": 19.916239316239317, "grad_norm": 0.23240981996059418, "learning_rate": 2.442106731920246e-09, "loss": 0.6454, "step": 17486 }, { "epoch": 19.91737891737892, "grad_norm": 0.17049475014209747, "learning_rate": 2.3774190633429894e-09, "loss": 0.6768, "step": 17487 }, { "epoch": 19.918518518518518, "grad_norm": 0.2072330266237259, "learning_rate": 2.313599630943131e-09, "loss": 0.7361, "step": 17488 }, { "epoch": 19.91965811965812, "grad_norm": 0.1602887511253357, "learning_rate": 2.250648436935565e-09, "loss": 0.566, "step": 17489 }, { "epoch": 19.920797720797722, "grad_norm": 0.1839447319507599, "learning_rate": 2.188565483507432e-09, "loss": 0.487, "step": 17490 }, { "epoch": 19.92193732193732, "grad_norm": 0.19115093350410461, "learning_rate": 2.12735077281534e-09, "loss": 0.6366, "step": 17491 }, { "epoch": 19.923076923076923, "grad_norm": 0.21729104220867157, "learning_rate": 2.0670043069825894e-09, "loss": 0.4544, "step": 17492 }, { "epoch": 19.924216524216526, "grad_norm": 0.20541562139987946, "learning_rate": 2.0075260881102787e-09, "loss": 0.6521, "step": 17493 }, { "epoch": 19.925356125356124, "grad_norm": 0.21780884265899658, "learning_rate": 1.9489161182578708e-09, "loss": 0.7322, "step": 17494 }, { "epoch": 19.926495726495727, "grad_norm": 0.20379436016082764, "learning_rate": 1.891174399468176e-09, "loss": 0.5706, "step": 17495 }, { "epoch": 19.92763532763533, "grad_norm": 0.14642983675003052, "learning_rate": 1.8343009337395967e-09, "loss": 0.4325, "step": 17496 }, { "epoch": 19.928774928774928, "grad_norm": 0.18292292952537537, "learning_rate": 1.77829572305388e-09, "loss": 0.5872, "step": 17497 }, { "epoch": 19.92991452991453, "grad_norm": 0.17598862946033478, "learning_rate": 1.7231587693511409e-09, "loss": 0.582, "step": 17498 }, { "epoch": 19.931054131054132, "grad_norm": 0.1596013307571411, "learning_rate": 1.6688900745492898e-09, "loss": 0.8117, "step": 17499 }, { "epoch": 19.93219373219373, "grad_norm": 0.2092096358537674, "learning_rate": 1.6154896405329302e-09, "loss": 0.7535, "step": 17500 }, { "epoch": 19.933333333333334, "grad_norm": 0.2274015098810196, "learning_rate": 1.5629574691561344e-09, "loss": 0.6412, "step": 17501 }, { "epoch": 19.934472934472936, "grad_norm": 0.19499576091766357, "learning_rate": 1.5112935622452196e-09, "loss": 0.6974, "step": 17502 }, { "epoch": 19.935612535612535, "grad_norm": 0.19396720826625824, "learning_rate": 1.4604979215904203e-09, "loss": 0.7163, "step": 17503 }, { "epoch": 19.936752136752137, "grad_norm": 0.16685441136360168, "learning_rate": 1.4105705489597664e-09, "loss": 0.7161, "step": 17504 }, { "epoch": 19.93789173789174, "grad_norm": 0.1716783493757248, "learning_rate": 1.3615114460879819e-09, "loss": 0.533, "step": 17505 }, { "epoch": 19.939031339031338, "grad_norm": 0.21904678642749786, "learning_rate": 1.3133206146764832e-09, "loss": 0.5761, "step": 17506 }, { "epoch": 19.94017094017094, "grad_norm": 0.1928315907716751, "learning_rate": 1.2659980563989315e-09, "loss": 0.6561, "step": 17507 }, { "epoch": 19.941310541310543, "grad_norm": 0.21238145232200623, "learning_rate": 1.2195437729012328e-09, "loss": 0.6436, "step": 17508 }, { "epoch": 19.94245014245014, "grad_norm": 0.16020573675632477, "learning_rate": 1.1739577657959856e-09, "loss": 0.679, "step": 17509 }, { "epoch": 19.943589743589744, "grad_norm": 0.17376503348350525, "learning_rate": 1.1292400366652579e-09, "loss": 0.4443, "step": 17510 }, { "epoch": 19.944729344729346, "grad_norm": 0.19276556372642517, "learning_rate": 1.0853905870633618e-09, "loss": 0.6591, "step": 17511 }, { "epoch": 19.945868945868945, "grad_norm": 0.2291955053806305, "learning_rate": 1.0424094185140786e-09, "loss": 0.4883, "step": 17512 }, { "epoch": 19.947008547008547, "grad_norm": 0.18416431546211243, "learning_rate": 1.0002965325078828e-09, "loss": 0.8636, "step": 17513 }, { "epoch": 19.94814814814815, "grad_norm": 0.1676609218120575, "learning_rate": 9.590519305102685e-10, "loss": 0.789, "step": 17514 }, { "epoch": 19.94928774928775, "grad_norm": 0.15865011513233185, "learning_rate": 9.186756139534236e-10, "loss": 0.7431, "step": 17515 }, { "epoch": 19.95042735042735, "grad_norm": 0.16312918066978455, "learning_rate": 8.791675842362291e-10, "loss": 0.4105, "step": 17516 }, { "epoch": 19.951566951566953, "grad_norm": 0.1837480366230011, "learning_rate": 8.405278427325858e-10, "loss": 0.7236, "step": 17517 }, { "epoch": 19.95270655270655, "grad_norm": 0.19747784733772278, "learning_rate": 8.027563907886392e-10, "loss": 0.6793, "step": 17518 }, { "epoch": 19.953846153846154, "grad_norm": 0.1995292603969574, "learning_rate": 7.658532297116772e-10, "loss": 0.6804, "step": 17519 }, { "epoch": 19.954985754985756, "grad_norm": 0.20492805540561676, "learning_rate": 7.29818360784007e-10, "loss": 0.6711, "step": 17520 }, { "epoch": 19.956125356125355, "grad_norm": 0.20226095616817474, "learning_rate": 6.946517852574052e-10, "loss": 0.638, "step": 17521 }, { "epoch": 19.957264957264957, "grad_norm": 0.1664465069770813, "learning_rate": 6.603535043558929e-10, "loss": 0.7497, "step": 17522 }, { "epoch": 19.95840455840456, "grad_norm": 0.20206011831760406, "learning_rate": 6.269235192674083e-10, "loss": 0.6539, "step": 17523 }, { "epoch": 19.95954415954416, "grad_norm": 0.20060062408447266, "learning_rate": 5.943618311549104e-10, "loss": 0.5205, "step": 17524 }, { "epoch": 19.96068376068376, "grad_norm": 0.22517715394496918, "learning_rate": 5.626684411508265e-10, "loss": 0.6764, "step": 17525 }, { "epoch": 19.961823361823363, "grad_norm": 0.16309784352779388, "learning_rate": 5.31843350351502e-10, "loss": 0.7961, "step": 17526 }, { "epoch": 19.962962962962962, "grad_norm": 0.20457012951374054, "learning_rate": 5.018865598310773e-10, "loss": 0.5823, "step": 17527 }, { "epoch": 19.964102564102564, "grad_norm": 0.24622264504432678, "learning_rate": 4.727980706276113e-10, "loss": 0.5505, "step": 17528 }, { "epoch": 19.965242165242167, "grad_norm": 0.17372025549411774, "learning_rate": 4.445778837541825e-10, "loss": 0.6343, "step": 17529 }, { "epoch": 19.966381766381765, "grad_norm": 0.1885814517736435, "learning_rate": 4.17226000187787e-10, "loss": 0.8166, "step": 17530 }, { "epoch": 19.967521367521368, "grad_norm": 0.22691234946250916, "learning_rate": 3.907424208832167e-10, "loss": 0.4473, "step": 17531 }, { "epoch": 19.96866096866097, "grad_norm": 0.1989099383354187, "learning_rate": 3.651271467536299e-10, "loss": 0.4989, "step": 17532 }, { "epoch": 19.96980056980057, "grad_norm": 0.2686764597892761, "learning_rate": 3.403801786955318e-10, "loss": 0.5714, "step": 17533 }, { "epoch": 19.97094017094017, "grad_norm": 0.22707980871200562, "learning_rate": 3.165015175637942e-10, "loss": 0.5101, "step": 17534 }, { "epoch": 19.972079772079773, "grad_norm": 0.2014019638299942, "learning_rate": 2.9349116419108427e-10, "loss": 0.4804, "step": 17535 }, { "epoch": 19.973219373219372, "grad_norm": 0.2190103381872177, "learning_rate": 2.71349119373987e-10, "loss": 0.4915, "step": 17536 }, { "epoch": 19.974358974358974, "grad_norm": 0.17524485290050507, "learning_rate": 2.500753838813319e-10, "loss": 0.6858, "step": 17537 }, { "epoch": 19.975498575498577, "grad_norm": 0.2067994326353073, "learning_rate": 2.296699584541928e-10, "loss": 0.8085, "step": 17538 }, { "epoch": 19.976638176638176, "grad_norm": 0.2091679573059082, "learning_rate": 2.1013284380033694e-10, "loss": 0.5121, "step": 17539 }, { "epoch": 19.977777777777778, "grad_norm": 0.23023058474063873, "learning_rate": 1.914640405970003e-10, "loss": 0.6296, "step": 17540 }, { "epoch": 19.97891737891738, "grad_norm": 0.17481495440006256, "learning_rate": 1.7366354949643893e-10, "loss": 0.6735, "step": 17541 }, { "epoch": 19.98005698005698, "grad_norm": 0.1665341705083847, "learning_rate": 1.5673137111205106e-10, "loss": 0.7221, "step": 17542 }, { "epoch": 19.98119658119658, "grad_norm": 0.17197179794311523, "learning_rate": 1.4066750603503044e-10, "loss": 0.6417, "step": 17543 }, { "epoch": 19.982336182336184, "grad_norm": 0.18099331855773926, "learning_rate": 1.2547195482048858e-10, "loss": 0.7751, "step": 17544 }, { "epoch": 19.983475783475782, "grad_norm": 0.18419486284255981, "learning_rate": 1.1114471800133252e-10, "loss": 0.62, "step": 17545 }, { "epoch": 19.984615384615385, "grad_norm": 0.20028352737426758, "learning_rate": 9.768579607161155e-11, "loss": 0.5974, "step": 17546 }, { "epoch": 19.985754985754987, "grad_norm": 0.20690953731536865, "learning_rate": 8.50951894976193e-11, "loss": 0.6736, "step": 17547 }, { "epoch": 19.986894586894586, "grad_norm": 0.20689022541046143, "learning_rate": 7.337289871789388e-11, "loss": 0.6134, "step": 17548 }, { "epoch": 19.988034188034188, "grad_norm": 0.20695361495018005, "learning_rate": 6.25189241432178e-11, "loss": 0.5089, "step": 17549 }, { "epoch": 19.98917378917379, "grad_norm": 0.17822995781898499, "learning_rate": 5.2533266145515794e-11, "loss": 0.7472, "step": 17550 }, { "epoch": 19.99031339031339, "grad_norm": 0.22333787381649017, "learning_rate": 4.341592507173253e-11, "loss": 0.7331, "step": 17551 }, { "epoch": 19.99145299145299, "grad_norm": 0.18312227725982666, "learning_rate": 3.516690124383271e-11, "loss": 0.7284, "step": 17552 }, { "epoch": 19.992592592592594, "grad_norm": 0.18905913829803467, "learning_rate": 2.7786194942147624e-11, "loss": 0.4803, "step": 17553 }, { "epoch": 19.993732193732193, "grad_norm": 0.26452741026878357, "learning_rate": 2.1273806424804143e-11, "loss": 0.5171, "step": 17554 }, { "epoch": 19.994871794871795, "grad_norm": 0.19459344446659088, "learning_rate": 1.5629735922173537e-11, "loss": 0.6539, "step": 17555 }, { "epoch": 19.996011396011397, "grad_norm": 0.20268167555332184, "learning_rate": 1.0853983622993725e-11, "loss": 0.7208, "step": 17556 }, { "epoch": 19.997150997150996, "grad_norm": 0.17209883034229279, "learning_rate": 6.946549699349269e-12, "loss": 0.499, "step": 17557 }, { "epoch": 19.9982905982906, "grad_norm": 0.21036337316036224, "learning_rate": 3.907434284466937e-12, "loss": 0.4244, "step": 17558 }, { "epoch": 19.9994301994302, "grad_norm": 0.1915605366230011, "learning_rate": 1.7366374865934732e-12, "loss": 0.7527, "step": 17559 }, { "epoch": 20.0, "grad_norm": 0.3738774061203003, "learning_rate": 4.3415937511781526e-13, "loss": 0.706, "step": 17560 } ], "logging_steps": 1, "max_steps": 17560, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.817931115291156e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }