{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 5864, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017053206002728513, "grad_norm": 7.650136947631836, "learning_rate": 0.0, "loss": 0.8342, "step": 1 }, { "epoch": 0.00034106412005457026, "grad_norm": 1.9354814291000366, "learning_rate": 1.1363636363636366e-08, "loss": 0.7446, "step": 2 }, { "epoch": 0.0005115961800818554, "grad_norm": 2.511084794998169, "learning_rate": 2.2727272727272732e-08, "loss": 0.7271, "step": 3 }, { "epoch": 0.0006821282401091405, "grad_norm": 1.2728736400604248, "learning_rate": 3.409090909090909e-08, "loss": 0.717, "step": 4 }, { "epoch": 0.0008526603001364257, "grad_norm": 1.1760114431381226, "learning_rate": 4.5454545454545464e-08, "loss": 0.7129, "step": 5 }, { "epoch": 0.0010231923601637107, "grad_norm": 1.9061273336410522, "learning_rate": 5.681818181818183e-08, "loss": 0.7003, "step": 6 }, { "epoch": 0.0011937244201909959, "grad_norm": 0.9682210087776184, "learning_rate": 6.818181818181819e-08, "loss": 0.6932, "step": 7 }, { "epoch": 0.001364256480218281, "grad_norm": 0.7187206149101257, "learning_rate": 7.954545454545456e-08, "loss": 0.6776, "step": 8 }, { "epoch": 0.0015347885402455662, "grad_norm": 0.595079243183136, "learning_rate": 9.090909090909093e-08, "loss": 0.6984, "step": 9 }, { "epoch": 0.0017053206002728514, "grad_norm": 0.6756548285484314, "learning_rate": 1.0227272727272729e-07, "loss": 0.6788, "step": 10 }, { "epoch": 0.0018758526603001365, "grad_norm": 0.5831514000892639, "learning_rate": 1.1363636363636366e-07, "loss": 0.6979, "step": 11 }, { "epoch": 0.0020463847203274215, "grad_norm": 0.4670233428478241, "learning_rate": 1.2500000000000002e-07, "loss": 0.6816, "step": 12 }, { "epoch": 0.002216916780354707, "grad_norm": 0.5446857213973999, "learning_rate": 1.3636363636363637e-07, "loss": 0.6633, "step": 13 }, { "epoch": 0.0023874488403819918, "grad_norm": 0.513152539730072, "learning_rate": 1.4772727272727275e-07, "loss": 0.6866, "step": 14 }, { "epoch": 0.002557980900409277, "grad_norm": 0.4256693720817566, "learning_rate": 1.5909090909090913e-07, "loss": 0.6829, "step": 15 }, { "epoch": 0.002728512960436562, "grad_norm": 0.5119574069976807, "learning_rate": 1.7045454545454548e-07, "loss": 0.6785, "step": 16 }, { "epoch": 0.002899045020463847, "grad_norm": 0.4757070541381836, "learning_rate": 1.8181818181818186e-07, "loss": 0.6841, "step": 17 }, { "epoch": 0.0030695770804911324, "grad_norm": 0.44998493790626526, "learning_rate": 1.931818181818182e-07, "loss": 0.6794, "step": 18 }, { "epoch": 0.0032401091405184173, "grad_norm": 0.49852901697158813, "learning_rate": 2.0454545454545458e-07, "loss": 0.6769, "step": 19 }, { "epoch": 0.0034106412005457027, "grad_norm": 0.4366150498390198, "learning_rate": 2.1590909090909094e-07, "loss": 0.6676, "step": 20 }, { "epoch": 0.0035811732605729877, "grad_norm": 0.4075460731983185, "learning_rate": 2.272727272727273e-07, "loss": 0.6667, "step": 21 }, { "epoch": 0.003751705320600273, "grad_norm": 0.43057799339294434, "learning_rate": 2.386363636363637e-07, "loss": 0.6619, "step": 22 }, { "epoch": 0.003922237380627558, "grad_norm": 0.45519155263900757, "learning_rate": 2.5000000000000004e-07, "loss": 0.6564, "step": 23 }, { "epoch": 0.004092769440654843, "grad_norm": 0.3911649286746979, "learning_rate": 2.613636363636364e-07, "loss": 0.6528, "step": 24 }, { "epoch": 0.004263301500682128, "grad_norm": 0.44951483607292175, "learning_rate": 2.7272727272727274e-07, "loss": 0.6502, "step": 25 }, { "epoch": 0.004433833560709414, "grad_norm": 0.3782355487346649, "learning_rate": 2.8409090909090915e-07, "loss": 0.6738, "step": 26 }, { "epoch": 0.004604365620736698, "grad_norm": 0.4313599765300751, "learning_rate": 2.954545454545455e-07, "loss": 0.6676, "step": 27 }, { "epoch": 0.0047748976807639835, "grad_norm": 0.4656371474266052, "learning_rate": 3.0681818181818185e-07, "loss": 0.6689, "step": 28 }, { "epoch": 0.004945429740791269, "grad_norm": 0.4009706676006317, "learning_rate": 3.1818181818181825e-07, "loss": 0.6708, "step": 29 }, { "epoch": 0.005115961800818554, "grad_norm": 0.45048245787620544, "learning_rate": 3.295454545454546e-07, "loss": 0.6633, "step": 30 }, { "epoch": 0.005286493860845839, "grad_norm": 0.4189516305923462, "learning_rate": 3.4090909090909096e-07, "loss": 0.6435, "step": 31 }, { "epoch": 0.005457025920873124, "grad_norm": 0.4668561518192291, "learning_rate": 3.5227272727272736e-07, "loss": 0.6507, "step": 32 }, { "epoch": 0.0056275579809004096, "grad_norm": 0.4583468735218048, "learning_rate": 3.636363636363637e-07, "loss": 0.642, "step": 33 }, { "epoch": 0.005798090040927694, "grad_norm": 0.540885329246521, "learning_rate": 3.7500000000000006e-07, "loss": 0.6798, "step": 34 }, { "epoch": 0.0059686221009549794, "grad_norm": 0.49574148654937744, "learning_rate": 3.863636363636364e-07, "loss": 0.6524, "step": 35 }, { "epoch": 0.006139154160982265, "grad_norm": 0.4109654426574707, "learning_rate": 3.9772727272727276e-07, "loss": 0.6608, "step": 36 }, { "epoch": 0.00630968622100955, "grad_norm": 0.3942180573940277, "learning_rate": 4.0909090909090917e-07, "loss": 0.6657, "step": 37 }, { "epoch": 0.006480218281036835, "grad_norm": 0.401726096868515, "learning_rate": 4.204545454545455e-07, "loss": 0.6473, "step": 38 }, { "epoch": 0.00665075034106412, "grad_norm": 0.448159396648407, "learning_rate": 4.3181818181818187e-07, "loss": 0.6499, "step": 39 }, { "epoch": 0.0068212824010914054, "grad_norm": 0.41607365012168884, "learning_rate": 4.431818181818183e-07, "loss": 0.6484, "step": 40 }, { "epoch": 0.00699181446111869, "grad_norm": 0.40875402092933655, "learning_rate": 4.545454545454546e-07, "loss": 0.6613, "step": 41 }, { "epoch": 0.007162346521145975, "grad_norm": 0.46847590804100037, "learning_rate": 4.65909090909091e-07, "loss": 0.6506, "step": 42 }, { "epoch": 0.007332878581173261, "grad_norm": 0.473928838968277, "learning_rate": 4.772727272727274e-07, "loss": 0.6264, "step": 43 }, { "epoch": 0.007503410641200546, "grad_norm": 0.6333487033843994, "learning_rate": 4.886363636363637e-07, "loss": 0.6591, "step": 44 }, { "epoch": 0.007673942701227831, "grad_norm": 0.9283931255340576, "learning_rate": 5.000000000000001e-07, "loss": 0.6611, "step": 45 }, { "epoch": 0.007844474761255117, "grad_norm": 0.8683100938796997, "learning_rate": 5.113636363636364e-07, "loss": 0.6401, "step": 46 }, { "epoch": 0.008015006821282401, "grad_norm": 0.4764223098754883, "learning_rate": 5.227272727272728e-07, "loss": 0.6398, "step": 47 }, { "epoch": 0.008185538881309686, "grad_norm": 0.692018985748291, "learning_rate": 5.340909090909091e-07, "loss": 0.6331, "step": 48 }, { "epoch": 0.008356070941336972, "grad_norm": 0.760298490524292, "learning_rate": 5.454545454545455e-07, "loss": 0.6503, "step": 49 }, { "epoch": 0.008526603001364257, "grad_norm": 0.48740214109420776, "learning_rate": 5.568181818181818e-07, "loss": 0.6273, "step": 50 }, { "epoch": 0.008697135061391541, "grad_norm": 0.623894989490509, "learning_rate": 5.681818181818183e-07, "loss": 0.6529, "step": 51 }, { "epoch": 0.008867667121418827, "grad_norm": 0.45810043811798096, "learning_rate": 5.795454545454546e-07, "loss": 0.6339, "step": 52 }, { "epoch": 0.009038199181446112, "grad_norm": 0.5587273240089417, "learning_rate": 5.90909090909091e-07, "loss": 0.6411, "step": 53 }, { "epoch": 0.009208731241473396, "grad_norm": 0.6215199828147888, "learning_rate": 6.022727272727273e-07, "loss": 0.6568, "step": 54 }, { "epoch": 0.009379263301500683, "grad_norm": 0.5236825346946716, "learning_rate": 6.136363636363637e-07, "loss": 0.6385, "step": 55 }, { "epoch": 0.009549795361527967, "grad_norm": 0.5010786652565002, "learning_rate": 6.250000000000002e-07, "loss": 0.6522, "step": 56 }, { "epoch": 0.009720327421555252, "grad_norm": 0.5132748484611511, "learning_rate": 6.363636363636365e-07, "loss": 0.619, "step": 57 }, { "epoch": 0.009890859481582538, "grad_norm": 0.5371212959289551, "learning_rate": 6.477272727272729e-07, "loss": 0.6224, "step": 58 }, { "epoch": 0.010061391541609822, "grad_norm": 0.5243486166000366, "learning_rate": 6.590909090909092e-07, "loss": 0.6524, "step": 59 }, { "epoch": 0.010231923601637109, "grad_norm": 0.5441023707389832, "learning_rate": 6.704545454545456e-07, "loss": 0.6522, "step": 60 }, { "epoch": 0.010402455661664393, "grad_norm": 0.5234872698783875, "learning_rate": 6.818181818181819e-07, "loss": 0.6329, "step": 61 }, { "epoch": 0.010572987721691678, "grad_norm": 0.5267979502677917, "learning_rate": 6.931818181818184e-07, "loss": 0.6364, "step": 62 }, { "epoch": 0.010743519781718964, "grad_norm": 0.5362216234207153, "learning_rate": 7.045454545454547e-07, "loss": 0.6296, "step": 63 }, { "epoch": 0.010914051841746248, "grad_norm": 0.5315608978271484, "learning_rate": 7.159090909090911e-07, "loss": 0.6616, "step": 64 }, { "epoch": 0.011084583901773533, "grad_norm": 0.4777587652206421, "learning_rate": 7.272727272727274e-07, "loss": 0.6318, "step": 65 }, { "epoch": 0.011255115961800819, "grad_norm": 0.5749709606170654, "learning_rate": 7.386363636363638e-07, "loss": 0.6301, "step": 66 }, { "epoch": 0.011425648021828104, "grad_norm": 0.4934489130973816, "learning_rate": 7.500000000000001e-07, "loss": 0.629, "step": 67 }, { "epoch": 0.011596180081855388, "grad_norm": 0.4862441420555115, "learning_rate": 7.613636363636365e-07, "loss": 0.6349, "step": 68 }, { "epoch": 0.011766712141882674, "grad_norm": 0.5512414574623108, "learning_rate": 7.727272727272728e-07, "loss": 0.6253, "step": 69 }, { "epoch": 0.011937244201909959, "grad_norm": 0.4864642322063446, "learning_rate": 7.840909090909092e-07, "loss": 0.6349, "step": 70 }, { "epoch": 0.012107776261937243, "grad_norm": 0.5771092176437378, "learning_rate": 7.954545454545455e-07, "loss": 0.6605, "step": 71 }, { "epoch": 0.01227830832196453, "grad_norm": 0.6331750750541687, "learning_rate": 8.068181818181819e-07, "loss": 0.6405, "step": 72 }, { "epoch": 0.012448840381991814, "grad_norm": 0.5899202823638916, "learning_rate": 8.181818181818183e-07, "loss": 0.6187, "step": 73 }, { "epoch": 0.0126193724420191, "grad_norm": 0.7244130969047546, "learning_rate": 8.295454545454547e-07, "loss": 0.6297, "step": 74 }, { "epoch": 0.012789904502046385, "grad_norm": 0.5164639949798584, "learning_rate": 8.40909090909091e-07, "loss": 0.6563, "step": 75 }, { "epoch": 0.01296043656207367, "grad_norm": 0.6244091391563416, "learning_rate": 8.522727272727274e-07, "loss": 0.6456, "step": 76 }, { "epoch": 0.013130968622100956, "grad_norm": 0.8265087008476257, "learning_rate": 8.636363636363637e-07, "loss": 0.6299, "step": 77 }, { "epoch": 0.01330150068212824, "grad_norm": 0.8179631233215332, "learning_rate": 8.750000000000001e-07, "loss": 0.6127, "step": 78 }, { "epoch": 0.013472032742155525, "grad_norm": 0.6222761869430542, "learning_rate": 8.863636363636365e-07, "loss": 0.628, "step": 79 }, { "epoch": 0.013642564802182811, "grad_norm": 0.5206494331359863, "learning_rate": 8.977272727272729e-07, "loss": 0.6299, "step": 80 }, { "epoch": 0.013813096862210095, "grad_norm": 0.5181920528411865, "learning_rate": 9.090909090909093e-07, "loss": 0.6319, "step": 81 }, { "epoch": 0.01398362892223738, "grad_norm": 0.6216228604316711, "learning_rate": 9.204545454545456e-07, "loss": 0.6336, "step": 82 }, { "epoch": 0.014154160982264666, "grad_norm": 0.6102015972137451, "learning_rate": 9.31818181818182e-07, "loss": 0.6238, "step": 83 }, { "epoch": 0.01432469304229195, "grad_norm": 0.5613488554954529, "learning_rate": 9.431818181818184e-07, "loss": 0.6319, "step": 84 }, { "epoch": 0.014495225102319235, "grad_norm": 0.5151708722114563, "learning_rate": 9.545454545454548e-07, "loss": 0.6096, "step": 85 }, { "epoch": 0.014665757162346521, "grad_norm": 0.5965888500213623, "learning_rate": 9.659090909090911e-07, "loss": 0.6321, "step": 86 }, { "epoch": 0.014836289222373806, "grad_norm": 0.6112313270568848, "learning_rate": 9.772727272727275e-07, "loss": 0.646, "step": 87 }, { "epoch": 0.015006821282401092, "grad_norm": 0.6595168709754944, "learning_rate": 9.886363636363638e-07, "loss": 0.6235, "step": 88 }, { "epoch": 0.015177353342428377, "grad_norm": 0.5075546503067017, "learning_rate": 1.0000000000000002e-06, "loss": 0.6289, "step": 89 }, { "epoch": 0.015347885402455661, "grad_norm": 0.6066994667053223, "learning_rate": 1.0113636363636367e-06, "loss": 0.6259, "step": 90 }, { "epoch": 0.015518417462482947, "grad_norm": 0.7509892582893372, "learning_rate": 1.0227272727272729e-06, "loss": 0.6194, "step": 91 }, { "epoch": 0.015688949522510234, "grad_norm": 0.7574506998062134, "learning_rate": 1.0340909090909094e-06, "loss": 0.6168, "step": 92 }, { "epoch": 0.015859481582537516, "grad_norm": 0.449613094329834, "learning_rate": 1.0454545454545456e-06, "loss": 0.6037, "step": 93 }, { "epoch": 0.016030013642564803, "grad_norm": 0.6445989608764648, "learning_rate": 1.0568181818181821e-06, "loss": 0.6481, "step": 94 }, { "epoch": 0.01620054570259209, "grad_norm": 0.5870500802993774, "learning_rate": 1.0681818181818183e-06, "loss": 0.6253, "step": 95 }, { "epoch": 0.01637107776261937, "grad_norm": 0.4731763005256653, "learning_rate": 1.0795454545454548e-06, "loss": 0.6132, "step": 96 }, { "epoch": 0.016541609822646658, "grad_norm": 0.5874089598655701, "learning_rate": 1.090909090909091e-06, "loss": 0.6051, "step": 97 }, { "epoch": 0.016712141882673944, "grad_norm": 0.5979234576225281, "learning_rate": 1.1022727272727275e-06, "loss": 0.6119, "step": 98 }, { "epoch": 0.016882673942701227, "grad_norm": 0.5582343935966492, "learning_rate": 1.1136363636363637e-06, "loss": 0.6189, "step": 99 }, { "epoch": 0.017053206002728513, "grad_norm": 0.44007357954978943, "learning_rate": 1.1250000000000002e-06, "loss": 0.6143, "step": 100 }, { "epoch": 0.0172237380627558, "grad_norm": 0.48567453026771545, "learning_rate": 1.1363636363636366e-06, "loss": 0.6127, "step": 101 }, { "epoch": 0.017394270122783082, "grad_norm": 0.5077864527702332, "learning_rate": 1.147727272727273e-06, "loss": 0.611, "step": 102 }, { "epoch": 0.01756480218281037, "grad_norm": 0.467916876077652, "learning_rate": 1.1590909090909093e-06, "loss": 0.6212, "step": 103 }, { "epoch": 0.017735334242837655, "grad_norm": 0.46231311559677124, "learning_rate": 1.1704545454545456e-06, "loss": 0.6244, "step": 104 }, { "epoch": 0.017905866302864937, "grad_norm": 0.48694437742233276, "learning_rate": 1.181818181818182e-06, "loss": 0.6145, "step": 105 }, { "epoch": 0.018076398362892224, "grad_norm": 0.6872959733009338, "learning_rate": 1.1931818181818183e-06, "loss": 0.627, "step": 106 }, { "epoch": 0.01824693042291951, "grad_norm": 0.43043017387390137, "learning_rate": 1.2045454545454547e-06, "loss": 0.6096, "step": 107 }, { "epoch": 0.018417462482946793, "grad_norm": 0.43259918689727783, "learning_rate": 1.215909090909091e-06, "loss": 0.6111, "step": 108 }, { "epoch": 0.01858799454297408, "grad_norm": 0.4670185446739197, "learning_rate": 1.2272727272727274e-06, "loss": 0.6122, "step": 109 }, { "epoch": 0.018758526603001365, "grad_norm": 0.4173937737941742, "learning_rate": 1.2386363636363638e-06, "loss": 0.6081, "step": 110 }, { "epoch": 0.018929058663028648, "grad_norm": 0.4710495173931122, "learning_rate": 1.2500000000000003e-06, "loss": 0.6028, "step": 111 }, { "epoch": 0.019099590723055934, "grad_norm": 0.5226242542266846, "learning_rate": 1.2613636363636367e-06, "loss": 0.6045, "step": 112 }, { "epoch": 0.01927012278308322, "grad_norm": 0.6086929440498352, "learning_rate": 1.272727272727273e-06, "loss": 0.6127, "step": 113 }, { "epoch": 0.019440654843110503, "grad_norm": 0.5822572112083435, "learning_rate": 1.2840909090909094e-06, "loss": 0.6039, "step": 114 }, { "epoch": 0.01961118690313779, "grad_norm": 0.49154001474380493, "learning_rate": 1.2954545454545457e-06, "loss": 0.621, "step": 115 }, { "epoch": 0.019781718963165076, "grad_norm": 0.4529736638069153, "learning_rate": 1.306818181818182e-06, "loss": 0.6174, "step": 116 }, { "epoch": 0.01995225102319236, "grad_norm": 0.5126652717590332, "learning_rate": 1.3181818181818184e-06, "loss": 0.5937, "step": 117 }, { "epoch": 0.020122783083219645, "grad_norm": 0.6636311411857605, "learning_rate": 1.3295454545454548e-06, "loss": 0.6058, "step": 118 }, { "epoch": 0.02029331514324693, "grad_norm": 0.7904946804046631, "learning_rate": 1.3409090909090911e-06, "loss": 0.6257, "step": 119 }, { "epoch": 0.020463847203274217, "grad_norm": 0.708732008934021, "learning_rate": 1.3522727272727275e-06, "loss": 0.6168, "step": 120 }, { "epoch": 0.0206343792633015, "grad_norm": 0.5224078297615051, "learning_rate": 1.3636363636363638e-06, "loss": 0.6344, "step": 121 }, { "epoch": 0.020804911323328786, "grad_norm": 0.4924236536026001, "learning_rate": 1.3750000000000002e-06, "loss": 0.5914, "step": 122 }, { "epoch": 0.020975443383356072, "grad_norm": 0.6102842688560486, "learning_rate": 1.3863636363636367e-06, "loss": 0.6104, "step": 123 }, { "epoch": 0.021145975443383355, "grad_norm": 0.45268720388412476, "learning_rate": 1.3977272727272729e-06, "loss": 0.6122, "step": 124 }, { "epoch": 0.02131650750341064, "grad_norm": 0.5215243697166443, "learning_rate": 1.4090909090909094e-06, "loss": 0.6402, "step": 125 }, { "epoch": 0.021487039563437928, "grad_norm": 0.4833904206752777, "learning_rate": 1.4204545454545456e-06, "loss": 0.6016, "step": 126 }, { "epoch": 0.02165757162346521, "grad_norm": 0.5406088829040527, "learning_rate": 1.4318181818181821e-06, "loss": 0.6133, "step": 127 }, { "epoch": 0.021828103683492497, "grad_norm": 0.5165327787399292, "learning_rate": 1.4431818181818183e-06, "loss": 0.6082, "step": 128 }, { "epoch": 0.021998635743519783, "grad_norm": 0.40911534428596497, "learning_rate": 1.4545454545454548e-06, "loss": 0.6027, "step": 129 }, { "epoch": 0.022169167803547066, "grad_norm": 0.47644490003585815, "learning_rate": 1.465909090909091e-06, "loss": 0.6196, "step": 130 }, { "epoch": 0.022339699863574352, "grad_norm": 0.4921034276485443, "learning_rate": 1.4772727272727275e-06, "loss": 0.6125, "step": 131 }, { "epoch": 0.022510231923601638, "grad_norm": 0.6039801239967346, "learning_rate": 1.4886363636363637e-06, "loss": 0.6069, "step": 132 }, { "epoch": 0.02268076398362892, "grad_norm": 0.4514678418636322, "learning_rate": 1.5000000000000002e-06, "loss": 0.6017, "step": 133 }, { "epoch": 0.022851296043656207, "grad_norm": 0.4566296637058258, "learning_rate": 1.5113636363636368e-06, "loss": 0.5971, "step": 134 }, { "epoch": 0.023021828103683493, "grad_norm": 0.5192421674728394, "learning_rate": 1.522727272727273e-06, "loss": 0.6104, "step": 135 }, { "epoch": 0.023192360163710776, "grad_norm": 0.5545705556869507, "learning_rate": 1.5340909090909095e-06, "loss": 0.6007, "step": 136 }, { "epoch": 0.023362892223738062, "grad_norm": 0.43825459480285645, "learning_rate": 1.5454545454545457e-06, "loss": 0.607, "step": 137 }, { "epoch": 0.02353342428376535, "grad_norm": 0.4573563039302826, "learning_rate": 1.5568181818181822e-06, "loss": 0.6144, "step": 138 }, { "epoch": 0.02370395634379263, "grad_norm": 0.5552775263786316, "learning_rate": 1.5681818181818184e-06, "loss": 0.6113, "step": 139 }, { "epoch": 0.023874488403819918, "grad_norm": 0.42399027943611145, "learning_rate": 1.579545454545455e-06, "loss": 0.6072, "step": 140 }, { "epoch": 0.024045020463847204, "grad_norm": 0.42997267842292786, "learning_rate": 1.590909090909091e-06, "loss": 0.6223, "step": 141 }, { "epoch": 0.024215552523874487, "grad_norm": 0.4232839047908783, "learning_rate": 1.6022727272727276e-06, "loss": 0.6205, "step": 142 }, { "epoch": 0.024386084583901773, "grad_norm": 0.43472787737846375, "learning_rate": 1.6136363636363638e-06, "loss": 0.608, "step": 143 }, { "epoch": 0.02455661664392906, "grad_norm": 0.4148360788822174, "learning_rate": 1.6250000000000003e-06, "loss": 0.6064, "step": 144 }, { "epoch": 0.024727148703956345, "grad_norm": 0.4622041583061218, "learning_rate": 1.6363636363636367e-06, "loss": 0.6148, "step": 145 }, { "epoch": 0.024897680763983628, "grad_norm": 0.4319433271884918, "learning_rate": 1.647727272727273e-06, "loss": 0.599, "step": 146 }, { "epoch": 0.025068212824010914, "grad_norm": 0.5556517243385315, "learning_rate": 1.6590909090909094e-06, "loss": 0.611, "step": 147 }, { "epoch": 0.0252387448840382, "grad_norm": 0.675335168838501, "learning_rate": 1.6704545454545457e-06, "loss": 0.6072, "step": 148 }, { "epoch": 0.025409276944065484, "grad_norm": 0.7137088775634766, "learning_rate": 1.681818181818182e-06, "loss": 0.5873, "step": 149 }, { "epoch": 0.02557980900409277, "grad_norm": 0.6885473728179932, "learning_rate": 1.6931818181818184e-06, "loss": 0.6084, "step": 150 }, { "epoch": 0.025750341064120056, "grad_norm": 0.430575966835022, "learning_rate": 1.7045454545454548e-06, "loss": 0.6157, "step": 151 }, { "epoch": 0.02592087312414734, "grad_norm": 0.539156973361969, "learning_rate": 1.7159090909090911e-06, "loss": 0.6024, "step": 152 }, { "epoch": 0.026091405184174625, "grad_norm": 0.7821847796440125, "learning_rate": 1.7272727272727275e-06, "loss": 0.6004, "step": 153 }, { "epoch": 0.02626193724420191, "grad_norm": 0.6745071411132812, "learning_rate": 1.7386363636363638e-06, "loss": 0.6099, "step": 154 }, { "epoch": 0.026432469304229194, "grad_norm": 0.44672396779060364, "learning_rate": 1.7500000000000002e-06, "loss": 0.615, "step": 155 }, { "epoch": 0.02660300136425648, "grad_norm": 0.5009127259254456, "learning_rate": 1.7613636363636367e-06, "loss": 0.6085, "step": 156 }, { "epoch": 0.026773533424283767, "grad_norm": 0.6595852375030518, "learning_rate": 1.772727272727273e-06, "loss": 0.5926, "step": 157 }, { "epoch": 0.02694406548431105, "grad_norm": 0.6197225451469421, "learning_rate": 1.7840909090909095e-06, "loss": 0.6137, "step": 158 }, { "epoch": 0.027114597544338336, "grad_norm": 0.39933961629867554, "learning_rate": 1.7954545454545458e-06, "loss": 0.6051, "step": 159 }, { "epoch": 0.027285129604365622, "grad_norm": 0.5831077098846436, "learning_rate": 1.8068181818181822e-06, "loss": 0.6239, "step": 160 }, { "epoch": 0.027455661664392905, "grad_norm": 0.4417240023612976, "learning_rate": 1.8181818181818185e-06, "loss": 0.5922, "step": 161 }, { "epoch": 0.02762619372442019, "grad_norm": 0.4070188105106354, "learning_rate": 1.8295454545454549e-06, "loss": 0.602, "step": 162 }, { "epoch": 0.027796725784447477, "grad_norm": 0.5691607594490051, "learning_rate": 1.8409090909090912e-06, "loss": 0.6004, "step": 163 }, { "epoch": 0.02796725784447476, "grad_norm": 0.6013835668563843, "learning_rate": 1.8522727272727276e-06, "loss": 0.598, "step": 164 }, { "epoch": 0.028137789904502046, "grad_norm": 0.6091790199279785, "learning_rate": 1.863636363636364e-06, "loss": 0.594, "step": 165 }, { "epoch": 0.028308321964529332, "grad_norm": 0.3511148691177368, "learning_rate": 1.8750000000000003e-06, "loss": 0.5843, "step": 166 }, { "epoch": 0.028478854024556615, "grad_norm": 0.4900568127632141, "learning_rate": 1.8863636363636368e-06, "loss": 0.591, "step": 167 }, { "epoch": 0.0286493860845839, "grad_norm": 0.49398183822631836, "learning_rate": 1.897727272727273e-06, "loss": 0.5938, "step": 168 }, { "epoch": 0.028819918144611188, "grad_norm": 0.46477246284484863, "learning_rate": 1.9090909090909095e-06, "loss": 0.5878, "step": 169 }, { "epoch": 0.02899045020463847, "grad_norm": 0.4071483910083771, "learning_rate": 1.9204545454545457e-06, "loss": 0.6173, "step": 170 }, { "epoch": 0.029160982264665757, "grad_norm": 0.44497352838516235, "learning_rate": 1.9318181818181822e-06, "loss": 0.6006, "step": 171 }, { "epoch": 0.029331514324693043, "grad_norm": 0.5880485773086548, "learning_rate": 1.9431818181818184e-06, "loss": 0.5992, "step": 172 }, { "epoch": 0.02950204638472033, "grad_norm": 0.4656350314617157, "learning_rate": 1.954545454545455e-06, "loss": 0.6085, "step": 173 }, { "epoch": 0.029672578444747612, "grad_norm": 0.4730178117752075, "learning_rate": 1.965909090909091e-06, "loss": 0.5865, "step": 174 }, { "epoch": 0.029843110504774898, "grad_norm": 0.5038389563560486, "learning_rate": 1.9772727272727276e-06, "loss": 0.5919, "step": 175 }, { "epoch": 0.030013642564802184, "grad_norm": 0.43242865800857544, "learning_rate": 1.9886363636363638e-06, "loss": 0.6107, "step": 176 }, { "epoch": 0.030184174624829467, "grad_norm": 0.6300122737884521, "learning_rate": 2.0000000000000003e-06, "loss": 0.6194, "step": 177 }, { "epoch": 0.030354706684856753, "grad_norm": 0.8253108263015747, "learning_rate": 1.9999998474716537e-06, "loss": 0.6009, "step": 178 }, { "epoch": 0.03052523874488404, "grad_norm": 0.7405084371566772, "learning_rate": 1.9999993898866594e-06, "loss": 0.6022, "step": 179 }, { "epoch": 0.030695770804911322, "grad_norm": 0.478266179561615, "learning_rate": 1.999998627245158e-06, "loss": 0.5899, "step": 180 }, { "epoch": 0.03086630286493861, "grad_norm": 0.3928435742855072, "learning_rate": 1.9999975595473815e-06, "loss": 0.594, "step": 181 }, { "epoch": 0.031036834924965895, "grad_norm": 0.5914251804351807, "learning_rate": 1.9999961867936557e-06, "loss": 0.6086, "step": 182 }, { "epoch": 0.031207366984993178, "grad_norm": 0.4950089454650879, "learning_rate": 1.9999945089843996e-06, "loss": 0.5991, "step": 183 }, { "epoch": 0.03137789904502047, "grad_norm": 0.40972012281417847, "learning_rate": 1.999992526120125e-06, "loss": 0.6021, "step": 184 }, { "epoch": 0.03154843110504775, "grad_norm": 0.7472282648086548, "learning_rate": 1.9999902382014366e-06, "loss": 0.5961, "step": 185 }, { "epoch": 0.03171896316507503, "grad_norm": 0.55669766664505, "learning_rate": 1.999987645229032e-06, "loss": 0.5975, "step": 186 }, { "epoch": 0.03188949522510232, "grad_norm": 0.3858289122581482, "learning_rate": 1.999984747203703e-06, "loss": 0.5918, "step": 187 }, { "epoch": 0.032060027285129605, "grad_norm": 0.5737813115119934, "learning_rate": 1.9999815441263336e-06, "loss": 0.5905, "step": 188 }, { "epoch": 0.03223055934515689, "grad_norm": 0.5033409595489502, "learning_rate": 1.9999780359979e-06, "loss": 0.6084, "step": 189 }, { "epoch": 0.03240109140518418, "grad_norm": 0.40473321080207825, "learning_rate": 1.9999742228194737e-06, "loss": 0.5977, "step": 190 }, { "epoch": 0.03257162346521146, "grad_norm": 0.4098617434501648, "learning_rate": 1.9999701045922165e-06, "loss": 0.6047, "step": 191 }, { "epoch": 0.03274215552523874, "grad_norm": 0.47473642230033875, "learning_rate": 1.999965681317386e-06, "loss": 0.5994, "step": 192 }, { "epoch": 0.03291268758526603, "grad_norm": 0.4402756094932556, "learning_rate": 1.9999609529963303e-06, "loss": 0.6056, "step": 193 }, { "epoch": 0.033083219645293316, "grad_norm": 0.3741694688796997, "learning_rate": 1.999955919630493e-06, "loss": 0.5947, "step": 194 }, { "epoch": 0.0332537517053206, "grad_norm": 0.3938404619693756, "learning_rate": 1.9999505812214086e-06, "loss": 0.6096, "step": 195 }, { "epoch": 0.03342428376534789, "grad_norm": 0.3473811149597168, "learning_rate": 1.9999449377707063e-06, "loss": 0.5899, "step": 196 }, { "epoch": 0.03359481582537517, "grad_norm": 0.3678300678730011, "learning_rate": 1.999938989280108e-06, "loss": 0.6118, "step": 197 }, { "epoch": 0.033765347885402454, "grad_norm": 0.34986555576324463, "learning_rate": 1.999932735751427e-06, "loss": 0.6361, "step": 198 }, { "epoch": 0.033935879945429744, "grad_norm": 0.39926373958587646, "learning_rate": 1.999926177186572e-06, "loss": 0.5901, "step": 199 }, { "epoch": 0.034106412005457026, "grad_norm": 0.3418705463409424, "learning_rate": 1.9999193135875433e-06, "loss": 0.5948, "step": 200 }, { "epoch": 0.03427694406548431, "grad_norm": 0.42050284147262573, "learning_rate": 1.999912144956435e-06, "loss": 0.5944, "step": 201 }, { "epoch": 0.0344474761255116, "grad_norm": 0.4259498417377472, "learning_rate": 1.9999046712954335e-06, "loss": 0.5853, "step": 202 }, { "epoch": 0.03461800818553888, "grad_norm": 0.4480075538158417, "learning_rate": 1.9998968926068193e-06, "loss": 0.6023, "step": 203 }, { "epoch": 0.034788540245566164, "grad_norm": 0.5295131802558899, "learning_rate": 1.9998888088929648e-06, "loss": 0.597, "step": 204 }, { "epoch": 0.034959072305593454, "grad_norm": 0.5217076539993286, "learning_rate": 1.9998804201563363e-06, "loss": 0.5916, "step": 205 }, { "epoch": 0.03512960436562074, "grad_norm": 0.42749714851379395, "learning_rate": 1.9998717263994928e-06, "loss": 0.582, "step": 206 }, { "epoch": 0.03530013642564802, "grad_norm": 0.48688915371894836, "learning_rate": 1.9998627276250863e-06, "loss": 0.618, "step": 207 }, { "epoch": 0.03547066848567531, "grad_norm": 0.7113652229309082, "learning_rate": 1.999853423835862e-06, "loss": 0.595, "step": 208 }, { "epoch": 0.03564120054570259, "grad_norm": 0.8222281336784363, "learning_rate": 1.9998438150346577e-06, "loss": 0.5974, "step": 209 }, { "epoch": 0.035811732605729875, "grad_norm": 0.5764082074165344, "learning_rate": 1.9998339012244052e-06, "loss": 0.5945, "step": 210 }, { "epoch": 0.035982264665757165, "grad_norm": 0.40010660886764526, "learning_rate": 1.9998236824081293e-06, "loss": 0.5976, "step": 211 }, { "epoch": 0.03615279672578445, "grad_norm": 0.6307498812675476, "learning_rate": 1.999813158588946e-06, "loss": 0.5949, "step": 212 }, { "epoch": 0.03632332878581173, "grad_norm": 0.5890904664993286, "learning_rate": 1.999802329770066e-06, "loss": 0.6107, "step": 213 }, { "epoch": 0.03649386084583902, "grad_norm": 0.45250949263572693, "learning_rate": 1.999791195954793e-06, "loss": 0.5806, "step": 214 }, { "epoch": 0.0366643929058663, "grad_norm": 0.45594438910484314, "learning_rate": 1.9997797571465236e-06, "loss": 0.5946, "step": 215 }, { "epoch": 0.036834924965893585, "grad_norm": 0.567471981048584, "learning_rate": 1.999768013348747e-06, "loss": 0.6035, "step": 216 }, { "epoch": 0.037005457025920875, "grad_norm": 0.4891415238380432, "learning_rate": 1.9997559645650458e-06, "loss": 0.602, "step": 217 }, { "epoch": 0.03717598908594816, "grad_norm": 0.4505879878997803, "learning_rate": 1.9997436107990957e-06, "loss": 0.5841, "step": 218 }, { "epoch": 0.03734652114597544, "grad_norm": 0.3941557705402374, "learning_rate": 1.9997309520546647e-06, "loss": 0.6117, "step": 219 }, { "epoch": 0.03751705320600273, "grad_norm": 0.421119749546051, "learning_rate": 1.9997179883356153e-06, "loss": 0.5985, "step": 220 }, { "epoch": 0.03768758526603001, "grad_norm": 0.4365604519844055, "learning_rate": 1.999704719645902e-06, "loss": 0.5842, "step": 221 }, { "epoch": 0.037858117326057296, "grad_norm": 0.4222920536994934, "learning_rate": 1.9996911459895717e-06, "loss": 0.5883, "step": 222 }, { "epoch": 0.038028649386084586, "grad_norm": 0.39965325593948364, "learning_rate": 1.999677267370766e-06, "loss": 0.6064, "step": 223 }, { "epoch": 0.03819918144611187, "grad_norm": 0.42957979440689087, "learning_rate": 1.9996630837937187e-06, "loss": 0.594, "step": 224 }, { "epoch": 0.03836971350613915, "grad_norm": 0.5226802825927734, "learning_rate": 1.9996485952627556e-06, "loss": 0.5942, "step": 225 }, { "epoch": 0.03854024556616644, "grad_norm": 0.42182719707489014, "learning_rate": 1.9996338017822976e-06, "loss": 0.5774, "step": 226 }, { "epoch": 0.038710777626193724, "grad_norm": 0.39415428042411804, "learning_rate": 1.9996187033568567e-06, "loss": 0.5861, "step": 227 }, { "epoch": 0.038881309686221006, "grad_norm": 0.37894678115844727, "learning_rate": 1.9996032999910397e-06, "loss": 0.5761, "step": 228 }, { "epoch": 0.039051841746248296, "grad_norm": 0.4413073658943176, "learning_rate": 1.999587591689545e-06, "loss": 0.5966, "step": 229 }, { "epoch": 0.03922237380627558, "grad_norm": 0.47882312536239624, "learning_rate": 1.9995715784571643e-06, "loss": 0.61, "step": 230 }, { "epoch": 0.03939290586630286, "grad_norm": 0.49211686849594116, "learning_rate": 1.999555260298783e-06, "loss": 0.5948, "step": 231 }, { "epoch": 0.03956343792633015, "grad_norm": 0.4007163941860199, "learning_rate": 1.999538637219378e-06, "loss": 0.5921, "step": 232 }, { "epoch": 0.039733969986357434, "grad_norm": 0.628340482711792, "learning_rate": 1.9995217092240224e-06, "loss": 0.593, "step": 233 }, { "epoch": 0.03990450204638472, "grad_norm": 0.5330571532249451, "learning_rate": 1.999504476317878e-06, "loss": 0.5964, "step": 234 }, { "epoch": 0.04007503410641201, "grad_norm": 0.6231827735900879, "learning_rate": 1.9994869385062033e-06, "loss": 0.5889, "step": 235 }, { "epoch": 0.04024556616643929, "grad_norm": 0.5359860062599182, "learning_rate": 1.9994690957943474e-06, "loss": 0.6076, "step": 236 }, { "epoch": 0.04041609822646658, "grad_norm": 0.3939756751060486, "learning_rate": 1.9994509481877538e-06, "loss": 0.5759, "step": 237 }, { "epoch": 0.04058663028649386, "grad_norm": 0.43472591042518616, "learning_rate": 1.9994324956919587e-06, "loss": 0.5948, "step": 238 }, { "epoch": 0.040757162346521145, "grad_norm": 0.5427122116088867, "learning_rate": 1.9994137383125902e-06, "loss": 0.5963, "step": 239 }, { "epoch": 0.040927694406548434, "grad_norm": 0.3906486928462982, "learning_rate": 1.999394676055372e-06, "loss": 0.5919, "step": 240 }, { "epoch": 0.04109822646657572, "grad_norm": 0.45824775099754333, "learning_rate": 1.9993753089261175e-06, "loss": 0.5932, "step": 241 }, { "epoch": 0.041268758526603, "grad_norm": 0.4277664124965668, "learning_rate": 1.9993556369307358e-06, "loss": 0.6218, "step": 242 }, { "epoch": 0.04143929058663029, "grad_norm": 0.3937847912311554, "learning_rate": 1.999335660075228e-06, "loss": 0.5984, "step": 243 }, { "epoch": 0.04160982264665757, "grad_norm": 0.5465782880783081, "learning_rate": 1.999315378365687e-06, "loss": 0.5884, "step": 244 }, { "epoch": 0.041780354706684855, "grad_norm": 0.4681834876537323, "learning_rate": 1.9992947918083014e-06, "loss": 0.5912, "step": 245 }, { "epoch": 0.041950886766712145, "grad_norm": 0.5907049179077148, "learning_rate": 1.9992739004093505e-06, "loss": 0.5876, "step": 246 }, { "epoch": 0.04212141882673943, "grad_norm": 0.5100119113922119, "learning_rate": 1.9992527041752075e-06, "loss": 0.5964, "step": 247 }, { "epoch": 0.04229195088676671, "grad_norm": 0.3652644157409668, "learning_rate": 1.9992312031123382e-06, "loss": 0.5868, "step": 248 }, { "epoch": 0.042462482946794, "grad_norm": 0.4750256836414337, "learning_rate": 1.999209397227302e-06, "loss": 0.5878, "step": 249 }, { "epoch": 0.04263301500682128, "grad_norm": 0.7315317392349243, "learning_rate": 1.999187286526751e-06, "loss": 0.5781, "step": 250 }, { "epoch": 0.042803547066848566, "grad_norm": 0.5293675065040588, "learning_rate": 1.99916487101743e-06, "loss": 0.5927, "step": 251 }, { "epoch": 0.042974079126875855, "grad_norm": 0.3959099054336548, "learning_rate": 1.9991421507061763e-06, "loss": 0.5798, "step": 252 }, { "epoch": 0.04314461118690314, "grad_norm": 0.6561183929443359, "learning_rate": 1.9991191255999224e-06, "loss": 0.5831, "step": 253 }, { "epoch": 0.04331514324693042, "grad_norm": 0.6775041818618774, "learning_rate": 1.9990957957056913e-06, "loss": 0.6045, "step": 254 }, { "epoch": 0.04348567530695771, "grad_norm": 0.47806668281555176, "learning_rate": 1.9990721610305997e-06, "loss": 0.5921, "step": 255 }, { "epoch": 0.04365620736698499, "grad_norm": 0.38560113310813904, "learning_rate": 1.9990482215818584e-06, "loss": 0.5793, "step": 256 }, { "epoch": 0.043826739427012276, "grad_norm": 0.49209868907928467, "learning_rate": 1.9990239773667694e-06, "loss": 0.5913, "step": 257 }, { "epoch": 0.043997271487039566, "grad_norm": 0.4512802064418793, "learning_rate": 1.998999428392729e-06, "loss": 0.5981, "step": 258 }, { "epoch": 0.04416780354706685, "grad_norm": 0.3730643391609192, "learning_rate": 1.9989745746672258e-06, "loss": 0.5803, "step": 259 }, { "epoch": 0.04433833560709413, "grad_norm": 0.40619736909866333, "learning_rate": 1.998949416197842e-06, "loss": 0.5955, "step": 260 }, { "epoch": 0.04450886766712142, "grad_norm": 0.5041584372520447, "learning_rate": 1.998923952992252e-06, "loss": 0.589, "step": 261 }, { "epoch": 0.044679399727148704, "grad_norm": 0.43416324257850647, "learning_rate": 1.9988981850582243e-06, "loss": 0.5942, "step": 262 }, { "epoch": 0.04484993178717599, "grad_norm": 0.39469459652900696, "learning_rate": 1.9988721124036185e-06, "loss": 0.5744, "step": 263 }, { "epoch": 0.045020463847203276, "grad_norm": 0.5740176439285278, "learning_rate": 1.9988457350363883e-06, "loss": 0.5682, "step": 264 }, { "epoch": 0.04519099590723056, "grad_norm": 0.5018396973609924, "learning_rate": 1.998819052964581e-06, "loss": 0.59, "step": 265 }, { "epoch": 0.04536152796725784, "grad_norm": 0.5312762260437012, "learning_rate": 1.9987920661963364e-06, "loss": 0.5769, "step": 266 }, { "epoch": 0.04553206002728513, "grad_norm": 0.43061333894729614, "learning_rate": 1.9987647747398855e-06, "loss": 0.5836, "step": 267 }, { "epoch": 0.045702592087312414, "grad_norm": 0.3743135333061218, "learning_rate": 1.998737178603555e-06, "loss": 0.5939, "step": 268 }, { "epoch": 0.0458731241473397, "grad_norm": 0.5071547627449036, "learning_rate": 1.998709277795763e-06, "loss": 0.5825, "step": 269 }, { "epoch": 0.04604365620736699, "grad_norm": 0.3874017000198364, "learning_rate": 1.998681072325021e-06, "loss": 0.5973, "step": 270 }, { "epoch": 0.04621418826739427, "grad_norm": 0.46630197763442993, "learning_rate": 1.9986525621999325e-06, "loss": 0.5684, "step": 271 }, { "epoch": 0.04638472032742155, "grad_norm": 0.36790570616722107, "learning_rate": 1.9986237474291955e-06, "loss": 0.574, "step": 272 }, { "epoch": 0.04655525238744884, "grad_norm": 0.5044417977333069, "learning_rate": 1.9985946280215996e-06, "loss": 0.6082, "step": 273 }, { "epoch": 0.046725784447476125, "grad_norm": 0.5328083634376526, "learning_rate": 1.9985652039860284e-06, "loss": 0.5826, "step": 274 }, { "epoch": 0.04689631650750341, "grad_norm": 0.4631241261959076, "learning_rate": 1.998535475331458e-06, "loss": 0.5792, "step": 275 }, { "epoch": 0.0470668485675307, "grad_norm": 0.4844154417514801, "learning_rate": 1.9985054420669563e-06, "loss": 0.5739, "step": 276 }, { "epoch": 0.04723738062755798, "grad_norm": 0.3497856855392456, "learning_rate": 1.998475104201686e-06, "loss": 0.6017, "step": 277 }, { "epoch": 0.04740791268758526, "grad_norm": 0.42583030462265015, "learning_rate": 1.9984444617449024e-06, "loss": 0.5789, "step": 278 }, { "epoch": 0.04757844474761255, "grad_norm": 0.44119739532470703, "learning_rate": 1.998413514705952e-06, "loss": 0.5819, "step": 279 }, { "epoch": 0.047748976807639835, "grad_norm": 0.45854702591896057, "learning_rate": 1.9983822630942755e-06, "loss": 0.5996, "step": 280 }, { "epoch": 0.04791950886766712, "grad_norm": 0.5364755392074585, "learning_rate": 1.998350706919407e-06, "loss": 0.5845, "step": 281 }, { "epoch": 0.04809004092769441, "grad_norm": 0.4300476312637329, "learning_rate": 1.998318846190973e-06, "loss": 0.5721, "step": 282 }, { "epoch": 0.04826057298772169, "grad_norm": 0.42480289936065674, "learning_rate": 1.9982866809186923e-06, "loss": 0.583, "step": 283 }, { "epoch": 0.048431105047748974, "grad_norm": 0.4817947447299957, "learning_rate": 1.9982542111123776e-06, "loss": 0.583, "step": 284 }, { "epoch": 0.04860163710777626, "grad_norm": 0.4101852774620056, "learning_rate": 1.9982214367819334e-06, "loss": 0.6063, "step": 285 }, { "epoch": 0.048772169167803546, "grad_norm": 0.48095643520355225, "learning_rate": 1.998188357937358e-06, "loss": 0.5855, "step": 286 }, { "epoch": 0.04894270122783083, "grad_norm": 0.41014596819877625, "learning_rate": 1.998154974588743e-06, "loss": 0.6027, "step": 287 }, { "epoch": 0.04911323328785812, "grad_norm": 0.44107893109321594, "learning_rate": 1.9981212867462715e-06, "loss": 0.5899, "step": 288 }, { "epoch": 0.0492837653478854, "grad_norm": 0.3553464710712433, "learning_rate": 1.9980872944202205e-06, "loss": 0.5827, "step": 289 }, { "epoch": 0.04945429740791269, "grad_norm": 0.4601629376411438, "learning_rate": 1.998052997620959e-06, "loss": 0.6081, "step": 290 }, { "epoch": 0.049624829467939974, "grad_norm": 0.4139860272407532, "learning_rate": 1.9980183963589505e-06, "loss": 0.5803, "step": 291 }, { "epoch": 0.049795361527967257, "grad_norm": 0.36177587509155273, "learning_rate": 1.9979834906447494e-06, "loss": 0.5828, "step": 292 }, { "epoch": 0.049965893587994546, "grad_norm": 0.8259939551353455, "learning_rate": 1.9979482804890046e-06, "loss": 0.5832, "step": 293 }, { "epoch": 0.05013642564802183, "grad_norm": 0.43055734038352966, "learning_rate": 1.9979127659024564e-06, "loss": 0.572, "step": 294 }, { "epoch": 0.05030695770804911, "grad_norm": 0.4655931890010834, "learning_rate": 1.9978769468959393e-06, "loss": 0.572, "step": 295 }, { "epoch": 0.0504774897680764, "grad_norm": 0.5628933906555176, "learning_rate": 1.997840823480381e-06, "loss": 0.589, "step": 296 }, { "epoch": 0.050648021828103684, "grad_norm": 0.4914795756340027, "learning_rate": 1.9978043956667993e-06, "loss": 0.5885, "step": 297 }, { "epoch": 0.05081855388813097, "grad_norm": 0.43875017762184143, "learning_rate": 1.997767663466308e-06, "loss": 0.5857, "step": 298 }, { "epoch": 0.05098908594815826, "grad_norm": 0.3887084126472473, "learning_rate": 1.997730626890112e-06, "loss": 0.5991, "step": 299 }, { "epoch": 0.05115961800818554, "grad_norm": 0.679341197013855, "learning_rate": 1.99769328594951e-06, "loss": 0.5751, "step": 300 }, { "epoch": 0.05133015006821282, "grad_norm": 0.4228670597076416, "learning_rate": 1.997655640655893e-06, "loss": 0.5882, "step": 301 }, { "epoch": 0.05150068212824011, "grad_norm": 0.4219878613948822, "learning_rate": 1.9976176910207447e-06, "loss": 0.5812, "step": 302 }, { "epoch": 0.051671214188267395, "grad_norm": 0.3721599876880646, "learning_rate": 1.997579437055642e-06, "loss": 0.5886, "step": 303 }, { "epoch": 0.05184174624829468, "grad_norm": 0.5150498747825623, "learning_rate": 1.9975408787722545e-06, "loss": 0.5813, "step": 304 }, { "epoch": 0.05201227830832197, "grad_norm": 0.4999360740184784, "learning_rate": 1.9975020161823446e-06, "loss": 0.5814, "step": 305 }, { "epoch": 0.05218281036834925, "grad_norm": 0.6294407844543457, "learning_rate": 1.9974628492977682e-06, "loss": 0.5735, "step": 306 }, { "epoch": 0.05235334242837653, "grad_norm": 0.7571012377738953, "learning_rate": 1.9974233781304727e-06, "loss": 0.5746, "step": 307 }, { "epoch": 0.05252387448840382, "grad_norm": 0.5309390425682068, "learning_rate": 1.997383602692499e-06, "loss": 0.5875, "step": 308 }, { "epoch": 0.052694406548431105, "grad_norm": 0.3978341221809387, "learning_rate": 1.997343522995981e-06, "loss": 0.5964, "step": 309 }, { "epoch": 0.05286493860845839, "grad_norm": 0.46522951126098633, "learning_rate": 1.9973031390531458e-06, "loss": 0.5808, "step": 310 }, { "epoch": 0.05303547066848568, "grad_norm": 0.41105544567108154, "learning_rate": 1.9972624508763126e-06, "loss": 0.5744, "step": 311 }, { "epoch": 0.05320600272851296, "grad_norm": 0.4005431532859802, "learning_rate": 1.997221458477893e-06, "loss": 0.5859, "step": 312 }, { "epoch": 0.05337653478854024, "grad_norm": 0.510310709476471, "learning_rate": 1.9971801618703925e-06, "loss": 0.5831, "step": 313 }, { "epoch": 0.05354706684856753, "grad_norm": 0.5165649056434631, "learning_rate": 1.997138561066409e-06, "loss": 0.5892, "step": 314 }, { "epoch": 0.053717598908594816, "grad_norm": 0.46517428755760193, "learning_rate": 1.9970966560786327e-06, "loss": 0.5689, "step": 315 }, { "epoch": 0.0538881309686221, "grad_norm": 0.47656869888305664, "learning_rate": 1.9970544469198473e-06, "loss": 0.584, "step": 316 }, { "epoch": 0.05405866302864939, "grad_norm": 0.5681532621383667, "learning_rate": 1.9970119336029287e-06, "loss": 0.5822, "step": 317 }, { "epoch": 0.05422919508867667, "grad_norm": 0.6467899084091187, "learning_rate": 1.9969691161408462e-06, "loss": 0.5704, "step": 318 }, { "epoch": 0.054399727148703954, "grad_norm": 0.4735259711742401, "learning_rate": 1.9969259945466615e-06, "loss": 0.5892, "step": 319 }, { "epoch": 0.054570259208731244, "grad_norm": 0.4352891743183136, "learning_rate": 1.9968825688335293e-06, "loss": 0.58, "step": 320 }, { "epoch": 0.054740791268758526, "grad_norm": 0.5583917498588562, "learning_rate": 1.996838839014696e-06, "loss": 0.5783, "step": 321 }, { "epoch": 0.05491132332878581, "grad_norm": 0.3809167742729187, "learning_rate": 1.996794805103503e-06, "loss": 0.5912, "step": 322 }, { "epoch": 0.0550818553888131, "grad_norm": 0.5830413699150085, "learning_rate": 1.9967504671133822e-06, "loss": 0.5998, "step": 323 }, { "epoch": 0.05525238744884038, "grad_norm": 0.6099497675895691, "learning_rate": 1.9967058250578593e-06, "loss": 0.5764, "step": 324 }, { "epoch": 0.055422919508867664, "grad_norm": 0.4644138514995575, "learning_rate": 1.996660878950553e-06, "loss": 0.5787, "step": 325 }, { "epoch": 0.055593451568894954, "grad_norm": 0.5825083255767822, "learning_rate": 1.9966156288051743e-06, "loss": 0.5764, "step": 326 }, { "epoch": 0.05576398362892224, "grad_norm": 0.6491715908050537, "learning_rate": 1.9965700746355273e-06, "loss": 0.572, "step": 327 }, { "epoch": 0.05593451568894952, "grad_norm": 0.5463743209838867, "learning_rate": 1.9965242164555077e-06, "loss": 0.5911, "step": 328 }, { "epoch": 0.05610504774897681, "grad_norm": 0.4275131821632385, "learning_rate": 1.996478054279106e-06, "loss": 0.571, "step": 329 }, { "epoch": 0.05627557980900409, "grad_norm": 0.4297434687614441, "learning_rate": 1.996431588120403e-06, "loss": 0.5778, "step": 330 }, { "epoch": 0.056446111869031375, "grad_norm": 0.5487849712371826, "learning_rate": 1.996384817993575e-06, "loss": 0.5717, "step": 331 }, { "epoch": 0.056616643929058665, "grad_norm": 0.4654536545276642, "learning_rate": 1.9963377439128886e-06, "loss": 0.5827, "step": 332 }, { "epoch": 0.05678717598908595, "grad_norm": 0.43091699481010437, "learning_rate": 1.996290365892704e-06, "loss": 0.5822, "step": 333 }, { "epoch": 0.05695770804911323, "grad_norm": 0.4967304468154907, "learning_rate": 1.9962426839474745e-06, "loss": 0.5858, "step": 334 }, { "epoch": 0.05712824010914052, "grad_norm": 0.39026960730552673, "learning_rate": 1.996194698091746e-06, "loss": 0.5835, "step": 335 }, { "epoch": 0.0572987721691678, "grad_norm": 0.4674072861671448, "learning_rate": 1.996146408340156e-06, "loss": 0.5748, "step": 336 }, { "epoch": 0.057469304229195085, "grad_norm": 0.38436296582221985, "learning_rate": 1.996097814707437e-06, "loss": 0.5794, "step": 337 }, { "epoch": 0.057639836289222375, "grad_norm": 0.33945173025131226, "learning_rate": 1.996048917208412e-06, "loss": 0.5787, "step": 338 }, { "epoch": 0.05781036834924966, "grad_norm": 0.45001330971717834, "learning_rate": 1.995999715857997e-06, "loss": 0.5771, "step": 339 }, { "epoch": 0.05798090040927694, "grad_norm": 0.37709715962409973, "learning_rate": 1.995950210671202e-06, "loss": 0.5987, "step": 340 }, { "epoch": 0.05815143246930423, "grad_norm": 0.48131653666496277, "learning_rate": 1.9959004016631293e-06, "loss": 0.5858, "step": 341 }, { "epoch": 0.05832196452933151, "grad_norm": 0.4368843734264374, "learning_rate": 1.995850288848972e-06, "loss": 0.5897, "step": 342 }, { "epoch": 0.0584924965893588, "grad_norm": 0.4104810655117035, "learning_rate": 1.9957998722440187e-06, "loss": 0.5781, "step": 343 }, { "epoch": 0.058663028649386086, "grad_norm": 0.5733034014701843, "learning_rate": 1.9957491518636482e-06, "loss": 0.5789, "step": 344 }, { "epoch": 0.05883356070941337, "grad_norm": 0.37760886549949646, "learning_rate": 1.9956981277233343e-06, "loss": 0.5799, "step": 345 }, { "epoch": 0.05900409276944066, "grad_norm": 0.4927324056625366, "learning_rate": 1.9956467998386414e-06, "loss": 0.5748, "step": 346 }, { "epoch": 0.05917462482946794, "grad_norm": 0.5612856149673462, "learning_rate": 1.9955951682252276e-06, "loss": 0.5835, "step": 347 }, { "epoch": 0.059345156889495224, "grad_norm": 0.7066859006881714, "learning_rate": 1.9955432328988437e-06, "loss": 0.5854, "step": 348 }, { "epoch": 0.05951568894952251, "grad_norm": 0.6565737128257751, "learning_rate": 1.9954909938753328e-06, "loss": 0.5813, "step": 349 }, { "epoch": 0.059686221009549796, "grad_norm": 0.41528597474098206, "learning_rate": 1.9954384511706304e-06, "loss": 0.5807, "step": 350 }, { "epoch": 0.05985675306957708, "grad_norm": 0.5107154250144958, "learning_rate": 1.9953856048007657e-06, "loss": 0.6013, "step": 351 }, { "epoch": 0.06002728512960437, "grad_norm": 0.5143330097198486, "learning_rate": 1.9953324547818592e-06, "loss": 0.5769, "step": 352 }, { "epoch": 0.06019781718963165, "grad_norm": 0.47564592957496643, "learning_rate": 1.995279001130125e-06, "loss": 0.5918, "step": 353 }, { "epoch": 0.060368349249658934, "grad_norm": 0.44760820269584656, "learning_rate": 1.9952252438618693e-06, "loss": 0.5785, "step": 354 }, { "epoch": 0.060538881309686224, "grad_norm": 0.37445268034935, "learning_rate": 1.9951711829934916e-06, "loss": 0.5714, "step": 355 }, { "epoch": 0.06070941336971351, "grad_norm": 0.7027251720428467, "learning_rate": 1.9951168185414827e-06, "loss": 0.5701, "step": 356 }, { "epoch": 0.06087994542974079, "grad_norm": 0.8148019909858704, "learning_rate": 1.9950621505224278e-06, "loss": 0.5913, "step": 357 }, { "epoch": 0.06105047748976808, "grad_norm": 0.47353798151016235, "learning_rate": 1.995007178953003e-06, "loss": 0.5964, "step": 358 }, { "epoch": 0.06122100954979536, "grad_norm": 0.5995876789093018, "learning_rate": 1.994951903849978e-06, "loss": 0.5922, "step": 359 }, { "epoch": 0.061391541609822645, "grad_norm": 0.8017838597297668, "learning_rate": 1.994896325230215e-06, "loss": 0.5747, "step": 360 }, { "epoch": 0.061562073669849934, "grad_norm": 0.5751956105232239, "learning_rate": 1.994840443110668e-06, "loss": 0.5922, "step": 361 }, { "epoch": 0.06173260572987722, "grad_norm": 0.4752778112888336, "learning_rate": 1.9947842575083853e-06, "loss": 0.6171, "step": 362 }, { "epoch": 0.0619031377899045, "grad_norm": 0.684117317199707, "learning_rate": 1.9947277684405055e-06, "loss": 0.5765, "step": 363 }, { "epoch": 0.06207366984993179, "grad_norm": 0.6569706797599792, "learning_rate": 1.994670975924262e-06, "loss": 0.575, "step": 364 }, { "epoch": 0.06224420190995907, "grad_norm": 0.47546330094337463, "learning_rate": 1.9946138799769792e-06, "loss": 0.5873, "step": 365 }, { "epoch": 0.062414733969986355, "grad_norm": 0.6601859331130981, "learning_rate": 1.9945564806160746e-06, "loss": 0.571, "step": 366 }, { "epoch": 0.06258526603001364, "grad_norm": 0.48071083426475525, "learning_rate": 1.994498777859058e-06, "loss": 0.5712, "step": 367 }, { "epoch": 0.06275579809004093, "grad_norm": 0.5320371985435486, "learning_rate": 1.994440771723533e-06, "loss": 0.6049, "step": 368 }, { "epoch": 0.06292633015006821, "grad_norm": 0.5746079087257385, "learning_rate": 1.9943824622271938e-06, "loss": 0.5853, "step": 369 }, { "epoch": 0.0630968622100955, "grad_norm": 0.42983976006507874, "learning_rate": 1.9943238493878286e-06, "loss": 0.5753, "step": 370 }, { "epoch": 0.06326739427012279, "grad_norm": 0.4530344307422638, "learning_rate": 1.994264933223317e-06, "loss": 0.5919, "step": 371 }, { "epoch": 0.06343792633015007, "grad_norm": 0.5168627500534058, "learning_rate": 1.9942057137516327e-06, "loss": 0.579, "step": 372 }, { "epoch": 0.06360845839017736, "grad_norm": 0.4011041820049286, "learning_rate": 1.9941461909908402e-06, "loss": 0.5559, "step": 373 }, { "epoch": 0.06377899045020465, "grad_norm": 0.44738543033599854, "learning_rate": 1.9940863649590977e-06, "loss": 0.5696, "step": 374 }, { "epoch": 0.06394952251023192, "grad_norm": 0.4560238718986511, "learning_rate": 1.9940262356746557e-06, "loss": 0.5892, "step": 375 }, { "epoch": 0.06412005457025921, "grad_norm": 0.4311150908470154, "learning_rate": 1.9939658031558566e-06, "loss": 0.6006, "step": 376 }, { "epoch": 0.0642905866302865, "grad_norm": 0.5050395131111145, "learning_rate": 1.993905067421136e-06, "loss": 0.5677, "step": 377 }, { "epoch": 0.06446111869031378, "grad_norm": 0.44102242588996887, "learning_rate": 1.9938440284890217e-06, "loss": 0.5881, "step": 378 }, { "epoch": 0.06463165075034107, "grad_norm": 0.3900633156299591, "learning_rate": 1.993782686378134e-06, "loss": 0.5681, "step": 379 }, { "epoch": 0.06480218281036836, "grad_norm": 0.4846239686012268, "learning_rate": 1.9937210411071857e-06, "loss": 0.5726, "step": 380 }, { "epoch": 0.06497271487039563, "grad_norm": 0.4322761297225952, "learning_rate": 1.9936590926949824e-06, "loss": 0.575, "step": 381 }, { "epoch": 0.06514324693042292, "grad_norm": 0.43640026450157166, "learning_rate": 1.9935968411604214e-06, "loss": 0.5758, "step": 382 }, { "epoch": 0.06531377899045021, "grad_norm": 0.44950467348098755, "learning_rate": 1.9935342865224937e-06, "loss": 0.5948, "step": 383 }, { "epoch": 0.06548431105047749, "grad_norm": 0.6357623934745789, "learning_rate": 1.993471428800281e-06, "loss": 0.5728, "step": 384 }, { "epoch": 0.06565484311050478, "grad_norm": 0.6120256781578064, "learning_rate": 1.993408268012959e-06, "loss": 0.5658, "step": 385 }, { "epoch": 0.06582537517053207, "grad_norm": 0.3851131796836853, "learning_rate": 1.993344804179795e-06, "loss": 0.5744, "step": 386 }, { "epoch": 0.06599590723055934, "grad_norm": 0.495846688747406, "learning_rate": 1.99328103732015e-06, "loss": 0.5777, "step": 387 }, { "epoch": 0.06616643929058663, "grad_norm": 0.824712872505188, "learning_rate": 1.9932169674534752e-06, "loss": 0.5611, "step": 388 }, { "epoch": 0.06633697135061392, "grad_norm": 0.8101972937583923, "learning_rate": 1.9931525945993165e-06, "loss": 0.5836, "step": 389 }, { "epoch": 0.0665075034106412, "grad_norm": 0.4071904420852661, "learning_rate": 1.993087918777311e-06, "loss": 0.575, "step": 390 }, { "epoch": 0.06667803547066849, "grad_norm": 0.5939446687698364, "learning_rate": 1.9930229400071888e-06, "loss": 0.6035, "step": 391 }, { "epoch": 0.06684856753069578, "grad_norm": 0.657426118850708, "learning_rate": 1.9929576583087716e-06, "loss": 0.579, "step": 392 }, { "epoch": 0.06701909959072305, "grad_norm": 0.4552478492259979, "learning_rate": 1.9928920737019734e-06, "loss": 0.5747, "step": 393 }, { "epoch": 0.06718963165075034, "grad_norm": 0.42880189418792725, "learning_rate": 1.992826186206803e-06, "loss": 0.5828, "step": 394 }, { "epoch": 0.06736016371077763, "grad_norm": 0.4603235423564911, "learning_rate": 1.9927599958433586e-06, "loss": 0.5822, "step": 395 }, { "epoch": 0.06753069577080491, "grad_norm": 0.5346198081970215, "learning_rate": 1.9926935026318323e-06, "loss": 0.5906, "step": 396 }, { "epoch": 0.0677012278308322, "grad_norm": 0.4686392843723297, "learning_rate": 1.992626706592508e-06, "loss": 0.5619, "step": 397 }, { "epoch": 0.06787175989085949, "grad_norm": 0.4141104817390442, "learning_rate": 1.9925596077457624e-06, "loss": 0.5676, "step": 398 }, { "epoch": 0.06804229195088676, "grad_norm": 0.42539089918136597, "learning_rate": 1.9924922061120644e-06, "loss": 0.571, "step": 399 }, { "epoch": 0.06821282401091405, "grad_norm": 0.44370517134666443, "learning_rate": 1.992424501711976e-06, "loss": 0.5882, "step": 400 }, { "epoch": 0.06838335607094134, "grad_norm": 0.43443042039871216, "learning_rate": 1.9923564945661506e-06, "loss": 0.5742, "step": 401 }, { "epoch": 0.06855388813096862, "grad_norm": 0.4402327835559845, "learning_rate": 1.9922881846953335e-06, "loss": 0.5759, "step": 402 }, { "epoch": 0.06872442019099591, "grad_norm": 0.35972359776496887, "learning_rate": 1.992219572120364e-06, "loss": 0.5733, "step": 403 }, { "epoch": 0.0688949522510232, "grad_norm": 0.4337236285209656, "learning_rate": 1.992150656862172e-06, "loss": 0.572, "step": 404 }, { "epoch": 0.06906548431105047, "grad_norm": 0.45187443494796753, "learning_rate": 1.9920814389417814e-06, "loss": 0.563, "step": 405 }, { "epoch": 0.06923601637107776, "grad_norm": 0.4660555422306061, "learning_rate": 1.9920119183803068e-06, "loss": 0.608, "step": 406 }, { "epoch": 0.06940654843110505, "grad_norm": 0.41995012760162354, "learning_rate": 1.9919420951989565e-06, "loss": 0.578, "step": 407 }, { "epoch": 0.06957708049113233, "grad_norm": 0.5486136078834534, "learning_rate": 1.99187196941903e-06, "loss": 0.5737, "step": 408 }, { "epoch": 0.06974761255115962, "grad_norm": 0.6215372085571289, "learning_rate": 1.9918015410619203e-06, "loss": 0.575, "step": 409 }, { "epoch": 0.06991814461118691, "grad_norm": 0.5288968086242676, "learning_rate": 1.9917308101491118e-06, "loss": 0.5749, "step": 410 }, { "epoch": 0.07008867667121418, "grad_norm": 0.5099157094955444, "learning_rate": 1.991659776702181e-06, "loss": 0.5823, "step": 411 }, { "epoch": 0.07025920873124147, "grad_norm": 0.46361568570137024, "learning_rate": 1.991588440742797e-06, "loss": 0.5711, "step": 412 }, { "epoch": 0.07042974079126876, "grad_norm": 0.47007569670677185, "learning_rate": 1.991516802292723e-06, "loss": 0.5746, "step": 413 }, { "epoch": 0.07060027285129604, "grad_norm": 0.5757049322128296, "learning_rate": 1.991444861373811e-06, "loss": 0.5821, "step": 414 }, { "epoch": 0.07077080491132333, "grad_norm": 0.4328621029853821, "learning_rate": 1.991372618008007e-06, "loss": 0.5736, "step": 415 }, { "epoch": 0.07094133697135062, "grad_norm": 0.501839816570282, "learning_rate": 1.991300072217351e-06, "loss": 0.5821, "step": 416 }, { "epoch": 0.0711118690313779, "grad_norm": 0.36504441499710083, "learning_rate": 1.9912272240239716e-06, "loss": 0.5882, "step": 417 }, { "epoch": 0.07128240109140518, "grad_norm": 0.4125383794307709, "learning_rate": 1.9911540734500932e-06, "loss": 0.5628, "step": 418 }, { "epoch": 0.07145293315143247, "grad_norm": 0.40303322672843933, "learning_rate": 1.99108062051803e-06, "loss": 0.6005, "step": 419 }, { "epoch": 0.07162346521145975, "grad_norm": 0.4227195978164673, "learning_rate": 1.9910068652501893e-06, "loss": 0.5529, "step": 420 }, { "epoch": 0.07179399727148704, "grad_norm": 0.37034130096435547, "learning_rate": 1.9909328076690713e-06, "loss": 0.5755, "step": 421 }, { "epoch": 0.07196452933151433, "grad_norm": 0.38170015811920166, "learning_rate": 1.990858447797267e-06, "loss": 0.5717, "step": 422 }, { "epoch": 0.0721350613915416, "grad_norm": 0.4700663387775421, "learning_rate": 1.990783785657461e-06, "loss": 0.582, "step": 423 }, { "epoch": 0.0723055934515689, "grad_norm": 0.42466485500335693, "learning_rate": 1.990708821272429e-06, "loss": 0.5798, "step": 424 }, { "epoch": 0.07247612551159618, "grad_norm": 0.44316986203193665, "learning_rate": 1.9906335546650395e-06, "loss": 0.555, "step": 425 }, { "epoch": 0.07264665757162346, "grad_norm": 0.36553263664245605, "learning_rate": 1.990557985858253e-06, "loss": 0.5596, "step": 426 }, { "epoch": 0.07281718963165075, "grad_norm": 0.4100362956523895, "learning_rate": 1.990482114875123e-06, "loss": 0.5537, "step": 427 }, { "epoch": 0.07298772169167804, "grad_norm": 0.4145418703556061, "learning_rate": 1.9904059417387935e-06, "loss": 0.5602, "step": 428 }, { "epoch": 0.07315825375170532, "grad_norm": 0.42742791771888733, "learning_rate": 1.9903294664725023e-06, "loss": 0.5833, "step": 429 }, { "epoch": 0.0733287858117326, "grad_norm": 0.33802473545074463, "learning_rate": 1.990252689099578e-06, "loss": 0.5578, "step": 430 }, { "epoch": 0.0734993178717599, "grad_norm": 0.4198910593986511, "learning_rate": 1.9901756096434426e-06, "loss": 0.5842, "step": 431 }, { "epoch": 0.07366984993178717, "grad_norm": 0.36407625675201416, "learning_rate": 1.99009822812761e-06, "loss": 0.5658, "step": 432 }, { "epoch": 0.07384038199181446, "grad_norm": 0.36379313468933105, "learning_rate": 1.9900205445756847e-06, "loss": 0.57, "step": 433 }, { "epoch": 0.07401091405184175, "grad_norm": 0.4023984372615814, "learning_rate": 1.989942559011366e-06, "loss": 0.577, "step": 434 }, { "epoch": 0.07418144611186903, "grad_norm": 0.3459740877151489, "learning_rate": 1.989864271458443e-06, "loss": 0.5719, "step": 435 }, { "epoch": 0.07435197817189632, "grad_norm": 0.4210052788257599, "learning_rate": 1.9897856819407983e-06, "loss": 0.5766, "step": 436 }, { "epoch": 0.0745225102319236, "grad_norm": 0.3438369333744049, "learning_rate": 1.989706790482406e-06, "loss": 0.5681, "step": 437 }, { "epoch": 0.07469304229195088, "grad_norm": 0.3840697407722473, "learning_rate": 1.9896275971073327e-06, "loss": 0.5962, "step": 438 }, { "epoch": 0.07486357435197817, "grad_norm": 0.3564974069595337, "learning_rate": 1.9895481018397363e-06, "loss": 0.5657, "step": 439 }, { "epoch": 0.07503410641200546, "grad_norm": 0.3731328547000885, "learning_rate": 1.989468304703868e-06, "loss": 0.569, "step": 440 }, { "epoch": 0.07520463847203274, "grad_norm": 0.42112016677856445, "learning_rate": 1.98938820572407e-06, "loss": 0.5673, "step": 441 }, { "epoch": 0.07537517053206003, "grad_norm": 0.429995596408844, "learning_rate": 1.989307804924777e-06, "loss": 0.5469, "step": 442 }, { "epoch": 0.07554570259208732, "grad_norm": 0.46121692657470703, "learning_rate": 1.9892271023305163e-06, "loss": 0.5784, "step": 443 }, { "epoch": 0.07571623465211459, "grad_norm": 0.5717109441757202, "learning_rate": 1.9891460979659064e-06, "loss": 0.5791, "step": 444 }, { "epoch": 0.07588676671214188, "grad_norm": 0.5568169355392456, "learning_rate": 1.9890647918556583e-06, "loss": 0.5677, "step": 445 }, { "epoch": 0.07605729877216917, "grad_norm": 0.39502644538879395, "learning_rate": 1.988983184024575e-06, "loss": 0.5864, "step": 446 }, { "epoch": 0.07622783083219645, "grad_norm": 0.4009121060371399, "learning_rate": 1.988901274497551e-06, "loss": 0.569, "step": 447 }, { "epoch": 0.07639836289222374, "grad_norm": 0.5456749796867371, "learning_rate": 1.988819063299574e-06, "loss": 0.5593, "step": 448 }, { "epoch": 0.07656889495225103, "grad_norm": 0.5741431713104248, "learning_rate": 1.9887365504557236e-06, "loss": 0.5749, "step": 449 }, { "epoch": 0.0767394270122783, "grad_norm": 0.530440628528595, "learning_rate": 1.9886537359911693e-06, "loss": 0.589, "step": 450 }, { "epoch": 0.07690995907230559, "grad_norm": 0.362716406583786, "learning_rate": 1.9885706199311757e-06, "loss": 0.5791, "step": 451 }, { "epoch": 0.07708049113233288, "grad_norm": 0.5184724926948547, "learning_rate": 1.9884872023010974e-06, "loss": 0.5861, "step": 452 }, { "epoch": 0.07725102319236016, "grad_norm": 0.4775357246398926, "learning_rate": 1.988403483126381e-06, "loss": 0.57, "step": 453 }, { "epoch": 0.07742155525238745, "grad_norm": 0.46023473143577576, "learning_rate": 1.988319462432566e-06, "loss": 0.5847, "step": 454 }, { "epoch": 0.07759208731241474, "grad_norm": 0.6719965934753418, "learning_rate": 1.9882351402452837e-06, "loss": 0.5629, "step": 455 }, { "epoch": 0.07776261937244201, "grad_norm": 0.7485231161117554, "learning_rate": 1.988150516590257e-06, "loss": 0.5688, "step": 456 }, { "epoch": 0.0779331514324693, "grad_norm": 0.5788260102272034, "learning_rate": 1.988065591493301e-06, "loss": 0.5696, "step": 457 }, { "epoch": 0.07810368349249659, "grad_norm": 0.5395895838737488, "learning_rate": 1.987980364980322e-06, "loss": 0.5785, "step": 458 }, { "epoch": 0.07827421555252387, "grad_norm": 0.43305692076683044, "learning_rate": 1.9878948370773196e-06, "loss": 0.5633, "step": 459 }, { "epoch": 0.07844474761255116, "grad_norm": 0.569288432598114, "learning_rate": 1.987809007810385e-06, "loss": 0.5796, "step": 460 }, { "epoch": 0.07861527967257845, "grad_norm": 0.49687209725379944, "learning_rate": 1.9877228772056993e-06, "loss": 0.5695, "step": 461 }, { "epoch": 0.07878581173260572, "grad_norm": 0.3611188530921936, "learning_rate": 1.9876364452895394e-06, "loss": 0.5752, "step": 462 }, { "epoch": 0.07895634379263301, "grad_norm": 0.5431200265884399, "learning_rate": 1.9875497120882705e-06, "loss": 0.5878, "step": 463 }, { "epoch": 0.0791268758526603, "grad_norm": 0.3528672158718109, "learning_rate": 1.987462677628352e-06, "loss": 0.5591, "step": 464 }, { "epoch": 0.07929740791268758, "grad_norm": 0.5321730971336365, "learning_rate": 1.9873753419363336e-06, "loss": 0.5747, "step": 465 }, { "epoch": 0.07946793997271487, "grad_norm": 0.4939395487308502, "learning_rate": 1.9872877050388583e-06, "loss": 0.5767, "step": 466 }, { "epoch": 0.07963847203274216, "grad_norm": 0.4068276882171631, "learning_rate": 1.9871997669626597e-06, "loss": 0.5797, "step": 467 }, { "epoch": 0.07980900409276943, "grad_norm": 0.4314722418785095, "learning_rate": 1.9871115277345643e-06, "loss": 0.5579, "step": 468 }, { "epoch": 0.07997953615279672, "grad_norm": 0.4068446457386017, "learning_rate": 1.98702298738149e-06, "loss": 0.5835, "step": 469 }, { "epoch": 0.08015006821282401, "grad_norm": 0.3883475661277771, "learning_rate": 1.9869341459304463e-06, "loss": 0.5777, "step": 470 }, { "epoch": 0.08032060027285129, "grad_norm": 0.37347447872161865, "learning_rate": 1.9868450034085355e-06, "loss": 0.5686, "step": 471 }, { "epoch": 0.08049113233287858, "grad_norm": 0.40475261211395264, "learning_rate": 1.9867555598429507e-06, "loss": 0.5955, "step": 472 }, { "epoch": 0.08066166439290587, "grad_norm": 0.44075220823287964, "learning_rate": 1.9866658152609774e-06, "loss": 0.5733, "step": 473 }, { "epoch": 0.08083219645293316, "grad_norm": 0.4294121563434601, "learning_rate": 1.9865757696899925e-06, "loss": 0.5787, "step": 474 }, { "epoch": 0.08100272851296043, "grad_norm": 0.4231039583683014, "learning_rate": 1.9864854231574652e-06, "loss": 0.5594, "step": 475 }, { "epoch": 0.08117326057298772, "grad_norm": 0.3832063674926758, "learning_rate": 1.9863947756909565e-06, "loss": 0.5884, "step": 476 }, { "epoch": 0.08134379263301501, "grad_norm": 0.4569307267665863, "learning_rate": 1.986303827318119e-06, "loss": 0.5579, "step": 477 }, { "epoch": 0.08151432469304229, "grad_norm": 0.5801775455474854, "learning_rate": 1.986212578066697e-06, "loss": 0.5844, "step": 478 }, { "epoch": 0.08168485675306958, "grad_norm": 0.4718495309352875, "learning_rate": 1.9861210279645262e-06, "loss": 0.5863, "step": 479 }, { "epoch": 0.08185538881309687, "grad_norm": 0.42160969972610474, "learning_rate": 1.9860291770395352e-06, "loss": 0.5624, "step": 480 }, { "epoch": 0.08202592087312414, "grad_norm": 0.430442214012146, "learning_rate": 1.9859370253197438e-06, "loss": 0.597, "step": 481 }, { "epoch": 0.08219645293315143, "grad_norm": 0.40239301323890686, "learning_rate": 1.985844572833263e-06, "loss": 0.564, "step": 482 }, { "epoch": 0.08236698499317872, "grad_norm": 0.4097753167152405, "learning_rate": 1.9857518196082966e-06, "loss": 0.556, "step": 483 }, { "epoch": 0.082537517053206, "grad_norm": 0.417348712682724, "learning_rate": 1.9856587656731393e-06, "loss": 0.5774, "step": 484 }, { "epoch": 0.08270804911323329, "grad_norm": 0.41074228286743164, "learning_rate": 1.985565411056178e-06, "loss": 0.5638, "step": 485 }, { "epoch": 0.08287858117326058, "grad_norm": 0.40855127573013306, "learning_rate": 1.9854717557858907e-06, "loss": 0.5903, "step": 486 }, { "epoch": 0.08304911323328786, "grad_norm": 0.4109269380569458, "learning_rate": 1.985377799890848e-06, "loss": 0.5752, "step": 487 }, { "epoch": 0.08321964529331514, "grad_norm": 0.5294615030288696, "learning_rate": 1.9852835433997114e-06, "loss": 0.5646, "step": 488 }, { "epoch": 0.08339017735334243, "grad_norm": 0.3962743282318115, "learning_rate": 1.9851889863412346e-06, "loss": 0.5778, "step": 489 }, { "epoch": 0.08356070941336971, "grad_norm": 0.39832717180252075, "learning_rate": 1.985094128744264e-06, "loss": 0.5866, "step": 490 }, { "epoch": 0.083731241473397, "grad_norm": 0.42551106214523315, "learning_rate": 1.9849989706377344e-06, "loss": 0.5749, "step": 491 }, { "epoch": 0.08390177353342429, "grad_norm": 0.3772289454936981, "learning_rate": 1.9849035120506757e-06, "loss": 0.575, "step": 492 }, { "epoch": 0.08407230559345157, "grad_norm": 0.3782070279121399, "learning_rate": 1.9848077530122084e-06, "loss": 0.5703, "step": 493 }, { "epoch": 0.08424283765347886, "grad_norm": 0.38931572437286377, "learning_rate": 1.984711693551544e-06, "loss": 0.5645, "step": 494 }, { "epoch": 0.08441336971350615, "grad_norm": 0.3773365020751953, "learning_rate": 1.9846153336979857e-06, "loss": 0.5983, "step": 495 }, { "epoch": 0.08458390177353342, "grad_norm": 0.47619354724884033, "learning_rate": 1.9845186734809297e-06, "loss": 0.5838, "step": 496 }, { "epoch": 0.08475443383356071, "grad_norm": 0.5386983752250671, "learning_rate": 1.9844217129298618e-06, "loss": 0.5664, "step": 497 }, { "epoch": 0.084924965893588, "grad_norm": 0.49994444847106934, "learning_rate": 1.9843244520743614e-06, "loss": 0.5888, "step": 498 }, { "epoch": 0.08509549795361528, "grad_norm": 0.4456343650817871, "learning_rate": 1.984226890944098e-06, "loss": 0.5736, "step": 499 }, { "epoch": 0.08526603001364257, "grad_norm": 0.4084942042827606, "learning_rate": 1.9841290295688334e-06, "loss": 0.5647, "step": 500 }, { "epoch": 0.08543656207366986, "grad_norm": 0.43860793113708496, "learning_rate": 1.984030867978421e-06, "loss": 0.5679, "step": 501 }, { "epoch": 0.08560709413369713, "grad_norm": 0.4625251293182373, "learning_rate": 1.9839324062028053e-06, "loss": 0.5576, "step": 502 }, { "epoch": 0.08577762619372442, "grad_norm": 0.4842986464500427, "learning_rate": 1.983833644272023e-06, "loss": 0.5773, "step": 503 }, { "epoch": 0.08594815825375171, "grad_norm": 0.4727341830730438, "learning_rate": 1.9837345822162022e-06, "loss": 0.576, "step": 504 }, { "epoch": 0.08611869031377899, "grad_norm": 0.40692001581192017, "learning_rate": 1.9836352200655623e-06, "loss": 0.5696, "step": 505 }, { "epoch": 0.08628922237380628, "grad_norm": 0.3653680086135864, "learning_rate": 1.9835355578504146e-06, "loss": 0.5759, "step": 506 }, { "epoch": 0.08645975443383357, "grad_norm": 0.4303702414035797, "learning_rate": 1.983435595601161e-06, "loss": 0.5641, "step": 507 }, { "epoch": 0.08663028649386084, "grad_norm": 0.5330203175544739, "learning_rate": 1.9833353333482964e-06, "loss": 0.5667, "step": 508 }, { "epoch": 0.08680081855388813, "grad_norm": 0.4900553226470947, "learning_rate": 1.983234771122406e-06, "loss": 0.5819, "step": 509 }, { "epoch": 0.08697135061391542, "grad_norm": 0.3540118932723999, "learning_rate": 1.9831339089541672e-06, "loss": 0.5652, "step": 510 }, { "epoch": 0.0871418826739427, "grad_norm": 0.484788715839386, "learning_rate": 1.9830327468743492e-06, "loss": 0.572, "step": 511 }, { "epoch": 0.08731241473396999, "grad_norm": 0.38806799054145813, "learning_rate": 1.9829312849138114e-06, "loss": 0.5646, "step": 512 }, { "epoch": 0.08748294679399728, "grad_norm": 0.4288354814052582, "learning_rate": 1.9828295231035057e-06, "loss": 0.5733, "step": 513 }, { "epoch": 0.08765347885402455, "grad_norm": 0.4415360391139984, "learning_rate": 1.982727461474475e-06, "loss": 0.5645, "step": 514 }, { "epoch": 0.08782401091405184, "grad_norm": 0.4523155093193054, "learning_rate": 1.9826251000578543e-06, "loss": 0.567, "step": 515 }, { "epoch": 0.08799454297407913, "grad_norm": 0.6626890897750854, "learning_rate": 1.9825224388848694e-06, "loss": 0.5566, "step": 516 }, { "epoch": 0.08816507503410641, "grad_norm": 0.7616189122200012, "learning_rate": 1.9824194779868375e-06, "loss": 0.5727, "step": 517 }, { "epoch": 0.0883356070941337, "grad_norm": 0.7304683327674866, "learning_rate": 1.9823162173951683e-06, "loss": 0.5767, "step": 518 }, { "epoch": 0.08850613915416099, "grad_norm": 0.5385451316833496, "learning_rate": 1.9822126571413617e-06, "loss": 0.5743, "step": 519 }, { "epoch": 0.08867667121418826, "grad_norm": 0.49225184321403503, "learning_rate": 1.9821087972570094e-06, "loss": 0.5807, "step": 520 }, { "epoch": 0.08884720327421555, "grad_norm": 0.4323744773864746, "learning_rate": 1.9820046377737943e-06, "loss": 0.5709, "step": 521 }, { "epoch": 0.08901773533424284, "grad_norm": 0.6151190996170044, "learning_rate": 1.9819001787234915e-06, "loss": 0.5605, "step": 522 }, { "epoch": 0.08918826739427012, "grad_norm": 0.6538416147232056, "learning_rate": 1.9817954201379666e-06, "loss": 0.5868, "step": 523 }, { "epoch": 0.08935879945429741, "grad_norm": 0.44516924023628235, "learning_rate": 1.9816903620491766e-06, "loss": 0.5631, "step": 524 }, { "epoch": 0.0895293315143247, "grad_norm": 0.40045231580734253, "learning_rate": 1.981585004489171e-06, "loss": 0.5787, "step": 525 }, { "epoch": 0.08969986357435197, "grad_norm": 0.4614306390285492, "learning_rate": 1.981479347490089e-06, "loss": 0.5786, "step": 526 }, { "epoch": 0.08987039563437926, "grad_norm": 0.3792482912540436, "learning_rate": 1.981373391084163e-06, "loss": 0.5671, "step": 527 }, { "epoch": 0.09004092769440655, "grad_norm": 0.41413459181785583, "learning_rate": 1.981267135303714e-06, "loss": 0.5785, "step": 528 }, { "epoch": 0.09021145975443383, "grad_norm": 0.35130906105041504, "learning_rate": 1.981160580181158e-06, "loss": 0.5761, "step": 529 }, { "epoch": 0.09038199181446112, "grad_norm": 0.442913293838501, "learning_rate": 1.9810537257489988e-06, "loss": 0.5761, "step": 530 }, { "epoch": 0.09055252387448841, "grad_norm": 0.39329108595848083, "learning_rate": 1.980946572039834e-06, "loss": 0.5773, "step": 531 }, { "epoch": 0.09072305593451568, "grad_norm": 0.37966036796569824, "learning_rate": 1.9808391190863515e-06, "loss": 0.5695, "step": 532 }, { "epoch": 0.09089358799454297, "grad_norm": 0.4225713610649109, "learning_rate": 1.98073136692133e-06, "loss": 0.5786, "step": 533 }, { "epoch": 0.09106412005457026, "grad_norm": 0.4584408700466156, "learning_rate": 1.98062331557764e-06, "loss": 0.5699, "step": 534 }, { "epoch": 0.09123465211459754, "grad_norm": 0.41836604475975037, "learning_rate": 1.9805149650882442e-06, "loss": 0.5715, "step": 535 }, { "epoch": 0.09140518417462483, "grad_norm": 0.4590608477592468, "learning_rate": 1.980406315486195e-06, "loss": 0.5785, "step": 536 }, { "epoch": 0.09157571623465212, "grad_norm": 0.49678942561149597, "learning_rate": 1.9802973668046367e-06, "loss": 0.561, "step": 537 }, { "epoch": 0.0917462482946794, "grad_norm": 0.3588040769100189, "learning_rate": 1.980188119076804e-06, "loss": 0.5658, "step": 538 }, { "epoch": 0.09191678035470668, "grad_norm": 0.4952399432659149, "learning_rate": 1.9800785723360257e-06, "loss": 0.5821, "step": 539 }, { "epoch": 0.09208731241473397, "grad_norm": 0.4507637023925781, "learning_rate": 1.979968726615718e-06, "loss": 0.5812, "step": 540 }, { "epoch": 0.09225784447476125, "grad_norm": 0.388642281293869, "learning_rate": 1.979858581949391e-06, "loss": 0.5736, "step": 541 }, { "epoch": 0.09242837653478854, "grad_norm": 0.47354656457901, "learning_rate": 1.979748138370644e-06, "loss": 0.5744, "step": 542 }, { "epoch": 0.09259890859481583, "grad_norm": 0.350930780172348, "learning_rate": 1.97963739591317e-06, "loss": 0.571, "step": 543 }, { "epoch": 0.0927694406548431, "grad_norm": 0.428281307220459, "learning_rate": 1.979526354610751e-06, "loss": 0.5591, "step": 544 }, { "epoch": 0.0929399727148704, "grad_norm": 0.4820069670677185, "learning_rate": 1.97941501449726e-06, "loss": 0.5862, "step": 545 }, { "epoch": 0.09311050477489768, "grad_norm": 0.5184879302978516, "learning_rate": 1.9793033756066635e-06, "loss": 0.5524, "step": 546 }, { "epoch": 0.09328103683492496, "grad_norm": 0.40888434648513794, "learning_rate": 1.9791914379730175e-06, "loss": 0.56, "step": 547 }, { "epoch": 0.09345156889495225, "grad_norm": 0.35360148549079895, "learning_rate": 1.9790792016304687e-06, "loss": 0.577, "step": 548 }, { "epoch": 0.09362210095497954, "grad_norm": 0.4426764249801636, "learning_rate": 1.9789666666132557e-06, "loss": 0.5913, "step": 549 }, { "epoch": 0.09379263301500682, "grad_norm": 0.38881126046180725, "learning_rate": 1.978853832955708e-06, "loss": 0.576, "step": 550 }, { "epoch": 0.0939631650750341, "grad_norm": 0.40196335315704346, "learning_rate": 1.9787407006922466e-06, "loss": 0.5899, "step": 551 }, { "epoch": 0.0941336971350614, "grad_norm": 0.37533479928970337, "learning_rate": 1.978627269857383e-06, "loss": 0.5804, "step": 552 }, { "epoch": 0.09430422919508867, "grad_norm": 0.34749701619148254, "learning_rate": 1.9785135404857202e-06, "loss": 0.5586, "step": 553 }, { "epoch": 0.09447476125511596, "grad_norm": 0.40509840846061707, "learning_rate": 1.978399512611952e-06, "loss": 0.5604, "step": 554 }, { "epoch": 0.09464529331514325, "grad_norm": 0.43824177980422974, "learning_rate": 1.9782851862708634e-06, "loss": 0.5616, "step": 555 }, { "epoch": 0.09481582537517053, "grad_norm": 0.5045179724693298, "learning_rate": 1.97817056149733e-06, "loss": 0.5608, "step": 556 }, { "epoch": 0.09498635743519782, "grad_norm": 0.4929228723049164, "learning_rate": 1.97805563832632e-06, "loss": 0.5768, "step": 557 }, { "epoch": 0.0951568894952251, "grad_norm": 0.3765542507171631, "learning_rate": 1.9779404167928904e-06, "loss": 0.5512, "step": 558 }, { "epoch": 0.09532742155525238, "grad_norm": 0.3653373420238495, "learning_rate": 1.9778248969321907e-06, "loss": 0.5653, "step": 559 }, { "epoch": 0.09549795361527967, "grad_norm": 0.4602164328098297, "learning_rate": 1.9777090787794607e-06, "loss": 0.5974, "step": 560 }, { "epoch": 0.09566848567530696, "grad_norm": 0.5155764818191528, "learning_rate": 1.977592962370032e-06, "loss": 0.5544, "step": 561 }, { "epoch": 0.09583901773533424, "grad_norm": 0.6241593360900879, "learning_rate": 1.9774765477393262e-06, "loss": 0.5837, "step": 562 }, { "epoch": 0.09600954979536153, "grad_norm": 0.6194249391555786, "learning_rate": 1.977359834922857e-06, "loss": 0.5662, "step": 563 }, { "epoch": 0.09618008185538882, "grad_norm": 0.5258529782295227, "learning_rate": 1.9772428239562277e-06, "loss": 0.5525, "step": 564 }, { "epoch": 0.09635061391541609, "grad_norm": 0.4712018370628357, "learning_rate": 1.9771255148751334e-06, "loss": 0.5716, "step": 565 }, { "epoch": 0.09652114597544338, "grad_norm": 0.38999003171920776, "learning_rate": 1.9770079077153607e-06, "loss": 0.5593, "step": 566 }, { "epoch": 0.09669167803547067, "grad_norm": 0.5134232640266418, "learning_rate": 1.9768900025127853e-06, "loss": 0.5628, "step": 567 }, { "epoch": 0.09686221009549795, "grad_norm": 0.4734049439430237, "learning_rate": 1.976771799303376e-06, "loss": 0.5643, "step": 568 }, { "epoch": 0.09703274215552524, "grad_norm": 0.42365288734436035, "learning_rate": 1.9766532981231915e-06, "loss": 0.568, "step": 569 }, { "epoch": 0.09720327421555253, "grad_norm": 0.7270985841751099, "learning_rate": 1.976534499008381e-06, "loss": 0.5636, "step": 570 }, { "epoch": 0.0973738062755798, "grad_norm": 0.8700385093688965, "learning_rate": 1.976415401995184e-06, "loss": 0.5673, "step": 571 }, { "epoch": 0.09754433833560709, "grad_norm": 0.6275728940963745, "learning_rate": 1.9762960071199336e-06, "loss": 0.5772, "step": 572 }, { "epoch": 0.09771487039563438, "grad_norm": 0.3480617105960846, "learning_rate": 1.9761763144190514e-06, "loss": 0.5675, "step": 573 }, { "epoch": 0.09788540245566166, "grad_norm": 0.6419890522956848, "learning_rate": 1.9760563239290495e-06, "loss": 0.5673, "step": 574 }, { "epoch": 0.09805593451568895, "grad_norm": 0.5626255869865417, "learning_rate": 1.9759360356865333e-06, "loss": 0.5794, "step": 575 }, { "epoch": 0.09822646657571624, "grad_norm": 0.3339516520500183, "learning_rate": 1.9758154497281966e-06, "loss": 0.5617, "step": 576 }, { "epoch": 0.09839699863574353, "grad_norm": 0.5528090000152588, "learning_rate": 1.975694566090825e-06, "loss": 0.5675, "step": 577 }, { "epoch": 0.0985675306957708, "grad_norm": 0.4296857714653015, "learning_rate": 1.975573384811295e-06, "loss": 0.5794, "step": 578 }, { "epoch": 0.09873806275579809, "grad_norm": 0.43378129601478577, "learning_rate": 1.975451905926574e-06, "loss": 0.5643, "step": 579 }, { "epoch": 0.09890859481582538, "grad_norm": 0.6495099663734436, "learning_rate": 1.9753301294737196e-06, "loss": 0.5582, "step": 580 }, { "epoch": 0.09907912687585266, "grad_norm": 0.42808154225349426, "learning_rate": 1.975208055489881e-06, "loss": 0.5682, "step": 581 }, { "epoch": 0.09924965893587995, "grad_norm": 0.4758428633213043, "learning_rate": 1.975085684012297e-06, "loss": 0.5626, "step": 582 }, { "epoch": 0.09942019099590724, "grad_norm": 0.6097862720489502, "learning_rate": 1.974963015078298e-06, "loss": 0.5737, "step": 583 }, { "epoch": 0.09959072305593451, "grad_norm": 0.3477315306663513, "learning_rate": 1.9748400487253056e-06, "loss": 0.5623, "step": 584 }, { "epoch": 0.0997612551159618, "grad_norm": 0.6643428206443787, "learning_rate": 1.9747167849908306e-06, "loss": 0.5662, "step": 585 }, { "epoch": 0.09993178717598909, "grad_norm": 0.5639585256576538, "learning_rate": 1.974593223912476e-06, "loss": 0.5592, "step": 586 }, { "epoch": 0.10010231923601637, "grad_norm": 0.3957652151584625, "learning_rate": 1.974469365527935e-06, "loss": 0.5758, "step": 587 }, { "epoch": 0.10027285129604366, "grad_norm": 0.7016974091529846, "learning_rate": 1.9743452098749913e-06, "loss": 0.5672, "step": 588 }, { "epoch": 0.10044338335607095, "grad_norm": 0.5436629056930542, "learning_rate": 1.9742207569915194e-06, "loss": 0.5701, "step": 589 }, { "epoch": 0.10061391541609822, "grad_norm": 0.4033794105052948, "learning_rate": 1.9740960069154842e-06, "loss": 0.5663, "step": 590 }, { "epoch": 0.10078444747612551, "grad_norm": 0.5005258321762085, "learning_rate": 1.973970959684942e-06, "loss": 0.5587, "step": 591 }, { "epoch": 0.1009549795361528, "grad_norm": 0.5139448642730713, "learning_rate": 1.973845615338039e-06, "loss": 0.5812, "step": 592 }, { "epoch": 0.10112551159618008, "grad_norm": 0.3810591995716095, "learning_rate": 1.9737199739130123e-06, "loss": 0.5699, "step": 593 }, { "epoch": 0.10129604365620737, "grad_norm": 0.4457865357398987, "learning_rate": 1.9735940354481902e-06, "loss": 0.5712, "step": 594 }, { "epoch": 0.10146657571623466, "grad_norm": 0.4659377336502075, "learning_rate": 1.9734677999819903e-06, "loss": 0.5545, "step": 595 }, { "epoch": 0.10163710777626193, "grad_norm": 0.3576979339122772, "learning_rate": 1.973341267552922e-06, "loss": 0.5772, "step": 596 }, { "epoch": 0.10180763983628922, "grad_norm": 0.4095207154750824, "learning_rate": 1.9732144381995848e-06, "loss": 0.5595, "step": 597 }, { "epoch": 0.10197817189631651, "grad_norm": 0.42436665296554565, "learning_rate": 1.973087311960669e-06, "loss": 0.5691, "step": 598 }, { "epoch": 0.10214870395634379, "grad_norm": 0.3475062847137451, "learning_rate": 1.972959888874955e-06, "loss": 0.5619, "step": 599 }, { "epoch": 0.10231923601637108, "grad_norm": 0.48956984281539917, "learning_rate": 1.972832168981314e-06, "loss": 0.5551, "step": 600 }, { "epoch": 0.10248976807639837, "grad_norm": 0.452088862657547, "learning_rate": 1.9727041523187084e-06, "loss": 0.5848, "step": 601 }, { "epoch": 0.10266030013642564, "grad_norm": 0.36038264632225037, "learning_rate": 1.97257583892619e-06, "loss": 0.5731, "step": 602 }, { "epoch": 0.10283083219645293, "grad_norm": 0.36114734411239624, "learning_rate": 1.972447228842902e-06, "loss": 0.5808, "step": 603 }, { "epoch": 0.10300136425648022, "grad_norm": 0.38365817070007324, "learning_rate": 1.9723183221080775e-06, "loss": 0.5543, "step": 604 }, { "epoch": 0.1031718963165075, "grad_norm": 0.34079596400260925, "learning_rate": 1.97218911876104e-06, "loss": 0.5809, "step": 605 }, { "epoch": 0.10334242837653479, "grad_norm": 0.36070719361305237, "learning_rate": 1.972059618841205e-06, "loss": 0.5574, "step": 606 }, { "epoch": 0.10351296043656208, "grad_norm": 0.3969060480594635, "learning_rate": 1.9719298223880763e-06, "loss": 0.5544, "step": 607 }, { "epoch": 0.10368349249658936, "grad_norm": 0.38917815685272217, "learning_rate": 1.971799729441249e-06, "loss": 0.5511, "step": 608 }, { "epoch": 0.10385402455661664, "grad_norm": 0.4314303696155548, "learning_rate": 1.97166934004041e-06, "loss": 0.5594, "step": 609 }, { "epoch": 0.10402455661664393, "grad_norm": 0.3728209137916565, "learning_rate": 1.971538654225335e-06, "loss": 0.5679, "step": 610 }, { "epoch": 0.10419508867667121, "grad_norm": 0.3441550135612488, "learning_rate": 1.97140767203589e-06, "loss": 0.5536, "step": 611 }, { "epoch": 0.1043656207366985, "grad_norm": 0.39468538761138916, "learning_rate": 1.971276393512032e-06, "loss": 0.5636, "step": 612 }, { "epoch": 0.10453615279672579, "grad_norm": 0.37954941391944885, "learning_rate": 1.971144818693809e-06, "loss": 0.5664, "step": 613 }, { "epoch": 0.10470668485675307, "grad_norm": 0.39011162519454956, "learning_rate": 1.9710129476213587e-06, "loss": 0.5618, "step": 614 }, { "epoch": 0.10487721691678036, "grad_norm": 0.3826182782649994, "learning_rate": 1.970880780334909e-06, "loss": 0.5677, "step": 615 }, { "epoch": 0.10504774897680765, "grad_norm": 0.48914775252342224, "learning_rate": 1.9707483168747783e-06, "loss": 0.5678, "step": 616 }, { "epoch": 0.10521828103683492, "grad_norm": 0.4400955140590668, "learning_rate": 1.970615557281376e-06, "loss": 0.5677, "step": 617 }, { "epoch": 0.10538881309686221, "grad_norm": 0.4150509238243103, "learning_rate": 1.9704825015952007e-06, "loss": 0.562, "step": 618 }, { "epoch": 0.1055593451568895, "grad_norm": 0.50996994972229, "learning_rate": 1.9703491498568423e-06, "loss": 0.5587, "step": 619 }, { "epoch": 0.10572987721691678, "grad_norm": 0.4692332148551941, "learning_rate": 1.97021550210698e-06, "loss": 0.5753, "step": 620 }, { "epoch": 0.10590040927694407, "grad_norm": 0.4658254384994507, "learning_rate": 1.9700815583863853e-06, "loss": 0.5565, "step": 621 }, { "epoch": 0.10607094133697136, "grad_norm": 0.4767058789730072, "learning_rate": 1.9699473187359174e-06, "loss": 0.57, "step": 622 }, { "epoch": 0.10624147339699863, "grad_norm": 0.34791260957717896, "learning_rate": 1.969812783196528e-06, "loss": 0.5713, "step": 623 }, { "epoch": 0.10641200545702592, "grad_norm": 0.43950381875038147, "learning_rate": 1.9696779518092567e-06, "loss": 0.5585, "step": 624 }, { "epoch": 0.10658253751705321, "grad_norm": 0.5135130882263184, "learning_rate": 1.969542824615235e-06, "loss": 0.5726, "step": 625 }, { "epoch": 0.10675306957708049, "grad_norm": 0.4924056828022003, "learning_rate": 1.969407401655686e-06, "loss": 0.5541, "step": 626 }, { "epoch": 0.10692360163710778, "grad_norm": 0.4262337386608124, "learning_rate": 1.96927168297192e-06, "loss": 0.6055, "step": 627 }, { "epoch": 0.10709413369713507, "grad_norm": 0.4578431248664856, "learning_rate": 1.9691356686053386e-06, "loss": 0.5653, "step": 628 }, { "epoch": 0.10726466575716234, "grad_norm": 0.6619389653205872, "learning_rate": 1.9689993585974345e-06, "loss": 0.5747, "step": 629 }, { "epoch": 0.10743519781718963, "grad_norm": 0.5923303365707397, "learning_rate": 1.96886275298979e-06, "loss": 0.5562, "step": 630 }, { "epoch": 0.10760572987721692, "grad_norm": 0.44426047801971436, "learning_rate": 1.9687258518240777e-06, "loss": 0.5355, "step": 631 }, { "epoch": 0.1077762619372442, "grad_norm": 0.5126156210899353, "learning_rate": 1.9685886551420594e-06, "loss": 0.5499, "step": 632 }, { "epoch": 0.10794679399727149, "grad_norm": 0.6300952434539795, "learning_rate": 1.9684511629855893e-06, "loss": 0.5624, "step": 633 }, { "epoch": 0.10811732605729878, "grad_norm": 0.5002975463867188, "learning_rate": 1.9683133753966086e-06, "loss": 0.5744, "step": 634 }, { "epoch": 0.10828785811732605, "grad_norm": 0.39933618903160095, "learning_rate": 1.9681752924171517e-06, "loss": 0.5805, "step": 635 }, { "epoch": 0.10845839017735334, "grad_norm": 0.6121488809585571, "learning_rate": 1.9680369140893407e-06, "loss": 0.5578, "step": 636 }, { "epoch": 0.10862892223738063, "grad_norm": 0.6016712188720703, "learning_rate": 1.9678982404553897e-06, "loss": 0.5535, "step": 637 }, { "epoch": 0.10879945429740791, "grad_norm": 0.4365065395832062, "learning_rate": 1.9677592715576016e-06, "loss": 0.5817, "step": 638 }, { "epoch": 0.1089699863574352, "grad_norm": 0.5382389426231384, "learning_rate": 1.9676200074383695e-06, "loss": 0.5595, "step": 639 }, { "epoch": 0.10914051841746249, "grad_norm": 0.552986204624176, "learning_rate": 1.9674804481401776e-06, "loss": 0.5728, "step": 640 }, { "epoch": 0.10931105047748976, "grad_norm": 0.36936983466148376, "learning_rate": 1.9673405937055987e-06, "loss": 0.5591, "step": 641 }, { "epoch": 0.10948158253751705, "grad_norm": 0.4702188968658447, "learning_rate": 1.9672004441772966e-06, "loss": 0.5633, "step": 642 }, { "epoch": 0.10965211459754434, "grad_norm": 0.5598739981651306, "learning_rate": 1.967059999598025e-06, "loss": 0.5663, "step": 643 }, { "epoch": 0.10982264665757162, "grad_norm": 0.4790910482406616, "learning_rate": 1.9669192600106275e-06, "loss": 0.5574, "step": 644 }, { "epoch": 0.10999317871759891, "grad_norm": 0.35663726925849915, "learning_rate": 1.9667782254580378e-06, "loss": 0.5501, "step": 645 }, { "epoch": 0.1101637107776262, "grad_norm": 0.43719905614852905, "learning_rate": 1.9666368959832782e-06, "loss": 0.5702, "step": 646 }, { "epoch": 0.11033424283765347, "grad_norm": 0.5694594383239746, "learning_rate": 1.9664952716294637e-06, "loss": 0.5461, "step": 647 }, { "epoch": 0.11050477489768076, "grad_norm": 0.5919437408447266, "learning_rate": 1.966353352439797e-06, "loss": 0.5492, "step": 648 }, { "epoch": 0.11067530695770805, "grad_norm": 0.5850440263748169, "learning_rate": 1.9662111384575717e-06, "loss": 0.543, "step": 649 }, { "epoch": 0.11084583901773533, "grad_norm": 0.41119086742401123, "learning_rate": 1.9660686297261712e-06, "loss": 0.5627, "step": 650 }, { "epoch": 0.11101637107776262, "grad_norm": 0.3939213156700134, "learning_rate": 1.9659258262890686e-06, "loss": 0.552, "step": 651 }, { "epoch": 0.11118690313778991, "grad_norm": 0.5054365396499634, "learning_rate": 1.965782728189827e-06, "loss": 0.564, "step": 652 }, { "epoch": 0.11135743519781718, "grad_norm": 0.5284506678581238, "learning_rate": 1.9656393354720997e-06, "loss": 0.5487, "step": 653 }, { "epoch": 0.11152796725784447, "grad_norm": 0.3425239324569702, "learning_rate": 1.9654956481796294e-06, "loss": 0.5675, "step": 654 }, { "epoch": 0.11169849931787176, "grad_norm": 0.46462586522102356, "learning_rate": 1.965351666356249e-06, "loss": 0.5544, "step": 655 }, { "epoch": 0.11186903137789904, "grad_norm": 0.4571056067943573, "learning_rate": 1.9652073900458806e-06, "loss": 0.5683, "step": 656 }, { "epoch": 0.11203956343792633, "grad_norm": 0.39550215005874634, "learning_rate": 1.965062819292537e-06, "loss": 0.5567, "step": 657 }, { "epoch": 0.11221009549795362, "grad_norm": 0.407055139541626, "learning_rate": 1.9649179541403213e-06, "loss": 0.5532, "step": 658 }, { "epoch": 0.1123806275579809, "grad_norm": 0.3835711181163788, "learning_rate": 1.9647727946334244e-06, "loss": 0.5417, "step": 659 }, { "epoch": 0.11255115961800818, "grad_norm": 0.38728925585746765, "learning_rate": 1.964627340816129e-06, "loss": 0.5688, "step": 660 }, { "epoch": 0.11272169167803547, "grad_norm": 0.33165115118026733, "learning_rate": 1.9644815927328056e-06, "loss": 0.5679, "step": 661 }, { "epoch": 0.11289222373806275, "grad_norm": 0.42033204436302185, "learning_rate": 1.9643355504279168e-06, "loss": 0.5632, "step": 662 }, { "epoch": 0.11306275579809004, "grad_norm": 0.3787094056606293, "learning_rate": 1.9641892139460133e-06, "loss": 0.5561, "step": 663 }, { "epoch": 0.11323328785811733, "grad_norm": 0.4706180989742279, "learning_rate": 1.9640425833317364e-06, "loss": 0.5674, "step": 664 }, { "epoch": 0.1134038199181446, "grad_norm": 0.4153285622596741, "learning_rate": 1.963895658629816e-06, "loss": 0.5623, "step": 665 }, { "epoch": 0.1135743519781719, "grad_norm": 0.4143642783164978, "learning_rate": 1.9637484398850736e-06, "loss": 0.5578, "step": 666 }, { "epoch": 0.11374488403819918, "grad_norm": 0.4237208962440491, "learning_rate": 1.963600927142418e-06, "loss": 0.5547, "step": 667 }, { "epoch": 0.11391541609822646, "grad_norm": 0.4191353917121887, "learning_rate": 1.96345312044685e-06, "loss": 0.5664, "step": 668 }, { "epoch": 0.11408594815825375, "grad_norm": 0.4121340811252594, "learning_rate": 1.963305019843458e-06, "loss": 0.5471, "step": 669 }, { "epoch": 0.11425648021828104, "grad_norm": 0.48723238706588745, "learning_rate": 1.963156625377422e-06, "loss": 0.5619, "step": 670 }, { "epoch": 0.11442701227830832, "grad_norm": 0.4804185628890991, "learning_rate": 1.96300793709401e-06, "loss": 0.563, "step": 671 }, { "epoch": 0.1145975443383356, "grad_norm": 0.413357138633728, "learning_rate": 1.9628589550385812e-06, "loss": 0.5604, "step": 672 }, { "epoch": 0.1147680763983629, "grad_norm": 0.7102183699607849, "learning_rate": 1.9627096792565827e-06, "loss": 0.5671, "step": 673 }, { "epoch": 0.11493860845839017, "grad_norm": 0.7849295139312744, "learning_rate": 1.9625601097935527e-06, "loss": 0.5438, "step": 674 }, { "epoch": 0.11510914051841746, "grad_norm": 0.49737444519996643, "learning_rate": 1.962410246695118e-06, "loss": 0.5545, "step": 675 }, { "epoch": 0.11527967257844475, "grad_norm": 0.46542850136756897, "learning_rate": 1.9622600900069958e-06, "loss": 0.5758, "step": 676 }, { "epoch": 0.11545020463847203, "grad_norm": 0.6955744624137878, "learning_rate": 1.9621096397749917e-06, "loss": 0.5381, "step": 677 }, { "epoch": 0.11562073669849932, "grad_norm": 0.5565853714942932, "learning_rate": 1.961958896045002e-06, "loss": 0.5796, "step": 678 }, { "epoch": 0.1157912687585266, "grad_norm": 0.39091721177101135, "learning_rate": 1.9618078588630126e-06, "loss": 0.5541, "step": 679 }, { "epoch": 0.11596180081855388, "grad_norm": 0.675932765007019, "learning_rate": 1.961656528275097e-06, "loss": 0.5747, "step": 680 }, { "epoch": 0.11613233287858117, "grad_norm": 0.7067851424217224, "learning_rate": 1.9615049043274208e-06, "loss": 0.5641, "step": 681 }, { "epoch": 0.11630286493860846, "grad_norm": 0.39402681589126587, "learning_rate": 1.9613529870662373e-06, "loss": 0.569, "step": 682 }, { "epoch": 0.11647339699863575, "grad_norm": 0.6251634359359741, "learning_rate": 1.9612007765378907e-06, "loss": 0.5594, "step": 683 }, { "epoch": 0.11664392905866303, "grad_norm": 0.6259270906448364, "learning_rate": 1.9610482727888127e-06, "loss": 0.5607, "step": 684 }, { "epoch": 0.11681446111869032, "grad_norm": 0.3737480342388153, "learning_rate": 1.960895475865526e-06, "loss": 0.5602, "step": 685 }, { "epoch": 0.1169849931787176, "grad_norm": 0.5192775130271912, "learning_rate": 1.960742385814643e-06, "loss": 0.5535, "step": 686 }, { "epoch": 0.11715552523874488, "grad_norm": 0.4264221489429474, "learning_rate": 1.9605890026828636e-06, "loss": 0.5531, "step": 687 }, { "epoch": 0.11732605729877217, "grad_norm": 0.46946707367897034, "learning_rate": 1.9604353265169797e-06, "loss": 0.56, "step": 688 }, { "epoch": 0.11749658935879946, "grad_norm": 0.46235525608062744, "learning_rate": 1.9602813573638707e-06, "loss": 0.5665, "step": 689 }, { "epoch": 0.11766712141882674, "grad_norm": 0.39178624749183655, "learning_rate": 1.9601270952705053e-06, "loss": 0.568, "step": 690 }, { "epoch": 0.11783765347885403, "grad_norm": 0.43903523683547974, "learning_rate": 1.959972540283943e-06, "loss": 0.5393, "step": 691 }, { "epoch": 0.11800818553888132, "grad_norm": 0.48046907782554626, "learning_rate": 1.959817692451331e-06, "loss": 0.5628, "step": 692 }, { "epoch": 0.11817871759890859, "grad_norm": 0.4377974569797516, "learning_rate": 1.959662551819908e-06, "loss": 0.56, "step": 693 }, { "epoch": 0.11834924965893588, "grad_norm": 0.37869778275489807, "learning_rate": 1.9595071184369994e-06, "loss": 0.5617, "step": 694 }, { "epoch": 0.11851978171896317, "grad_norm": 0.5731699466705322, "learning_rate": 1.959351392350022e-06, "loss": 0.573, "step": 695 }, { "epoch": 0.11869031377899045, "grad_norm": 0.3743929862976074, "learning_rate": 1.9591953736064805e-06, "loss": 0.5589, "step": 696 }, { "epoch": 0.11886084583901774, "grad_norm": 0.4147673547267914, "learning_rate": 1.9590390622539695e-06, "loss": 0.5485, "step": 697 }, { "epoch": 0.11903137789904503, "grad_norm": 0.4705844521522522, "learning_rate": 1.9588824583401736e-06, "loss": 0.5599, "step": 698 }, { "epoch": 0.1192019099590723, "grad_norm": 0.39228910207748413, "learning_rate": 1.958725561912865e-06, "loss": 0.5785, "step": 699 }, { "epoch": 0.11937244201909959, "grad_norm": 0.39754730463027954, "learning_rate": 1.9585683730199067e-06, "loss": 0.557, "step": 700 }, { "epoch": 0.11954297407912688, "grad_norm": 0.46609994769096375, "learning_rate": 1.9584108917092495e-06, "loss": 0.56, "step": 701 }, { "epoch": 0.11971350613915416, "grad_norm": 0.3678927421569824, "learning_rate": 1.9582531180289346e-06, "loss": 0.5619, "step": 702 }, { "epoch": 0.11988403819918145, "grad_norm": 0.42708367109298706, "learning_rate": 1.9580950520270917e-06, "loss": 0.5481, "step": 703 }, { "epoch": 0.12005457025920874, "grad_norm": 0.5537395477294922, "learning_rate": 1.9579366937519403e-06, "loss": 0.5645, "step": 704 }, { "epoch": 0.12022510231923601, "grad_norm": 0.47820043563842773, "learning_rate": 1.957778043251788e-06, "loss": 0.5698, "step": 705 }, { "epoch": 0.1203956343792633, "grad_norm": 0.43340158462524414, "learning_rate": 1.9576191005750324e-06, "loss": 0.5571, "step": 706 }, { "epoch": 0.12056616643929059, "grad_norm": 0.48589396476745605, "learning_rate": 1.9574598657701608e-06, "loss": 0.5693, "step": 707 }, { "epoch": 0.12073669849931787, "grad_norm": 0.3963133990764618, "learning_rate": 1.957300338885748e-06, "loss": 0.5527, "step": 708 }, { "epoch": 0.12090723055934516, "grad_norm": 0.5045079588890076, "learning_rate": 1.9571405199704583e-06, "loss": 0.5576, "step": 709 }, { "epoch": 0.12107776261937245, "grad_norm": 0.4563431441783905, "learning_rate": 1.956980409073047e-06, "loss": 0.55, "step": 710 }, { "epoch": 0.12124829467939972, "grad_norm": 0.44609710574150085, "learning_rate": 1.956820006242356e-06, "loss": 0.5525, "step": 711 }, { "epoch": 0.12141882673942701, "grad_norm": 0.4390665888786316, "learning_rate": 1.956659311527317e-06, "loss": 0.559, "step": 712 }, { "epoch": 0.1215893587994543, "grad_norm": 0.3507465422153473, "learning_rate": 1.956498324976952e-06, "loss": 0.5407, "step": 713 }, { "epoch": 0.12175989085948158, "grad_norm": 0.40906068682670593, "learning_rate": 1.95633704664037e-06, "loss": 0.5526, "step": 714 }, { "epoch": 0.12193042291950887, "grad_norm": 0.3908653259277344, "learning_rate": 1.956175476566771e-06, "loss": 0.5483, "step": 715 }, { "epoch": 0.12210095497953616, "grad_norm": 0.3964634835720062, "learning_rate": 1.9560136148054424e-06, "loss": 0.555, "step": 716 }, { "epoch": 0.12227148703956343, "grad_norm": 0.4320867359638214, "learning_rate": 1.955851461405761e-06, "loss": 0.5557, "step": 717 }, { "epoch": 0.12244201909959072, "grad_norm": 0.4480936825275421, "learning_rate": 1.9556890164171935e-06, "loss": 0.5432, "step": 718 }, { "epoch": 0.12261255115961801, "grad_norm": 0.3845686912536621, "learning_rate": 1.9555262798892943e-06, "loss": 0.5479, "step": 719 }, { "epoch": 0.12278308321964529, "grad_norm": 0.5671671032905579, "learning_rate": 1.9553632518717076e-06, "loss": 0.5558, "step": 720 }, { "epoch": 0.12295361527967258, "grad_norm": 0.7879170179367065, "learning_rate": 1.9551999324141658e-06, "loss": 0.5562, "step": 721 }, { "epoch": 0.12312414733969987, "grad_norm": 0.7197359204292297, "learning_rate": 1.955036321566491e-06, "loss": 0.5574, "step": 722 }, { "epoch": 0.12329467939972714, "grad_norm": 0.3807520270347595, "learning_rate": 1.9548724193785934e-06, "loss": 0.5548, "step": 723 }, { "epoch": 0.12346521145975443, "grad_norm": 0.5508958697319031, "learning_rate": 1.954708225900473e-06, "loss": 0.5645, "step": 724 }, { "epoch": 0.12363574351978172, "grad_norm": 0.7994507551193237, "learning_rate": 1.9545437411822174e-06, "loss": 0.5529, "step": 725 }, { "epoch": 0.123806275579809, "grad_norm": 0.6162607669830322, "learning_rate": 1.9543789652740043e-06, "loss": 0.5558, "step": 726 }, { "epoch": 0.12397680763983629, "grad_norm": 0.3924804925918579, "learning_rate": 1.9542138982260994e-06, "loss": 0.5507, "step": 727 }, { "epoch": 0.12414733969986358, "grad_norm": 0.712841272354126, "learning_rate": 1.9540485400888577e-06, "loss": 0.5667, "step": 728 }, { "epoch": 0.12431787175989086, "grad_norm": 0.5594262480735779, "learning_rate": 1.9538828909127233e-06, "loss": 0.5487, "step": 729 }, { "epoch": 0.12448840381991814, "grad_norm": 0.4557380974292755, "learning_rate": 1.9537169507482273e-06, "loss": 0.5477, "step": 730 }, { "epoch": 0.12465893587994543, "grad_norm": 0.5845224857330322, "learning_rate": 1.953550719645992e-06, "loss": 0.5531, "step": 731 }, { "epoch": 0.12482946793997271, "grad_norm": 0.5004369020462036, "learning_rate": 1.9533841976567266e-06, "loss": 0.546, "step": 732 }, { "epoch": 0.125, "grad_norm": 0.4830988347530365, "learning_rate": 1.9532173848312304e-06, "loss": 0.5651, "step": 733 }, { "epoch": 0.1251705320600273, "grad_norm": 0.4703359305858612, "learning_rate": 1.9530502812203903e-06, "loss": 0.5527, "step": 734 }, { "epoch": 0.12534106412005458, "grad_norm": 0.44919008016586304, "learning_rate": 1.952882886875182e-06, "loss": 0.565, "step": 735 }, { "epoch": 0.12551159618008187, "grad_norm": 0.6079019904136658, "learning_rate": 1.952715201846671e-06, "loss": 0.5582, "step": 736 }, { "epoch": 0.12568212824010913, "grad_norm": 0.4172450602054596, "learning_rate": 1.9525472261860113e-06, "loss": 0.5489, "step": 737 }, { "epoch": 0.12585266030013642, "grad_norm": 0.6292310357093811, "learning_rate": 1.952378959944443e-06, "loss": 0.5562, "step": 738 }, { "epoch": 0.1260231923601637, "grad_norm": 0.5903599262237549, "learning_rate": 1.952210403173299e-06, "loss": 0.5781, "step": 739 }, { "epoch": 0.126193724420191, "grad_norm": 0.3425973653793335, "learning_rate": 1.952041555923997e-06, "loss": 0.5507, "step": 740 }, { "epoch": 0.1263642564802183, "grad_norm": 0.6512162089347839, "learning_rate": 1.9518724182480464e-06, "loss": 0.578, "step": 741 }, { "epoch": 0.12653478854024558, "grad_norm": 0.51100754737854, "learning_rate": 1.9517029901970427e-06, "loss": 0.5726, "step": 742 }, { "epoch": 0.12670532060027284, "grad_norm": 0.41974154114723206, "learning_rate": 1.9515332718226714e-06, "loss": 0.5413, "step": 743 }, { "epoch": 0.12687585266030013, "grad_norm": 0.5773981213569641, "learning_rate": 1.9513632631767066e-06, "loss": 0.5574, "step": 744 }, { "epoch": 0.12704638472032742, "grad_norm": 0.534631073474884, "learning_rate": 1.9511929643110097e-06, "loss": 0.5651, "step": 745 }, { "epoch": 0.1272169167803547, "grad_norm": 0.42577818036079407, "learning_rate": 1.9510223752775325e-06, "loss": 0.5523, "step": 746 }, { "epoch": 0.127387448840382, "grad_norm": 0.5253404974937439, "learning_rate": 1.950851496128314e-06, "loss": 0.5495, "step": 747 }, { "epoch": 0.1275579809004093, "grad_norm": 0.43779006600379944, "learning_rate": 1.9506803269154815e-06, "loss": 0.5716, "step": 748 }, { "epoch": 0.12772851296043655, "grad_norm": 0.5063499808311462, "learning_rate": 1.9505088676912516e-06, "loss": 0.5485, "step": 749 }, { "epoch": 0.12789904502046384, "grad_norm": 0.5007257461547852, "learning_rate": 1.9503371185079296e-06, "loss": 0.5498, "step": 750 }, { "epoch": 0.12806957708049113, "grad_norm": 0.47316974401474, "learning_rate": 1.9501650794179083e-06, "loss": 0.553, "step": 751 }, { "epoch": 0.12824010914051842, "grad_norm": 0.4601980149745941, "learning_rate": 1.9499927504736694e-06, "loss": 0.5443, "step": 752 }, { "epoch": 0.1284106412005457, "grad_norm": 0.5112723112106323, "learning_rate": 1.9498201317277832e-06, "loss": 0.5439, "step": 753 }, { "epoch": 0.128581173260573, "grad_norm": 0.37005728483200073, "learning_rate": 1.9496472232329076e-06, "loss": 0.5589, "step": 754 }, { "epoch": 0.12875170532060026, "grad_norm": 0.4853660762310028, "learning_rate": 1.9494740250417903e-06, "loss": 0.5443, "step": 755 }, { "epoch": 0.12892223738062755, "grad_norm": 0.4041537642478943, "learning_rate": 1.949300537207266e-06, "loss": 0.5486, "step": 756 }, { "epoch": 0.12909276944065484, "grad_norm": 0.40221962332725525, "learning_rate": 1.949126759782258e-06, "loss": 0.5317, "step": 757 }, { "epoch": 0.12926330150068213, "grad_norm": 0.46186521649360657, "learning_rate": 1.9489526928197795e-06, "loss": 0.5705, "step": 758 }, { "epoch": 0.12943383356070942, "grad_norm": 0.47338148951530457, "learning_rate": 1.9487783363729296e-06, "loss": 0.552, "step": 759 }, { "epoch": 0.1296043656207367, "grad_norm": 0.44101738929748535, "learning_rate": 1.948603690494898e-06, "loss": 0.5483, "step": 760 }, { "epoch": 0.12977489768076397, "grad_norm": 0.41624701023101807, "learning_rate": 1.9484287552389604e-06, "loss": 0.5553, "step": 761 }, { "epoch": 0.12994542974079126, "grad_norm": 0.3483124077320099, "learning_rate": 1.948253530658483e-06, "loss": 0.5583, "step": 762 }, { "epoch": 0.13011596180081855, "grad_norm": 0.4810324013233185, "learning_rate": 1.9480780168069182e-06, "loss": 0.5474, "step": 763 }, { "epoch": 0.13028649386084584, "grad_norm": 0.4634459912776947, "learning_rate": 1.947902213737809e-06, "loss": 0.5442, "step": 764 }, { "epoch": 0.13045702592087313, "grad_norm": 0.33125039935112, "learning_rate": 1.9477261215047836e-06, "loss": 0.5611, "step": 765 }, { "epoch": 0.13062755798090042, "grad_norm": 0.4639657735824585, "learning_rate": 1.947549740161562e-06, "loss": 0.5572, "step": 766 }, { "epoch": 0.13079809004092768, "grad_norm": 0.52504962682724, "learning_rate": 1.9473730697619487e-06, "loss": 0.5623, "step": 767 }, { "epoch": 0.13096862210095497, "grad_norm": 0.41981661319732666, "learning_rate": 1.9471961103598393e-06, "loss": 0.5655, "step": 768 }, { "epoch": 0.13113915416098226, "grad_norm": 0.4140869081020355, "learning_rate": 1.9470188620092163e-06, "loss": 0.5506, "step": 769 }, { "epoch": 0.13130968622100955, "grad_norm": 0.5477293729782104, "learning_rate": 1.9468413247641505e-06, "loss": 0.5592, "step": 770 }, { "epoch": 0.13148021828103684, "grad_norm": 0.4434651732444763, "learning_rate": 1.9466634986788006e-06, "loss": 0.5561, "step": 771 }, { "epoch": 0.13165075034106413, "grad_norm": 0.5599554181098938, "learning_rate": 1.9464853838074138e-06, "loss": 0.5509, "step": 772 }, { "epoch": 0.1318212824010914, "grad_norm": 0.417349249124527, "learning_rate": 1.9463069802043253e-06, "loss": 0.5643, "step": 773 }, { "epoch": 0.13199181446111868, "grad_norm": 0.4504680633544922, "learning_rate": 1.946128287923958e-06, "loss": 0.5596, "step": 774 }, { "epoch": 0.13216234652114597, "grad_norm": 0.4446078836917877, "learning_rate": 1.945949307020824e-06, "loss": 0.5599, "step": 775 }, { "epoch": 0.13233287858117326, "grad_norm": 0.3625372648239136, "learning_rate": 1.9457700375495212e-06, "loss": 0.5671, "step": 776 }, { "epoch": 0.13250341064120055, "grad_norm": 0.48231276869773865, "learning_rate": 1.945590479564738e-06, "loss": 0.5482, "step": 777 }, { "epoch": 0.13267394270122784, "grad_norm": 0.4121912717819214, "learning_rate": 1.9454106331212498e-06, "loss": 0.5527, "step": 778 }, { "epoch": 0.1328444747612551, "grad_norm": 0.4654589891433716, "learning_rate": 1.9452304982739198e-06, "loss": 0.5591, "step": 779 }, { "epoch": 0.1330150068212824, "grad_norm": 0.5038689374923706, "learning_rate": 1.945050075077699e-06, "loss": 0.5535, "step": 780 }, { "epoch": 0.13318553888130968, "grad_norm": 0.39660993218421936, "learning_rate": 1.9448693635876267e-06, "loss": 0.56, "step": 781 }, { "epoch": 0.13335607094133697, "grad_norm": 0.47360673546791077, "learning_rate": 1.9446883638588304e-06, "loss": 0.5518, "step": 782 }, { "epoch": 0.13352660300136426, "grad_norm": 0.3952374756336212, "learning_rate": 1.9445070759465256e-06, "loss": 0.5724, "step": 783 }, { "epoch": 0.13369713506139155, "grad_norm": 0.5022841691970825, "learning_rate": 1.944325499906015e-06, "loss": 0.5549, "step": 784 }, { "epoch": 0.13386766712141882, "grad_norm": 0.5010923743247986, "learning_rate": 1.9441436357926896e-06, "loss": 0.5597, "step": 785 }, { "epoch": 0.1340381991814461, "grad_norm": 0.4214611053466797, "learning_rate": 1.943961483662028e-06, "loss": 0.5529, "step": 786 }, { "epoch": 0.1342087312414734, "grad_norm": 0.47380971908569336, "learning_rate": 1.9437790435695973e-06, "loss": 0.5471, "step": 787 }, { "epoch": 0.13437926330150068, "grad_norm": 0.515921950340271, "learning_rate": 1.943596315571052e-06, "loss": 0.5633, "step": 788 }, { "epoch": 0.13454979536152797, "grad_norm": 0.5466718077659607, "learning_rate": 1.943413299722135e-06, "loss": 0.5501, "step": 789 }, { "epoch": 0.13472032742155526, "grad_norm": 0.4565638303756714, "learning_rate": 1.9432299960786757e-06, "loss": 0.5543, "step": 790 }, { "epoch": 0.13489085948158253, "grad_norm": 0.3882017433643341, "learning_rate": 1.9430464046965923e-06, "loss": 0.5615, "step": 791 }, { "epoch": 0.13506139154160982, "grad_norm": 0.5496183633804321, "learning_rate": 1.9428625256318908e-06, "loss": 0.5627, "step": 792 }, { "epoch": 0.1352319236016371, "grad_norm": 0.4625122845172882, "learning_rate": 1.9426783589406645e-06, "loss": 0.5486, "step": 793 }, { "epoch": 0.1354024556616644, "grad_norm": 0.41997969150543213, "learning_rate": 1.942493904679095e-06, "loss": 0.5452, "step": 794 }, { "epoch": 0.13557298772169168, "grad_norm": 0.4320921003818512, "learning_rate": 1.942309162903451e-06, "loss": 0.5618, "step": 795 }, { "epoch": 0.13574351978171897, "grad_norm": 0.43192803859710693, "learning_rate": 1.9421241336700892e-06, "loss": 0.5618, "step": 796 }, { "epoch": 0.13591405184174624, "grad_norm": 0.3801654577255249, "learning_rate": 1.9419388170354548e-06, "loss": 0.5574, "step": 797 }, { "epoch": 0.13608458390177353, "grad_norm": 0.4831063449382782, "learning_rate": 1.9417532130560784e-06, "loss": 0.548, "step": 798 }, { "epoch": 0.13625511596180082, "grad_norm": 0.38888534903526306, "learning_rate": 1.941567321788581e-06, "loss": 0.5708, "step": 799 }, { "epoch": 0.1364256480218281, "grad_norm": 0.4589107632637024, "learning_rate": 1.9413811432896697e-06, "loss": 0.5742, "step": 800 }, { "epoch": 0.1365961800818554, "grad_norm": 0.4890725314617157, "learning_rate": 1.9411946776161388e-06, "loss": 0.5634, "step": 801 }, { "epoch": 0.13676671214188268, "grad_norm": 0.39768195152282715, "learning_rate": 1.9410079248248717e-06, "loss": 0.5542, "step": 802 }, { "epoch": 0.13693724420190995, "grad_norm": 0.48835986852645874, "learning_rate": 1.9408208849728386e-06, "loss": 0.5556, "step": 803 }, { "epoch": 0.13710777626193724, "grad_norm": 0.436010479927063, "learning_rate": 1.9406335581170965e-06, "loss": 0.5677, "step": 804 }, { "epoch": 0.13727830832196453, "grad_norm": 0.344992458820343, "learning_rate": 1.9404459443147916e-06, "loss": 0.5693, "step": 805 }, { "epoch": 0.13744884038199182, "grad_norm": 0.3935587406158447, "learning_rate": 1.940258043623156e-06, "loss": 0.5492, "step": 806 }, { "epoch": 0.1376193724420191, "grad_norm": 0.45187273621559143, "learning_rate": 1.9400698560995105e-06, "loss": 0.5611, "step": 807 }, { "epoch": 0.1377899045020464, "grad_norm": 0.3705230951309204, "learning_rate": 1.939881381801263e-06, "loss": 0.5736, "step": 808 }, { "epoch": 0.13796043656207366, "grad_norm": 0.39386627078056335, "learning_rate": 1.9396926207859087e-06, "loss": 0.5685, "step": 809 }, { "epoch": 0.13813096862210095, "grad_norm": 0.3702477812767029, "learning_rate": 1.9395035731110303e-06, "loss": 0.5601, "step": 810 }, { "epoch": 0.13830150068212824, "grad_norm": 0.3687109649181366, "learning_rate": 1.9393142388342982e-06, "loss": 0.5639, "step": 811 }, { "epoch": 0.13847203274215553, "grad_norm": 0.42839211225509644, "learning_rate": 1.9391246180134703e-06, "loss": 0.553, "step": 812 }, { "epoch": 0.13864256480218282, "grad_norm": 0.3752674162387848, "learning_rate": 1.9389347107063913e-06, "loss": 0.5514, "step": 813 }, { "epoch": 0.1388130968622101, "grad_norm": 0.38368934392929077, "learning_rate": 1.938744516970994e-06, "loss": 0.5716, "step": 814 }, { "epoch": 0.13898362892223737, "grad_norm": 0.3754592835903168, "learning_rate": 1.938554036865298e-06, "loss": 0.5458, "step": 815 }, { "epoch": 0.13915416098226466, "grad_norm": 0.3761025667190552, "learning_rate": 1.9383632704474107e-06, "loss": 0.563, "step": 816 }, { "epoch": 0.13932469304229195, "grad_norm": 0.36144253611564636, "learning_rate": 1.9381722177755267e-06, "loss": 0.5773, "step": 817 }, { "epoch": 0.13949522510231924, "grad_norm": 0.33363327383995056, "learning_rate": 1.9379808789079276e-06, "loss": 0.5337, "step": 818 }, { "epoch": 0.13966575716234653, "grad_norm": 0.4120798110961914, "learning_rate": 1.937789253902983e-06, "loss": 0.5612, "step": 819 }, { "epoch": 0.13983628922237382, "grad_norm": 0.4070011079311371, "learning_rate": 1.9375973428191492e-06, "loss": 0.5584, "step": 820 }, { "epoch": 0.14000682128240108, "grad_norm": 0.42036911845207214, "learning_rate": 1.93740514571497e-06, "loss": 0.5463, "step": 821 }, { "epoch": 0.14017735334242837, "grad_norm": 0.4309782087802887, "learning_rate": 1.9372126626490765e-06, "loss": 0.5846, "step": 822 }, { "epoch": 0.14034788540245566, "grad_norm": 0.3897610306739807, "learning_rate": 1.937019893680187e-06, "loss": 0.5504, "step": 823 }, { "epoch": 0.14051841746248295, "grad_norm": 0.3699517548084259, "learning_rate": 1.9368268388671068e-06, "loss": 0.5592, "step": 824 }, { "epoch": 0.14068894952251024, "grad_norm": 0.3632091283798218, "learning_rate": 1.9366334982687284e-06, "loss": 0.551, "step": 825 }, { "epoch": 0.14085948158253753, "grad_norm": 1.0206141471862793, "learning_rate": 1.9364398719440315e-06, "loss": 0.554, "step": 826 }, { "epoch": 0.1410300136425648, "grad_norm": 0.5427378416061401, "learning_rate": 1.936245959952084e-06, "loss": 0.5673, "step": 827 }, { "epoch": 0.14120054570259208, "grad_norm": 0.631756603717804, "learning_rate": 1.9360517623520393e-06, "loss": 0.5532, "step": 828 }, { "epoch": 0.14137107776261937, "grad_norm": 0.6074979305267334, "learning_rate": 1.9358572792031385e-06, "loss": 0.5628, "step": 829 }, { "epoch": 0.14154160982264666, "grad_norm": 0.48977214097976685, "learning_rate": 1.935662510564711e-06, "loss": 0.5575, "step": 830 }, { "epoch": 0.14171214188267395, "grad_norm": 0.4265683591365814, "learning_rate": 1.935467456496171e-06, "loss": 0.561, "step": 831 }, { "epoch": 0.14188267394270124, "grad_norm": 0.4576287865638733, "learning_rate": 1.935272117057022e-06, "loss": 0.556, "step": 832 }, { "epoch": 0.1420532060027285, "grad_norm": 0.45377016067504883, "learning_rate": 1.9350764923068535e-06, "loss": 0.5561, "step": 833 }, { "epoch": 0.1422237380627558, "grad_norm": 0.3955170214176178, "learning_rate": 1.9348805823053414e-06, "loss": 0.5769, "step": 834 }, { "epoch": 0.14239427012278308, "grad_norm": 0.4813169538974762, "learning_rate": 1.9346843871122506e-06, "loss": 0.5433, "step": 835 }, { "epoch": 0.14256480218281037, "grad_norm": 0.42308658361434937, "learning_rate": 1.93448790678743e-06, "loss": 0.5545, "step": 836 }, { "epoch": 0.14273533424283766, "grad_norm": 0.5245870351791382, "learning_rate": 1.934291141390819e-06, "loss": 0.5594, "step": 837 }, { "epoch": 0.14290586630286495, "grad_norm": 0.5994154214859009, "learning_rate": 1.9340940909824415e-06, "loss": 0.5606, "step": 838 }, { "epoch": 0.14307639836289224, "grad_norm": 0.43548011779785156, "learning_rate": 1.9338967556224087e-06, "loss": 0.5467, "step": 839 }, { "epoch": 0.1432469304229195, "grad_norm": 0.469954252243042, "learning_rate": 1.9336991353709197e-06, "loss": 0.5459, "step": 840 }, { "epoch": 0.1434174624829468, "grad_norm": 0.5122477412223816, "learning_rate": 1.933501230288259e-06, "loss": 0.5522, "step": 841 }, { "epoch": 0.14358799454297408, "grad_norm": 0.43415263295173645, "learning_rate": 1.9333030404348004e-06, "loss": 0.5567, "step": 842 }, { "epoch": 0.14375852660300137, "grad_norm": 0.42007696628570557, "learning_rate": 1.933104565871001e-06, "loss": 0.5413, "step": 843 }, { "epoch": 0.14392905866302866, "grad_norm": 0.4120525121688843, "learning_rate": 1.9329058066574088e-06, "loss": 0.553, "step": 844 }, { "epoch": 0.14409959072305595, "grad_norm": 0.34494391083717346, "learning_rate": 1.9327067628546553e-06, "loss": 0.5322, "step": 845 }, { "epoch": 0.1442701227830832, "grad_norm": 0.5468747615814209, "learning_rate": 1.9325074345234602e-06, "loss": 0.5515, "step": 846 }, { "epoch": 0.1444406548431105, "grad_norm": 0.6220876574516296, "learning_rate": 1.9323078217246308e-06, "loss": 0.5583, "step": 847 }, { "epoch": 0.1446111869031378, "grad_norm": 0.5272266268730164, "learning_rate": 1.9321079245190597e-06, "loss": 0.576, "step": 848 }, { "epoch": 0.14478171896316508, "grad_norm": 0.39103057980537415, "learning_rate": 1.931907742967727e-06, "loss": 0.5595, "step": 849 }, { "epoch": 0.14495225102319237, "grad_norm": 0.5823642611503601, "learning_rate": 1.931707277131699e-06, "loss": 0.5576, "step": 850 }, { "epoch": 0.14512278308321966, "grad_norm": 0.5365065932273865, "learning_rate": 1.93150652707213e-06, "loss": 0.5616, "step": 851 }, { "epoch": 0.14529331514324692, "grad_norm": 0.4258389174938202, "learning_rate": 1.9313054928502598e-06, "loss": 0.5504, "step": 852 }, { "epoch": 0.1454638472032742, "grad_norm": 0.5903677940368652, "learning_rate": 1.931104174527415e-06, "loss": 0.5575, "step": 853 }, { "epoch": 0.1456343792633015, "grad_norm": 0.48347654938697815, "learning_rate": 1.9309025721650092e-06, "loss": 0.5417, "step": 854 }, { "epoch": 0.1458049113233288, "grad_norm": 0.4118397533893585, "learning_rate": 1.9307006858245428e-06, "loss": 0.5493, "step": 855 }, { "epoch": 0.14597544338335608, "grad_norm": 0.4552883505821228, "learning_rate": 1.930498515567602e-06, "loss": 0.5398, "step": 856 }, { "epoch": 0.14614597544338337, "grad_norm": 0.3356091380119324, "learning_rate": 1.9302960614558604e-06, "loss": 0.5512, "step": 857 }, { "epoch": 0.14631650750341063, "grad_norm": 0.4241127073764801, "learning_rate": 1.9300933235510788e-06, "loss": 0.5621, "step": 858 }, { "epoch": 0.14648703956343792, "grad_norm": 0.39341604709625244, "learning_rate": 1.929890301915103e-06, "loss": 0.5598, "step": 859 }, { "epoch": 0.1466575716234652, "grad_norm": 0.3889201283454895, "learning_rate": 1.929686996609866e-06, "loss": 0.5463, "step": 860 }, { "epoch": 0.1468281036834925, "grad_norm": 0.40517398715019226, "learning_rate": 1.929483407697387e-06, "loss": 0.5603, "step": 861 }, { "epoch": 0.1469986357435198, "grad_norm": 0.4488038718700409, "learning_rate": 1.9292795352397737e-06, "loss": 0.5567, "step": 862 }, { "epoch": 0.14716916780354708, "grad_norm": 0.49497777223587036, "learning_rate": 1.929075379299218e-06, "loss": 0.5485, "step": 863 }, { "epoch": 0.14733969986357434, "grad_norm": 0.45604822039604187, "learning_rate": 1.9288709399379984e-06, "loss": 0.552, "step": 864 }, { "epoch": 0.14751023192360163, "grad_norm": 0.3964408040046692, "learning_rate": 1.928666217218481e-06, "loss": 0.5665, "step": 865 }, { "epoch": 0.14768076398362892, "grad_norm": 0.4897339940071106, "learning_rate": 1.928461211203118e-06, "loss": 0.5387, "step": 866 }, { "epoch": 0.1478512960436562, "grad_norm": 0.477505624294281, "learning_rate": 1.9282559219544476e-06, "loss": 0.5553, "step": 867 }, { "epoch": 0.1480218281036835, "grad_norm": 0.4992867410182953, "learning_rate": 1.9280503495350953e-06, "loss": 0.5678, "step": 868 }, { "epoch": 0.1481923601637108, "grad_norm": 0.542177140712738, "learning_rate": 1.927844494007771e-06, "loss": 0.5472, "step": 869 }, { "epoch": 0.14836289222373805, "grad_norm": 0.41191962361335754, "learning_rate": 1.9276383554352736e-06, "loss": 0.5753, "step": 870 }, { "epoch": 0.14853342428376534, "grad_norm": 0.643372118473053, "learning_rate": 1.9274319338804866e-06, "loss": 0.5607, "step": 871 }, { "epoch": 0.14870395634379263, "grad_norm": 0.757784903049469, "learning_rate": 1.92722522940638e-06, "loss": 0.5533, "step": 872 }, { "epoch": 0.14887448840381992, "grad_norm": 0.47195321321487427, "learning_rate": 1.9270182420760104e-06, "loss": 0.5521, "step": 873 }, { "epoch": 0.1490450204638472, "grad_norm": 0.5352654457092285, "learning_rate": 1.926810971952521e-06, "loss": 0.5766, "step": 874 }, { "epoch": 0.1492155525238745, "grad_norm": 0.6699765920639038, "learning_rate": 1.9266034190991415e-06, "loss": 0.574, "step": 875 }, { "epoch": 0.14938608458390176, "grad_norm": 0.44111526012420654, "learning_rate": 1.926395583579186e-06, "loss": 0.5707, "step": 876 }, { "epoch": 0.14955661664392905, "grad_norm": 0.5262956023216248, "learning_rate": 1.926187465456057e-06, "loss": 0.5551, "step": 877 }, { "epoch": 0.14972714870395634, "grad_norm": 0.5442524552345276, "learning_rate": 1.925979064793242e-06, "loss": 0.5482, "step": 878 }, { "epoch": 0.14989768076398363, "grad_norm": 0.46086442470550537, "learning_rate": 1.9257703816543146e-06, "loss": 0.5678, "step": 879 }, { "epoch": 0.15006821282401092, "grad_norm": 0.45078033208847046, "learning_rate": 1.925561416102936e-06, "loss": 0.5542, "step": 880 }, { "epoch": 0.1502387448840382, "grad_norm": 0.3949615955352783, "learning_rate": 1.9253521682028516e-06, "loss": 0.549, "step": 881 }, { "epoch": 0.15040927694406547, "grad_norm": 0.4516843855381012, "learning_rate": 1.9251426380178947e-06, "loss": 0.5714, "step": 882 }, { "epoch": 0.15057980900409276, "grad_norm": 0.3815528154373169, "learning_rate": 1.9249328256119834e-06, "loss": 0.5473, "step": 883 }, { "epoch": 0.15075034106412005, "grad_norm": 0.4536670446395874, "learning_rate": 1.9247227310491224e-06, "loss": 0.5674, "step": 884 }, { "epoch": 0.15092087312414734, "grad_norm": 0.3400889039039612, "learning_rate": 1.924512354393402e-06, "loss": 0.5777, "step": 885 }, { "epoch": 0.15109140518417463, "grad_norm": 0.485637903213501, "learning_rate": 1.9243016957090003e-06, "loss": 0.5513, "step": 886 }, { "epoch": 0.15126193724420192, "grad_norm": 0.4044179916381836, "learning_rate": 1.9240907550601787e-06, "loss": 0.5454, "step": 887 }, { "epoch": 0.15143246930422918, "grad_norm": 0.43215033411979675, "learning_rate": 1.923879532511287e-06, "loss": 0.555, "step": 888 }, { "epoch": 0.15160300136425647, "grad_norm": 0.46532517671585083, "learning_rate": 1.9236680281267597e-06, "loss": 0.5576, "step": 889 }, { "epoch": 0.15177353342428376, "grad_norm": 0.3941131830215454, "learning_rate": 1.923456241971118e-06, "loss": 0.5381, "step": 890 }, { "epoch": 0.15194406548431105, "grad_norm": 0.46986761689186096, "learning_rate": 1.923244174108968e-06, "loss": 0.5365, "step": 891 }, { "epoch": 0.15211459754433834, "grad_norm": 0.4742710292339325, "learning_rate": 1.923031824605003e-06, "loss": 0.5623, "step": 892 }, { "epoch": 0.15228512960436563, "grad_norm": 0.41251805424690247, "learning_rate": 1.922819193524001e-06, "loss": 0.5533, "step": 893 }, { "epoch": 0.1524556616643929, "grad_norm": 0.47409358620643616, "learning_rate": 1.9226062809308273e-06, "loss": 0.5622, "step": 894 }, { "epoch": 0.15262619372442018, "grad_norm": 0.4420965015888214, "learning_rate": 1.9223930868904317e-06, "loss": 0.5421, "step": 895 }, { "epoch": 0.15279672578444747, "grad_norm": 0.48168325424194336, "learning_rate": 1.922179611467851e-06, "loss": 0.5488, "step": 896 }, { "epoch": 0.15296725784447476, "grad_norm": 0.46085238456726074, "learning_rate": 1.921965854728207e-06, "loss": 0.5497, "step": 897 }, { "epoch": 0.15313778990450205, "grad_norm": 0.38156741857528687, "learning_rate": 1.921751816736708e-06, "loss": 0.5654, "step": 898 }, { "epoch": 0.15330832196452934, "grad_norm": 0.4924895167350769, "learning_rate": 1.9215374975586467e-06, "loss": 0.5607, "step": 899 }, { "epoch": 0.1534788540245566, "grad_norm": 0.44584453105926514, "learning_rate": 1.9213228972594035e-06, "loss": 0.545, "step": 900 }, { "epoch": 0.1536493860845839, "grad_norm": 0.3664495348930359, "learning_rate": 1.9211080159044437e-06, "loss": 0.5444, "step": 901 }, { "epoch": 0.15381991814461118, "grad_norm": 0.5687245726585388, "learning_rate": 1.9208928535593182e-06, "loss": 0.5544, "step": 902 }, { "epoch": 0.15399045020463847, "grad_norm": 0.5331858992576599, "learning_rate": 1.920677410289663e-06, "loss": 0.5549, "step": 903 }, { "epoch": 0.15416098226466576, "grad_norm": 0.468654602766037, "learning_rate": 1.9204616861612016e-06, "loss": 0.5313, "step": 904 }, { "epoch": 0.15433151432469305, "grad_norm": 0.40355348587036133, "learning_rate": 1.920245681239741e-06, "loss": 0.5492, "step": 905 }, { "epoch": 0.15450204638472032, "grad_norm": 0.5259311199188232, "learning_rate": 1.920029395591176e-06, "loss": 0.5557, "step": 906 }, { "epoch": 0.1546725784447476, "grad_norm": 0.48963090777397156, "learning_rate": 1.9198128292814852e-06, "loss": 0.552, "step": 907 }, { "epoch": 0.1548431105047749, "grad_norm": 0.36119452118873596, "learning_rate": 1.919595982376734e-06, "loss": 0.5656, "step": 908 }, { "epoch": 0.15501364256480218, "grad_norm": 0.456949383020401, "learning_rate": 1.919378854943073e-06, "loss": 0.571, "step": 909 }, { "epoch": 0.15518417462482947, "grad_norm": 0.45524856448173523, "learning_rate": 1.9191614470467382e-06, "loss": 0.562, "step": 910 }, { "epoch": 0.15535470668485676, "grad_norm": 0.5876911878585815, "learning_rate": 1.9189437587540513e-06, "loss": 0.5733, "step": 911 }, { "epoch": 0.15552523874488403, "grad_norm": 0.48358115553855896, "learning_rate": 1.9187257901314194e-06, "loss": 0.5519, "step": 912 }, { "epoch": 0.15569577080491132, "grad_norm": 0.39478081464767456, "learning_rate": 1.918507541245336e-06, "loss": 0.5618, "step": 913 }, { "epoch": 0.1558663028649386, "grad_norm": 0.4886495769023895, "learning_rate": 1.918289012162379e-06, "loss": 0.5627, "step": 914 }, { "epoch": 0.1560368349249659, "grad_norm": 0.5879368185997009, "learning_rate": 1.918070202949212e-06, "loss": 0.5482, "step": 915 }, { "epoch": 0.15620736698499318, "grad_norm": 0.7165773510932922, "learning_rate": 1.9178511136725843e-06, "loss": 0.5584, "step": 916 }, { "epoch": 0.15637789904502047, "grad_norm": 0.7150624990463257, "learning_rate": 1.9176317443993307e-06, "loss": 0.5639, "step": 917 }, { "epoch": 0.15654843110504774, "grad_norm": 0.45642855763435364, "learning_rate": 1.9174120951963706e-06, "loss": 0.5507, "step": 918 }, { "epoch": 0.15671896316507503, "grad_norm": 0.42844220995903015, "learning_rate": 1.9171921661307105e-06, "loss": 0.5409, "step": 919 }, { "epoch": 0.15688949522510232, "grad_norm": 0.5636053085327148, "learning_rate": 1.9169719572694406e-06, "loss": 0.5448, "step": 920 }, { "epoch": 0.1570600272851296, "grad_norm": 0.4291675388813019, "learning_rate": 1.9167514686797373e-06, "loss": 0.5603, "step": 921 }, { "epoch": 0.1572305593451569, "grad_norm": 0.4401952028274536, "learning_rate": 1.916530700428862e-06, "loss": 0.5476, "step": 922 }, { "epoch": 0.15740109140518418, "grad_norm": 0.5289968252182007, "learning_rate": 1.9163096525841614e-06, "loss": 0.55, "step": 923 }, { "epoch": 0.15757162346521145, "grad_norm": 0.38858067989349365, "learning_rate": 1.916088325213068e-06, "loss": 0.5564, "step": 924 }, { "epoch": 0.15774215552523874, "grad_norm": 0.477230429649353, "learning_rate": 1.915866718383099e-06, "loss": 0.5518, "step": 925 }, { "epoch": 0.15791268758526603, "grad_norm": 0.5338437557220459, "learning_rate": 1.915644832161857e-06, "loss": 0.5662, "step": 926 }, { "epoch": 0.15808321964529332, "grad_norm": 0.4159543812274933, "learning_rate": 1.91542266661703e-06, "loss": 0.5654, "step": 927 }, { "epoch": 0.1582537517053206, "grad_norm": 0.3964542746543884, "learning_rate": 1.9152002218163904e-06, "loss": 0.5541, "step": 928 }, { "epoch": 0.1584242837653479, "grad_norm": 0.5510343909263611, "learning_rate": 1.9149774978277976e-06, "loss": 0.5575, "step": 929 }, { "epoch": 0.15859481582537516, "grad_norm": 0.4642314910888672, "learning_rate": 1.9147544947191947e-06, "loss": 0.5476, "step": 930 }, { "epoch": 0.15876534788540245, "grad_norm": 0.4070025086402893, "learning_rate": 1.9145312125586095e-06, "loss": 0.5626, "step": 931 }, { "epoch": 0.15893587994542974, "grad_norm": 0.41872039437294006, "learning_rate": 1.914307651414157e-06, "loss": 0.5482, "step": 932 }, { "epoch": 0.15910641200545703, "grad_norm": 0.3984706401824951, "learning_rate": 1.914083811354035e-06, "loss": 0.5533, "step": 933 }, { "epoch": 0.15927694406548432, "grad_norm": 0.40920400619506836, "learning_rate": 1.9138596924465277e-06, "loss": 0.5505, "step": 934 }, { "epoch": 0.1594474761255116, "grad_norm": 0.3676402270793915, "learning_rate": 1.9136352947600047e-06, "loss": 0.5634, "step": 935 }, { "epoch": 0.15961800818553887, "grad_norm": 0.4323231875896454, "learning_rate": 1.9134106183629184e-06, "loss": 0.5611, "step": 936 }, { "epoch": 0.15978854024556616, "grad_norm": 0.39513805508613586, "learning_rate": 1.9131856633238096e-06, "loss": 0.5493, "step": 937 }, { "epoch": 0.15995907230559345, "grad_norm": 0.47802916169166565, "learning_rate": 1.9129604297113015e-06, "loss": 0.5516, "step": 938 }, { "epoch": 0.16012960436562074, "grad_norm": 0.394153892993927, "learning_rate": 1.9127349175941032e-06, "loss": 0.5467, "step": 939 }, { "epoch": 0.16030013642564803, "grad_norm": 0.4861249029636383, "learning_rate": 1.9125091270410088e-06, "loss": 0.5611, "step": 940 }, { "epoch": 0.16047066848567532, "grad_norm": 0.5115260481834412, "learning_rate": 1.912283058120897e-06, "loss": 0.5379, "step": 941 }, { "epoch": 0.16064120054570258, "grad_norm": 0.3449493646621704, "learning_rate": 1.9120567109027323e-06, "loss": 0.5678, "step": 942 }, { "epoch": 0.16081173260572987, "grad_norm": 0.4588490426540375, "learning_rate": 1.9118300854555625e-06, "loss": 0.5415, "step": 943 }, { "epoch": 0.16098226466575716, "grad_norm": 0.41280704736709595, "learning_rate": 1.9116031818485217e-06, "loss": 0.5542, "step": 944 }, { "epoch": 0.16115279672578445, "grad_norm": 0.3902633786201477, "learning_rate": 1.9113760001508284e-06, "loss": 0.5663, "step": 945 }, { "epoch": 0.16132332878581174, "grad_norm": 0.3669435977935791, "learning_rate": 1.9111485404317853e-06, "loss": 0.5393, "step": 946 }, { "epoch": 0.16149386084583903, "grad_norm": 0.3833954632282257, "learning_rate": 1.9109208027607818e-06, "loss": 0.5643, "step": 947 }, { "epoch": 0.16166439290586632, "grad_norm": 0.4436415135860443, "learning_rate": 1.9106927872072893e-06, "loss": 0.54, "step": 948 }, { "epoch": 0.16183492496589358, "grad_norm": 0.36787348985671997, "learning_rate": 1.9104644938408666e-06, "loss": 0.5533, "step": 949 }, { "epoch": 0.16200545702592087, "grad_norm": 0.5090466141700745, "learning_rate": 1.9102359227311555e-06, "loss": 0.5408, "step": 950 }, { "epoch": 0.16217598908594816, "grad_norm": 0.4917638599872589, "learning_rate": 1.9100070739478833e-06, "loss": 0.5659, "step": 951 }, { "epoch": 0.16234652114597545, "grad_norm": 0.3450438678264618, "learning_rate": 1.909777947560862e-06, "loss": 0.5501, "step": 952 }, { "epoch": 0.16251705320600274, "grad_norm": 0.47705134749412537, "learning_rate": 1.909548543639988e-06, "loss": 0.563, "step": 953 }, { "epoch": 0.16268758526603003, "grad_norm": 0.41855838894844055, "learning_rate": 1.9093188622552425e-06, "loss": 0.5501, "step": 954 }, { "epoch": 0.1628581173260573, "grad_norm": 0.48679107427597046, "learning_rate": 1.9090889034766917e-06, "loss": 0.5496, "step": 955 }, { "epoch": 0.16302864938608458, "grad_norm": 0.4727509319782257, "learning_rate": 1.9088586673744853e-06, "loss": 0.5451, "step": 956 }, { "epoch": 0.16319918144611187, "grad_norm": 0.482374906539917, "learning_rate": 1.9086281540188593e-06, "loss": 0.5506, "step": 957 }, { "epoch": 0.16336971350613916, "grad_norm": 0.47090649604797363, "learning_rate": 1.9083973634801322e-06, "loss": 0.5512, "step": 958 }, { "epoch": 0.16354024556616645, "grad_norm": 0.47305747866630554, "learning_rate": 1.908166295828709e-06, "loss": 0.5626, "step": 959 }, { "epoch": 0.16371077762619374, "grad_norm": 0.39664438366889954, "learning_rate": 1.9079349511350787e-06, "loss": 0.5689, "step": 960 }, { "epoch": 0.163881309686221, "grad_norm": 0.4571145474910736, "learning_rate": 1.9077033294698137e-06, "loss": 0.5583, "step": 961 }, { "epoch": 0.1640518417462483, "grad_norm": 0.3828239142894745, "learning_rate": 1.9074714309035723e-06, "loss": 0.5401, "step": 962 }, { "epoch": 0.16422237380627558, "grad_norm": 0.4175052046775818, "learning_rate": 1.9072392555070967e-06, "loss": 0.5396, "step": 963 }, { "epoch": 0.16439290586630287, "grad_norm": 0.41923534870147705, "learning_rate": 1.9070068033512134e-06, "loss": 0.5477, "step": 964 }, { "epoch": 0.16456343792633016, "grad_norm": 0.4210037589073181, "learning_rate": 1.9067740745068335e-06, "loss": 0.5478, "step": 965 }, { "epoch": 0.16473396998635745, "grad_norm": 0.5002460479736328, "learning_rate": 1.9065410690449526e-06, "loss": 0.5392, "step": 966 }, { "epoch": 0.1649045020463847, "grad_norm": 0.34181100130081177, "learning_rate": 1.9063077870366503e-06, "loss": 0.5337, "step": 967 }, { "epoch": 0.165075034106412, "grad_norm": 0.39679914712905884, "learning_rate": 1.906074228553091e-06, "loss": 0.5657, "step": 968 }, { "epoch": 0.1652455661664393, "grad_norm": 0.40103983879089355, "learning_rate": 1.9058403936655237e-06, "loss": 0.5326, "step": 969 }, { "epoch": 0.16541609822646658, "grad_norm": 0.4794570207595825, "learning_rate": 1.9056062824452805e-06, "loss": 0.5581, "step": 970 }, { "epoch": 0.16558663028649387, "grad_norm": 0.4267297089099884, "learning_rate": 1.9053718949637792e-06, "loss": 0.5458, "step": 971 }, { "epoch": 0.16575716234652116, "grad_norm": 0.43018782138824463, "learning_rate": 1.905137231292521e-06, "loss": 0.5417, "step": 972 }, { "epoch": 0.16592769440654842, "grad_norm": 0.47038307785987854, "learning_rate": 1.9049022915030915e-06, "loss": 0.5524, "step": 973 }, { "epoch": 0.1660982264665757, "grad_norm": 0.38833221793174744, "learning_rate": 1.904667075667161e-06, "loss": 0.5526, "step": 974 }, { "epoch": 0.166268758526603, "grad_norm": 0.4255011975765228, "learning_rate": 1.9044315838564838e-06, "loss": 0.5696, "step": 975 }, { "epoch": 0.1664392905866303, "grad_norm": 0.3458400368690491, "learning_rate": 1.9041958161428976e-06, "loss": 0.5473, "step": 976 }, { "epoch": 0.16660982264665758, "grad_norm": 0.41008731722831726, "learning_rate": 1.9039597725983255e-06, "loss": 0.5409, "step": 977 }, { "epoch": 0.16678035470668487, "grad_norm": 0.47080957889556885, "learning_rate": 1.903723453294774e-06, "loss": 0.5488, "step": 978 }, { "epoch": 0.16695088676671213, "grad_norm": 0.4224494397640228, "learning_rate": 1.9034868583043335e-06, "loss": 0.5601, "step": 979 }, { "epoch": 0.16712141882673942, "grad_norm": 0.38210219144821167, "learning_rate": 1.9032499876991793e-06, "loss": 0.5615, "step": 980 }, { "epoch": 0.1672919508867667, "grad_norm": 0.5055222511291504, "learning_rate": 1.9030128415515704e-06, "loss": 0.56, "step": 981 }, { "epoch": 0.167462482946794, "grad_norm": 0.41526558995246887, "learning_rate": 1.90277541993385e-06, "loss": 0.5478, "step": 982 }, { "epoch": 0.1676330150068213, "grad_norm": 0.38019880652427673, "learning_rate": 1.9025377229184442e-06, "loss": 0.5555, "step": 983 }, { "epoch": 0.16780354706684858, "grad_norm": 0.48236194252967834, "learning_rate": 1.9022997505778653e-06, "loss": 0.5391, "step": 984 }, { "epoch": 0.16797407912687584, "grad_norm": 0.450827419757843, "learning_rate": 1.9020615029847076e-06, "loss": 0.5604, "step": 985 }, { "epoch": 0.16814461118690313, "grad_norm": 0.4144051671028137, "learning_rate": 1.9018229802116501e-06, "loss": 0.5422, "step": 986 }, { "epoch": 0.16831514324693042, "grad_norm": 0.38686126470565796, "learning_rate": 1.9015841823314562e-06, "loss": 0.5461, "step": 987 }, { "epoch": 0.1684856753069577, "grad_norm": 0.4112605154514313, "learning_rate": 1.9013451094169725e-06, "loss": 0.5329, "step": 988 }, { "epoch": 0.168656207366985, "grad_norm": 0.4212754964828491, "learning_rate": 1.9011057615411299e-06, "loss": 0.5681, "step": 989 }, { "epoch": 0.1688267394270123, "grad_norm": 0.45845523476600647, "learning_rate": 1.900866138776943e-06, "loss": 0.5573, "step": 990 }, { "epoch": 0.16899727148703955, "grad_norm": 0.5667742490768433, "learning_rate": 1.9006262411975102e-06, "loss": 0.5599, "step": 991 }, { "epoch": 0.16916780354706684, "grad_norm": 0.5878269672393799, "learning_rate": 1.9003860688760143e-06, "loss": 0.5534, "step": 992 }, { "epoch": 0.16933833560709413, "grad_norm": 0.6614935994148254, "learning_rate": 1.900145621885721e-06, "loss": 0.5464, "step": 993 }, { "epoch": 0.16950886766712142, "grad_norm": 0.44332489371299744, "learning_rate": 1.8999049002999805e-06, "loss": 0.5511, "step": 994 }, { "epoch": 0.1696793997271487, "grad_norm": 0.3465108871459961, "learning_rate": 1.8996639041922266e-06, "loss": 0.5506, "step": 995 }, { "epoch": 0.169849931787176, "grad_norm": 0.4552614688873291, "learning_rate": 1.8994226336359765e-06, "loss": 0.5495, "step": 996 }, { "epoch": 0.17002046384720326, "grad_norm": 0.502493679523468, "learning_rate": 1.8991810887048315e-06, "loss": 0.5524, "step": 997 }, { "epoch": 0.17019099590723055, "grad_norm": 0.3657921254634857, "learning_rate": 1.8989392694724766e-06, "loss": 0.5616, "step": 998 }, { "epoch": 0.17036152796725784, "grad_norm": 0.49448162317276, "learning_rate": 1.8986971760126806e-06, "loss": 0.5446, "step": 999 }, { "epoch": 0.17053206002728513, "grad_norm": 0.6634671688079834, "learning_rate": 1.8984548083992952e-06, "loss": 0.5656, "step": 1000 }, { "epoch": 0.17070259208731242, "grad_norm": 0.7345843315124512, "learning_rate": 1.8982121667062567e-06, "loss": 0.5497, "step": 1001 }, { "epoch": 0.1708731241473397, "grad_norm": 0.45212310552597046, "learning_rate": 1.8979692510075842e-06, "loss": 0.5444, "step": 1002 }, { "epoch": 0.17104365620736697, "grad_norm": 0.3672301471233368, "learning_rate": 1.8977260613773812e-06, "loss": 0.5407, "step": 1003 }, { "epoch": 0.17121418826739426, "grad_norm": 0.5045791864395142, "learning_rate": 1.8974825978898339e-06, "loss": 0.5597, "step": 1004 }, { "epoch": 0.17138472032742155, "grad_norm": 0.4292773902416229, "learning_rate": 1.8972388606192126e-06, "loss": 0.5406, "step": 1005 }, { "epoch": 0.17155525238744884, "grad_norm": 0.3314538598060608, "learning_rate": 1.896994849639871e-06, "loss": 0.5534, "step": 1006 }, { "epoch": 0.17172578444747613, "grad_norm": 0.43244096636772156, "learning_rate": 1.8967505650262465e-06, "loss": 0.5411, "step": 1007 }, { "epoch": 0.17189631650750342, "grad_norm": 0.4487178921699524, "learning_rate": 1.8965060068528594e-06, "loss": 0.5489, "step": 1008 }, { "epoch": 0.17206684856753068, "grad_norm": 0.4456799328327179, "learning_rate": 1.8962611751943143e-06, "loss": 0.5577, "step": 1009 }, { "epoch": 0.17223738062755797, "grad_norm": 0.3741670846939087, "learning_rate": 1.8960160701252982e-06, "loss": 0.5506, "step": 1010 }, { "epoch": 0.17240791268758526, "grad_norm": 0.5865883231163025, "learning_rate": 1.8957706917205822e-06, "loss": 0.5448, "step": 1011 }, { "epoch": 0.17257844474761255, "grad_norm": 0.673882782459259, "learning_rate": 1.8955250400550208e-06, "loss": 0.5612, "step": 1012 }, { "epoch": 0.17274897680763984, "grad_norm": 0.5418551564216614, "learning_rate": 1.8952791152035516e-06, "loss": 0.5577, "step": 1013 }, { "epoch": 0.17291950886766713, "grad_norm": 0.3859636187553406, "learning_rate": 1.8950329172411955e-06, "loss": 0.5432, "step": 1014 }, { "epoch": 0.1730900409276944, "grad_norm": 0.3586163818836212, "learning_rate": 1.894786446243057e-06, "loss": 0.555, "step": 1015 }, { "epoch": 0.17326057298772168, "grad_norm": 0.4536883533000946, "learning_rate": 1.8945397022843236e-06, "loss": 0.5517, "step": 1016 }, { "epoch": 0.17343110504774897, "grad_norm": 0.49440905451774597, "learning_rate": 1.8942926854402662e-06, "loss": 0.5409, "step": 1017 }, { "epoch": 0.17360163710777626, "grad_norm": 0.3685409724712372, "learning_rate": 1.894045395786239e-06, "loss": 0.5721, "step": 1018 }, { "epoch": 0.17377216916780355, "grad_norm": 0.42508232593536377, "learning_rate": 1.8937978333976797e-06, "loss": 0.557, "step": 1019 }, { "epoch": 0.17394270122783084, "grad_norm": 0.4841102659702301, "learning_rate": 1.8935499983501082e-06, "loss": 0.5488, "step": 1020 }, { "epoch": 0.1741132332878581, "grad_norm": 0.43548738956451416, "learning_rate": 1.8933018907191287e-06, "loss": 0.5481, "step": 1021 }, { "epoch": 0.1742837653478854, "grad_norm": 0.39762040972709656, "learning_rate": 1.8930535105804277e-06, "loss": 0.5654, "step": 1022 }, { "epoch": 0.17445429740791268, "grad_norm": 0.4504348337650299, "learning_rate": 1.892804858009776e-06, "loss": 0.5444, "step": 1023 }, { "epoch": 0.17462482946793997, "grad_norm": 0.5455803275108337, "learning_rate": 1.8925559330830256e-06, "loss": 0.5553, "step": 1024 }, { "epoch": 0.17479536152796726, "grad_norm": 0.551396369934082, "learning_rate": 1.8923067358761138e-06, "loss": 0.56, "step": 1025 }, { "epoch": 0.17496589358799455, "grad_norm": 0.4443368911743164, "learning_rate": 1.892057266465059e-06, "loss": 0.5432, "step": 1026 }, { "epoch": 0.17513642564802182, "grad_norm": 0.3978983759880066, "learning_rate": 1.8918075249259642e-06, "loss": 0.5501, "step": 1027 }, { "epoch": 0.1753069577080491, "grad_norm": 0.4175165891647339, "learning_rate": 1.8915575113350144e-06, "loss": 0.5499, "step": 1028 }, { "epoch": 0.1754774897680764, "grad_norm": 0.381310373544693, "learning_rate": 1.891307225768478e-06, "loss": 0.5414, "step": 1029 }, { "epoch": 0.17564802182810368, "grad_norm": 0.44892942905426025, "learning_rate": 1.891056668302706e-06, "loss": 0.5506, "step": 1030 }, { "epoch": 0.17581855388813097, "grad_norm": 0.4433884024620056, "learning_rate": 1.890805839014133e-06, "loss": 0.5556, "step": 1031 }, { "epoch": 0.17598908594815826, "grad_norm": 0.36218923330307007, "learning_rate": 1.8905547379792762e-06, "loss": 0.5377, "step": 1032 }, { "epoch": 0.17615961800818553, "grad_norm": 0.49523016810417175, "learning_rate": 1.8903033652747355e-06, "loss": 0.5572, "step": 1033 }, { "epoch": 0.17633015006821282, "grad_norm": 0.5547558069229126, "learning_rate": 1.8900517209771934e-06, "loss": 0.5579, "step": 1034 }, { "epoch": 0.1765006821282401, "grad_norm": 0.45448794960975647, "learning_rate": 1.8897998051634167e-06, "loss": 0.5451, "step": 1035 }, { "epoch": 0.1766712141882674, "grad_norm": 0.45425528287887573, "learning_rate": 1.8895476179102529e-06, "loss": 0.5616, "step": 1036 }, { "epoch": 0.17684174624829468, "grad_norm": 0.5401942729949951, "learning_rate": 1.8892951592946345e-06, "loss": 0.5648, "step": 1037 }, { "epoch": 0.17701227830832197, "grad_norm": 0.48139169812202454, "learning_rate": 1.8890424293935744e-06, "loss": 0.5333, "step": 1038 }, { "epoch": 0.17718281036834924, "grad_norm": 0.5250762701034546, "learning_rate": 1.8887894282841707e-06, "loss": 0.5572, "step": 1039 }, { "epoch": 0.17735334242837653, "grad_norm": 0.49231913685798645, "learning_rate": 1.8885361560436024e-06, "loss": 0.5411, "step": 1040 }, { "epoch": 0.17752387448840382, "grad_norm": 0.44401469826698303, "learning_rate": 1.8882826127491323e-06, "loss": 0.5631, "step": 1041 }, { "epoch": 0.1776944065484311, "grad_norm": 0.5123889446258545, "learning_rate": 1.888028798478105e-06, "loss": 0.5647, "step": 1042 }, { "epoch": 0.1778649386084584, "grad_norm": 0.48957693576812744, "learning_rate": 1.8877747133079488e-06, "loss": 0.5417, "step": 1043 }, { "epoch": 0.17803547066848568, "grad_norm": 0.5831753611564636, "learning_rate": 1.8875203573161737e-06, "loss": 0.558, "step": 1044 }, { "epoch": 0.17820600272851295, "grad_norm": 0.6821199655532837, "learning_rate": 1.8872657305803731e-06, "loss": 0.5435, "step": 1045 }, { "epoch": 0.17837653478854024, "grad_norm": 0.3453556001186371, "learning_rate": 1.887010833178222e-06, "loss": 0.5501, "step": 1046 }, { "epoch": 0.17854706684856753, "grad_norm": 0.5605396032333374, "learning_rate": 1.8867556651874792e-06, "loss": 0.5386, "step": 1047 }, { "epoch": 0.17871759890859482, "grad_norm": 0.7059539556503296, "learning_rate": 1.8865002266859847e-06, "loss": 0.5408, "step": 1048 }, { "epoch": 0.1788881309686221, "grad_norm": 0.536009669303894, "learning_rate": 1.8862445177516624e-06, "loss": 0.5471, "step": 1049 }, { "epoch": 0.1790586630286494, "grad_norm": 0.4778744578361511, "learning_rate": 1.8859885384625174e-06, "loss": 0.5323, "step": 1050 }, { "epoch": 0.17922919508867668, "grad_norm": 0.6259710788726807, "learning_rate": 1.8857322888966384e-06, "loss": 0.5486, "step": 1051 }, { "epoch": 0.17939972714870395, "grad_norm": 0.5543518662452698, "learning_rate": 1.8854757691321959e-06, "loss": 0.5742, "step": 1052 }, { "epoch": 0.17957025920873124, "grad_norm": 0.45125091075897217, "learning_rate": 1.8852189792474428e-06, "loss": 0.5356, "step": 1053 }, { "epoch": 0.17974079126875853, "grad_norm": 0.7473435401916504, "learning_rate": 1.8849619193207146e-06, "loss": 0.5473, "step": 1054 }, { "epoch": 0.17991132332878582, "grad_norm": 0.5112062692642212, "learning_rate": 1.8847045894304297e-06, "loss": 0.5491, "step": 1055 }, { "epoch": 0.1800818553888131, "grad_norm": 0.4993838369846344, "learning_rate": 1.8844469896550877e-06, "loss": 0.5469, "step": 1056 }, { "epoch": 0.1802523874488404, "grad_norm": 0.5624857544898987, "learning_rate": 1.884189120073271e-06, "loss": 0.5346, "step": 1057 }, { "epoch": 0.18042291950886766, "grad_norm": 0.506741464138031, "learning_rate": 1.8839309807636451e-06, "loss": 0.5491, "step": 1058 }, { "epoch": 0.18059345156889495, "grad_norm": 0.4264898896217346, "learning_rate": 1.8836725718049564e-06, "loss": 0.5607, "step": 1059 }, { "epoch": 0.18076398362892224, "grad_norm": 0.4436824321746826, "learning_rate": 1.883413893276035e-06, "loss": 0.537, "step": 1060 }, { "epoch": 0.18093451568894953, "grad_norm": 0.5607333183288574, "learning_rate": 1.8831549452557918e-06, "loss": 0.5567, "step": 1061 }, { "epoch": 0.18110504774897682, "grad_norm": 0.3857938349246979, "learning_rate": 1.8828957278232217e-06, "loss": 0.5405, "step": 1062 }, { "epoch": 0.1812755798090041, "grad_norm": 0.3912268280982971, "learning_rate": 1.8826362410573992e-06, "loss": 0.5401, "step": 1063 }, { "epoch": 0.18144611186903137, "grad_norm": 0.43382149934768677, "learning_rate": 1.8823764850374836e-06, "loss": 0.5541, "step": 1064 }, { "epoch": 0.18161664392905866, "grad_norm": 0.47907930612564087, "learning_rate": 1.8821164598427148e-06, "loss": 0.5404, "step": 1065 }, { "epoch": 0.18178717598908595, "grad_norm": 0.40004122257232666, "learning_rate": 1.8818561655524157e-06, "loss": 0.5373, "step": 1066 }, { "epoch": 0.18195770804911324, "grad_norm": 0.3792261481285095, "learning_rate": 1.88159560224599e-06, "loss": 0.5402, "step": 1067 }, { "epoch": 0.18212824010914053, "grad_norm": 0.42916253209114075, "learning_rate": 1.8813347700029246e-06, "loss": 0.5357, "step": 1068 }, { "epoch": 0.18229877216916782, "grad_norm": 0.4503953456878662, "learning_rate": 1.8810736689027887e-06, "loss": 0.5514, "step": 1069 }, { "epoch": 0.18246930422919508, "grad_norm": 0.4629843533039093, "learning_rate": 1.8808122990252322e-06, "loss": 0.5415, "step": 1070 }, { "epoch": 0.18263983628922237, "grad_norm": 0.5524861216545105, "learning_rate": 1.8805506604499884e-06, "loss": 0.5379, "step": 1071 }, { "epoch": 0.18281036834924966, "grad_norm": 0.5018559694290161, "learning_rate": 1.880288753256871e-06, "loss": 0.559, "step": 1072 }, { "epoch": 0.18298090040927695, "grad_norm": 0.4084283709526062, "learning_rate": 1.8800265775257772e-06, "loss": 0.5417, "step": 1073 }, { "epoch": 0.18315143246930424, "grad_norm": 0.6081492304801941, "learning_rate": 1.8797641333366854e-06, "loss": 0.5493, "step": 1074 }, { "epoch": 0.18332196452933153, "grad_norm": 0.7344405651092529, "learning_rate": 1.8795014207696557e-06, "loss": 0.5664, "step": 1075 }, { "epoch": 0.1834924965893588, "grad_norm": 0.5947568416595459, "learning_rate": 1.8792384399048306e-06, "loss": 0.5554, "step": 1076 }, { "epoch": 0.18366302864938608, "grad_norm": 0.39973506331443787, "learning_rate": 1.878975190822434e-06, "loss": 0.547, "step": 1077 }, { "epoch": 0.18383356070941337, "grad_norm": 0.559581995010376, "learning_rate": 1.8787116736027722e-06, "loss": 0.5423, "step": 1078 }, { "epoch": 0.18400409276944066, "grad_norm": 0.521888256072998, "learning_rate": 1.878447888326232e-06, "loss": 0.5455, "step": 1079 }, { "epoch": 0.18417462482946795, "grad_norm": 0.3867820203304291, "learning_rate": 1.8781838350732836e-06, "loss": 0.5397, "step": 1080 }, { "epoch": 0.18434515688949524, "grad_norm": 0.5141144394874573, "learning_rate": 1.8779195139244782e-06, "loss": 0.5558, "step": 1081 }, { "epoch": 0.1845156889495225, "grad_norm": 0.685032308101654, "learning_rate": 1.8776549249604485e-06, "loss": 0.5759, "step": 1082 }, { "epoch": 0.1846862210095498, "grad_norm": 0.43259522318840027, "learning_rate": 1.8773900682619091e-06, "loss": 0.5405, "step": 1083 }, { "epoch": 0.18485675306957708, "grad_norm": 0.3838390111923218, "learning_rate": 1.8771249439096566e-06, "loss": 0.5571, "step": 1084 }, { "epoch": 0.18502728512960437, "grad_norm": 0.5635690093040466, "learning_rate": 1.8768595519845685e-06, "loss": 0.5538, "step": 1085 }, { "epoch": 0.18519781718963166, "grad_norm": 0.5552285313606262, "learning_rate": 1.8765938925676049e-06, "loss": 0.5296, "step": 1086 }, { "epoch": 0.18536834924965895, "grad_norm": 0.39209043979644775, "learning_rate": 1.8763279657398065e-06, "loss": 0.5453, "step": 1087 }, { "epoch": 0.1855388813096862, "grad_norm": 0.458196222782135, "learning_rate": 1.8760617715822967e-06, "loss": 0.5717, "step": 1088 }, { "epoch": 0.1857094133697135, "grad_norm": 0.40345677733421326, "learning_rate": 1.8757953101762788e-06, "loss": 0.5426, "step": 1089 }, { "epoch": 0.1858799454297408, "grad_norm": 0.3410954177379608, "learning_rate": 1.8755285816030397e-06, "loss": 0.537, "step": 1090 }, { "epoch": 0.18605047748976808, "grad_norm": 0.4490777850151062, "learning_rate": 1.8752615859439462e-06, "loss": 0.5424, "step": 1091 }, { "epoch": 0.18622100954979537, "grad_norm": 0.4328539967536926, "learning_rate": 1.874994323280447e-06, "loss": 0.5557, "step": 1092 }, { "epoch": 0.18639154160982266, "grad_norm": 0.3498595058917999, "learning_rate": 1.8747267936940725e-06, "loss": 0.5373, "step": 1093 }, { "epoch": 0.18656207366984992, "grad_norm": 0.3731926381587982, "learning_rate": 1.8744589972664349e-06, "loss": 0.5309, "step": 1094 }, { "epoch": 0.1867326057298772, "grad_norm": 0.40898609161376953, "learning_rate": 1.8741909340792263e-06, "loss": 0.5525, "step": 1095 }, { "epoch": 0.1869031377899045, "grad_norm": 0.43006113171577454, "learning_rate": 1.8739226042142218e-06, "loss": 0.5428, "step": 1096 }, { "epoch": 0.1870736698499318, "grad_norm": 0.42080700397491455, "learning_rate": 1.8736540077532775e-06, "loss": 0.5514, "step": 1097 }, { "epoch": 0.18724420190995908, "grad_norm": 0.461102694272995, "learning_rate": 1.87338514477833e-06, "loss": 0.5435, "step": 1098 }, { "epoch": 0.18741473396998637, "grad_norm": 0.430658221244812, "learning_rate": 1.8731160153713978e-06, "loss": 0.5485, "step": 1099 }, { "epoch": 0.18758526603001363, "grad_norm": 0.44055265188217163, "learning_rate": 1.8728466196145808e-06, "loss": 0.5456, "step": 1100 }, { "epoch": 0.18775579809004092, "grad_norm": 0.4936978816986084, "learning_rate": 1.8725769575900601e-06, "loss": 0.5392, "step": 1101 }, { "epoch": 0.1879263301500682, "grad_norm": 0.5400622487068176, "learning_rate": 1.8723070293800976e-06, "loss": 0.5406, "step": 1102 }, { "epoch": 0.1880968622100955, "grad_norm": 0.35937973856925964, "learning_rate": 1.8720368350670364e-06, "loss": 0.5331, "step": 1103 }, { "epoch": 0.1882673942701228, "grad_norm": 0.48796388506889343, "learning_rate": 1.871766374733302e-06, "loss": 0.5527, "step": 1104 }, { "epoch": 0.18843792633015008, "grad_norm": 0.6496586203575134, "learning_rate": 1.8714956484613997e-06, "loss": 0.5523, "step": 1105 }, { "epoch": 0.18860845839017734, "grad_norm": 0.6348623037338257, "learning_rate": 1.8712246563339161e-06, "loss": 0.5529, "step": 1106 }, { "epoch": 0.18877899045020463, "grad_norm": 0.5044506788253784, "learning_rate": 1.8709533984335194e-06, "loss": 0.5482, "step": 1107 }, { "epoch": 0.18894952251023192, "grad_norm": 0.5128569602966309, "learning_rate": 1.870681874842959e-06, "loss": 0.5724, "step": 1108 }, { "epoch": 0.1891200545702592, "grad_norm": 0.6813385486602783, "learning_rate": 1.8704100856450645e-06, "loss": 0.5376, "step": 1109 }, { "epoch": 0.1892905866302865, "grad_norm": 0.5284555554389954, "learning_rate": 1.8701380309227467e-06, "loss": 0.5366, "step": 1110 }, { "epoch": 0.1894611186903138, "grad_norm": 0.4766194522380829, "learning_rate": 1.8698657107589985e-06, "loss": 0.5472, "step": 1111 }, { "epoch": 0.18963165075034105, "grad_norm": 0.5370320081710815, "learning_rate": 1.8695931252368923e-06, "loss": 0.5468, "step": 1112 }, { "epoch": 0.18980218281036834, "grad_norm": 0.5143731832504272, "learning_rate": 1.869320274439583e-06, "loss": 0.5733, "step": 1113 }, { "epoch": 0.18997271487039563, "grad_norm": 0.38538476824760437, "learning_rate": 1.8690471584503047e-06, "loss": 0.5756, "step": 1114 }, { "epoch": 0.19014324693042292, "grad_norm": 0.556102454662323, "learning_rate": 1.8687737773523734e-06, "loss": 0.5558, "step": 1115 }, { "epoch": 0.1903137789904502, "grad_norm": 0.4762594699859619, "learning_rate": 1.8685001312291863e-06, "loss": 0.5392, "step": 1116 }, { "epoch": 0.1904843110504775, "grad_norm": 0.35018035769462585, "learning_rate": 1.8682262201642206e-06, "loss": 0.5344, "step": 1117 }, { "epoch": 0.19065484311050476, "grad_norm": 0.4512125551700592, "learning_rate": 1.867952044241035e-06, "loss": 0.5407, "step": 1118 }, { "epoch": 0.19082537517053205, "grad_norm": 0.39985963702201843, "learning_rate": 1.8676776035432682e-06, "loss": 0.5528, "step": 1119 }, { "epoch": 0.19099590723055934, "grad_norm": 0.538284182548523, "learning_rate": 1.8674028981546407e-06, "loss": 0.5543, "step": 1120 }, { "epoch": 0.19116643929058663, "grad_norm": 0.45395785570144653, "learning_rate": 1.8671279281589531e-06, "loss": 0.5509, "step": 1121 }, { "epoch": 0.19133697135061392, "grad_norm": 0.46653100848197937, "learning_rate": 1.8668526936400864e-06, "loss": 0.5499, "step": 1122 }, { "epoch": 0.1915075034106412, "grad_norm": 0.5032127499580383, "learning_rate": 1.8665771946820033e-06, "loss": 0.5548, "step": 1123 }, { "epoch": 0.19167803547066847, "grad_norm": 0.43430373072624207, "learning_rate": 1.8663014313687463e-06, "loss": 0.541, "step": 1124 }, { "epoch": 0.19184856753069576, "grad_norm": 0.6326541304588318, "learning_rate": 1.866025403784439e-06, "loss": 0.5392, "step": 1125 }, { "epoch": 0.19201909959072305, "grad_norm": 0.5785826444625854, "learning_rate": 1.8657491120132853e-06, "loss": 0.5416, "step": 1126 }, { "epoch": 0.19218963165075034, "grad_norm": 0.35987457633018494, "learning_rate": 1.8654725561395698e-06, "loss": 0.5389, "step": 1127 }, { "epoch": 0.19236016371077763, "grad_norm": 0.5511410236358643, "learning_rate": 1.8651957362476582e-06, "loss": 0.5232, "step": 1128 }, { "epoch": 0.19253069577080492, "grad_norm": 0.5044106245040894, "learning_rate": 1.8649186524219957e-06, "loss": 0.543, "step": 1129 }, { "epoch": 0.19270122783083218, "grad_norm": 0.442216157913208, "learning_rate": 1.8646413047471087e-06, "loss": 0.5511, "step": 1130 }, { "epoch": 0.19287175989085947, "grad_norm": 0.40444454550743103, "learning_rate": 1.864363693307604e-06, "loss": 0.5422, "step": 1131 }, { "epoch": 0.19304229195088676, "grad_norm": 0.4315738081932068, "learning_rate": 1.8640858181881693e-06, "loss": 0.5516, "step": 1132 }, { "epoch": 0.19321282401091405, "grad_norm": 0.380001425743103, "learning_rate": 1.8638076794735715e-06, "loss": 0.5345, "step": 1133 }, { "epoch": 0.19338335607094134, "grad_norm": 0.4576973617076874, "learning_rate": 1.8635292772486593e-06, "loss": 0.5418, "step": 1134 }, { "epoch": 0.19355388813096863, "grad_norm": 0.4385460317134857, "learning_rate": 1.8632506115983605e-06, "loss": 0.5425, "step": 1135 }, { "epoch": 0.1937244201909959, "grad_norm": 0.37570932507514954, "learning_rate": 1.8629716826076845e-06, "loss": 0.5611, "step": 1136 }, { "epoch": 0.19389495225102318, "grad_norm": 0.46132612228393555, "learning_rate": 1.8626924903617202e-06, "loss": 0.5384, "step": 1137 }, { "epoch": 0.19406548431105047, "grad_norm": 0.4091589152812958, "learning_rate": 1.8624130349456371e-06, "loss": 0.5387, "step": 1138 }, { "epoch": 0.19423601637107776, "grad_norm": 0.4387233257293701, "learning_rate": 1.862133316444685e-06, "loss": 0.5389, "step": 1139 }, { "epoch": 0.19440654843110505, "grad_norm": 0.3781164884567261, "learning_rate": 1.861853334944194e-06, "loss": 0.531, "step": 1140 }, { "epoch": 0.19457708049113234, "grad_norm": 0.4493245780467987, "learning_rate": 1.8615730905295742e-06, "loss": 0.5426, "step": 1141 }, { "epoch": 0.1947476125511596, "grad_norm": 0.5568568110466003, "learning_rate": 1.861292583286316e-06, "loss": 0.5501, "step": 1142 }, { "epoch": 0.1949181446111869, "grad_norm": 0.47942033410072327, "learning_rate": 1.8610118132999899e-06, "loss": 0.532, "step": 1143 }, { "epoch": 0.19508867667121418, "grad_norm": 0.40977925062179565, "learning_rate": 1.860730780656247e-06, "loss": 0.5472, "step": 1144 }, { "epoch": 0.19525920873124147, "grad_norm": 0.5302245616912842, "learning_rate": 1.8604494854408178e-06, "loss": 0.5472, "step": 1145 }, { "epoch": 0.19542974079126876, "grad_norm": 0.5703160762786865, "learning_rate": 1.8601679277395136e-06, "loss": 0.5508, "step": 1146 }, { "epoch": 0.19560027285129605, "grad_norm": 0.4289019703865051, "learning_rate": 1.8598861076382254e-06, "loss": 0.5321, "step": 1147 }, { "epoch": 0.19577080491132332, "grad_norm": 0.4585319757461548, "learning_rate": 1.8596040252229243e-06, "loss": 0.5484, "step": 1148 }, { "epoch": 0.1959413369713506, "grad_norm": 0.5592920184135437, "learning_rate": 1.8593216805796613e-06, "loss": 0.5462, "step": 1149 }, { "epoch": 0.1961118690313779, "grad_norm": 0.40145450830459595, "learning_rate": 1.8590390737945678e-06, "loss": 0.5377, "step": 1150 }, { "epoch": 0.19628240109140518, "grad_norm": 0.48376187682151794, "learning_rate": 1.8587562049538542e-06, "loss": 0.5303, "step": 1151 }, { "epoch": 0.19645293315143247, "grad_norm": 0.6458383202552795, "learning_rate": 1.8584730741438129e-06, "loss": 0.5525, "step": 1152 }, { "epoch": 0.19662346521145976, "grad_norm": 0.45522838830947876, "learning_rate": 1.8581896814508134e-06, "loss": 0.5406, "step": 1153 }, { "epoch": 0.19679399727148705, "grad_norm": 0.5150303840637207, "learning_rate": 1.857906026961307e-06, "loss": 0.5334, "step": 1154 }, { "epoch": 0.19696452933151432, "grad_norm": 0.43621817231178284, "learning_rate": 1.8576221107618246e-06, "loss": 0.5431, "step": 1155 }, { "epoch": 0.1971350613915416, "grad_norm": 0.48908352851867676, "learning_rate": 1.8573379329389768e-06, "loss": 0.5586, "step": 1156 }, { "epoch": 0.1973055934515689, "grad_norm": 0.5652856230735779, "learning_rate": 1.857053493579454e-06, "loss": 0.54, "step": 1157 }, { "epoch": 0.19747612551159618, "grad_norm": 0.5483607649803162, "learning_rate": 1.8567687927700256e-06, "loss": 0.5388, "step": 1158 }, { "epoch": 0.19764665757162347, "grad_norm": 0.47800490260124207, "learning_rate": 1.8564838305975425e-06, "loss": 0.5464, "step": 1159 }, { "epoch": 0.19781718963165076, "grad_norm": 0.5164735317230225, "learning_rate": 1.8561986071489336e-06, "loss": 0.5366, "step": 1160 }, { "epoch": 0.19798772169167803, "grad_norm": 0.712042510509491, "learning_rate": 1.8559131225112088e-06, "loss": 0.5509, "step": 1161 }, { "epoch": 0.19815825375170532, "grad_norm": 0.5521159768104553, "learning_rate": 1.8556273767714565e-06, "loss": 0.5549, "step": 1162 }, { "epoch": 0.1983287858117326, "grad_norm": 0.4541512727737427, "learning_rate": 1.8553413700168456e-06, "loss": 0.5376, "step": 1163 }, { "epoch": 0.1984993178717599, "grad_norm": 0.4656938314437866, "learning_rate": 1.8550551023346246e-06, "loss": 0.5364, "step": 1164 }, { "epoch": 0.19866984993178718, "grad_norm": 0.5346999764442444, "learning_rate": 1.8547685738121208e-06, "loss": 0.5527, "step": 1165 }, { "epoch": 0.19884038199181447, "grad_norm": 0.5368838310241699, "learning_rate": 1.8544817845367423e-06, "loss": 0.5471, "step": 1166 }, { "epoch": 0.19901091405184174, "grad_norm": 0.6363966464996338, "learning_rate": 1.8541947345959757e-06, "loss": 0.5509, "step": 1167 }, { "epoch": 0.19918144611186903, "grad_norm": 0.4643917679786682, "learning_rate": 1.8539074240773877e-06, "loss": 0.5342, "step": 1168 }, { "epoch": 0.19935197817189632, "grad_norm": 0.49029770493507385, "learning_rate": 1.8536198530686241e-06, "loss": 0.5407, "step": 1169 }, { "epoch": 0.1995225102319236, "grad_norm": 0.592406690120697, "learning_rate": 1.8533320216574103e-06, "loss": 0.5459, "step": 1170 }, { "epoch": 0.1996930422919509, "grad_norm": 0.397834837436676, "learning_rate": 1.8530439299315517e-06, "loss": 0.5466, "step": 1171 }, { "epoch": 0.19986357435197818, "grad_norm": 0.5638540983200073, "learning_rate": 1.8527555779789323e-06, "loss": 0.5371, "step": 1172 }, { "epoch": 0.20003410641200545, "grad_norm": 0.42020824551582336, "learning_rate": 1.8524669658875154e-06, "loss": 0.5408, "step": 1173 }, { "epoch": 0.20020463847203274, "grad_norm": 0.5150777101516724, "learning_rate": 1.8521780937453446e-06, "loss": 0.5405, "step": 1174 }, { "epoch": 0.20037517053206003, "grad_norm": 0.5404800176620483, "learning_rate": 1.851888961640542e-06, "loss": 0.5397, "step": 1175 }, { "epoch": 0.20054570259208732, "grad_norm": 0.4149342477321625, "learning_rate": 1.8515995696613098e-06, "loss": 0.5353, "step": 1176 }, { "epoch": 0.2007162346521146, "grad_norm": 0.521208643913269, "learning_rate": 1.8513099178959282e-06, "loss": 0.5495, "step": 1177 }, { "epoch": 0.2008867667121419, "grad_norm": 0.4055381417274475, "learning_rate": 1.8510200064327577e-06, "loss": 0.5464, "step": 1178 }, { "epoch": 0.20105729877216916, "grad_norm": 0.439575731754303, "learning_rate": 1.8507298353602376e-06, "loss": 0.5343, "step": 1179 }, { "epoch": 0.20122783083219645, "grad_norm": 0.456194669008255, "learning_rate": 1.8504394047668872e-06, "loss": 0.5445, "step": 1180 }, { "epoch": 0.20139836289222374, "grad_norm": 0.4100000262260437, "learning_rate": 1.8501487147413035e-06, "loss": 0.5419, "step": 1181 }, { "epoch": 0.20156889495225103, "grad_norm": 0.41677936911582947, "learning_rate": 1.849857765372164e-06, "loss": 0.5643, "step": 1182 }, { "epoch": 0.20173942701227832, "grad_norm": 0.404525488615036, "learning_rate": 1.849566556748224e-06, "loss": 0.5447, "step": 1183 }, { "epoch": 0.2019099590723056, "grad_norm": 0.40742960572242737, "learning_rate": 1.8492750889583194e-06, "loss": 0.5253, "step": 1184 }, { "epoch": 0.20208049113233287, "grad_norm": 0.4676227867603302, "learning_rate": 1.8489833620913644e-06, "loss": 0.5362, "step": 1185 }, { "epoch": 0.20225102319236016, "grad_norm": 0.3785782754421234, "learning_rate": 1.8486913762363517e-06, "loss": 0.5448, "step": 1186 }, { "epoch": 0.20242155525238745, "grad_norm": 0.3994957506656647, "learning_rate": 1.8483991314823536e-06, "loss": 0.5359, "step": 1187 }, { "epoch": 0.20259208731241474, "grad_norm": 0.4070039391517639, "learning_rate": 1.848106627918522e-06, "loss": 0.5461, "step": 1188 }, { "epoch": 0.20276261937244203, "grad_norm": 0.5237683057785034, "learning_rate": 1.8478138656340862e-06, "loss": 0.5494, "step": 1189 }, { "epoch": 0.20293315143246932, "grad_norm": 0.3871895670890808, "learning_rate": 1.8475208447183558e-06, "loss": 0.5684, "step": 1190 }, { "epoch": 0.20310368349249658, "grad_norm": 0.4152975082397461, "learning_rate": 1.8472275652607187e-06, "loss": 0.5337, "step": 1191 }, { "epoch": 0.20327421555252387, "grad_norm": 0.44173455238342285, "learning_rate": 1.846934027350642e-06, "loss": 0.5474, "step": 1192 }, { "epoch": 0.20344474761255116, "grad_norm": 0.4573276937007904, "learning_rate": 1.846640231077671e-06, "loss": 0.5465, "step": 1193 }, { "epoch": 0.20361527967257845, "grad_norm": 0.3855762779712677, "learning_rate": 1.8463461765314302e-06, "loss": 0.5494, "step": 1194 }, { "epoch": 0.20378581173260574, "grad_norm": 0.4517650306224823, "learning_rate": 1.846051863801623e-06, "loss": 0.5424, "step": 1195 }, { "epoch": 0.20395634379263303, "grad_norm": 0.5178718566894531, "learning_rate": 1.8457572929780317e-06, "loss": 0.5361, "step": 1196 }, { "epoch": 0.2041268758526603, "grad_norm": 0.3623640835285187, "learning_rate": 1.845462464150517e-06, "loss": 0.5471, "step": 1197 }, { "epoch": 0.20429740791268758, "grad_norm": 0.4305116832256317, "learning_rate": 1.8451673774090185e-06, "loss": 0.5439, "step": 1198 }, { "epoch": 0.20446793997271487, "grad_norm": 0.4251005947589874, "learning_rate": 1.844872032843554e-06, "loss": 0.5418, "step": 1199 }, { "epoch": 0.20463847203274216, "grad_norm": 0.40618592500686646, "learning_rate": 1.8445764305442205e-06, "loss": 0.5442, "step": 1200 }, { "epoch": 0.20480900409276945, "grad_norm": 0.4994547963142395, "learning_rate": 1.8442805706011938e-06, "loss": 0.5606, "step": 1201 }, { "epoch": 0.20497953615279674, "grad_norm": 0.38456836342811584, "learning_rate": 1.8439844531047277e-06, "loss": 0.5436, "step": 1202 }, { "epoch": 0.205150068212824, "grad_norm": 0.3920612335205078, "learning_rate": 1.8436880781451547e-06, "loss": 0.5288, "step": 1203 }, { "epoch": 0.2053206002728513, "grad_norm": 0.38321173191070557, "learning_rate": 1.843391445812886e-06, "loss": 0.5412, "step": 1204 }, { "epoch": 0.20549113233287858, "grad_norm": 0.4655633568763733, "learning_rate": 1.8430945561984115e-06, "loss": 0.5461, "step": 1205 }, { "epoch": 0.20566166439290587, "grad_norm": 0.404006689786911, "learning_rate": 1.842797409392299e-06, "loss": 0.5531, "step": 1206 }, { "epoch": 0.20583219645293316, "grad_norm": 0.4156908094882965, "learning_rate": 1.8425000054851956e-06, "loss": 0.5405, "step": 1207 }, { "epoch": 0.20600272851296045, "grad_norm": 0.4246964156627655, "learning_rate": 1.842202344567826e-06, "loss": 0.5354, "step": 1208 }, { "epoch": 0.2061732605729877, "grad_norm": 0.3740110993385315, "learning_rate": 1.841904426730994e-06, "loss": 0.5334, "step": 1209 }, { "epoch": 0.206343792633015, "grad_norm": 0.40257853269577026, "learning_rate": 1.8416062520655808e-06, "loss": 0.5448, "step": 1210 }, { "epoch": 0.2065143246930423, "grad_norm": 0.42841023206710815, "learning_rate": 1.841307820662547e-06, "loss": 0.5467, "step": 1211 }, { "epoch": 0.20668485675306958, "grad_norm": 0.4049186408519745, "learning_rate": 1.8410091326129314e-06, "loss": 0.5371, "step": 1212 }, { "epoch": 0.20685538881309687, "grad_norm": 0.44290080666542053, "learning_rate": 1.8407101880078502e-06, "loss": 0.5429, "step": 1213 }, { "epoch": 0.20702592087312416, "grad_norm": 0.3257935643196106, "learning_rate": 1.8404109869384988e-06, "loss": 0.5436, "step": 1214 }, { "epoch": 0.20719645293315142, "grad_norm": 0.410832941532135, "learning_rate": 1.8401115294961501e-06, "loss": 0.5453, "step": 1215 }, { "epoch": 0.2073669849931787, "grad_norm": 0.3964151442050934, "learning_rate": 1.839811815772156e-06, "loss": 0.5411, "step": 1216 }, { "epoch": 0.207537517053206, "grad_norm": 0.3891664147377014, "learning_rate": 1.8395118458579461e-06, "loss": 0.5293, "step": 1217 }, { "epoch": 0.2077080491132333, "grad_norm": 0.4442242681980133, "learning_rate": 1.8392116198450285e-06, "loss": 0.5433, "step": 1218 }, { "epoch": 0.20787858117326058, "grad_norm": 0.4433388411998749, "learning_rate": 1.8389111378249886e-06, "loss": 0.5403, "step": 1219 }, { "epoch": 0.20804911323328787, "grad_norm": 0.3714314103126526, "learning_rate": 1.8386103998894909e-06, "loss": 0.5255, "step": 1220 }, { "epoch": 0.20821964529331513, "grad_norm": 0.4090864062309265, "learning_rate": 1.8383094061302767e-06, "loss": 0.5419, "step": 1221 }, { "epoch": 0.20839017735334242, "grad_norm": 0.4515744745731354, "learning_rate": 1.8380081566391674e-06, "loss": 0.5469, "step": 1222 }, { "epoch": 0.2085607094133697, "grad_norm": 0.46093738079071045, "learning_rate": 1.8377066515080606e-06, "loss": 0.5332, "step": 1223 }, { "epoch": 0.208731241473397, "grad_norm": 0.4779576361179352, "learning_rate": 1.837404890828932e-06, "loss": 0.5405, "step": 1224 }, { "epoch": 0.2089017735334243, "grad_norm": 0.4057524502277374, "learning_rate": 1.837102874693836e-06, "loss": 0.5383, "step": 1225 }, { "epoch": 0.20907230559345158, "grad_norm": 0.4065392315387726, "learning_rate": 1.8368006031949054e-06, "loss": 0.5408, "step": 1226 }, { "epoch": 0.20924283765347884, "grad_norm": 0.4654337167739868, "learning_rate": 1.836498076424349e-06, "loss": 0.5321, "step": 1227 }, { "epoch": 0.20941336971350613, "grad_norm": 0.4152114689350128, "learning_rate": 1.8361952944744555e-06, "loss": 0.5488, "step": 1228 }, { "epoch": 0.20958390177353342, "grad_norm": 0.35753628611564636, "learning_rate": 1.83589225743759e-06, "loss": 0.5408, "step": 1229 }, { "epoch": 0.2097544338335607, "grad_norm": 0.4003051519393921, "learning_rate": 1.8355889654061963e-06, "loss": 0.5397, "step": 1230 }, { "epoch": 0.209924965893588, "grad_norm": 0.422453910112381, "learning_rate": 1.8352854184727954e-06, "loss": 0.5361, "step": 1231 }, { "epoch": 0.2100954979536153, "grad_norm": 0.4182709753513336, "learning_rate": 1.8349816167299866e-06, "loss": 0.5346, "step": 1232 }, { "epoch": 0.21026603001364255, "grad_norm": 0.42172756791114807, "learning_rate": 1.8346775602704465e-06, "loss": 0.54, "step": 1233 }, { "epoch": 0.21043656207366984, "grad_norm": 0.547315776348114, "learning_rate": 1.8343732491869294e-06, "loss": 0.5373, "step": 1234 }, { "epoch": 0.21060709413369713, "grad_norm": 0.48611629009246826, "learning_rate": 1.834068683572268e-06, "loss": 0.5476, "step": 1235 }, { "epoch": 0.21077762619372442, "grad_norm": 0.45138785243034363, "learning_rate": 1.8337638635193712e-06, "loss": 0.5395, "step": 1236 }, { "epoch": 0.2109481582537517, "grad_norm": 0.37378039956092834, "learning_rate": 1.8334587891212272e-06, "loss": 0.5449, "step": 1237 }, { "epoch": 0.211118690313779, "grad_norm": 0.41004690527915955, "learning_rate": 1.8331534604709002e-06, "loss": 0.54, "step": 1238 }, { "epoch": 0.21128922237380626, "grad_norm": 0.5197843313217163, "learning_rate": 1.8328478776615336e-06, "loss": 0.5211, "step": 1239 }, { "epoch": 0.21145975443383355, "grad_norm": 0.46441686153411865, "learning_rate": 1.8325420407863471e-06, "loss": 0.5462, "step": 1240 }, { "epoch": 0.21163028649386084, "grad_norm": 0.44501668214797974, "learning_rate": 1.8322359499386384e-06, "loss": 0.5453, "step": 1241 }, { "epoch": 0.21180081855388813, "grad_norm": 0.42854249477386475, "learning_rate": 1.8319296052117823e-06, "loss": 0.5192, "step": 1242 }, { "epoch": 0.21197135061391542, "grad_norm": 0.5542727708816528, "learning_rate": 1.8316230066992314e-06, "loss": 0.5319, "step": 1243 }, { "epoch": 0.2121418826739427, "grad_norm": 0.5215505957603455, "learning_rate": 1.831316154494516e-06, "loss": 0.5465, "step": 1244 }, { "epoch": 0.21231241473396997, "grad_norm": 0.4085174798965454, "learning_rate": 1.8310090486912425e-06, "loss": 0.5434, "step": 1245 }, { "epoch": 0.21248294679399726, "grad_norm": 0.477775514125824, "learning_rate": 1.8307016893830967e-06, "loss": 0.5421, "step": 1246 }, { "epoch": 0.21265347885402455, "grad_norm": 0.3507140576839447, "learning_rate": 1.8303940766638398e-06, "loss": 0.5407, "step": 1247 }, { "epoch": 0.21282401091405184, "grad_norm": 0.47276178002357483, "learning_rate": 1.8300862106273116e-06, "loss": 0.539, "step": 1248 }, { "epoch": 0.21299454297407913, "grad_norm": 0.3437632918357849, "learning_rate": 1.8297780913674288e-06, "loss": 0.5411, "step": 1249 }, { "epoch": 0.21316507503410642, "grad_norm": 0.38725805282592773, "learning_rate": 1.8294697189781844e-06, "loss": 0.5454, "step": 1250 }, { "epoch": 0.21333560709413368, "grad_norm": 0.4623619019985199, "learning_rate": 1.8291610935536504e-06, "loss": 0.5397, "step": 1251 }, { "epoch": 0.21350613915416097, "grad_norm": 0.4034901559352875, "learning_rate": 1.8288522151879744e-06, "loss": 0.5383, "step": 1252 }, { "epoch": 0.21367667121418826, "grad_norm": 0.4366649389266968, "learning_rate": 1.828543083975382e-06, "loss": 0.5511, "step": 1253 }, { "epoch": 0.21384720327421555, "grad_norm": 0.4827879071235657, "learning_rate": 1.8282337000101764e-06, "loss": 0.5471, "step": 1254 }, { "epoch": 0.21401773533424284, "grad_norm": 0.5037616491317749, "learning_rate": 1.827924063386736e-06, "loss": 0.5346, "step": 1255 }, { "epoch": 0.21418826739427013, "grad_norm": 0.401431679725647, "learning_rate": 1.8276141741995187e-06, "loss": 0.5498, "step": 1256 }, { "epoch": 0.21435879945429742, "grad_norm": 0.4014277756214142, "learning_rate": 1.8273040325430577e-06, "loss": 0.552, "step": 1257 }, { "epoch": 0.21452933151432468, "grad_norm": 0.39532434940338135, "learning_rate": 1.8269936385119635e-06, "loss": 0.5521, "step": 1258 }, { "epoch": 0.21469986357435197, "grad_norm": 0.3930286467075348, "learning_rate": 1.8266829922009248e-06, "loss": 0.5446, "step": 1259 }, { "epoch": 0.21487039563437926, "grad_norm": 0.414559543132782, "learning_rate": 1.8263720937047052e-06, "loss": 0.5623, "step": 1260 }, { "epoch": 0.21504092769440655, "grad_norm": 0.5679807662963867, "learning_rate": 1.8260609431181472e-06, "loss": 0.5411, "step": 1261 }, { "epoch": 0.21521145975443384, "grad_norm": 0.381727010011673, "learning_rate": 1.8257495405361694e-06, "loss": 0.5386, "step": 1262 }, { "epoch": 0.21538199181446113, "grad_norm": 0.5208612680435181, "learning_rate": 1.8254378860537666e-06, "loss": 0.5475, "step": 1263 }, { "epoch": 0.2155525238744884, "grad_norm": 0.44777414202690125, "learning_rate": 1.8251259797660115e-06, "loss": 0.53, "step": 1264 }, { "epoch": 0.21572305593451568, "grad_norm": 0.38557693362236023, "learning_rate": 1.8248138217680532e-06, "loss": 0.5426, "step": 1265 }, { "epoch": 0.21589358799454297, "grad_norm": 0.6629211902618408, "learning_rate": 1.8245014121551177e-06, "loss": 0.5521, "step": 1266 }, { "epoch": 0.21606412005457026, "grad_norm": 0.6797375082969666, "learning_rate": 1.8241887510225071e-06, "loss": 0.5662, "step": 1267 }, { "epoch": 0.21623465211459755, "grad_norm": 0.5131363868713379, "learning_rate": 1.8238758384656011e-06, "loss": 0.5426, "step": 1268 }, { "epoch": 0.21640518417462484, "grad_norm": 0.4549805223941803, "learning_rate": 1.8235626745798562e-06, "loss": 0.5389, "step": 1269 }, { "epoch": 0.2165757162346521, "grad_norm": 0.6096103191375732, "learning_rate": 1.8232492594608044e-06, "loss": 0.5359, "step": 1270 }, { "epoch": 0.2167462482946794, "grad_norm": 0.5440108776092529, "learning_rate": 1.8229355932040557e-06, "loss": 0.542, "step": 1271 }, { "epoch": 0.21691678035470668, "grad_norm": 0.38566699624061584, "learning_rate": 1.8226216759052958e-06, "loss": 0.5391, "step": 1272 }, { "epoch": 0.21708731241473397, "grad_norm": 0.503471314907074, "learning_rate": 1.8223075076602873e-06, "loss": 0.5427, "step": 1273 }, { "epoch": 0.21725784447476126, "grad_norm": 0.4962663948535919, "learning_rate": 1.8219930885648691e-06, "loss": 0.54, "step": 1274 }, { "epoch": 0.21742837653478855, "grad_norm": 0.417920857667923, "learning_rate": 1.8216784187149571e-06, "loss": 0.5368, "step": 1275 }, { "epoch": 0.21759890859481582, "grad_norm": 0.34477683901786804, "learning_rate": 1.8213634982065437e-06, "loss": 0.5392, "step": 1276 }, { "epoch": 0.2177694406548431, "grad_norm": 0.3951910436153412, "learning_rate": 1.8210483271356973e-06, "loss": 0.5498, "step": 1277 }, { "epoch": 0.2179399727148704, "grad_norm": 0.4015583097934723, "learning_rate": 1.8207329055985624e-06, "loss": 0.5371, "step": 1278 }, { "epoch": 0.21811050477489768, "grad_norm": 0.3596416115760803, "learning_rate": 1.8204172336913611e-06, "loss": 0.535, "step": 1279 }, { "epoch": 0.21828103683492497, "grad_norm": 0.3580999970436096, "learning_rate": 1.8201013115103913e-06, "loss": 0.547, "step": 1280 }, { "epoch": 0.21845156889495226, "grad_norm": 0.3621145486831665, "learning_rate": 1.8197851391520267e-06, "loss": 0.5491, "step": 1281 }, { "epoch": 0.21862210095497953, "grad_norm": 0.4546815752983093, "learning_rate": 1.819468716712718e-06, "loss": 0.5493, "step": 1282 }, { "epoch": 0.21879263301500682, "grad_norm": 0.37099504470825195, "learning_rate": 1.8191520442889922e-06, "loss": 0.5512, "step": 1283 }, { "epoch": 0.2189631650750341, "grad_norm": 0.444868803024292, "learning_rate": 1.818835121977452e-06, "loss": 0.5492, "step": 1284 }, { "epoch": 0.2191336971350614, "grad_norm": 0.46764564514160156, "learning_rate": 1.818517949874777e-06, "loss": 0.5284, "step": 1285 }, { "epoch": 0.21930422919508868, "grad_norm": 0.48628371953964233, "learning_rate": 1.818200528077722e-06, "loss": 0.5249, "step": 1286 }, { "epoch": 0.21947476125511597, "grad_norm": 0.5340228080749512, "learning_rate": 1.8178828566831194e-06, "loss": 0.5378, "step": 1287 }, { "epoch": 0.21964529331514324, "grad_norm": 0.4188104569911957, "learning_rate": 1.8175649357878767e-06, "loss": 0.5404, "step": 1288 }, { "epoch": 0.21981582537517053, "grad_norm": 0.5152527093887329, "learning_rate": 1.8172467654889782e-06, "loss": 0.538, "step": 1289 }, { "epoch": 0.21998635743519782, "grad_norm": 0.7427700161933899, "learning_rate": 1.8169283458834832e-06, "loss": 0.547, "step": 1290 }, { "epoch": 0.2201568894952251, "grad_norm": 0.5649572610855103, "learning_rate": 1.8166096770685279e-06, "loss": 0.5332, "step": 1291 }, { "epoch": 0.2203274215552524, "grad_norm": 0.46063774824142456, "learning_rate": 1.8162907591413248e-06, "loss": 0.5572, "step": 1292 }, { "epoch": 0.22049795361527968, "grad_norm": 0.5200372934341431, "learning_rate": 1.8159715921991613e-06, "loss": 0.5475, "step": 1293 }, { "epoch": 0.22066848567530695, "grad_norm": 0.5936065316200256, "learning_rate": 1.8156521763394019e-06, "loss": 0.5578, "step": 1294 }, { "epoch": 0.22083901773533424, "grad_norm": 0.430730402469635, "learning_rate": 1.8153325116594864e-06, "loss": 0.5383, "step": 1295 }, { "epoch": 0.22100954979536153, "grad_norm": 0.5382259488105774, "learning_rate": 1.8150125982569307e-06, "loss": 0.5297, "step": 1296 }, { "epoch": 0.22118008185538882, "grad_norm": 0.6202133297920227, "learning_rate": 1.8146924362293262e-06, "loss": 0.5409, "step": 1297 }, { "epoch": 0.2213506139154161, "grad_norm": 0.445716917514801, "learning_rate": 1.8143720256743411e-06, "loss": 0.5378, "step": 1298 }, { "epoch": 0.2215211459754434, "grad_norm": 0.46867361664772034, "learning_rate": 1.814051366689718e-06, "loss": 0.5441, "step": 1299 }, { "epoch": 0.22169167803547066, "grad_norm": 0.5404912829399109, "learning_rate": 1.813730459373277e-06, "loss": 0.5589, "step": 1300 }, { "epoch": 0.22186221009549795, "grad_norm": 0.4060252606868744, "learning_rate": 1.813409303822912e-06, "loss": 0.5414, "step": 1301 }, { "epoch": 0.22203274215552524, "grad_norm": 0.6208741664886475, "learning_rate": 1.8130879001365946e-06, "loss": 0.5467, "step": 1302 }, { "epoch": 0.22220327421555253, "grad_norm": 0.5801400542259216, "learning_rate": 1.8127662484123704e-06, "loss": 0.5449, "step": 1303 }, { "epoch": 0.22237380627557982, "grad_norm": 0.47542262077331543, "learning_rate": 1.812444348748362e-06, "loss": 0.5361, "step": 1304 }, { "epoch": 0.2225443383356071, "grad_norm": 0.5469897389411926, "learning_rate": 1.8121222012427668e-06, "loss": 0.5364, "step": 1305 }, { "epoch": 0.22271487039563437, "grad_norm": 0.477078914642334, "learning_rate": 1.8117998059938576e-06, "loss": 0.5367, "step": 1306 }, { "epoch": 0.22288540245566166, "grad_norm": 0.5777487754821777, "learning_rate": 1.811477163099984e-06, "loss": 0.5258, "step": 1307 }, { "epoch": 0.22305593451568895, "grad_norm": 0.5522773861885071, "learning_rate": 1.81115427265957e-06, "loss": 0.5483, "step": 1308 }, { "epoch": 0.22322646657571624, "grad_norm": 0.4026612341403961, "learning_rate": 1.8108311347711152e-06, "loss": 0.5373, "step": 1309 }, { "epoch": 0.22339699863574353, "grad_norm": 0.6066965460777283, "learning_rate": 1.8105077495331957e-06, "loss": 0.5359, "step": 1310 }, { "epoch": 0.22356753069577082, "grad_norm": 0.4083956480026245, "learning_rate": 1.8101841170444617e-06, "loss": 0.5222, "step": 1311 }, { "epoch": 0.22373806275579808, "grad_norm": 0.46423399448394775, "learning_rate": 1.8098602374036397e-06, "loss": 0.553, "step": 1312 }, { "epoch": 0.22390859481582537, "grad_norm": 0.5001481175422668, "learning_rate": 1.8095361107095315e-06, "loss": 0.5369, "step": 1313 }, { "epoch": 0.22407912687585266, "grad_norm": 0.351531058549881, "learning_rate": 1.8092117370610138e-06, "loss": 0.5366, "step": 1314 }, { "epoch": 0.22424965893587995, "grad_norm": 0.5711095929145813, "learning_rate": 1.8088871165570392e-06, "loss": 0.5398, "step": 1315 }, { "epoch": 0.22442019099590724, "grad_norm": 0.5152797698974609, "learning_rate": 1.808562249296635e-06, "loss": 0.5287, "step": 1316 }, { "epoch": 0.22459072305593453, "grad_norm": 0.4698987603187561, "learning_rate": 1.8082371353789048e-06, "loss": 0.5343, "step": 1317 }, { "epoch": 0.2247612551159618, "grad_norm": 0.5477645993232727, "learning_rate": 1.8079117749030259e-06, "loss": 0.5405, "step": 1318 }, { "epoch": 0.22493178717598908, "grad_norm": 0.5242092609405518, "learning_rate": 1.8075861679682526e-06, "loss": 0.5324, "step": 1319 }, { "epoch": 0.22510231923601637, "grad_norm": 0.45128878951072693, "learning_rate": 1.8072603146739128e-06, "loss": 0.5498, "step": 1320 }, { "epoch": 0.22527285129604366, "grad_norm": 0.5173705220222473, "learning_rate": 1.8069342151194106e-06, "loss": 0.5331, "step": 1321 }, { "epoch": 0.22544338335607095, "grad_norm": 0.5540408492088318, "learning_rate": 1.8066078694042245e-06, "loss": 0.5348, "step": 1322 }, { "epoch": 0.22561391541609824, "grad_norm": 0.38541969656944275, "learning_rate": 1.8062812776279088e-06, "loss": 0.5237, "step": 1323 }, { "epoch": 0.2257844474761255, "grad_norm": 0.5497429966926575, "learning_rate": 1.8059544398900925e-06, "loss": 0.5488, "step": 1324 }, { "epoch": 0.2259549795361528, "grad_norm": 0.5218777656555176, "learning_rate": 1.8056273562904792e-06, "loss": 0.5442, "step": 1325 }, { "epoch": 0.22612551159618008, "grad_norm": 0.37961360812187195, "learning_rate": 1.8053000269288487e-06, "loss": 0.5404, "step": 1326 }, { "epoch": 0.22629604365620737, "grad_norm": 0.5503019690513611, "learning_rate": 1.8049724519050538e-06, "loss": 0.5262, "step": 1327 }, { "epoch": 0.22646657571623466, "grad_norm": 0.510767936706543, "learning_rate": 1.804644631319025e-06, "loss": 0.5468, "step": 1328 }, { "epoch": 0.22663710777626195, "grad_norm": 0.4793156683444977, "learning_rate": 1.8043165652707652e-06, "loss": 0.5402, "step": 1329 }, { "epoch": 0.2268076398362892, "grad_norm": 0.40747585892677307, "learning_rate": 1.803988253860353e-06, "loss": 0.5397, "step": 1330 }, { "epoch": 0.2269781718963165, "grad_norm": 0.440603643655777, "learning_rate": 1.8036596971879426e-06, "loss": 0.5273, "step": 1331 }, { "epoch": 0.2271487039563438, "grad_norm": 0.5874107480049133, "learning_rate": 1.803330895353762e-06, "loss": 0.5544, "step": 1332 }, { "epoch": 0.22731923601637108, "grad_norm": 0.5229067802429199, "learning_rate": 1.8030018484581149e-06, "loss": 0.5341, "step": 1333 }, { "epoch": 0.22748976807639837, "grad_norm": 0.350911945104599, "learning_rate": 1.8026725566013785e-06, "loss": 0.5396, "step": 1334 }, { "epoch": 0.22766030013642566, "grad_norm": 0.5130938291549683, "learning_rate": 1.802343019884006e-06, "loss": 0.5373, "step": 1335 }, { "epoch": 0.22783083219645292, "grad_norm": 0.6306084990501404, "learning_rate": 1.8020132384065244e-06, "loss": 0.5329, "step": 1336 }, { "epoch": 0.2280013642564802, "grad_norm": 0.5357939600944519, "learning_rate": 1.8016832122695362e-06, "loss": 0.5496, "step": 1337 }, { "epoch": 0.2281718963165075, "grad_norm": 0.3664228022098541, "learning_rate": 1.801352941573718e-06, "loss": 0.531, "step": 1338 }, { "epoch": 0.2283424283765348, "grad_norm": 0.5649370551109314, "learning_rate": 1.8010224264198207e-06, "loss": 0.5354, "step": 1339 }, { "epoch": 0.22851296043656208, "grad_norm": 0.6709194779396057, "learning_rate": 1.800691666908671e-06, "loss": 0.5418, "step": 1340 }, { "epoch": 0.22868349249658937, "grad_norm": 0.441829115152359, "learning_rate": 1.800360663141168e-06, "loss": 0.5525, "step": 1341 }, { "epoch": 0.22885402455661663, "grad_norm": 0.38027212023735046, "learning_rate": 1.8000294152182878e-06, "loss": 0.5368, "step": 1342 }, { "epoch": 0.22902455661664392, "grad_norm": 0.39649662375450134, "learning_rate": 1.799697923241079e-06, "loss": 0.5308, "step": 1343 }, { "epoch": 0.2291950886766712, "grad_norm": 0.39250436425209045, "learning_rate": 1.7993661873106662e-06, "loss": 0.5328, "step": 1344 }, { "epoch": 0.2293656207366985, "grad_norm": 0.37690094113349915, "learning_rate": 1.7990342075282472e-06, "loss": 0.5258, "step": 1345 }, { "epoch": 0.2295361527967258, "grad_norm": 0.4618053436279297, "learning_rate": 1.7987019839950944e-06, "loss": 0.5412, "step": 1346 }, { "epoch": 0.22970668485675308, "grad_norm": 0.47363170981407166, "learning_rate": 1.7983695168125553e-06, "loss": 0.5292, "step": 1347 }, { "epoch": 0.22987721691678034, "grad_norm": 0.44554126262664795, "learning_rate": 1.7980368060820511e-06, "loss": 0.5278, "step": 1348 }, { "epoch": 0.23004774897680763, "grad_norm": 0.49328240752220154, "learning_rate": 1.7977038519050774e-06, "loss": 0.5424, "step": 1349 }, { "epoch": 0.23021828103683492, "grad_norm": 0.4327653646469116, "learning_rate": 1.797370654383204e-06, "loss": 0.543, "step": 1350 }, { "epoch": 0.2303888130968622, "grad_norm": 0.5031704902648926, "learning_rate": 1.7970372136180756e-06, "loss": 0.5332, "step": 1351 }, { "epoch": 0.2305593451568895, "grad_norm": 0.5221416354179382, "learning_rate": 1.7967035297114095e-06, "loss": 0.5275, "step": 1352 }, { "epoch": 0.2307298772169168, "grad_norm": 0.35783857107162476, "learning_rate": 1.796369602764999e-06, "loss": 0.5364, "step": 1353 }, { "epoch": 0.23090040927694405, "grad_norm": 0.4733849763870239, "learning_rate": 1.7960354328807106e-06, "loss": 0.5288, "step": 1354 }, { "epoch": 0.23107094133697134, "grad_norm": 0.3745383620262146, "learning_rate": 1.7957010201604847e-06, "loss": 0.5447, "step": 1355 }, { "epoch": 0.23124147339699863, "grad_norm": 0.4308125078678131, "learning_rate": 1.7953663647063365e-06, "loss": 0.5425, "step": 1356 }, { "epoch": 0.23141200545702592, "grad_norm": 0.4075262248516083, "learning_rate": 1.795031466620355e-06, "loss": 0.5344, "step": 1357 }, { "epoch": 0.2315825375170532, "grad_norm": 0.43857333064079285, "learning_rate": 1.794696326004703e-06, "loss": 0.5375, "step": 1358 }, { "epoch": 0.2317530695770805, "grad_norm": 0.5499160885810852, "learning_rate": 1.7943609429616174e-06, "loss": 0.5464, "step": 1359 }, { "epoch": 0.23192360163710776, "grad_norm": 0.5283775329589844, "learning_rate": 1.7940253175934086e-06, "loss": 0.5405, "step": 1360 }, { "epoch": 0.23209413369713505, "grad_norm": 0.558288037776947, "learning_rate": 1.7936894500024616e-06, "loss": 0.5373, "step": 1361 }, { "epoch": 0.23226466575716234, "grad_norm": 0.39163830876350403, "learning_rate": 1.7933533402912355e-06, "loss": 0.5412, "step": 1362 }, { "epoch": 0.23243519781718963, "grad_norm": 0.5775661468505859, "learning_rate": 1.7930169885622622e-06, "loss": 0.527, "step": 1363 }, { "epoch": 0.23260572987721692, "grad_norm": 0.5608419179916382, "learning_rate": 1.7926803949181486e-06, "loss": 0.5332, "step": 1364 }, { "epoch": 0.2327762619372442, "grad_norm": 0.4189216196537018, "learning_rate": 1.7923435594615744e-06, "loss": 0.5418, "step": 1365 }, { "epoch": 0.2329467939972715, "grad_norm": 0.5751291513442993, "learning_rate": 1.792006482295294e-06, "loss": 0.539, "step": 1366 }, { "epoch": 0.23311732605729876, "grad_norm": 0.4839267134666443, "learning_rate": 1.7916691635221346e-06, "loss": 0.5465, "step": 1367 }, { "epoch": 0.23328785811732605, "grad_norm": 0.4686916768550873, "learning_rate": 1.7913316032449976e-06, "loss": 0.5534, "step": 1368 }, { "epoch": 0.23345839017735334, "grad_norm": 0.4905444383621216, "learning_rate": 1.7909938015668581e-06, "loss": 0.5419, "step": 1369 }, { "epoch": 0.23362892223738063, "grad_norm": 0.5154764652252197, "learning_rate": 1.790655758590765e-06, "loss": 0.5481, "step": 1370 }, { "epoch": 0.23379945429740792, "grad_norm": 0.4774230122566223, "learning_rate": 1.79031747441984e-06, "loss": 0.5269, "step": 1371 }, { "epoch": 0.2339699863574352, "grad_norm": 0.3737574815750122, "learning_rate": 1.7899789491572794e-06, "loss": 0.5326, "step": 1372 }, { "epoch": 0.23414051841746247, "grad_norm": 0.41364020109176636, "learning_rate": 1.7896401829063524e-06, "loss": 0.5555, "step": 1373 }, { "epoch": 0.23431105047748976, "grad_norm": 0.41541293263435364, "learning_rate": 1.7893011757704024e-06, "loss": 0.5374, "step": 1374 }, { "epoch": 0.23448158253751705, "grad_norm": 0.4567691385746002, "learning_rate": 1.788961927852845e-06, "loss": 0.5438, "step": 1375 }, { "epoch": 0.23465211459754434, "grad_norm": 0.46262142062187195, "learning_rate": 1.7886224392571707e-06, "loss": 0.5419, "step": 1376 }, { "epoch": 0.23482264665757163, "grad_norm": 0.48895400762557983, "learning_rate": 1.7882827100869423e-06, "loss": 0.5562, "step": 1377 }, { "epoch": 0.23499317871759892, "grad_norm": 0.6779786348342896, "learning_rate": 1.7879427404457967e-06, "loss": 0.5455, "step": 1378 }, { "epoch": 0.23516371077762618, "grad_norm": 0.5912104249000549, "learning_rate": 1.7876025304374441e-06, "loss": 0.5356, "step": 1379 }, { "epoch": 0.23533424283765347, "grad_norm": 0.3976776599884033, "learning_rate": 1.7872620801656675e-06, "loss": 0.5336, "step": 1380 }, { "epoch": 0.23550477489768076, "grad_norm": 0.6555095314979553, "learning_rate": 1.7869213897343238e-06, "loss": 0.5451, "step": 1381 }, { "epoch": 0.23567530695770805, "grad_norm": 0.7012636661529541, "learning_rate": 1.7865804592473424e-06, "loss": 0.5406, "step": 1382 }, { "epoch": 0.23584583901773534, "grad_norm": 0.5283592343330383, "learning_rate": 1.786239288808727e-06, "loss": 0.5485, "step": 1383 }, { "epoch": 0.23601637107776263, "grad_norm": 0.4392828345298767, "learning_rate": 1.7858978785225537e-06, "loss": 0.5427, "step": 1384 }, { "epoch": 0.2361869031377899, "grad_norm": 0.5758363008499146, "learning_rate": 1.785556228492972e-06, "loss": 0.5465, "step": 1385 }, { "epoch": 0.23635743519781718, "grad_norm": 0.5296410322189331, "learning_rate": 1.7852143388242048e-06, "loss": 0.531, "step": 1386 }, { "epoch": 0.23652796725784447, "grad_norm": 0.3970211148262024, "learning_rate": 1.7848722096205473e-06, "loss": 0.5535, "step": 1387 }, { "epoch": 0.23669849931787176, "grad_norm": 0.6295623183250427, "learning_rate": 1.7845298409863684e-06, "loss": 0.5366, "step": 1388 }, { "epoch": 0.23686903137789905, "grad_norm": 0.45526471734046936, "learning_rate": 1.7841872330261104e-06, "loss": 0.5509, "step": 1389 }, { "epoch": 0.23703956343792634, "grad_norm": 0.5751361846923828, "learning_rate": 1.7838443858442876e-06, "loss": 0.5459, "step": 1390 }, { "epoch": 0.2372100954979536, "grad_norm": 0.5891697406768799, "learning_rate": 1.7835012995454885e-06, "loss": 0.5561, "step": 1391 }, { "epoch": 0.2373806275579809, "grad_norm": 0.5361023545265198, "learning_rate": 1.7831579742343731e-06, "loss": 0.537, "step": 1392 }, { "epoch": 0.23755115961800818, "grad_norm": 0.5258536338806152, "learning_rate": 1.7828144100156757e-06, "loss": 0.5172, "step": 1393 }, { "epoch": 0.23772169167803547, "grad_norm": 0.6109665632247925, "learning_rate": 1.7824706069942026e-06, "loss": 0.5425, "step": 1394 }, { "epoch": 0.23789222373806276, "grad_norm": 0.4555569291114807, "learning_rate": 1.782126565274833e-06, "loss": 0.5563, "step": 1395 }, { "epoch": 0.23806275579809005, "grad_norm": 0.4836210012435913, "learning_rate": 1.7817822849625195e-06, "loss": 0.5375, "step": 1396 }, { "epoch": 0.23823328785811732, "grad_norm": 0.5767562985420227, "learning_rate": 1.781437766162287e-06, "loss": 0.5467, "step": 1397 }, { "epoch": 0.2384038199181446, "grad_norm": 0.40231022238731384, "learning_rate": 1.781093008979233e-06, "loss": 0.5388, "step": 1398 }, { "epoch": 0.2385743519781719, "grad_norm": 0.5109986066818237, "learning_rate": 1.7807480135185285e-06, "loss": 0.5251, "step": 1399 }, { "epoch": 0.23874488403819918, "grad_norm": 0.484244167804718, "learning_rate": 1.7804027798854166e-06, "loss": 0.544, "step": 1400 }, { "epoch": 0.23891541609822647, "grad_norm": 0.515340268611908, "learning_rate": 1.7800573081852125e-06, "loss": 0.5219, "step": 1401 }, { "epoch": 0.23908594815825376, "grad_norm": 0.5590576529502869, "learning_rate": 1.7797115985233052e-06, "loss": 0.5427, "step": 1402 }, { "epoch": 0.23925648021828103, "grad_norm": 0.45182856917381287, "learning_rate": 1.7793656510051558e-06, "loss": 0.5387, "step": 1403 }, { "epoch": 0.23942701227830832, "grad_norm": 0.49502822756767273, "learning_rate": 1.7790194657362974e-06, "loss": 0.5385, "step": 1404 }, { "epoch": 0.2395975443383356, "grad_norm": 0.5761579871177673, "learning_rate": 1.7786730428223365e-06, "loss": 0.5401, "step": 1405 }, { "epoch": 0.2397680763983629, "grad_norm": 0.4353269934654236, "learning_rate": 1.7783263823689523e-06, "loss": 0.528, "step": 1406 }, { "epoch": 0.23993860845839018, "grad_norm": 0.3897313177585602, "learning_rate": 1.7779794844818947e-06, "loss": 0.5276, "step": 1407 }, { "epoch": 0.24010914051841747, "grad_norm": 0.4709970951080322, "learning_rate": 1.7776323492669878e-06, "loss": 0.5405, "step": 1408 }, { "epoch": 0.24027967257844474, "grad_norm": 0.5096486210823059, "learning_rate": 1.7772849768301277e-06, "loss": 0.5343, "step": 1409 }, { "epoch": 0.24045020463847203, "grad_norm": 0.38168245553970337, "learning_rate": 1.7769373672772824e-06, "loss": 0.5346, "step": 1410 }, { "epoch": 0.24062073669849932, "grad_norm": 0.34209996461868286, "learning_rate": 1.776589520714493e-06, "loss": 0.54, "step": 1411 }, { "epoch": 0.2407912687585266, "grad_norm": 0.44546452164649963, "learning_rate": 1.7762414372478718e-06, "loss": 0.536, "step": 1412 }, { "epoch": 0.2409618008185539, "grad_norm": 0.37246155738830566, "learning_rate": 1.7758931169836041e-06, "loss": 0.528, "step": 1413 }, { "epoch": 0.24113233287858118, "grad_norm": 0.3510386347770691, "learning_rate": 1.775544560027948e-06, "loss": 0.5315, "step": 1414 }, { "epoch": 0.24130286493860845, "grad_norm": 0.3805161714553833, "learning_rate": 1.7751957664872318e-06, "loss": 0.5311, "step": 1415 }, { "epoch": 0.24147339699863574, "grad_norm": 0.35687196254730225, "learning_rate": 1.7748467364678587e-06, "loss": 0.5518, "step": 1416 }, { "epoch": 0.24164392905866303, "grad_norm": 0.37188974022865295, "learning_rate": 1.774497470076302e-06, "loss": 0.5411, "step": 1417 }, { "epoch": 0.24181446111869032, "grad_norm": 0.3701164126396179, "learning_rate": 1.7741479674191078e-06, "loss": 0.5432, "step": 1418 }, { "epoch": 0.2419849931787176, "grad_norm": 0.4429377317428589, "learning_rate": 1.773798228602894e-06, "loss": 0.5277, "step": 1419 }, { "epoch": 0.2421555252387449, "grad_norm": 0.4325040876865387, "learning_rate": 1.7734482537343514e-06, "loss": 0.5358, "step": 1420 }, { "epoch": 0.24232605729877216, "grad_norm": 0.4019585847854614, "learning_rate": 1.7730980429202415e-06, "loss": 0.5328, "step": 1421 }, { "epoch": 0.24249658935879945, "grad_norm": 0.4692414104938507, "learning_rate": 1.7727475962673986e-06, "loss": 0.5395, "step": 1422 }, { "epoch": 0.24266712141882674, "grad_norm": 0.4960983395576477, "learning_rate": 1.7723969138827293e-06, "loss": 0.5354, "step": 1423 }, { "epoch": 0.24283765347885403, "grad_norm": 0.4993603527545929, "learning_rate": 1.7720459958732112e-06, "loss": 0.5208, "step": 1424 }, { "epoch": 0.24300818553888132, "grad_norm": 0.5154334306716919, "learning_rate": 1.771694842345894e-06, "loss": 0.5344, "step": 1425 }, { "epoch": 0.2431787175989086, "grad_norm": 0.4017626941204071, "learning_rate": 1.7713434534078998e-06, "loss": 0.5361, "step": 1426 }, { "epoch": 0.24334924965893587, "grad_norm": 0.49590057134628296, "learning_rate": 1.770991829166422e-06, "loss": 0.5545, "step": 1427 }, { "epoch": 0.24351978171896316, "grad_norm": 0.6026977896690369, "learning_rate": 1.770639969728726e-06, "loss": 0.5229, "step": 1428 }, { "epoch": 0.24369031377899045, "grad_norm": 0.4571629762649536, "learning_rate": 1.7702878752021488e-06, "loss": 0.5439, "step": 1429 }, { "epoch": 0.24386084583901774, "grad_norm": 0.3930206596851349, "learning_rate": 1.769935545694099e-06, "loss": 0.5346, "step": 1430 }, { "epoch": 0.24403137789904503, "grad_norm": 0.49052783846855164, "learning_rate": 1.7695829813120575e-06, "loss": 0.5547, "step": 1431 }, { "epoch": 0.24420190995907232, "grad_norm": 0.45432084798812866, "learning_rate": 1.7692301821635764e-06, "loss": 0.5354, "step": 1432 }, { "epoch": 0.24437244201909958, "grad_norm": 0.4165271520614624, "learning_rate": 1.768877148356279e-06, "loss": 0.536, "step": 1433 }, { "epoch": 0.24454297407912687, "grad_norm": 0.34783709049224854, "learning_rate": 1.7685238799978607e-06, "loss": 0.5441, "step": 1434 }, { "epoch": 0.24471350613915416, "grad_norm": 0.4623855948448181, "learning_rate": 1.7681703771960886e-06, "loss": 0.5444, "step": 1435 }, { "epoch": 0.24488403819918145, "grad_norm": 0.44057196378707886, "learning_rate": 1.7678166400588013e-06, "loss": 0.5268, "step": 1436 }, { "epoch": 0.24505457025920874, "grad_norm": 0.5099006295204163, "learning_rate": 1.7674626686939081e-06, "loss": 0.5346, "step": 1437 }, { "epoch": 0.24522510231923603, "grad_norm": 0.48367926478385925, "learning_rate": 1.767108463209391e-06, "loss": 0.5404, "step": 1438 }, { "epoch": 0.2453956343792633, "grad_norm": 0.4167617857456207, "learning_rate": 1.7667540237133025e-06, "loss": 0.5279, "step": 1439 }, { "epoch": 0.24556616643929058, "grad_norm": 0.37517234683036804, "learning_rate": 1.7663993503137661e-06, "loss": 0.5471, "step": 1440 }, { "epoch": 0.24573669849931787, "grad_norm": 0.4140487313270569, "learning_rate": 1.7660444431189781e-06, "loss": 0.548, "step": 1441 }, { "epoch": 0.24590723055934516, "grad_norm": 0.4195196032524109, "learning_rate": 1.7656893022372055e-06, "loss": 0.5442, "step": 1442 }, { "epoch": 0.24607776261937245, "grad_norm": 0.3868313133716583, "learning_rate": 1.7653339277767855e-06, "loss": 0.5393, "step": 1443 }, { "epoch": 0.24624829467939974, "grad_norm": 0.347720205783844, "learning_rate": 1.764978319846128e-06, "loss": 0.5369, "step": 1444 }, { "epoch": 0.246418826739427, "grad_norm": 0.41931644082069397, "learning_rate": 1.7646224785537137e-06, "loss": 0.5386, "step": 1445 }, { "epoch": 0.2465893587994543, "grad_norm": 0.36928820610046387, "learning_rate": 1.7642664040080941e-06, "loss": 0.5301, "step": 1446 }, { "epoch": 0.24675989085948158, "grad_norm": 0.4465715289115906, "learning_rate": 1.763910096317892e-06, "loss": 0.5563, "step": 1447 }, { "epoch": 0.24693042291950887, "grad_norm": 0.39555251598358154, "learning_rate": 1.763553555591802e-06, "loss": 0.5324, "step": 1448 }, { "epoch": 0.24710095497953616, "grad_norm": 0.3805316090583801, "learning_rate": 1.7631967819385885e-06, "loss": 0.539, "step": 1449 }, { "epoch": 0.24727148703956345, "grad_norm": 0.4752626121044159, "learning_rate": 1.7628397754670881e-06, "loss": 0.5375, "step": 1450 }, { "epoch": 0.2474420190995907, "grad_norm": 0.4983365833759308, "learning_rate": 1.7624825362862084e-06, "loss": 0.5298, "step": 1451 }, { "epoch": 0.247612551159618, "grad_norm": 0.4518239498138428, "learning_rate": 1.7621250645049268e-06, "loss": 0.5432, "step": 1452 }, { "epoch": 0.2477830832196453, "grad_norm": 0.4357149004936218, "learning_rate": 1.761767360232293e-06, "loss": 0.5442, "step": 1453 }, { "epoch": 0.24795361527967258, "grad_norm": 0.4708903431892395, "learning_rate": 1.761409423577427e-06, "loss": 0.5316, "step": 1454 }, { "epoch": 0.24812414733969987, "grad_norm": 0.5494210720062256, "learning_rate": 1.7610512546495196e-06, "loss": 0.5368, "step": 1455 }, { "epoch": 0.24829467939972716, "grad_norm": 0.4802795946598053, "learning_rate": 1.7606928535578328e-06, "loss": 0.5277, "step": 1456 }, { "epoch": 0.24846521145975442, "grad_norm": 0.4052109718322754, "learning_rate": 1.7603342204116992e-06, "loss": 0.5441, "step": 1457 }, { "epoch": 0.2486357435197817, "grad_norm": 0.5663003921508789, "learning_rate": 1.759975355320522e-06, "loss": 0.5519, "step": 1458 }, { "epoch": 0.248806275579809, "grad_norm": 0.4638127088546753, "learning_rate": 1.7596162583937758e-06, "loss": 0.5299, "step": 1459 }, { "epoch": 0.2489768076398363, "grad_norm": 0.4754791855812073, "learning_rate": 1.7592569297410056e-06, "loss": 0.5322, "step": 1460 }, { "epoch": 0.24914733969986358, "grad_norm": 0.43075990676879883, "learning_rate": 1.7588973694718265e-06, "loss": 0.5425, "step": 1461 }, { "epoch": 0.24931787175989087, "grad_norm": 0.567526638507843, "learning_rate": 1.7585375776959248e-06, "loss": 0.5426, "step": 1462 }, { "epoch": 0.24948840381991813, "grad_norm": 0.43639516830444336, "learning_rate": 1.758177554523058e-06, "loss": 0.5346, "step": 1463 }, { "epoch": 0.24965893587994542, "grad_norm": 0.36125215888023376, "learning_rate": 1.757817300063053e-06, "loss": 0.5348, "step": 1464 }, { "epoch": 0.2498294679399727, "grad_norm": 0.4189085066318512, "learning_rate": 1.7574568144258079e-06, "loss": 0.5315, "step": 1465 }, { "epoch": 0.25, "grad_norm": 0.39513495564460754, "learning_rate": 1.7570960977212915e-06, "loss": 0.5408, "step": 1466 }, { "epoch": 0.25017053206002726, "grad_norm": 0.3659518361091614, "learning_rate": 1.756735150059543e-06, "loss": 0.5476, "step": 1467 }, { "epoch": 0.2503410641200546, "grad_norm": 0.401780366897583, "learning_rate": 1.7563739715506713e-06, "loss": 0.5473, "step": 1468 }, { "epoch": 0.25051159618008184, "grad_norm": 0.4212344288825989, "learning_rate": 1.756012562304857e-06, "loss": 0.5269, "step": 1469 }, { "epoch": 0.25068212824010916, "grad_norm": 0.378131628036499, "learning_rate": 1.7556509224323496e-06, "loss": 0.5233, "step": 1470 }, { "epoch": 0.2508526603001364, "grad_norm": 0.3757961392402649, "learning_rate": 1.7552890520434706e-06, "loss": 0.5456, "step": 1471 }, { "epoch": 0.25102319236016374, "grad_norm": 0.47206035256385803, "learning_rate": 1.75492695124861e-06, "loss": 0.55, "step": 1472 }, { "epoch": 0.251193724420191, "grad_norm": 0.34729981422424316, "learning_rate": 1.7545646201582305e-06, "loss": 0.5406, "step": 1473 }, { "epoch": 0.25136425648021826, "grad_norm": 0.47744041681289673, "learning_rate": 1.7542020588828621e-06, "loss": 0.5344, "step": 1474 }, { "epoch": 0.2515347885402456, "grad_norm": 0.4247567653656006, "learning_rate": 1.7538392675331076e-06, "loss": 0.5597, "step": 1475 }, { "epoch": 0.25170532060027284, "grad_norm": 0.5592153072357178, "learning_rate": 1.7534762462196389e-06, "loss": 0.5418, "step": 1476 }, { "epoch": 0.25187585266030016, "grad_norm": 0.5871926546096802, "learning_rate": 1.7531129950531973e-06, "loss": 0.525, "step": 1477 }, { "epoch": 0.2520463847203274, "grad_norm": 0.3953045606613159, "learning_rate": 1.7527495141445957e-06, "loss": 0.5435, "step": 1478 }, { "epoch": 0.2522169167803547, "grad_norm": 0.48583656549453735, "learning_rate": 1.7523858036047159e-06, "loss": 0.557, "step": 1479 }, { "epoch": 0.252387448840382, "grad_norm": 0.3983100354671478, "learning_rate": 1.7520218635445108e-06, "loss": 0.5399, "step": 1480 }, { "epoch": 0.25255798090040926, "grad_norm": 0.38920262455940247, "learning_rate": 1.751657694075002e-06, "loss": 0.5276, "step": 1481 }, { "epoch": 0.2527285129604366, "grad_norm": 0.4583071172237396, "learning_rate": 1.7512932953072826e-06, "loss": 0.5263, "step": 1482 }, { "epoch": 0.25289904502046384, "grad_norm": 0.40535056591033936, "learning_rate": 1.7509286673525145e-06, "loss": 0.5359, "step": 1483 }, { "epoch": 0.25306957708049116, "grad_norm": 0.4464225172996521, "learning_rate": 1.75056381032193e-06, "loss": 0.5446, "step": 1484 }, { "epoch": 0.2532401091405184, "grad_norm": 0.5687764286994934, "learning_rate": 1.7501987243268309e-06, "loss": 0.5421, "step": 1485 }, { "epoch": 0.2534106412005457, "grad_norm": 0.4144216775894165, "learning_rate": 1.7498334094785896e-06, "loss": 0.5363, "step": 1486 }, { "epoch": 0.253581173260573, "grad_norm": 0.5526368021965027, "learning_rate": 1.7494678658886474e-06, "loss": 0.5316, "step": 1487 }, { "epoch": 0.25375170532060026, "grad_norm": 0.4659339189529419, "learning_rate": 1.749102093668516e-06, "loss": 0.5334, "step": 1488 }, { "epoch": 0.2539222373806276, "grad_norm": 0.39899182319641113, "learning_rate": 1.7487360929297768e-06, "loss": 0.5356, "step": 1489 }, { "epoch": 0.25409276944065484, "grad_norm": 0.509774386882782, "learning_rate": 1.7483698637840805e-06, "loss": 0.539, "step": 1490 }, { "epoch": 0.2542633015006821, "grad_norm": 0.5092775225639343, "learning_rate": 1.748003406343148e-06, "loss": 0.5271, "step": 1491 }, { "epoch": 0.2544338335607094, "grad_norm": 0.42660048604011536, "learning_rate": 1.7476367207187697e-06, "loss": 0.5413, "step": 1492 }, { "epoch": 0.2546043656207367, "grad_norm": 0.35999831557273865, "learning_rate": 1.7472698070228051e-06, "loss": 0.5335, "step": 1493 }, { "epoch": 0.254774897680764, "grad_norm": 0.3916492164134979, "learning_rate": 1.746902665367184e-06, "loss": 0.5439, "step": 1494 }, { "epoch": 0.25494542974079126, "grad_norm": 0.36775171756744385, "learning_rate": 1.7465352958639052e-06, "loss": 0.5386, "step": 1495 }, { "epoch": 0.2551159618008186, "grad_norm": 0.4069823622703552, "learning_rate": 1.7461676986250373e-06, "loss": 0.5463, "step": 1496 }, { "epoch": 0.25528649386084584, "grad_norm": 0.3523212671279907, "learning_rate": 1.7457998737627184e-06, "loss": 0.54, "step": 1497 }, { "epoch": 0.2554570259208731, "grad_norm": 0.4446667432785034, "learning_rate": 1.7454318213891557e-06, "loss": 0.5282, "step": 1498 }, { "epoch": 0.2556275579809004, "grad_norm": 0.398403137922287, "learning_rate": 1.7450635416166262e-06, "loss": 0.528, "step": 1499 }, { "epoch": 0.2557980900409277, "grad_norm": 0.4375839829444885, "learning_rate": 1.7446950345574764e-06, "loss": 0.5429, "step": 1500 }, { "epoch": 0.255968622100955, "grad_norm": 0.41334497928619385, "learning_rate": 1.7443263003241213e-06, "loss": 0.5182, "step": 1501 }, { "epoch": 0.25613915416098226, "grad_norm": 0.4163067936897278, "learning_rate": 1.743957339029046e-06, "loss": 0.5474, "step": 1502 }, { "epoch": 0.2563096862210095, "grad_norm": 0.44295674562454224, "learning_rate": 1.743588150784805e-06, "loss": 0.5352, "step": 1503 }, { "epoch": 0.25648021828103684, "grad_norm": 0.3465801775455475, "learning_rate": 1.7432187357040207e-06, "loss": 0.5285, "step": 1504 }, { "epoch": 0.2566507503410641, "grad_norm": 0.39809703826904297, "learning_rate": 1.7428490938993865e-06, "loss": 0.5229, "step": 1505 }, { "epoch": 0.2568212824010914, "grad_norm": 0.42837026715278625, "learning_rate": 1.7424792254836638e-06, "loss": 0.527, "step": 1506 }, { "epoch": 0.2569918144611187, "grad_norm": 0.3859080672264099, "learning_rate": 1.7421091305696835e-06, "loss": 0.5412, "step": 1507 }, { "epoch": 0.257162346521146, "grad_norm": 0.33818215131759644, "learning_rate": 1.7417388092703454e-06, "loss": 0.5227, "step": 1508 }, { "epoch": 0.25733287858117326, "grad_norm": 0.428977906703949, "learning_rate": 1.7413682616986187e-06, "loss": 0.5369, "step": 1509 }, { "epoch": 0.2575034106412005, "grad_norm": 0.3944161832332611, "learning_rate": 1.740997487967541e-06, "loss": 0.5301, "step": 1510 }, { "epoch": 0.25767394270122784, "grad_norm": 0.39399105310440063, "learning_rate": 1.7406264881902197e-06, "loss": 0.5211, "step": 1511 }, { "epoch": 0.2578444747612551, "grad_norm": 0.47329261898994446, "learning_rate": 1.7402552624798306e-06, "loss": 0.5299, "step": 1512 }, { "epoch": 0.2580150068212824, "grad_norm": 0.6779657602310181, "learning_rate": 1.7398838109496188e-06, "loss": 0.5208, "step": 1513 }, { "epoch": 0.2581855388813097, "grad_norm": 0.7127777934074402, "learning_rate": 1.739512133712898e-06, "loss": 0.5359, "step": 1514 }, { "epoch": 0.25835607094133695, "grad_norm": 0.47813621163368225, "learning_rate": 1.7391402308830507e-06, "loss": 0.5411, "step": 1515 }, { "epoch": 0.25852660300136426, "grad_norm": 0.4114779829978943, "learning_rate": 1.738768102573528e-06, "loss": 0.533, "step": 1516 }, { "epoch": 0.2586971350613915, "grad_norm": 0.572684645652771, "learning_rate": 1.738395748897851e-06, "loss": 0.5256, "step": 1517 }, { "epoch": 0.25886766712141884, "grad_norm": 0.40316465497016907, "learning_rate": 1.7380231699696081e-06, "loss": 0.5448, "step": 1518 }, { "epoch": 0.2590381991814461, "grad_norm": 0.41706883907318115, "learning_rate": 1.737650365902457e-06, "loss": 0.5454, "step": 1519 }, { "epoch": 0.2592087312414734, "grad_norm": 0.4692871868610382, "learning_rate": 1.7372773368101244e-06, "loss": 0.5417, "step": 1520 }, { "epoch": 0.2593792633015007, "grad_norm": 0.5374259352684021, "learning_rate": 1.7369040828064048e-06, "loss": 0.5369, "step": 1521 }, { "epoch": 0.25954979536152795, "grad_norm": 0.5214207172393799, "learning_rate": 1.7365306040051623e-06, "loss": 0.5342, "step": 1522 }, { "epoch": 0.25972032742155526, "grad_norm": 0.4177664518356323, "learning_rate": 1.736156900520329e-06, "loss": 0.5433, "step": 1523 }, { "epoch": 0.2598908594815825, "grad_norm": 0.42407530546188354, "learning_rate": 1.7357829724659056e-06, "loss": 0.5325, "step": 1524 }, { "epoch": 0.26006139154160984, "grad_norm": 0.3311309218406677, "learning_rate": 1.7354088199559612e-06, "loss": 0.5361, "step": 1525 }, { "epoch": 0.2602319236016371, "grad_norm": 0.5420066118240356, "learning_rate": 1.735034443104634e-06, "loss": 0.5271, "step": 1526 }, { "epoch": 0.26040245566166437, "grad_norm": 0.497405469417572, "learning_rate": 1.7346598420261298e-06, "loss": 0.5335, "step": 1527 }, { "epoch": 0.2605729877216917, "grad_norm": 0.4364621639251709, "learning_rate": 1.734285016834723e-06, "loss": 0.5524, "step": 1528 }, { "epoch": 0.26074351978171895, "grad_norm": 0.5040315985679626, "learning_rate": 1.7339099676447567e-06, "loss": 0.5319, "step": 1529 }, { "epoch": 0.26091405184174626, "grad_norm": 0.4158141613006592, "learning_rate": 1.7335346945706421e-06, "loss": 0.5485, "step": 1530 }, { "epoch": 0.2610845839017735, "grad_norm": 0.4395507276058197, "learning_rate": 1.7331591977268594e-06, "loss": 0.5188, "step": 1531 }, { "epoch": 0.26125511596180084, "grad_norm": 0.47268790006637573, "learning_rate": 1.7327834772279552e-06, "loss": 0.5307, "step": 1532 }, { "epoch": 0.2614256480218281, "grad_norm": 0.4334191381931305, "learning_rate": 1.7324075331885468e-06, "loss": 0.5362, "step": 1533 }, { "epoch": 0.26159618008185537, "grad_norm": 0.6413484811782837, "learning_rate": 1.7320313657233177e-06, "loss": 0.529, "step": 1534 }, { "epoch": 0.2617667121418827, "grad_norm": 0.3648528754711151, "learning_rate": 1.7316549749470205e-06, "loss": 0.5225, "step": 1535 }, { "epoch": 0.26193724420190995, "grad_norm": 0.4816478490829468, "learning_rate": 1.7312783609744755e-06, "loss": 0.5444, "step": 1536 }, { "epoch": 0.26210777626193726, "grad_norm": 0.470768004655838, "learning_rate": 1.7309015239205719e-06, "loss": 0.5327, "step": 1537 }, { "epoch": 0.2622783083219645, "grad_norm": 0.5041337013244629, "learning_rate": 1.730524463900266e-06, "loss": 0.539, "step": 1538 }, { "epoch": 0.2624488403819918, "grad_norm": 0.46768811345100403, "learning_rate": 1.7301471810285824e-06, "loss": 0.5355, "step": 1539 }, { "epoch": 0.2626193724420191, "grad_norm": 0.4774124026298523, "learning_rate": 1.7297696754206138e-06, "loss": 0.5454, "step": 1540 }, { "epoch": 0.26278990450204637, "grad_norm": 0.4992062449455261, "learning_rate": 1.7293919471915207e-06, "loss": 0.5319, "step": 1541 }, { "epoch": 0.2629604365620737, "grad_norm": 0.40920349955558777, "learning_rate": 1.7290139964565322e-06, "loss": 0.5432, "step": 1542 }, { "epoch": 0.26313096862210095, "grad_norm": 0.39942824840545654, "learning_rate": 1.7286358233309441e-06, "loss": 0.5403, "step": 1543 }, { "epoch": 0.26330150068212826, "grad_norm": 0.40174806118011475, "learning_rate": 1.7282574279301206e-06, "loss": 0.5395, "step": 1544 }, { "epoch": 0.2634720327421555, "grad_norm": 0.5135958790779114, "learning_rate": 1.7278788103694944e-06, "loss": 0.5331, "step": 1545 }, { "epoch": 0.2636425648021828, "grad_norm": 0.5391552448272705, "learning_rate": 1.7274999707645646e-06, "loss": 0.5385, "step": 1546 }, { "epoch": 0.2638130968622101, "grad_norm": 0.37942448258399963, "learning_rate": 1.7271209092308994e-06, "loss": 0.5223, "step": 1547 }, { "epoch": 0.26398362892223737, "grad_norm": 0.4890262484550476, "learning_rate": 1.7267416258841332e-06, "loss": 0.5287, "step": 1548 }, { "epoch": 0.2641541609822647, "grad_norm": 0.43532058596611023, "learning_rate": 1.7263621208399699e-06, "loss": 0.5188, "step": 1549 }, { "epoch": 0.26432469304229195, "grad_norm": 0.5018872618675232, "learning_rate": 1.7259823942141794e-06, "loss": 0.5317, "step": 1550 }, { "epoch": 0.2644952251023192, "grad_norm": 0.5198800563812256, "learning_rate": 1.7256024461226004e-06, "loss": 0.5359, "step": 1551 }, { "epoch": 0.2646657571623465, "grad_norm": 0.5587589740753174, "learning_rate": 1.7252222766811379e-06, "loss": 0.551, "step": 1552 }, { "epoch": 0.2648362892223738, "grad_norm": 0.5261849164962769, "learning_rate": 1.7248418860057656e-06, "loss": 0.54, "step": 1553 }, { "epoch": 0.2650068212824011, "grad_norm": 0.45196717977523804, "learning_rate": 1.7244612742125238e-06, "loss": 0.5316, "step": 1554 }, { "epoch": 0.26517735334242837, "grad_norm": 0.6772377490997314, "learning_rate": 1.7240804414175214e-06, "loss": 0.5185, "step": 1555 }, { "epoch": 0.2653478854024557, "grad_norm": 0.4614553451538086, "learning_rate": 1.7236993877369334e-06, "loss": 0.5206, "step": 1556 }, { "epoch": 0.26551841746248295, "grad_norm": 0.4851749837398529, "learning_rate": 1.7233181132870032e-06, "loss": 0.5461, "step": 1557 }, { "epoch": 0.2656889495225102, "grad_norm": 0.5044141411781311, "learning_rate": 1.7229366181840405e-06, "loss": 0.5424, "step": 1558 }, { "epoch": 0.2658594815825375, "grad_norm": 0.4354725480079651, "learning_rate": 1.7225549025444234e-06, "loss": 0.5245, "step": 1559 }, { "epoch": 0.2660300136425648, "grad_norm": 0.5553825497627258, "learning_rate": 1.7221729664845968e-06, "loss": 0.5199, "step": 1560 }, { "epoch": 0.2662005457025921, "grad_norm": 0.4498416781425476, "learning_rate": 1.7217908101210727e-06, "loss": 0.5315, "step": 1561 }, { "epoch": 0.26637107776261937, "grad_norm": 0.5130297541618347, "learning_rate": 1.7214084335704309e-06, "loss": 0.5289, "step": 1562 }, { "epoch": 0.26654160982264663, "grad_norm": 0.43296122550964355, "learning_rate": 1.7210258369493173e-06, "loss": 0.5363, "step": 1563 }, { "epoch": 0.26671214188267395, "grad_norm": 0.45668303966522217, "learning_rate": 1.7206430203744456e-06, "loss": 0.5387, "step": 1564 }, { "epoch": 0.2668826739427012, "grad_norm": 0.549055278301239, "learning_rate": 1.720259983962597e-06, "loss": 0.5297, "step": 1565 }, { "epoch": 0.2670532060027285, "grad_norm": 0.3640718460083008, "learning_rate": 1.7198767278306191e-06, "loss": 0.5298, "step": 1566 }, { "epoch": 0.2672237380627558, "grad_norm": 0.5561659932136536, "learning_rate": 1.7194932520954268e-06, "loss": 0.5207, "step": 1567 }, { "epoch": 0.2673942701227831, "grad_norm": 0.5278323888778687, "learning_rate": 1.7191095568740018e-06, "loss": 0.5263, "step": 1568 }, { "epoch": 0.26756480218281037, "grad_norm": 0.41605237126350403, "learning_rate": 1.718725642283393e-06, "loss": 0.5318, "step": 1569 }, { "epoch": 0.26773533424283763, "grad_norm": 0.3954503536224365, "learning_rate": 1.7183415084407163e-06, "loss": 0.5242, "step": 1570 }, { "epoch": 0.26790586630286495, "grad_norm": 0.44009891152381897, "learning_rate": 1.7179571554631536e-06, "loss": 0.5363, "step": 1571 }, { "epoch": 0.2680763983628922, "grad_norm": 0.580356240272522, "learning_rate": 1.7175725834679553e-06, "loss": 0.5163, "step": 1572 }, { "epoch": 0.2682469304229195, "grad_norm": 0.4962151348590851, "learning_rate": 1.717187792572437e-06, "loss": 0.5246, "step": 1573 }, { "epoch": 0.2684174624829468, "grad_norm": 0.39382004737854004, "learning_rate": 1.716802782893982e-06, "loss": 0.5356, "step": 1574 }, { "epoch": 0.2685879945429741, "grad_norm": 0.4842481017112732, "learning_rate": 1.7164175545500403e-06, "loss": 0.523, "step": 1575 }, { "epoch": 0.26875852660300137, "grad_norm": 0.5253110527992249, "learning_rate": 1.7160321076581277e-06, "loss": 0.5354, "step": 1576 }, { "epoch": 0.26892905866302863, "grad_norm": 0.39040082693099976, "learning_rate": 1.7156464423358279e-06, "loss": 0.5269, "step": 1577 }, { "epoch": 0.26909959072305595, "grad_norm": 0.39658838510513306, "learning_rate": 1.7152605587007906e-06, "loss": 0.5282, "step": 1578 }, { "epoch": 0.2692701227830832, "grad_norm": 0.5355466604232788, "learning_rate": 1.7148744568707323e-06, "loss": 0.5382, "step": 1579 }, { "epoch": 0.2694406548431105, "grad_norm": 0.38271233439445496, "learning_rate": 1.7144881369634354e-06, "loss": 0.5288, "step": 1580 }, { "epoch": 0.2696111869031378, "grad_norm": 0.39254850149154663, "learning_rate": 1.7141015990967502e-06, "loss": 0.5249, "step": 1581 }, { "epoch": 0.26978171896316505, "grad_norm": 0.5330004096031189, "learning_rate": 1.7137148433885918e-06, "loss": 0.5234, "step": 1582 }, { "epoch": 0.26995225102319237, "grad_norm": 0.5613977909088135, "learning_rate": 1.7133278699569433e-06, "loss": 0.5135, "step": 1583 }, { "epoch": 0.27012278308321963, "grad_norm": 0.36673110723495483, "learning_rate": 1.7129406789198531e-06, "loss": 0.5292, "step": 1584 }, { "epoch": 0.27029331514324695, "grad_norm": 0.405582994222641, "learning_rate": 1.7125532703954367e-06, "loss": 0.5271, "step": 1585 }, { "epoch": 0.2704638472032742, "grad_norm": 0.49316197633743286, "learning_rate": 1.712165644501876e-06, "loss": 0.5293, "step": 1586 }, { "epoch": 0.2706343792633015, "grad_norm": 0.405775785446167, "learning_rate": 1.7117778013574177e-06, "loss": 0.5323, "step": 1587 }, { "epoch": 0.2708049113233288, "grad_norm": 0.4072016775608063, "learning_rate": 1.7113897410803768e-06, "loss": 0.5351, "step": 1588 }, { "epoch": 0.27097544338335605, "grad_norm": 0.5345101952552795, "learning_rate": 1.7110014637891341e-06, "loss": 0.5429, "step": 1589 }, { "epoch": 0.27114597544338337, "grad_norm": 0.4184858798980713, "learning_rate": 1.7106129696021353e-06, "loss": 0.5286, "step": 1590 }, { "epoch": 0.27131650750341063, "grad_norm": 0.40997418761253357, "learning_rate": 1.7102242586378936e-06, "loss": 0.5256, "step": 1591 }, { "epoch": 0.27148703956343795, "grad_norm": 0.5290420055389404, "learning_rate": 1.7098353310149877e-06, "loss": 0.5406, "step": 1592 }, { "epoch": 0.2716575716234652, "grad_norm": 0.3918258845806122, "learning_rate": 1.7094461868520626e-06, "loss": 0.5471, "step": 1593 }, { "epoch": 0.2718281036834925, "grad_norm": 0.41981926560401917, "learning_rate": 1.7090568262678294e-06, "loss": 0.5435, "step": 1594 }, { "epoch": 0.2719986357435198, "grad_norm": 0.5090052485466003, "learning_rate": 1.7086672493810654e-06, "loss": 0.5339, "step": 1595 }, { "epoch": 0.27216916780354705, "grad_norm": 0.401683509349823, "learning_rate": 1.7082774563106129e-06, "loss": 0.5197, "step": 1596 }, { "epoch": 0.27233969986357437, "grad_norm": 0.3491285443305969, "learning_rate": 1.7078874471753819e-06, "loss": 0.5388, "step": 1597 }, { "epoch": 0.27251023192360163, "grad_norm": 0.4440529942512512, "learning_rate": 1.7074972220943466e-06, "loss": 0.5343, "step": 1598 }, { "epoch": 0.27268076398362895, "grad_norm": 0.4229428172111511, "learning_rate": 1.7071067811865479e-06, "loss": 0.5364, "step": 1599 }, { "epoch": 0.2728512960436562, "grad_norm": 0.4363120496273041, "learning_rate": 1.7067161245710924e-06, "loss": 0.5367, "step": 1600 }, { "epoch": 0.2730218281036835, "grad_norm": 0.4940813183784485, "learning_rate": 1.7063252523671523e-06, "loss": 0.5412, "step": 1601 }, { "epoch": 0.2731923601637108, "grad_norm": 0.4062732458114624, "learning_rate": 1.7059341646939666e-06, "loss": 0.5332, "step": 1602 }, { "epoch": 0.27336289222373805, "grad_norm": 0.45208001136779785, "learning_rate": 1.7055428616708384e-06, "loss": 0.5463, "step": 1603 }, { "epoch": 0.27353342428376537, "grad_norm": 0.44971469044685364, "learning_rate": 1.7051513434171375e-06, "loss": 0.5347, "step": 1604 }, { "epoch": 0.27370395634379263, "grad_norm": 0.4267110228538513, "learning_rate": 1.7047596100522994e-06, "loss": 0.5314, "step": 1605 }, { "epoch": 0.2738744884038199, "grad_norm": 0.42136669158935547, "learning_rate": 1.7043676616958247e-06, "loss": 0.5323, "step": 1606 }, { "epoch": 0.2740450204638472, "grad_norm": 0.53708815574646, "learning_rate": 1.7039754984672799e-06, "loss": 0.5391, "step": 1607 }, { "epoch": 0.2742155525238745, "grad_norm": 0.3569575846195221, "learning_rate": 1.7035831204862973e-06, "loss": 0.5404, "step": 1608 }, { "epoch": 0.2743860845839018, "grad_norm": 0.45229849219322205, "learning_rate": 1.703190527872574e-06, "loss": 0.5329, "step": 1609 }, { "epoch": 0.27455661664392905, "grad_norm": 0.5555593371391296, "learning_rate": 1.7027977207458733e-06, "loss": 0.52, "step": 1610 }, { "epoch": 0.27472714870395637, "grad_norm": 0.45333659648895264, "learning_rate": 1.702404699226024e-06, "loss": 0.5447, "step": 1611 }, { "epoch": 0.27489768076398363, "grad_norm": 0.37671902775764465, "learning_rate": 1.7020114634329191e-06, "loss": 0.5257, "step": 1612 }, { "epoch": 0.2750682128240109, "grad_norm": 0.5203356146812439, "learning_rate": 1.7016180134865183e-06, "loss": 0.5417, "step": 1613 }, { "epoch": 0.2752387448840382, "grad_norm": 0.44975242018699646, "learning_rate": 1.7012243495068463e-06, "loss": 0.5354, "step": 1614 }, { "epoch": 0.2754092769440655, "grad_norm": 0.4513874053955078, "learning_rate": 1.7008304716139924e-06, "loss": 0.5252, "step": 1615 }, { "epoch": 0.2755798090040928, "grad_norm": 0.3808591067790985, "learning_rate": 1.7004363799281122e-06, "loss": 0.5302, "step": 1616 }, { "epoch": 0.27575034106412005, "grad_norm": 0.4400796890258789, "learning_rate": 1.7000420745694257e-06, "loss": 0.5325, "step": 1617 }, { "epoch": 0.2759208731241473, "grad_norm": 0.4776574373245239, "learning_rate": 1.6996475556582185e-06, "loss": 0.5363, "step": 1618 }, { "epoch": 0.27609140518417463, "grad_norm": 0.3435958921909332, "learning_rate": 1.6992528233148416e-06, "loss": 0.5414, "step": 1619 }, { "epoch": 0.2762619372442019, "grad_norm": 0.41727060079574585, "learning_rate": 1.6988578776597096e-06, "loss": 0.5581, "step": 1620 }, { "epoch": 0.2764324693042292, "grad_norm": 0.37507522106170654, "learning_rate": 1.6984627188133047e-06, "loss": 0.5399, "step": 1621 }, { "epoch": 0.2766030013642565, "grad_norm": 0.4046799838542938, "learning_rate": 1.6980673468961719e-06, "loss": 0.5316, "step": 1622 }, { "epoch": 0.2767735334242838, "grad_norm": 0.569532573223114, "learning_rate": 1.6976717620289223e-06, "loss": 0.5307, "step": 1623 }, { "epoch": 0.27694406548431105, "grad_norm": 0.42097124457359314, "learning_rate": 1.6972759643322316e-06, "loss": 0.5297, "step": 1624 }, { "epoch": 0.2771145975443383, "grad_norm": 0.5242999792098999, "learning_rate": 1.6968799539268408e-06, "loss": 0.5553, "step": 1625 }, { "epoch": 0.27728512960436563, "grad_norm": 0.41940924525260925, "learning_rate": 1.6964837309335555e-06, "loss": 0.5299, "step": 1626 }, { "epoch": 0.2774556616643929, "grad_norm": 0.48386019468307495, "learning_rate": 1.696087295473246e-06, "loss": 0.5394, "step": 1627 }, { "epoch": 0.2776261937244202, "grad_norm": 0.601997971534729, "learning_rate": 1.6956906476668474e-06, "loss": 0.5269, "step": 1628 }, { "epoch": 0.2777967257844475, "grad_norm": 0.35025733709335327, "learning_rate": 1.6952937876353597e-06, "loss": 0.5362, "step": 1629 }, { "epoch": 0.27796725784447474, "grad_norm": 0.4714794456958771, "learning_rate": 1.6948967154998487e-06, "loss": 0.533, "step": 1630 }, { "epoch": 0.27813778990450205, "grad_norm": 0.35530272126197815, "learning_rate": 1.6944994313814429e-06, "loss": 0.5377, "step": 1631 }, { "epoch": 0.2783083219645293, "grad_norm": 0.5158945918083191, "learning_rate": 1.694101935401337e-06, "loss": 0.5358, "step": 1632 }, { "epoch": 0.27847885402455663, "grad_norm": 0.36836668848991394, "learning_rate": 1.6937042276807894e-06, "loss": 0.5408, "step": 1633 }, { "epoch": 0.2786493860845839, "grad_norm": 0.5216159224510193, "learning_rate": 1.6933063083411236e-06, "loss": 0.5266, "step": 1634 }, { "epoch": 0.2788199181446112, "grad_norm": 0.746419370174408, "learning_rate": 1.6929081775037278e-06, "loss": 0.5368, "step": 1635 }, { "epoch": 0.2789904502046385, "grad_norm": 0.4515080153942108, "learning_rate": 1.6925098352900545e-06, "loss": 0.5255, "step": 1636 }, { "epoch": 0.27916098226466574, "grad_norm": 0.5471440553665161, "learning_rate": 1.6921112818216203e-06, "loss": 0.5347, "step": 1637 }, { "epoch": 0.27933151432469305, "grad_norm": 0.4010627865791321, "learning_rate": 1.6917125172200067e-06, "loss": 0.545, "step": 1638 }, { "epoch": 0.2795020463847203, "grad_norm": 0.5758441686630249, "learning_rate": 1.6913135416068598e-06, "loss": 0.543, "step": 1639 }, { "epoch": 0.27967257844474763, "grad_norm": 0.39412951469421387, "learning_rate": 1.6909143551038897e-06, "loss": 0.5276, "step": 1640 }, { "epoch": 0.2798431105047749, "grad_norm": 0.6113039255142212, "learning_rate": 1.6905149578328704e-06, "loss": 0.5461, "step": 1641 }, { "epoch": 0.28001364256480216, "grad_norm": 0.5534077286720276, "learning_rate": 1.6901153499156416e-06, "loss": 0.5343, "step": 1642 }, { "epoch": 0.2801841746248295, "grad_norm": 0.3831448256969452, "learning_rate": 1.689715531474106e-06, "loss": 0.542, "step": 1643 }, { "epoch": 0.28035470668485674, "grad_norm": 0.5919682383537292, "learning_rate": 1.6893155026302302e-06, "loss": 0.526, "step": 1644 }, { "epoch": 0.28052523874488405, "grad_norm": 0.3408511281013489, "learning_rate": 1.6889152635060467e-06, "loss": 0.5294, "step": 1645 }, { "epoch": 0.2806957708049113, "grad_norm": 0.49439018964767456, "learning_rate": 1.6885148142236505e-06, "loss": 0.5378, "step": 1646 }, { "epoch": 0.28086630286493863, "grad_norm": 0.4036789834499359, "learning_rate": 1.6881141549052015e-06, "loss": 0.5322, "step": 1647 }, { "epoch": 0.2810368349249659, "grad_norm": 0.4894874095916748, "learning_rate": 1.6877132856729238e-06, "loss": 0.5385, "step": 1648 }, { "epoch": 0.28120736698499316, "grad_norm": 0.713665783405304, "learning_rate": 1.6873122066491048e-06, "loss": 0.5356, "step": 1649 }, { "epoch": 0.2813778990450205, "grad_norm": 0.4714016020298004, "learning_rate": 1.6869109179560965e-06, "loss": 0.5326, "step": 1650 }, { "epoch": 0.28154843110504774, "grad_norm": 0.5672765374183655, "learning_rate": 1.6865094197163148e-06, "loss": 0.5237, "step": 1651 }, { "epoch": 0.28171896316507505, "grad_norm": 0.4787973463535309, "learning_rate": 1.6861077120522391e-06, "loss": 0.5304, "step": 1652 }, { "epoch": 0.2818894952251023, "grad_norm": 0.5834455490112305, "learning_rate": 1.6857057950864134e-06, "loss": 0.5387, "step": 1653 }, { "epoch": 0.2820600272851296, "grad_norm": 0.44850626587867737, "learning_rate": 1.6853036689414449e-06, "loss": 0.534, "step": 1654 }, { "epoch": 0.2822305593451569, "grad_norm": 0.4338832497596741, "learning_rate": 1.6849013337400053e-06, "loss": 0.5214, "step": 1655 }, { "epoch": 0.28240109140518416, "grad_norm": 0.4489261507987976, "learning_rate": 1.684498789604829e-06, "loss": 0.5397, "step": 1656 }, { "epoch": 0.2825716234652115, "grad_norm": 0.47586989402770996, "learning_rate": 1.6840960366587152e-06, "loss": 0.5275, "step": 1657 }, { "epoch": 0.28274215552523874, "grad_norm": 0.4325336515903473, "learning_rate": 1.6836930750245265e-06, "loss": 0.5262, "step": 1658 }, { "epoch": 0.28291268758526605, "grad_norm": 0.39559781551361084, "learning_rate": 1.6832899048251887e-06, "loss": 0.5303, "step": 1659 }, { "epoch": 0.2830832196452933, "grad_norm": 0.44162189960479736, "learning_rate": 1.6828865261836919e-06, "loss": 0.5194, "step": 1660 }, { "epoch": 0.2832537517053206, "grad_norm": 0.4153518080711365, "learning_rate": 1.6824829392230888e-06, "loss": 0.524, "step": 1661 }, { "epoch": 0.2834242837653479, "grad_norm": 0.4147271513938904, "learning_rate": 1.682079144066497e-06, "loss": 0.5323, "step": 1662 }, { "epoch": 0.28359481582537516, "grad_norm": 0.5544766783714294, "learning_rate": 1.6816751408370968e-06, "loss": 0.5238, "step": 1663 }, { "epoch": 0.2837653478854025, "grad_norm": 0.4279305338859558, "learning_rate": 1.6812709296581317e-06, "loss": 0.5344, "step": 1664 }, { "epoch": 0.28393587994542974, "grad_norm": 0.3990345001220703, "learning_rate": 1.6808665106529094e-06, "loss": 0.5256, "step": 1665 }, { "epoch": 0.284106412005457, "grad_norm": 0.4134671092033386, "learning_rate": 1.680461883944801e-06, "loss": 0.5281, "step": 1666 }, { "epoch": 0.2842769440654843, "grad_norm": 0.43525567650794983, "learning_rate": 1.6800570496572397e-06, "loss": 0.5351, "step": 1667 }, { "epoch": 0.2844474761255116, "grad_norm": 0.4455080032348633, "learning_rate": 1.6796520079137233e-06, "loss": 0.5217, "step": 1668 }, { "epoch": 0.2846180081855389, "grad_norm": 0.3639788031578064, "learning_rate": 1.6792467588378125e-06, "loss": 0.5241, "step": 1669 }, { "epoch": 0.28478854024556616, "grad_norm": 0.38478967547416687, "learning_rate": 1.6788413025531312e-06, "loss": 0.5272, "step": 1670 }, { "epoch": 0.2849590723055935, "grad_norm": 0.339579701423645, "learning_rate": 1.6784356391833666e-06, "loss": 0.527, "step": 1671 }, { "epoch": 0.28512960436562074, "grad_norm": 0.4318312406539917, "learning_rate": 1.6780297688522691e-06, "loss": 0.5237, "step": 1672 }, { "epoch": 0.285300136425648, "grad_norm": 0.4525909125804901, "learning_rate": 1.677623691683652e-06, "loss": 0.5274, "step": 1673 }, { "epoch": 0.2854706684856753, "grad_norm": 0.36183860898017883, "learning_rate": 1.6772174078013922e-06, "loss": 0.5281, "step": 1674 }, { "epoch": 0.2856412005457026, "grad_norm": 0.4849749505519867, "learning_rate": 1.676810917329429e-06, "loss": 0.5271, "step": 1675 }, { "epoch": 0.2858117326057299, "grad_norm": 0.7375707626342773, "learning_rate": 1.676404220391765e-06, "loss": 0.5312, "step": 1676 }, { "epoch": 0.28598226466575716, "grad_norm": 0.6052662134170532, "learning_rate": 1.675997317112466e-06, "loss": 0.5216, "step": 1677 }, { "epoch": 0.2861527967257845, "grad_norm": 0.36949390172958374, "learning_rate": 1.6755902076156607e-06, "loss": 0.5353, "step": 1678 }, { "epoch": 0.28632332878581174, "grad_norm": 0.4570222795009613, "learning_rate": 1.6751828920255398e-06, "loss": 0.5299, "step": 1679 }, { "epoch": 0.286493860845839, "grad_norm": 0.5875330567359924, "learning_rate": 1.6747753704663586e-06, "loss": 0.5316, "step": 1680 }, { "epoch": 0.2866643929058663, "grad_norm": 0.40268129110336304, "learning_rate": 1.674367643062434e-06, "loss": 0.5277, "step": 1681 }, { "epoch": 0.2868349249658936, "grad_norm": 0.39442431926727295, "learning_rate": 1.6739597099381458e-06, "loss": 0.5238, "step": 1682 }, { "epoch": 0.2870054570259209, "grad_norm": 0.4880406856536865, "learning_rate": 1.673551571217937e-06, "loss": 0.5259, "step": 1683 }, { "epoch": 0.28717598908594816, "grad_norm": 0.4166980981826782, "learning_rate": 1.6731432270263128e-06, "loss": 0.5368, "step": 1684 }, { "epoch": 0.2873465211459754, "grad_norm": 0.33825594186782837, "learning_rate": 1.672734677487841e-06, "loss": 0.5261, "step": 1685 }, { "epoch": 0.28751705320600274, "grad_norm": 0.5011537671089172, "learning_rate": 1.672325922727153e-06, "loss": 0.5179, "step": 1686 }, { "epoch": 0.28768758526603, "grad_norm": 0.4549483358860016, "learning_rate": 1.6719169628689422e-06, "loss": 0.5306, "step": 1687 }, { "epoch": 0.2878581173260573, "grad_norm": 0.35790887475013733, "learning_rate": 1.671507798037964e-06, "loss": 0.5369, "step": 1688 }, { "epoch": 0.2880286493860846, "grad_norm": 0.5589377284049988, "learning_rate": 1.6710984283590371e-06, "loss": 0.5297, "step": 1689 }, { "epoch": 0.2881991814461119, "grad_norm": 0.5511754751205444, "learning_rate": 1.6706888539570427e-06, "loss": 0.5398, "step": 1690 }, { "epoch": 0.28836971350613916, "grad_norm": 0.4057330787181854, "learning_rate": 1.6702790749569239e-06, "loss": 0.5349, "step": 1691 }, { "epoch": 0.2885402455661664, "grad_norm": 0.4141501784324646, "learning_rate": 1.6698690914836864e-06, "loss": 0.5238, "step": 1692 }, { "epoch": 0.28871077762619374, "grad_norm": 0.44292518496513367, "learning_rate": 1.669458903662399e-06, "loss": 0.5206, "step": 1693 }, { "epoch": 0.288881309686221, "grad_norm": 0.4296228587627411, "learning_rate": 1.6690485116181917e-06, "loss": 0.5321, "step": 1694 }, { "epoch": 0.2890518417462483, "grad_norm": 0.38201814889907837, "learning_rate": 1.6686379154762576e-06, "loss": 0.5326, "step": 1695 }, { "epoch": 0.2892223738062756, "grad_norm": 0.5104760527610779, "learning_rate": 1.6682271153618518e-06, "loss": 0.5337, "step": 1696 }, { "epoch": 0.28939290586630284, "grad_norm": 0.4310562014579773, "learning_rate": 1.6678161114002917e-06, "loss": 0.5315, "step": 1697 }, { "epoch": 0.28956343792633016, "grad_norm": 0.3841532766819, "learning_rate": 1.6674049037169564e-06, "loss": 0.5177, "step": 1698 }, { "epoch": 0.2897339699863574, "grad_norm": 0.4063761532306671, "learning_rate": 1.6669934924372882e-06, "loss": 0.5239, "step": 1699 }, { "epoch": 0.28990450204638474, "grad_norm": 0.40579575300216675, "learning_rate": 1.66658187768679e-06, "loss": 0.5307, "step": 1700 }, { "epoch": 0.290075034106412, "grad_norm": 0.3862999379634857, "learning_rate": 1.6661700595910286e-06, "loss": 0.5293, "step": 1701 }, { "epoch": 0.2902455661664393, "grad_norm": 0.3648364841938019, "learning_rate": 1.6657580382756307e-06, "loss": 0.5191, "step": 1702 }, { "epoch": 0.2904160982264666, "grad_norm": 0.48320475220680237, "learning_rate": 1.6653458138662873e-06, "loss": 0.5388, "step": 1703 }, { "epoch": 0.29058663028649384, "grad_norm": 0.39256301522254944, "learning_rate": 1.66493338648875e-06, "loss": 0.5405, "step": 1704 }, { "epoch": 0.29075716234652116, "grad_norm": 0.39358004927635193, "learning_rate": 1.6645207562688322e-06, "loss": 0.5257, "step": 1705 }, { "epoch": 0.2909276944065484, "grad_norm": 0.42495185136795044, "learning_rate": 1.6641079233324093e-06, "loss": 0.5398, "step": 1706 }, { "epoch": 0.29109822646657574, "grad_norm": 0.5367429256439209, "learning_rate": 1.6636948878054193e-06, "loss": 0.5191, "step": 1707 }, { "epoch": 0.291268758526603, "grad_norm": 0.5471576452255249, "learning_rate": 1.6632816498138613e-06, "loss": 0.5229, "step": 1708 }, { "epoch": 0.29143929058663026, "grad_norm": 0.43130508065223694, "learning_rate": 1.662868209483796e-06, "loss": 0.5301, "step": 1709 }, { "epoch": 0.2916098226466576, "grad_norm": 0.3890998959541321, "learning_rate": 1.6624545669413462e-06, "loss": 0.5242, "step": 1710 }, { "epoch": 0.29178035470668484, "grad_norm": 0.3883855640888214, "learning_rate": 1.6620407223126969e-06, "loss": 0.5204, "step": 1711 }, { "epoch": 0.29195088676671216, "grad_norm": 0.3944658637046814, "learning_rate": 1.6616266757240935e-06, "loss": 0.5347, "step": 1712 }, { "epoch": 0.2921214188267394, "grad_norm": 0.45826256275177, "learning_rate": 1.6612124273018442e-06, "loss": 0.5346, "step": 1713 }, { "epoch": 0.29229195088676674, "grad_norm": 0.3530188798904419, "learning_rate": 1.6607979771723176e-06, "loss": 0.5333, "step": 1714 }, { "epoch": 0.292462482946794, "grad_norm": 0.4453744888305664, "learning_rate": 1.660383325461945e-06, "loss": 0.5352, "step": 1715 }, { "epoch": 0.29263301500682126, "grad_norm": 0.4675208628177643, "learning_rate": 1.659968472297219e-06, "loss": 0.5327, "step": 1716 }, { "epoch": 0.2928035470668486, "grad_norm": 0.5276213884353638, "learning_rate": 1.6595534178046928e-06, "loss": 0.5325, "step": 1717 }, { "epoch": 0.29297407912687584, "grad_norm": 0.3921293020248413, "learning_rate": 1.659138162110981e-06, "loss": 0.5468, "step": 1718 }, { "epoch": 0.29314461118690316, "grad_norm": 0.40789127349853516, "learning_rate": 1.6587227053427614e-06, "loss": 0.5253, "step": 1719 }, { "epoch": 0.2933151432469304, "grad_norm": 0.5174045562744141, "learning_rate": 1.6583070476267713e-06, "loss": 0.5344, "step": 1720 }, { "epoch": 0.2934856753069577, "grad_norm": 0.44151756167411804, "learning_rate": 1.6578911890898096e-06, "loss": 0.5242, "step": 1721 }, { "epoch": 0.293656207366985, "grad_norm": 0.38198962807655334, "learning_rate": 1.6574751298587371e-06, "loss": 0.5176, "step": 1722 }, { "epoch": 0.29382673942701226, "grad_norm": 0.4616943299770355, "learning_rate": 1.6570588700604753e-06, "loss": 0.519, "step": 1723 }, { "epoch": 0.2939972714870396, "grad_norm": 0.5245501399040222, "learning_rate": 1.6566424098220065e-06, "loss": 0.5216, "step": 1724 }, { "epoch": 0.29416780354706684, "grad_norm": 0.4258287847042084, "learning_rate": 1.656225749270376e-06, "loss": 0.5366, "step": 1725 }, { "epoch": 0.29433833560709416, "grad_norm": 0.49658289551734924, "learning_rate": 1.6558088885326877e-06, "loss": 0.5326, "step": 1726 }, { "epoch": 0.2945088676671214, "grad_norm": 0.5772252082824707, "learning_rate": 1.6553918277361082e-06, "loss": 0.5313, "step": 1727 }, { "epoch": 0.2946793997271487, "grad_norm": 0.47091925144195557, "learning_rate": 1.654974567007865e-06, "loss": 0.5276, "step": 1728 }, { "epoch": 0.294849931787176, "grad_norm": 0.4931386709213257, "learning_rate": 1.6545571064752455e-06, "loss": 0.5436, "step": 1729 }, { "epoch": 0.29502046384720326, "grad_norm": 0.5427204370498657, "learning_rate": 1.6541394462655996e-06, "loss": 0.5366, "step": 1730 }, { "epoch": 0.2951909959072306, "grad_norm": 0.6340658664703369, "learning_rate": 1.6537215865063367e-06, "loss": 0.518, "step": 1731 }, { "epoch": 0.29536152796725784, "grad_norm": 0.43148940801620483, "learning_rate": 1.653303527324928e-06, "loss": 0.5288, "step": 1732 }, { "epoch": 0.2955320600272851, "grad_norm": 0.47688204050064087, "learning_rate": 1.6528852688489055e-06, "loss": 0.5278, "step": 1733 }, { "epoch": 0.2957025920873124, "grad_norm": 0.6697880029678345, "learning_rate": 1.6524668112058619e-06, "loss": 0.5333, "step": 1734 }, { "epoch": 0.2958731241473397, "grad_norm": 0.5715264081954956, "learning_rate": 1.6520481545234496e-06, "loss": 0.5413, "step": 1735 }, { "epoch": 0.296043656207367, "grad_norm": 0.3737260401248932, "learning_rate": 1.6516292989293836e-06, "loss": 0.5244, "step": 1736 }, { "epoch": 0.29621418826739426, "grad_norm": 0.40294066071510315, "learning_rate": 1.6512102445514376e-06, "loss": 0.5339, "step": 1737 }, { "epoch": 0.2963847203274216, "grad_norm": 0.3857491910457611, "learning_rate": 1.6507909915174477e-06, "loss": 0.5286, "step": 1738 }, { "epoch": 0.29655525238744884, "grad_norm": 0.3942945599555969, "learning_rate": 1.65037153995531e-06, "loss": 0.5307, "step": 1739 }, { "epoch": 0.2967257844474761, "grad_norm": 0.429793119430542, "learning_rate": 1.6499518899929803e-06, "loss": 0.5377, "step": 1740 }, { "epoch": 0.2968963165075034, "grad_norm": 0.427202969789505, "learning_rate": 1.6495320417584758e-06, "loss": 0.5402, "step": 1741 }, { "epoch": 0.2970668485675307, "grad_norm": 0.3815003037452698, "learning_rate": 1.6491119953798748e-06, "loss": 0.5378, "step": 1742 }, { "epoch": 0.297237380627558, "grad_norm": 0.4911755919456482, "learning_rate": 1.6486917509853142e-06, "loss": 0.5348, "step": 1743 }, { "epoch": 0.29740791268758526, "grad_norm": 0.535512387752533, "learning_rate": 1.648271308702993e-06, "loss": 0.5341, "step": 1744 }, { "epoch": 0.2975784447476125, "grad_norm": 0.4352928698062897, "learning_rate": 1.64785066866117e-06, "loss": 0.528, "step": 1745 }, { "epoch": 0.29774897680763984, "grad_norm": 0.4337961673736572, "learning_rate": 1.6474298309881636e-06, "loss": 0.5352, "step": 1746 }, { "epoch": 0.2979195088676671, "grad_norm": 0.5219410061836243, "learning_rate": 1.6470087958123536e-06, "loss": 0.5308, "step": 1747 }, { "epoch": 0.2980900409276944, "grad_norm": 0.45884940028190613, "learning_rate": 1.6465875632621797e-06, "loss": 0.5304, "step": 1748 }, { "epoch": 0.2982605729877217, "grad_norm": 0.4260612726211548, "learning_rate": 1.6461661334661416e-06, "loss": 0.5395, "step": 1749 }, { "epoch": 0.298431105047749, "grad_norm": 0.4612407088279724, "learning_rate": 1.645744506552799e-06, "loss": 0.5343, "step": 1750 }, { "epoch": 0.29860163710777626, "grad_norm": 0.5481771230697632, "learning_rate": 1.6453226826507723e-06, "loss": 0.5383, "step": 1751 }, { "epoch": 0.2987721691678035, "grad_norm": 0.4502001702785492, "learning_rate": 1.6449006618887422e-06, "loss": 0.534, "step": 1752 }, { "epoch": 0.29894270122783084, "grad_norm": 0.48182806372642517, "learning_rate": 1.6444784443954482e-06, "loss": 0.5359, "step": 1753 }, { "epoch": 0.2991132332878581, "grad_norm": 0.4174640476703644, "learning_rate": 1.6440560302996907e-06, "loss": 0.5341, "step": 1754 }, { "epoch": 0.2992837653478854, "grad_norm": 0.5465035438537598, "learning_rate": 1.6436334197303298e-06, "loss": 0.5379, "step": 1755 }, { "epoch": 0.2994542974079127, "grad_norm": 0.41326817870140076, "learning_rate": 1.6432106128162862e-06, "loss": 0.5295, "step": 1756 }, { "epoch": 0.29962482946793995, "grad_norm": 0.3652679920196533, "learning_rate": 1.6427876096865395e-06, "loss": 0.5207, "step": 1757 }, { "epoch": 0.29979536152796726, "grad_norm": 0.3553360104560852, "learning_rate": 1.6423644104701302e-06, "loss": 0.537, "step": 1758 }, { "epoch": 0.2999658935879945, "grad_norm": 0.3630985915660858, "learning_rate": 1.6419410152961574e-06, "loss": 0.537, "step": 1759 }, { "epoch": 0.30013642564802184, "grad_norm": 0.41660600900650024, "learning_rate": 1.641517424293781e-06, "loss": 0.5434, "step": 1760 }, { "epoch": 0.3003069577080491, "grad_norm": 0.4603380858898163, "learning_rate": 1.6410936375922204e-06, "loss": 0.544, "step": 1761 }, { "epoch": 0.3004774897680764, "grad_norm": 0.35790368914604187, "learning_rate": 1.6406696553207544e-06, "loss": 0.5298, "step": 1762 }, { "epoch": 0.3006480218281037, "grad_norm": 0.35639557242393494, "learning_rate": 1.6402454776087214e-06, "loss": 0.534, "step": 1763 }, { "epoch": 0.30081855388813095, "grad_norm": 0.4377691149711609, "learning_rate": 1.6398211045855198e-06, "loss": 0.525, "step": 1764 }, { "epoch": 0.30098908594815826, "grad_norm": 0.33262577652931213, "learning_rate": 1.6393965363806075e-06, "loss": 0.5294, "step": 1765 }, { "epoch": 0.3011596180081855, "grad_norm": 0.4677214026451111, "learning_rate": 1.6389717731235022e-06, "loss": 0.5274, "step": 1766 }, { "epoch": 0.30133015006821284, "grad_norm": 0.5282312035560608, "learning_rate": 1.63854681494378e-06, "loss": 0.5314, "step": 1767 }, { "epoch": 0.3015006821282401, "grad_norm": 0.3520287573337555, "learning_rate": 1.6381216619710781e-06, "loss": 0.5272, "step": 1768 }, { "epoch": 0.30167121418826737, "grad_norm": 0.5878857374191284, "learning_rate": 1.6376963143350915e-06, "loss": 0.5206, "step": 1769 }, { "epoch": 0.3018417462482947, "grad_norm": 0.47125834226608276, "learning_rate": 1.6372707721655757e-06, "loss": 0.5354, "step": 1770 }, { "epoch": 0.30201227830832195, "grad_norm": 0.4633807837963104, "learning_rate": 1.6368450355923453e-06, "loss": 0.5275, "step": 1771 }, { "epoch": 0.30218281036834926, "grad_norm": 0.5724704265594482, "learning_rate": 1.6364191047452738e-06, "loss": 0.532, "step": 1772 }, { "epoch": 0.3023533424283765, "grad_norm": 0.5451125502586365, "learning_rate": 1.6359929797542946e-06, "loss": 0.534, "step": 1773 }, { "epoch": 0.30252387448840384, "grad_norm": 0.5832334756851196, "learning_rate": 1.6355666607493995e-06, "loss": 0.535, "step": 1774 }, { "epoch": 0.3026944065484311, "grad_norm": 0.3607366979122162, "learning_rate": 1.6351401478606406e-06, "loss": 0.5342, "step": 1775 }, { "epoch": 0.30286493860845837, "grad_norm": 0.5791069269180298, "learning_rate": 1.634713441218128e-06, "loss": 0.5453, "step": 1776 }, { "epoch": 0.3030354706684857, "grad_norm": 0.5250142812728882, "learning_rate": 1.6342865409520318e-06, "loss": 0.5248, "step": 1777 }, { "epoch": 0.30320600272851295, "grad_norm": 0.34585243463516235, "learning_rate": 1.6338594471925802e-06, "loss": 0.5264, "step": 1778 }, { "epoch": 0.30337653478854026, "grad_norm": 0.5033993124961853, "learning_rate": 1.6334321600700616e-06, "loss": 0.5311, "step": 1779 }, { "epoch": 0.3035470668485675, "grad_norm": 0.40762490034103394, "learning_rate": 1.6330046797148225e-06, "loss": 0.5348, "step": 1780 }, { "epoch": 0.30371759890859484, "grad_norm": 0.52232426404953, "learning_rate": 1.6325770062572688e-06, "loss": 0.528, "step": 1781 }, { "epoch": 0.3038881309686221, "grad_norm": 0.4686141312122345, "learning_rate": 1.6321491398278647e-06, "loss": 0.5323, "step": 1782 }, { "epoch": 0.30405866302864937, "grad_norm": 0.5295236110687256, "learning_rate": 1.6317210805571349e-06, "loss": 0.5195, "step": 1783 }, { "epoch": 0.3042291950886767, "grad_norm": 0.7101245522499084, "learning_rate": 1.63129282857566e-06, "loss": 0.5341, "step": 1784 }, { "epoch": 0.30439972714870395, "grad_norm": 0.5113368034362793, "learning_rate": 1.6308643840140829e-06, "loss": 0.5356, "step": 1785 }, { "epoch": 0.30457025920873126, "grad_norm": 0.4097968339920044, "learning_rate": 1.6304357470031021e-06, "loss": 0.5405, "step": 1786 }, { "epoch": 0.3047407912687585, "grad_norm": 0.45384204387664795, "learning_rate": 1.6300069176734768e-06, "loss": 0.549, "step": 1787 }, { "epoch": 0.3049113233287858, "grad_norm": 0.5580360889434814, "learning_rate": 1.6295778961560244e-06, "loss": 0.539, "step": 1788 }, { "epoch": 0.3050818553888131, "grad_norm": 0.3851800262928009, "learning_rate": 1.6291486825816207e-06, "loss": 0.5301, "step": 1789 }, { "epoch": 0.30525238744884037, "grad_norm": 0.45921897888183594, "learning_rate": 1.6287192770811997e-06, "loss": 0.5577, "step": 1790 }, { "epoch": 0.3054229195088677, "grad_norm": 0.469555139541626, "learning_rate": 1.6282896797857549e-06, "loss": 0.5355, "step": 1791 }, { "epoch": 0.30559345156889495, "grad_norm": 0.46167778968811035, "learning_rate": 1.6278598908263377e-06, "loss": 0.5261, "step": 1792 }, { "epoch": 0.30576398362892226, "grad_norm": 0.43485838174819946, "learning_rate": 1.627429910334058e-06, "loss": 0.5255, "step": 1793 }, { "epoch": 0.3059345156889495, "grad_norm": 0.47659534215927124, "learning_rate": 1.6269997384400846e-06, "loss": 0.5459, "step": 1794 }, { "epoch": 0.3061050477489768, "grad_norm": 0.5137026906013489, "learning_rate": 1.626569375275644e-06, "loss": 0.5347, "step": 1795 }, { "epoch": 0.3062755798090041, "grad_norm": 0.4063645899295807, "learning_rate": 1.6261388209720213e-06, "loss": 0.5282, "step": 1796 }, { "epoch": 0.30644611186903137, "grad_norm": 0.4230020344257355, "learning_rate": 1.6257080756605603e-06, "loss": 0.528, "step": 1797 }, { "epoch": 0.3066166439290587, "grad_norm": 0.4565592110157013, "learning_rate": 1.6252771394726623e-06, "loss": 0.5346, "step": 1798 }, { "epoch": 0.30678717598908595, "grad_norm": 0.46513283252716064, "learning_rate": 1.6248460125397876e-06, "loss": 0.5364, "step": 1799 }, { "epoch": 0.3069577080491132, "grad_norm": 0.3886967897415161, "learning_rate": 1.6244146949934542e-06, "loss": 0.529, "step": 1800 }, { "epoch": 0.3071282401091405, "grad_norm": 0.4297249913215637, "learning_rate": 1.6239831869652385e-06, "loss": 0.5566, "step": 1801 }, { "epoch": 0.3072987721691678, "grad_norm": 0.4115973114967346, "learning_rate": 1.6235514885867749e-06, "loss": 0.533, "step": 1802 }, { "epoch": 0.3074693042291951, "grad_norm": 0.5175999402999878, "learning_rate": 1.623119599989756e-06, "loss": 0.5493, "step": 1803 }, { "epoch": 0.30763983628922237, "grad_norm": 0.6544990539550781, "learning_rate": 1.6226875213059317e-06, "loss": 0.5272, "step": 1804 }, { "epoch": 0.3078103683492497, "grad_norm": 0.4819190502166748, "learning_rate": 1.622255252667111e-06, "loss": 0.522, "step": 1805 }, { "epoch": 0.30798090040927695, "grad_norm": 0.4651631712913513, "learning_rate": 1.6218227942051607e-06, "loss": 0.5298, "step": 1806 }, { "epoch": 0.3081514324693042, "grad_norm": 1.1320414543151855, "learning_rate": 1.6213901460520047e-06, "loss": 0.5272, "step": 1807 }, { "epoch": 0.3083219645293315, "grad_norm": 0.351107120513916, "learning_rate": 1.620957308339625e-06, "loss": 0.5476, "step": 1808 }, { "epoch": 0.3084924965893588, "grad_norm": 0.4684494733810425, "learning_rate": 1.620524281200062e-06, "loss": 0.529, "step": 1809 }, { "epoch": 0.3086630286493861, "grad_norm": 0.3853071331977844, "learning_rate": 1.6200910647654134e-06, "loss": 0.5453, "step": 1810 }, { "epoch": 0.30883356070941337, "grad_norm": 0.6147592067718506, "learning_rate": 1.6196576591678349e-06, "loss": 0.5268, "step": 1811 }, { "epoch": 0.30900409276944063, "grad_norm": 0.4014926254749298, "learning_rate": 1.6192240645395398e-06, "loss": 0.5349, "step": 1812 }, { "epoch": 0.30917462482946795, "grad_norm": 0.40421199798583984, "learning_rate": 1.6187902810127983e-06, "loss": 0.5522, "step": 1813 }, { "epoch": 0.3093451568894952, "grad_norm": 0.6183474063873291, "learning_rate": 1.6183563087199405e-06, "loss": 0.5163, "step": 1814 }, { "epoch": 0.3095156889495225, "grad_norm": 0.4411429464817047, "learning_rate": 1.617922147793351e-06, "loss": 0.5455, "step": 1815 }, { "epoch": 0.3096862210095498, "grad_norm": 0.4115869402885437, "learning_rate": 1.6174877983654744e-06, "loss": 0.5285, "step": 1816 }, { "epoch": 0.3098567530695771, "grad_norm": 0.39461439847946167, "learning_rate": 1.6170532605688114e-06, "loss": 0.534, "step": 1817 }, { "epoch": 0.31002728512960437, "grad_norm": 0.42679721117019653, "learning_rate": 1.616618534535921e-06, "loss": 0.5293, "step": 1818 }, { "epoch": 0.31019781718963163, "grad_norm": 0.5123752951622009, "learning_rate": 1.6161836203994194e-06, "loss": 0.5351, "step": 1819 }, { "epoch": 0.31036834924965895, "grad_norm": 0.6214609146118164, "learning_rate": 1.6157485182919797e-06, "loss": 0.5224, "step": 1820 }, { "epoch": 0.3105388813096862, "grad_norm": 0.5766216516494751, "learning_rate": 1.6153132283463329e-06, "loss": 0.5086, "step": 1821 }, { "epoch": 0.3107094133697135, "grad_norm": 0.4433499574661255, "learning_rate": 1.6148777506952673e-06, "loss": 0.5397, "step": 1822 }, { "epoch": 0.3108799454297408, "grad_norm": 0.3571315109729767, "learning_rate": 1.6144420854716278e-06, "loss": 0.5385, "step": 1823 }, { "epoch": 0.31105047748976805, "grad_norm": 0.4480735659599304, "learning_rate": 1.6140062328083173e-06, "loss": 0.5445, "step": 1824 }, { "epoch": 0.31122100954979537, "grad_norm": 0.4067867696285248, "learning_rate": 1.6135701928382953e-06, "loss": 0.5189, "step": 1825 }, { "epoch": 0.31139154160982263, "grad_norm": 0.38373708724975586, "learning_rate": 1.6131339656945792e-06, "loss": 0.5512, "step": 1826 }, { "epoch": 0.31156207366984995, "grad_norm": 0.4451771676540375, "learning_rate": 1.6126975515102424e-06, "loss": 0.5351, "step": 1827 }, { "epoch": 0.3117326057298772, "grad_norm": 0.3590299189090729, "learning_rate": 1.6122609504184167e-06, "loss": 0.5366, "step": 1828 }, { "epoch": 0.3119031377899045, "grad_norm": 0.4824784994125366, "learning_rate": 1.6118241625522898e-06, "loss": 0.5485, "step": 1829 }, { "epoch": 0.3120736698499318, "grad_norm": 0.4515202045440674, "learning_rate": 1.6113871880451065e-06, "loss": 0.5277, "step": 1830 }, { "epoch": 0.31224420190995905, "grad_norm": 0.4325205981731415, "learning_rate": 1.6109500270301694e-06, "loss": 0.5326, "step": 1831 }, { "epoch": 0.31241473396998637, "grad_norm": 0.4184912443161011, "learning_rate": 1.6105126796408368e-06, "loss": 0.5357, "step": 1832 }, { "epoch": 0.31258526603001363, "grad_norm": 0.46820399165153503, "learning_rate": 1.6100751460105245e-06, "loss": 0.5348, "step": 1833 }, { "epoch": 0.31275579809004095, "grad_norm": 0.40271228551864624, "learning_rate": 1.6096374262727057e-06, "loss": 0.5425, "step": 1834 }, { "epoch": 0.3129263301500682, "grad_norm": 0.38214290142059326, "learning_rate": 1.6091995205609092e-06, "loss": 0.5198, "step": 1835 }, { "epoch": 0.3130968622100955, "grad_norm": 0.5242385864257812, "learning_rate": 1.608761429008721e-06, "loss": 0.5422, "step": 1836 }, { "epoch": 0.3132673942701228, "grad_norm": 0.4800110459327698, "learning_rate": 1.608323151749784e-06, "loss": 0.5247, "step": 1837 }, { "epoch": 0.31343792633015005, "grad_norm": 0.3811364471912384, "learning_rate": 1.607884688917798e-06, "loss": 0.5411, "step": 1838 }, { "epoch": 0.31360845839017737, "grad_norm": 0.40190911293029785, "learning_rate": 1.6074460406465182e-06, "loss": 0.5282, "step": 1839 }, { "epoch": 0.31377899045020463, "grad_norm": 0.5054764747619629, "learning_rate": 1.6070072070697578e-06, "loss": 0.5547, "step": 1840 }, { "epoch": 0.31394952251023195, "grad_norm": 0.3729870915412903, "learning_rate": 1.6065681883213857e-06, "loss": 0.5306, "step": 1841 }, { "epoch": 0.3141200545702592, "grad_norm": 0.3761366605758667, "learning_rate": 1.6061289845353279e-06, "loss": 0.5319, "step": 1842 }, { "epoch": 0.3142905866302865, "grad_norm": 0.4457892179489136, "learning_rate": 1.6056895958455657e-06, "loss": 0.5406, "step": 1843 }, { "epoch": 0.3144611186903138, "grad_norm": 0.3641684949398041, "learning_rate": 1.6052500223861383e-06, "loss": 0.5264, "step": 1844 }, { "epoch": 0.31463165075034105, "grad_norm": 0.4447363615036011, "learning_rate": 1.6048102642911399e-06, "loss": 0.5265, "step": 1845 }, { "epoch": 0.31480218281036837, "grad_norm": 0.4733704626560211, "learning_rate": 1.6043703216947225e-06, "loss": 0.5254, "step": 1846 }, { "epoch": 0.31497271487039563, "grad_norm": 0.3722693920135498, "learning_rate": 1.6039301947310924e-06, "loss": 0.5337, "step": 1847 }, { "epoch": 0.3151432469304229, "grad_norm": 0.4494919180870056, "learning_rate": 1.6034898835345144e-06, "loss": 0.533, "step": 1848 }, { "epoch": 0.3153137789904502, "grad_norm": 0.42897871136665344, "learning_rate": 1.6030493882393075e-06, "loss": 0.5329, "step": 1849 }, { "epoch": 0.3154843110504775, "grad_norm": 0.45090794563293457, "learning_rate": 1.6026087089798483e-06, "loss": 0.5352, "step": 1850 }, { "epoch": 0.3156548431105048, "grad_norm": 0.37061816453933716, "learning_rate": 1.6021678458905686e-06, "loss": 0.5278, "step": 1851 }, { "epoch": 0.31582537517053205, "grad_norm": 0.4528961181640625, "learning_rate": 1.601726799105957e-06, "loss": 0.5247, "step": 1852 }, { "epoch": 0.31599590723055937, "grad_norm": 0.4259662628173828, "learning_rate": 1.6012855687605574e-06, "loss": 0.5376, "step": 1853 }, { "epoch": 0.31616643929058663, "grad_norm": 0.43850255012512207, "learning_rate": 1.6008441549889703e-06, "loss": 0.5398, "step": 1854 }, { "epoch": 0.3163369713506139, "grad_norm": 0.40549373626708984, "learning_rate": 1.600402557925852e-06, "loss": 0.5229, "step": 1855 }, { "epoch": 0.3165075034106412, "grad_norm": 0.43591824173927307, "learning_rate": 1.5999607777059141e-06, "loss": 0.5385, "step": 1856 }, { "epoch": 0.3166780354706685, "grad_norm": 0.4598686695098877, "learning_rate": 1.5995188144639252e-06, "loss": 0.5373, "step": 1857 }, { "epoch": 0.3168485675306958, "grad_norm": 0.34796208143234253, "learning_rate": 1.599076668334709e-06, "loss": 0.5257, "step": 1858 }, { "epoch": 0.31701909959072305, "grad_norm": 0.46574661135673523, "learning_rate": 1.598634339453145e-06, "loss": 0.5219, "step": 1859 }, { "epoch": 0.3171896316507503, "grad_norm": 0.4256969094276428, "learning_rate": 1.598191827954169e-06, "loss": 0.5286, "step": 1860 }, { "epoch": 0.31736016371077763, "grad_norm": 0.37936362624168396, "learning_rate": 1.5977491339727716e-06, "loss": 0.5332, "step": 1861 }, { "epoch": 0.3175306957708049, "grad_norm": 0.4368015229701996, "learning_rate": 1.5973062576439999e-06, "loss": 0.521, "step": 1862 }, { "epoch": 0.3177012278308322, "grad_norm": 0.364070862531662, "learning_rate": 1.5968631991029557e-06, "loss": 0.5412, "step": 1863 }, { "epoch": 0.3178717598908595, "grad_norm": 0.4169939160346985, "learning_rate": 1.5964199584847977e-06, "loss": 0.5329, "step": 1864 }, { "epoch": 0.3180422919508868, "grad_norm": 0.3581223487854004, "learning_rate": 1.595976535924739e-06, "loss": 0.527, "step": 1865 }, { "epoch": 0.31821282401091405, "grad_norm": 0.47882622480392456, "learning_rate": 1.595532931558049e-06, "loss": 0.5297, "step": 1866 }, { "epoch": 0.3183833560709413, "grad_norm": 0.43346402049064636, "learning_rate": 1.5950891455200516e-06, "loss": 0.5233, "step": 1867 }, { "epoch": 0.31855388813096863, "grad_norm": 0.458211749792099, "learning_rate": 1.5946451779461271e-06, "loss": 0.5312, "step": 1868 }, { "epoch": 0.3187244201909959, "grad_norm": 0.5317389965057373, "learning_rate": 1.5942010289717107e-06, "loss": 0.5225, "step": 1869 }, { "epoch": 0.3188949522510232, "grad_norm": 0.4372017979621887, "learning_rate": 1.5937566987322932e-06, "loss": 0.5266, "step": 1870 }, { "epoch": 0.3190654843110505, "grad_norm": 0.4571208655834198, "learning_rate": 1.5933121873634202e-06, "loss": 0.5217, "step": 1871 }, { "epoch": 0.31923601637107774, "grad_norm": 0.4793291389942169, "learning_rate": 1.592867495000693e-06, "loss": 0.5137, "step": 1872 }, { "epoch": 0.31940654843110505, "grad_norm": 0.56477290391922, "learning_rate": 1.592422621779768e-06, "loss": 0.5182, "step": 1873 }, { "epoch": 0.3195770804911323, "grad_norm": 0.497898131608963, "learning_rate": 1.5919775678363565e-06, "loss": 0.5215, "step": 1874 }, { "epoch": 0.31974761255115963, "grad_norm": 0.43701592087745667, "learning_rate": 1.5915323333062257e-06, "loss": 0.526, "step": 1875 }, { "epoch": 0.3199181446111869, "grad_norm": 0.6302632689476013, "learning_rate": 1.591086918325197e-06, "loss": 0.5271, "step": 1876 }, { "epoch": 0.3200886766712142, "grad_norm": 0.5440297722816467, "learning_rate": 1.5906413230291474e-06, "loss": 0.5266, "step": 1877 }, { "epoch": 0.3202592087312415, "grad_norm": 0.6256676912307739, "learning_rate": 1.5901955475540085e-06, "loss": 0.5331, "step": 1878 }, { "epoch": 0.32042974079126874, "grad_norm": 0.7379283308982849, "learning_rate": 1.5897495920357676e-06, "loss": 0.5251, "step": 1879 }, { "epoch": 0.32060027285129605, "grad_norm": 0.5450626015663147, "learning_rate": 1.5893034566104658e-06, "loss": 0.5261, "step": 1880 }, { "epoch": 0.3207708049113233, "grad_norm": 0.4530295431613922, "learning_rate": 1.5888571414141998e-06, "loss": 0.5315, "step": 1881 }, { "epoch": 0.32094133697135063, "grad_norm": 0.47940734028816223, "learning_rate": 1.5884106465831212e-06, "loss": 0.5267, "step": 1882 }, { "epoch": 0.3211118690313779, "grad_norm": 0.49188658595085144, "learning_rate": 1.5879639722534365e-06, "loss": 0.5228, "step": 1883 }, { "epoch": 0.32128240109140516, "grad_norm": 0.46712371706962585, "learning_rate": 1.5875171185614063e-06, "loss": 0.5258, "step": 1884 }, { "epoch": 0.3214529331514325, "grad_norm": 0.5874091982841492, "learning_rate": 1.5870700856433465e-06, "loss": 0.526, "step": 1885 }, { "epoch": 0.32162346521145974, "grad_norm": 0.41451871395111084, "learning_rate": 1.5866228736356271e-06, "loss": 0.5298, "step": 1886 }, { "epoch": 0.32179399727148705, "grad_norm": 0.5600169897079468, "learning_rate": 1.5861754826746738e-06, "loss": 0.5244, "step": 1887 }, { "epoch": 0.3219645293315143, "grad_norm": 0.6498214602470398, "learning_rate": 1.5857279128969657e-06, "loss": 0.5294, "step": 1888 }, { "epoch": 0.32213506139154163, "grad_norm": 0.6391829252243042, "learning_rate": 1.5852801644390368e-06, "loss": 0.5334, "step": 1889 }, { "epoch": 0.3223055934515689, "grad_norm": 0.46014702320098877, "learning_rate": 1.5848322374374766e-06, "loss": 0.5404, "step": 1890 }, { "epoch": 0.32247612551159616, "grad_norm": 0.4720998704433441, "learning_rate": 1.5843841320289273e-06, "loss": 0.5324, "step": 1891 }, { "epoch": 0.3226466575716235, "grad_norm": 0.5924428701400757, "learning_rate": 1.5839358483500872e-06, "loss": 0.5467, "step": 1892 }, { "epoch": 0.32281718963165074, "grad_norm": 0.4617968797683716, "learning_rate": 1.5834873865377077e-06, "loss": 0.5461, "step": 1893 }, { "epoch": 0.32298772169167805, "grad_norm": 0.4647984504699707, "learning_rate": 1.5830387467285952e-06, "loss": 0.5327, "step": 1894 }, { "epoch": 0.3231582537517053, "grad_norm": 0.47549039125442505, "learning_rate": 1.5825899290596103e-06, "loss": 0.525, "step": 1895 }, { "epoch": 0.32332878581173263, "grad_norm": 0.41526469588279724, "learning_rate": 1.5821409336676678e-06, "loss": 0.5148, "step": 1896 }, { "epoch": 0.3234993178717599, "grad_norm": 0.4488104581832886, "learning_rate": 1.581691760689737e-06, "loss": 0.5427, "step": 1897 }, { "epoch": 0.32366984993178716, "grad_norm": 0.3801409900188446, "learning_rate": 1.581242410262841e-06, "loss": 0.5222, "step": 1898 }, { "epoch": 0.3238403819918145, "grad_norm": 0.4768373370170593, "learning_rate": 1.5807928825240567e-06, "loss": 0.5282, "step": 1899 }, { "epoch": 0.32401091405184174, "grad_norm": 0.45949891209602356, "learning_rate": 1.5803431776105163e-06, "loss": 0.5228, "step": 1900 }, { "epoch": 0.32418144611186905, "grad_norm": 0.41923388838768005, "learning_rate": 1.579893295659405e-06, "loss": 0.529, "step": 1901 }, { "epoch": 0.3243519781718963, "grad_norm": 0.40380582213401794, "learning_rate": 1.5794432368079619e-06, "loss": 0.5313, "step": 1902 }, { "epoch": 0.3245225102319236, "grad_norm": 0.5308855772018433, "learning_rate": 1.578993001193481e-06, "loss": 0.526, "step": 1903 }, { "epoch": 0.3246930422919509, "grad_norm": 0.5592904686927795, "learning_rate": 1.5785425889533097e-06, "loss": 0.5313, "step": 1904 }, { "epoch": 0.32486357435197816, "grad_norm": 0.4044639766216278, "learning_rate": 1.5780920002248487e-06, "loss": 0.5402, "step": 1905 }, { "epoch": 0.3250341064120055, "grad_norm": 0.40651005506515503, "learning_rate": 1.5776412351455536e-06, "loss": 0.5383, "step": 1906 }, { "epoch": 0.32520463847203274, "grad_norm": 0.5054802894592285, "learning_rate": 1.5771902938529333e-06, "loss": 0.528, "step": 1907 }, { "epoch": 0.32537517053206005, "grad_norm": 0.5022133588790894, "learning_rate": 1.5767391764845502e-06, "loss": 0.5277, "step": 1908 }, { "epoch": 0.3255457025920873, "grad_norm": 0.36169329285621643, "learning_rate": 1.5762878831780207e-06, "loss": 0.5249, "step": 1909 }, { "epoch": 0.3257162346521146, "grad_norm": 0.41136419773101807, "learning_rate": 1.575836414071015e-06, "loss": 0.5265, "step": 1910 }, { "epoch": 0.3258867667121419, "grad_norm": 0.40905439853668213, "learning_rate": 1.575384769301257e-06, "loss": 0.516, "step": 1911 }, { "epoch": 0.32605729877216916, "grad_norm": 0.35940247774124146, "learning_rate": 1.5749329490065234e-06, "loss": 0.5278, "step": 1912 }, { "epoch": 0.3262278308321965, "grad_norm": 0.3744727373123169, "learning_rate": 1.5744809533246452e-06, "loss": 0.5467, "step": 1913 }, { "epoch": 0.32639836289222374, "grad_norm": 0.3644757866859436, "learning_rate": 1.5740287823935068e-06, "loss": 0.5221, "step": 1914 }, { "epoch": 0.326568894952251, "grad_norm": 0.3962186872959137, "learning_rate": 1.5735764363510464e-06, "loss": 0.5266, "step": 1915 }, { "epoch": 0.3267394270122783, "grad_norm": 0.3878701329231262, "learning_rate": 1.5731239153352546e-06, "loss": 0.5331, "step": 1916 }, { "epoch": 0.3269099590723056, "grad_norm": 0.39507779479026794, "learning_rate": 1.572671219484176e-06, "loss": 0.5265, "step": 1917 }, { "epoch": 0.3270804911323329, "grad_norm": 0.3882591128349304, "learning_rate": 1.5722183489359083e-06, "loss": 0.5404, "step": 1918 }, { "epoch": 0.32725102319236016, "grad_norm": 0.3593181371688843, "learning_rate": 1.5717653038286036e-06, "loss": 0.5336, "step": 1919 }, { "epoch": 0.3274215552523875, "grad_norm": 0.3732438087463379, "learning_rate": 1.5713120843004653e-06, "loss": 0.5382, "step": 1920 }, { "epoch": 0.32759208731241474, "grad_norm": 0.39789363741874695, "learning_rate": 1.5708586904897516e-06, "loss": 0.5301, "step": 1921 }, { "epoch": 0.327762619372442, "grad_norm": 0.3832828104496002, "learning_rate": 1.5704051225347734e-06, "loss": 0.5175, "step": 1922 }, { "epoch": 0.3279331514324693, "grad_norm": 0.37655749917030334, "learning_rate": 1.5699513805738943e-06, "loss": 0.5201, "step": 1923 }, { "epoch": 0.3281036834924966, "grad_norm": 0.4358007311820984, "learning_rate": 1.5694974647455318e-06, "loss": 0.5203, "step": 1924 }, { "epoch": 0.3282742155525239, "grad_norm": 0.4240171015262604, "learning_rate": 1.5690433751881553e-06, "loss": 0.5205, "step": 1925 }, { "epoch": 0.32844474761255116, "grad_norm": 0.5076821446418762, "learning_rate": 1.5685891120402883e-06, "loss": 0.5246, "step": 1926 }, { "epoch": 0.3286152796725784, "grad_norm": 0.41013339161872864, "learning_rate": 1.5681346754405066e-06, "loss": 0.5288, "step": 1927 }, { "epoch": 0.32878581173260574, "grad_norm": 0.42070189118385315, "learning_rate": 1.5676800655274394e-06, "loss": 0.5262, "step": 1928 }, { "epoch": 0.328956343792633, "grad_norm": 0.3666054308414459, "learning_rate": 1.5672252824397683e-06, "loss": 0.5382, "step": 1929 }, { "epoch": 0.3291268758526603, "grad_norm": 0.49871453642845154, "learning_rate": 1.5667703263162278e-06, "loss": 0.5142, "step": 1930 }, { "epoch": 0.3292974079126876, "grad_norm": 0.4860789179801941, "learning_rate": 1.5663151972956057e-06, "loss": 0.518, "step": 1931 }, { "epoch": 0.3294679399727149, "grad_norm": 0.4729858338832855, "learning_rate": 1.565859895516742e-06, "loss": 0.5248, "step": 1932 }, { "epoch": 0.32963847203274216, "grad_norm": 0.4114147424697876, "learning_rate": 1.5654044211185293e-06, "loss": 0.5318, "step": 1933 }, { "epoch": 0.3298090040927694, "grad_norm": 0.3970237672328949, "learning_rate": 1.5649487742399133e-06, "loss": 0.5412, "step": 1934 }, { "epoch": 0.32997953615279674, "grad_norm": 0.5709453821182251, "learning_rate": 1.5644929550198921e-06, "loss": 0.5333, "step": 1935 }, { "epoch": 0.330150068212824, "grad_norm": 0.4466193616390228, "learning_rate": 1.5640369635975164e-06, "loss": 0.5275, "step": 1936 }, { "epoch": 0.3303206002728513, "grad_norm": 0.46157199144363403, "learning_rate": 1.5635808001118898e-06, "loss": 0.5309, "step": 1937 }, { "epoch": 0.3304911323328786, "grad_norm": 0.4336494505405426, "learning_rate": 1.5631244647021674e-06, "loss": 0.5237, "step": 1938 }, { "epoch": 0.33066166439290584, "grad_norm": 0.4006889760494232, "learning_rate": 1.5626679575075578e-06, "loss": 0.5389, "step": 1939 }, { "epoch": 0.33083219645293316, "grad_norm": 0.5048114657402039, "learning_rate": 1.5622112786673214e-06, "loss": 0.5337, "step": 1940 }, { "epoch": 0.3310027285129604, "grad_norm": 0.6756312251091003, "learning_rate": 1.5617544283207713e-06, "loss": 0.5331, "step": 1941 }, { "epoch": 0.33117326057298774, "grad_norm": 0.6390357613563538, "learning_rate": 1.5612974066072723e-06, "loss": 0.5336, "step": 1942 }, { "epoch": 0.331343792633015, "grad_norm": 0.43792104721069336, "learning_rate": 1.5608402136662425e-06, "loss": 0.5467, "step": 1943 }, { "epoch": 0.3315143246930423, "grad_norm": 0.4027770757675171, "learning_rate": 1.5603828496371514e-06, "loss": 0.519, "step": 1944 }, { "epoch": 0.3316848567530696, "grad_norm": 0.6208730936050415, "learning_rate": 1.5599253146595213e-06, "loss": 0.5205, "step": 1945 }, { "epoch": 0.33185538881309684, "grad_norm": 0.5827608108520508, "learning_rate": 1.5594676088729253e-06, "loss": 0.5209, "step": 1946 }, { "epoch": 0.33202592087312416, "grad_norm": 0.3326941728591919, "learning_rate": 1.559009732416991e-06, "loss": 0.5336, "step": 1947 }, { "epoch": 0.3321964529331514, "grad_norm": 0.5395388603210449, "learning_rate": 1.5585516854313957e-06, "loss": 0.5229, "step": 1948 }, { "epoch": 0.33236698499317874, "grad_norm": 0.5696554780006409, "learning_rate": 1.5580934680558698e-06, "loss": 0.5303, "step": 1949 }, { "epoch": 0.332537517053206, "grad_norm": 0.5093621015548706, "learning_rate": 1.557635080430196e-06, "loss": 0.5302, "step": 1950 }, { "epoch": 0.33270804911323326, "grad_norm": 0.4694986343383789, "learning_rate": 1.5571765226942084e-06, "loss": 0.5333, "step": 1951 }, { "epoch": 0.3328785811732606, "grad_norm": 0.5953072309494019, "learning_rate": 1.5567177949877927e-06, "loss": 0.5287, "step": 1952 }, { "epoch": 0.33304911323328784, "grad_norm": 0.42887023091316223, "learning_rate": 1.5562588974508874e-06, "loss": 0.5212, "step": 1953 }, { "epoch": 0.33321964529331516, "grad_norm": 0.45148569345474243, "learning_rate": 1.5557998302234816e-06, "loss": 0.5289, "step": 1954 }, { "epoch": 0.3333901773533424, "grad_norm": 0.5752291679382324, "learning_rate": 1.555340593445618e-06, "loss": 0.539, "step": 1955 }, { "epoch": 0.33356070941336974, "grad_norm": 0.37518638372421265, "learning_rate": 1.5548811872573885e-06, "loss": 0.5322, "step": 1956 }, { "epoch": 0.333731241473397, "grad_norm": 0.47894930839538574, "learning_rate": 1.5544216117989389e-06, "loss": 0.5256, "step": 1957 }, { "epoch": 0.33390177353342426, "grad_norm": 0.5407999753952026, "learning_rate": 1.5539618672104652e-06, "loss": 0.516, "step": 1958 }, { "epoch": 0.3340723055934516, "grad_norm": 0.3796008825302124, "learning_rate": 1.5535019536322159e-06, "loss": 0.5129, "step": 1959 }, { "epoch": 0.33424283765347884, "grad_norm": 0.4745221436023712, "learning_rate": 1.5530418712044909e-06, "loss": 0.5347, "step": 1960 }, { "epoch": 0.33441336971350616, "grad_norm": 0.5195663571357727, "learning_rate": 1.552581620067641e-06, "loss": 0.5291, "step": 1961 }, { "epoch": 0.3345839017735334, "grad_norm": 0.4340425133705139, "learning_rate": 1.5521212003620693e-06, "loss": 0.5173, "step": 1962 }, { "epoch": 0.3347544338335607, "grad_norm": 0.43442365527153015, "learning_rate": 1.5516606122282298e-06, "loss": 0.5323, "step": 1963 }, { "epoch": 0.334924965893588, "grad_norm": 0.4027803838253021, "learning_rate": 1.5511998558066276e-06, "loss": 0.5329, "step": 1964 }, { "epoch": 0.33509549795361526, "grad_norm": 0.384951114654541, "learning_rate": 1.5507389312378199e-06, "loss": 0.5183, "step": 1965 }, { "epoch": 0.3352660300136426, "grad_norm": 0.43811824917793274, "learning_rate": 1.5502778386624148e-06, "loss": 0.5307, "step": 1966 }, { "epoch": 0.33543656207366984, "grad_norm": 0.3942059874534607, "learning_rate": 1.5498165782210715e-06, "loss": 0.5291, "step": 1967 }, { "epoch": 0.33560709413369716, "grad_norm": 0.5205844044685364, "learning_rate": 1.549355150054501e-06, "loss": 0.5273, "step": 1968 }, { "epoch": 0.3357776261937244, "grad_norm": 0.415243536233902, "learning_rate": 1.5488935543034646e-06, "loss": 0.5248, "step": 1969 }, { "epoch": 0.3359481582537517, "grad_norm": 0.47817909717559814, "learning_rate": 1.5484317911087752e-06, "loss": 0.5311, "step": 1970 }, { "epoch": 0.336118690313779, "grad_norm": 0.48120763897895813, "learning_rate": 1.5479698606112973e-06, "loss": 0.5255, "step": 1971 }, { "epoch": 0.33628922237380626, "grad_norm": 0.42829394340515137, "learning_rate": 1.5475077629519452e-06, "loss": 0.515, "step": 1972 }, { "epoch": 0.3364597544338336, "grad_norm": 0.48222029209136963, "learning_rate": 1.5470454982716852e-06, "loss": 0.5163, "step": 1973 }, { "epoch": 0.33663028649386084, "grad_norm": 0.5472458004951477, "learning_rate": 1.546583066711534e-06, "loss": 0.535, "step": 1974 }, { "epoch": 0.3368008185538881, "grad_norm": 0.6080836057662964, "learning_rate": 1.54612046841256e-06, "loss": 0.5256, "step": 1975 }, { "epoch": 0.3369713506139154, "grad_norm": 0.5023837685585022, "learning_rate": 1.5456577035158815e-06, "loss": 0.5301, "step": 1976 }, { "epoch": 0.3371418826739427, "grad_norm": 0.41623133420944214, "learning_rate": 1.5451947721626677e-06, "loss": 0.5393, "step": 1977 }, { "epoch": 0.33731241473397, "grad_norm": 0.6211003065109253, "learning_rate": 1.5447316744941399e-06, "loss": 0.5327, "step": 1978 }, { "epoch": 0.33748294679399726, "grad_norm": 0.5653514862060547, "learning_rate": 1.5442684106515684e-06, "loss": 0.5275, "step": 1979 }, { "epoch": 0.3376534788540246, "grad_norm": 0.4624597430229187, "learning_rate": 1.5438049807762746e-06, "loss": 0.5332, "step": 1980 }, { "epoch": 0.33782401091405184, "grad_norm": 0.39047080278396606, "learning_rate": 1.5433413850096316e-06, "loss": 0.5188, "step": 1981 }, { "epoch": 0.3379945429740791, "grad_norm": 0.4766196608543396, "learning_rate": 1.5428776234930622e-06, "loss": 0.5251, "step": 1982 }, { "epoch": 0.3381650750341064, "grad_norm": 0.45972636342048645, "learning_rate": 1.54241369636804e-06, "loss": 0.5222, "step": 1983 }, { "epoch": 0.3383356070941337, "grad_norm": 0.455423504114151, "learning_rate": 1.5419496037760887e-06, "loss": 0.5189, "step": 1984 }, { "epoch": 0.338506139154161, "grad_norm": 0.4104258120059967, "learning_rate": 1.5414853458587835e-06, "loss": 0.5182, "step": 1985 }, { "epoch": 0.33867667121418826, "grad_norm": 0.4785384237766266, "learning_rate": 1.5410209227577487e-06, "loss": 0.5242, "step": 1986 }, { "epoch": 0.3388472032742155, "grad_norm": 0.5701748132705688, "learning_rate": 1.5405563346146604e-06, "loss": 0.5248, "step": 1987 }, { "epoch": 0.33901773533424284, "grad_norm": 0.5212503671646118, "learning_rate": 1.5400915815712434e-06, "loss": 0.5267, "step": 1988 }, { "epoch": 0.3391882673942701, "grad_norm": 0.3825041353702545, "learning_rate": 1.5396266637692746e-06, "loss": 0.5258, "step": 1989 }, { "epoch": 0.3393587994542974, "grad_norm": 0.5028911232948303, "learning_rate": 1.5391615813505794e-06, "loss": 0.5435, "step": 1990 }, { "epoch": 0.3395293315143247, "grad_norm": 0.46265122294425964, "learning_rate": 1.5386963344570355e-06, "loss": 0.5167, "step": 1991 }, { "epoch": 0.339699863574352, "grad_norm": 0.511637806892395, "learning_rate": 1.5382309232305687e-06, "loss": 0.5324, "step": 1992 }, { "epoch": 0.33987039563437926, "grad_norm": 0.5030692219734192, "learning_rate": 1.5377653478131558e-06, "loss": 0.5254, "step": 1993 }, { "epoch": 0.3400409276944065, "grad_norm": 0.4326249659061432, "learning_rate": 1.5372996083468244e-06, "loss": 0.5222, "step": 1994 }, { "epoch": 0.34021145975443384, "grad_norm": 0.6164526343345642, "learning_rate": 1.5368337049736505e-06, "loss": 0.536, "step": 1995 }, { "epoch": 0.3403819918144611, "grad_norm": 0.6666930317878723, "learning_rate": 1.5363676378357615e-06, "loss": 0.5299, "step": 1996 }, { "epoch": 0.3405525238744884, "grad_norm": 0.40014538168907166, "learning_rate": 1.5359014070753346e-06, "loss": 0.5318, "step": 1997 }, { "epoch": 0.3407230559345157, "grad_norm": 0.5744419693946838, "learning_rate": 1.535435012834596e-06, "loss": 0.5231, "step": 1998 }, { "epoch": 0.340893587994543, "grad_norm": 0.5867252349853516, "learning_rate": 1.5349684552558227e-06, "loss": 0.5269, "step": 1999 }, { "epoch": 0.34106412005457026, "grad_norm": 0.4233283996582031, "learning_rate": 1.5345017344813415e-06, "loss": 0.5262, "step": 2000 }, { "epoch": 0.3412346521145975, "grad_norm": 0.5572194457054138, "learning_rate": 1.5340348506535285e-06, "loss": 0.5439, "step": 2001 }, { "epoch": 0.34140518417462484, "grad_norm": 0.46090197563171387, "learning_rate": 1.5335678039148094e-06, "loss": 0.5302, "step": 2002 }, { "epoch": 0.3415757162346521, "grad_norm": 0.3734171688556671, "learning_rate": 1.5331005944076602e-06, "loss": 0.5215, "step": 2003 }, { "epoch": 0.3417462482946794, "grad_norm": 0.6022562384605408, "learning_rate": 1.5326332222746061e-06, "loss": 0.5221, "step": 2004 }, { "epoch": 0.3419167803547067, "grad_norm": 0.4694681465625763, "learning_rate": 1.5321656876582226e-06, "loss": 0.5396, "step": 2005 }, { "epoch": 0.34208731241473395, "grad_norm": 0.41527536511421204, "learning_rate": 1.5316979907011336e-06, "loss": 0.5219, "step": 2006 }, { "epoch": 0.34225784447476126, "grad_norm": 0.6797448992729187, "learning_rate": 1.531230131546014e-06, "loss": 0.5231, "step": 2007 }, { "epoch": 0.3424283765347885, "grad_norm": 0.4598335027694702, "learning_rate": 1.5307621103355864e-06, "loss": 0.5207, "step": 2008 }, { "epoch": 0.34259890859481584, "grad_norm": 0.47826674580574036, "learning_rate": 1.5302939272126246e-06, "loss": 0.5482, "step": 2009 }, { "epoch": 0.3427694406548431, "grad_norm": 0.46293771266937256, "learning_rate": 1.5298255823199506e-06, "loss": 0.5179, "step": 2010 }, { "epoch": 0.3429399727148704, "grad_norm": 0.4489620327949524, "learning_rate": 1.5293570758004363e-06, "loss": 0.5285, "step": 2011 }, { "epoch": 0.3431105047748977, "grad_norm": 0.5135905742645264, "learning_rate": 1.5288884077970024e-06, "loss": 0.5119, "step": 2012 }, { "epoch": 0.34328103683492495, "grad_norm": 0.3585302531719208, "learning_rate": 1.5284195784526198e-06, "loss": 0.5128, "step": 2013 }, { "epoch": 0.34345156889495226, "grad_norm": 0.5710442662239075, "learning_rate": 1.5279505879103074e-06, "loss": 0.5284, "step": 2014 }, { "epoch": 0.3436221009549795, "grad_norm": 0.5375210046768188, "learning_rate": 1.5274814363131347e-06, "loss": 0.5112, "step": 2015 }, { "epoch": 0.34379263301500684, "grad_norm": 0.4037111699581146, "learning_rate": 1.5270121238042188e-06, "loss": 0.5386, "step": 2016 }, { "epoch": 0.3439631650750341, "grad_norm": 1.513051152229309, "learning_rate": 1.5265426505267266e-06, "loss": 0.52, "step": 2017 }, { "epoch": 0.34413369713506137, "grad_norm": 0.44357699155807495, "learning_rate": 1.5260730166238747e-06, "loss": 0.5279, "step": 2018 }, { "epoch": 0.3443042291950887, "grad_norm": 0.44351810216903687, "learning_rate": 1.5256032222389278e-06, "loss": 0.5325, "step": 2019 }, { "epoch": 0.34447476125511595, "grad_norm": 0.41944336891174316, "learning_rate": 1.5251332675151994e-06, "loss": 0.5376, "step": 2020 }, { "epoch": 0.34464529331514326, "grad_norm": 0.3723226487636566, "learning_rate": 1.5246631525960528e-06, "loss": 0.5357, "step": 2021 }, { "epoch": 0.3448158253751705, "grad_norm": 0.45165905356407166, "learning_rate": 1.5241928776248995e-06, "loss": 0.5277, "step": 2022 }, { "epoch": 0.34498635743519784, "grad_norm": 0.3919413685798645, "learning_rate": 1.5237224427452003e-06, "loss": 0.5169, "step": 2023 }, { "epoch": 0.3451568894952251, "grad_norm": 0.41514766216278076, "learning_rate": 1.5232518481004642e-06, "loss": 0.5273, "step": 2024 }, { "epoch": 0.34532742155525237, "grad_norm": 0.385572612285614, "learning_rate": 1.5227810938342495e-06, "loss": 0.5279, "step": 2025 }, { "epoch": 0.3454979536152797, "grad_norm": 0.42853543162345886, "learning_rate": 1.5223101800901627e-06, "loss": 0.5399, "step": 2026 }, { "epoch": 0.34566848567530695, "grad_norm": 0.39967915415763855, "learning_rate": 1.5218391070118592e-06, "loss": 0.5278, "step": 2027 }, { "epoch": 0.34583901773533426, "grad_norm": 0.44363322854042053, "learning_rate": 1.521367874743043e-06, "loss": 0.5257, "step": 2028 }, { "epoch": 0.3460095497953615, "grad_norm": 0.40447431802749634, "learning_rate": 1.520896483427467e-06, "loss": 0.5288, "step": 2029 }, { "epoch": 0.3461800818553888, "grad_norm": 0.4379490613937378, "learning_rate": 1.520424933208932e-06, "loss": 0.5274, "step": 2030 }, { "epoch": 0.3463506139154161, "grad_norm": 0.375988632440567, "learning_rate": 1.5199532242312872e-06, "loss": 0.5148, "step": 2031 }, { "epoch": 0.34652114597544337, "grad_norm": 0.38080793619155884, "learning_rate": 1.5194813566384313e-06, "loss": 0.53, "step": 2032 }, { "epoch": 0.3466916780354707, "grad_norm": 0.48653528094291687, "learning_rate": 1.5190093305743105e-06, "loss": 0.5278, "step": 2033 }, { "epoch": 0.34686221009549795, "grad_norm": 0.36069509387016296, "learning_rate": 1.5185371461829187e-06, "loss": 0.5204, "step": 2034 }, { "epoch": 0.34703274215552526, "grad_norm": 0.4373205602169037, "learning_rate": 1.5180648036083e-06, "loss": 0.531, "step": 2035 }, { "epoch": 0.3472032742155525, "grad_norm": 0.40521979331970215, "learning_rate": 1.517592302994545e-06, "loss": 0.5415, "step": 2036 }, { "epoch": 0.3473738062755798, "grad_norm": 0.37990278005599976, "learning_rate": 1.5171196444857935e-06, "loss": 0.5285, "step": 2037 }, { "epoch": 0.3475443383356071, "grad_norm": 0.4962809383869171, "learning_rate": 1.5166468282262328e-06, "loss": 0.5419, "step": 2038 }, { "epoch": 0.34771487039563437, "grad_norm": 0.37079575657844543, "learning_rate": 1.5161738543600991e-06, "loss": 0.5143, "step": 2039 }, { "epoch": 0.3478854024556617, "grad_norm": 0.4124794602394104, "learning_rate": 1.515700723031676e-06, "loss": 0.5289, "step": 2040 }, { "epoch": 0.34805593451568895, "grad_norm": 0.42842522263526917, "learning_rate": 1.5152274343852955e-06, "loss": 0.5228, "step": 2041 }, { "epoch": 0.3482264665757162, "grad_norm": 0.4419716000556946, "learning_rate": 1.5147539885653372e-06, "loss": 0.5255, "step": 2042 }, { "epoch": 0.3483969986357435, "grad_norm": 0.37058091163635254, "learning_rate": 1.514280385716229e-06, "loss": 0.5273, "step": 2043 }, { "epoch": 0.3485675306957708, "grad_norm": 0.4142124652862549, "learning_rate": 1.513806625982447e-06, "loss": 0.5241, "step": 2044 }, { "epoch": 0.3487380627557981, "grad_norm": 0.3658922016620636, "learning_rate": 1.513332709508514e-06, "loss": 0.5156, "step": 2045 }, { "epoch": 0.34890859481582537, "grad_norm": 0.41257330775260925, "learning_rate": 1.5128586364390022e-06, "loss": 0.5337, "step": 2046 }, { "epoch": 0.3490791268758527, "grad_norm": 0.40474775433540344, "learning_rate": 1.5123844069185307e-06, "loss": 0.5188, "step": 2047 }, { "epoch": 0.34924965893587995, "grad_norm": 0.4597495198249817, "learning_rate": 1.5119100210917658e-06, "loss": 0.5319, "step": 2048 }, { "epoch": 0.3494201909959072, "grad_norm": 0.4251413643360138, "learning_rate": 1.5114354791034226e-06, "loss": 0.5208, "step": 2049 }, { "epoch": 0.3495907230559345, "grad_norm": 0.37406793236732483, "learning_rate": 1.5109607810982628e-06, "loss": 0.5184, "step": 2050 }, { "epoch": 0.3497612551159618, "grad_norm": 0.47679436206817627, "learning_rate": 1.5104859272210966e-06, "loss": 0.5132, "step": 2051 }, { "epoch": 0.3499317871759891, "grad_norm": 0.45877325534820557, "learning_rate": 1.5100109176167815e-06, "loss": 0.5237, "step": 2052 }, { "epoch": 0.35010231923601637, "grad_norm": 0.4083709716796875, "learning_rate": 1.509535752430222e-06, "loss": 0.5328, "step": 2053 }, { "epoch": 0.35027285129604363, "grad_norm": 0.3754033148288727, "learning_rate": 1.5090604318063705e-06, "loss": 0.5276, "step": 2054 }, { "epoch": 0.35044338335607095, "grad_norm": 0.4184139370918274, "learning_rate": 1.5085849558902268e-06, "loss": 0.5178, "step": 2055 }, { "epoch": 0.3506139154160982, "grad_norm": 0.35778215527534485, "learning_rate": 1.5081093248268384e-06, "loss": 0.5261, "step": 2056 }, { "epoch": 0.3507844474761255, "grad_norm": 0.4060825705528259, "learning_rate": 1.507633538761299e-06, "loss": 0.5356, "step": 2057 }, { "epoch": 0.3509549795361528, "grad_norm": 0.35478487610816956, "learning_rate": 1.5071575978387506e-06, "loss": 0.5267, "step": 2058 }, { "epoch": 0.3511255115961801, "grad_norm": 0.42642742395401, "learning_rate": 1.506681502204382e-06, "loss": 0.5162, "step": 2059 }, { "epoch": 0.35129604365620737, "grad_norm": 0.4379814565181732, "learning_rate": 1.50620525200343e-06, "loss": 0.5341, "step": 2060 }, { "epoch": 0.35146657571623463, "grad_norm": 0.40005528926849365, "learning_rate": 1.5057288473811775e-06, "loss": 0.5218, "step": 2061 }, { "epoch": 0.35163710777626195, "grad_norm": 0.5353875756263733, "learning_rate": 1.5052522884829544e-06, "loss": 0.5281, "step": 2062 }, { "epoch": 0.3518076398362892, "grad_norm": 0.6442404389381409, "learning_rate": 1.5047755754541393e-06, "loss": 0.5289, "step": 2063 }, { "epoch": 0.3519781718963165, "grad_norm": 0.4252280294895172, "learning_rate": 1.5042987084401561e-06, "loss": 0.5277, "step": 2064 }, { "epoch": 0.3521487039563438, "grad_norm": 0.527929961681366, "learning_rate": 1.5038216875864758e-06, "loss": 0.5145, "step": 2065 }, { "epoch": 0.35231923601637105, "grad_norm": 0.41897308826446533, "learning_rate": 1.5033445130386176e-06, "loss": 0.5266, "step": 2066 }, { "epoch": 0.35248976807639837, "grad_norm": 0.5344181060791016, "learning_rate": 1.5028671849421465e-06, "loss": 0.5192, "step": 2067 }, { "epoch": 0.35266030013642563, "grad_norm": 0.47897011041641235, "learning_rate": 1.5023897034426743e-06, "loss": 0.5155, "step": 2068 }, { "epoch": 0.35283083219645295, "grad_norm": 0.4926980435848236, "learning_rate": 1.5019120686858605e-06, "loss": 0.5302, "step": 2069 }, { "epoch": 0.3530013642564802, "grad_norm": 0.6491872072219849, "learning_rate": 1.5014342808174105e-06, "loss": 0.5222, "step": 2070 }, { "epoch": 0.3531718963165075, "grad_norm": 0.4459693431854248, "learning_rate": 1.5009563399830766e-06, "loss": 0.5275, "step": 2071 }, { "epoch": 0.3533424283765348, "grad_norm": 0.44589418172836304, "learning_rate": 1.5004782463286582e-06, "loss": 0.5262, "step": 2072 }, { "epoch": 0.35351296043656205, "grad_norm": 0.38912469148635864, "learning_rate": 1.5000000000000002e-06, "loss": 0.5324, "step": 2073 }, { "epoch": 0.35368349249658937, "grad_norm": 0.4303382933139801, "learning_rate": 1.4995216011429958e-06, "loss": 0.5037, "step": 2074 }, { "epoch": 0.35385402455661663, "grad_norm": 0.52245032787323, "learning_rate": 1.4990430499035831e-06, "loss": 0.5237, "step": 2075 }, { "epoch": 0.35402455661664395, "grad_norm": 0.3739396333694458, "learning_rate": 1.4985643464277476e-06, "loss": 0.5173, "step": 2076 }, { "epoch": 0.3541950886766712, "grad_norm": 0.47938236594200134, "learning_rate": 1.4980854908615212e-06, "loss": 0.5119, "step": 2077 }, { "epoch": 0.3543656207366985, "grad_norm": 0.5574185252189636, "learning_rate": 1.4976064833509816e-06, "loss": 0.5169, "step": 2078 }, { "epoch": 0.3545361527967258, "grad_norm": 0.7085524797439575, "learning_rate": 1.4971273240422536e-06, "loss": 0.5241, "step": 2079 }, { "epoch": 0.35470668485675305, "grad_norm": 0.5693588256835938, "learning_rate": 1.4966480130815076e-06, "loss": 0.5168, "step": 2080 }, { "epoch": 0.35487721691678037, "grad_norm": 0.39197826385498047, "learning_rate": 1.496168550614961e-06, "loss": 0.5253, "step": 2081 }, { "epoch": 0.35504774897680763, "grad_norm": 0.43353649973869324, "learning_rate": 1.4956889367888765e-06, "loss": 0.5288, "step": 2082 }, { "epoch": 0.35521828103683495, "grad_norm": 0.41520053148269653, "learning_rate": 1.4952091717495642e-06, "loss": 0.5193, "step": 2083 }, { "epoch": 0.3553888130968622, "grad_norm": 0.5020126104354858, "learning_rate": 1.4947292556433791e-06, "loss": 0.5348, "step": 2084 }, { "epoch": 0.3555593451568895, "grad_norm": 0.3375995457172394, "learning_rate": 1.4942491886167231e-06, "loss": 0.5184, "step": 2085 }, { "epoch": 0.3557298772169168, "grad_norm": 0.5305654406547546, "learning_rate": 1.4937689708160435e-06, "loss": 0.5225, "step": 2086 }, { "epoch": 0.35590040927694405, "grad_norm": 0.3381396234035492, "learning_rate": 1.4932886023878347e-06, "loss": 0.516, "step": 2087 }, { "epoch": 0.35607094133697137, "grad_norm": 0.5799662470817566, "learning_rate": 1.4928080834786358e-06, "loss": 0.5087, "step": 2088 }, { "epoch": 0.35624147339699863, "grad_norm": 0.582531750202179, "learning_rate": 1.492327414235032e-06, "loss": 0.5208, "step": 2089 }, { "epoch": 0.3564120054570259, "grad_norm": 0.36300772428512573, "learning_rate": 1.491846594803655e-06, "loss": 0.5227, "step": 2090 }, { "epoch": 0.3565825375170532, "grad_norm": 0.5540021657943726, "learning_rate": 1.4913656253311822e-06, "loss": 0.5191, "step": 2091 }, { "epoch": 0.3567530695770805, "grad_norm": 0.4059838652610779, "learning_rate": 1.490884505964336e-06, "loss": 0.5174, "step": 2092 }, { "epoch": 0.3569236016371078, "grad_norm": 0.47956252098083496, "learning_rate": 1.4904032368498858e-06, "loss": 0.5095, "step": 2093 }, { "epoch": 0.35709413369713505, "grad_norm": 0.4530734717845917, "learning_rate": 1.4899218181346455e-06, "loss": 0.5362, "step": 2094 }, { "epoch": 0.35726466575716237, "grad_norm": 0.5519397854804993, "learning_rate": 1.4894402499654751e-06, "loss": 0.5253, "step": 2095 }, { "epoch": 0.35743519781718963, "grad_norm": 0.39683836698532104, "learning_rate": 1.4889585324892805e-06, "loss": 0.515, "step": 2096 }, { "epoch": 0.3576057298772169, "grad_norm": 0.605188250541687, "learning_rate": 1.4884766658530128e-06, "loss": 0.5167, "step": 2097 }, { "epoch": 0.3577762619372442, "grad_norm": 0.4661652445793152, "learning_rate": 1.4879946502036679e-06, "loss": 0.5241, "step": 2098 }, { "epoch": 0.3579467939972715, "grad_norm": 0.5223652720451355, "learning_rate": 1.4875124856882886e-06, "loss": 0.5177, "step": 2099 }, { "epoch": 0.3581173260572988, "grad_norm": 0.6159395575523376, "learning_rate": 1.4870301724539627e-06, "loss": 0.5001, "step": 2100 }, { "epoch": 0.35828785811732605, "grad_norm": 0.4143725633621216, "learning_rate": 1.4865477106478222e-06, "loss": 0.5057, "step": 2101 }, { "epoch": 0.35845839017735337, "grad_norm": 0.6344066262245178, "learning_rate": 1.4860651004170463e-06, "loss": 0.5356, "step": 2102 }, { "epoch": 0.35862892223738063, "grad_norm": 0.4035511910915375, "learning_rate": 1.4855823419088577e-06, "loss": 0.5212, "step": 2103 }, { "epoch": 0.3587994542974079, "grad_norm": 0.5761251449584961, "learning_rate": 1.4850994352705251e-06, "loss": 0.5194, "step": 2104 }, { "epoch": 0.3589699863574352, "grad_norm": 0.38429519534111023, "learning_rate": 1.4846163806493628e-06, "loss": 0.5078, "step": 2105 }, { "epoch": 0.3591405184174625, "grad_norm": 0.5760840177536011, "learning_rate": 1.4841331781927298e-06, "loss": 0.5188, "step": 2106 }, { "epoch": 0.3593110504774898, "grad_norm": 0.4178948700428009, "learning_rate": 1.4836498280480301e-06, "loss": 0.5174, "step": 2107 }, { "epoch": 0.35948158253751705, "grad_norm": 0.397052526473999, "learning_rate": 1.483166330362713e-06, "loss": 0.5159, "step": 2108 }, { "epoch": 0.3596521145975443, "grad_norm": 0.43821024894714355, "learning_rate": 1.4826826852842727e-06, "loss": 0.5144, "step": 2109 }, { "epoch": 0.35982264665757163, "grad_norm": 0.5855873227119446, "learning_rate": 1.4821988929602482e-06, "loss": 0.5122, "step": 2110 }, { "epoch": 0.3599931787175989, "grad_norm": 0.5009761452674866, "learning_rate": 1.4817149535382239e-06, "loss": 0.513, "step": 2111 }, { "epoch": 0.3601637107776262, "grad_norm": 0.5092044472694397, "learning_rate": 1.4812308671658285e-06, "loss": 0.5117, "step": 2112 }, { "epoch": 0.3603342428376535, "grad_norm": 0.3675675690174103, "learning_rate": 1.4807466339907358e-06, "loss": 0.5035, "step": 2113 }, { "epoch": 0.3605047748976808, "grad_norm": 0.42256852984428406, "learning_rate": 1.4802622541606645e-06, "loss": 0.5109, "step": 2114 }, { "epoch": 0.36067530695770805, "grad_norm": 0.48379117250442505, "learning_rate": 1.4797777278233779e-06, "loss": 0.5182, "step": 2115 }, { "epoch": 0.3608458390177353, "grad_norm": 0.4444460868835449, "learning_rate": 1.4792930551266836e-06, "loss": 0.522, "step": 2116 }, { "epoch": 0.36101637107776263, "grad_norm": 0.4920353889465332, "learning_rate": 1.478808236218435e-06, "loss": 0.5078, "step": 2117 }, { "epoch": 0.3611869031377899, "grad_norm": 0.45749807357788086, "learning_rate": 1.4783232712465289e-06, "loss": 0.5082, "step": 2118 }, { "epoch": 0.3613574351978172, "grad_norm": 0.48693403601646423, "learning_rate": 1.4778381603589071e-06, "loss": 0.508, "step": 2119 }, { "epoch": 0.3615279672578445, "grad_norm": 0.35000139474868774, "learning_rate": 1.477352903703556e-06, "loss": 0.5108, "step": 2120 }, { "epoch": 0.36169849931787174, "grad_norm": 0.39079880714416504, "learning_rate": 1.4768675014285065e-06, "loss": 0.5207, "step": 2121 }, { "epoch": 0.36186903137789905, "grad_norm": 0.48962411284446716, "learning_rate": 1.4763819536818332e-06, "loss": 0.5146, "step": 2122 }, { "epoch": 0.3620395634379263, "grad_norm": 0.423258900642395, "learning_rate": 1.4758962606116566e-06, "loss": 0.5147, "step": 2123 }, { "epoch": 0.36221009549795363, "grad_norm": 0.414676308631897, "learning_rate": 1.4754104223661402e-06, "loss": 0.5241, "step": 2124 }, { "epoch": 0.3623806275579809, "grad_norm": 0.4666195809841156, "learning_rate": 1.4749244390934922e-06, "loss": 0.5182, "step": 2125 }, { "epoch": 0.3625511596180082, "grad_norm": 0.4646506905555725, "learning_rate": 1.4744383109419649e-06, "loss": 0.5143, "step": 2126 }, { "epoch": 0.3627216916780355, "grad_norm": 0.48451265692710876, "learning_rate": 1.4739520380598552e-06, "loss": 0.5187, "step": 2127 }, { "epoch": 0.36289222373806274, "grad_norm": 0.4569034278392792, "learning_rate": 1.473465620595504e-06, "loss": 0.5135, "step": 2128 }, { "epoch": 0.36306275579809005, "grad_norm": 0.4225984215736389, "learning_rate": 1.4729790586972957e-06, "loss": 0.5251, "step": 2129 }, { "epoch": 0.3632332878581173, "grad_norm": 0.6046484112739563, "learning_rate": 1.4724923525136597e-06, "loss": 0.515, "step": 2130 }, { "epoch": 0.36340381991814463, "grad_norm": 0.6546425223350525, "learning_rate": 1.4720055021930688e-06, "loss": 0.5179, "step": 2131 }, { "epoch": 0.3635743519781719, "grad_norm": 0.4017772972583771, "learning_rate": 1.4715185078840402e-06, "loss": 0.5184, "step": 2132 }, { "epoch": 0.36374488403819916, "grad_norm": 0.4456183612346649, "learning_rate": 1.4710313697351343e-06, "loss": 0.5152, "step": 2133 }, { "epoch": 0.3639154160982265, "grad_norm": 0.4506753981113434, "learning_rate": 1.470544087894956e-06, "loss": 0.5225, "step": 2134 }, { "epoch": 0.36408594815825374, "grad_norm": 0.5804704427719116, "learning_rate": 1.4700566625121545e-06, "loss": 0.5212, "step": 2135 }, { "epoch": 0.36425648021828105, "grad_norm": 0.564107358455658, "learning_rate": 1.4695690937354214e-06, "loss": 0.5107, "step": 2136 }, { "epoch": 0.3644270122783083, "grad_norm": 0.3576640784740448, "learning_rate": 1.469081381713493e-06, "loss": 0.5176, "step": 2137 }, { "epoch": 0.36459754433833563, "grad_norm": 0.5520999431610107, "learning_rate": 1.4685935265951495e-06, "loss": 0.5277, "step": 2138 }, { "epoch": 0.3647680763983629, "grad_norm": 0.5637651085853577, "learning_rate": 1.468105528529214e-06, "loss": 0.5197, "step": 2139 }, { "epoch": 0.36493860845839016, "grad_norm": 0.4603327512741089, "learning_rate": 1.4676173876645537e-06, "loss": 0.5225, "step": 2140 }, { "epoch": 0.3651091405184175, "grad_norm": 0.6356070637702942, "learning_rate": 1.4671291041500792e-06, "loss": 0.5365, "step": 2141 }, { "epoch": 0.36527967257844474, "grad_norm": 0.3971726596355438, "learning_rate": 1.4666406781347446e-06, "loss": 0.5232, "step": 2142 }, { "epoch": 0.36545020463847205, "grad_norm": 0.5633137226104736, "learning_rate": 1.4661521097675479e-06, "loss": 0.5296, "step": 2143 }, { "epoch": 0.3656207366984993, "grad_norm": 0.49675676226615906, "learning_rate": 1.4656633991975295e-06, "loss": 0.5449, "step": 2144 }, { "epoch": 0.3657912687585266, "grad_norm": 0.48119738698005676, "learning_rate": 1.465174546573774e-06, "loss": 0.5168, "step": 2145 }, { "epoch": 0.3659618008185539, "grad_norm": 0.5705353617668152, "learning_rate": 1.4646855520454097e-06, "loss": 0.5262, "step": 2146 }, { "epoch": 0.36613233287858116, "grad_norm": 0.48952609300613403, "learning_rate": 1.464196415761607e-06, "loss": 0.5217, "step": 2147 }, { "epoch": 0.3663028649386085, "grad_norm": 0.5046576261520386, "learning_rate": 1.4637071378715806e-06, "loss": 0.5298, "step": 2148 }, { "epoch": 0.36647339699863574, "grad_norm": 0.4642289876937866, "learning_rate": 1.463217718524588e-06, "loss": 0.5328, "step": 2149 }, { "epoch": 0.36664392905866305, "grad_norm": 0.5076698064804077, "learning_rate": 1.46272815786993e-06, "loss": 0.5161, "step": 2150 }, { "epoch": 0.3668144611186903, "grad_norm": 0.4147416949272156, "learning_rate": 1.4622384560569495e-06, "loss": 0.523, "step": 2151 }, { "epoch": 0.3669849931787176, "grad_norm": 0.5995153188705444, "learning_rate": 1.4617486132350345e-06, "loss": 0.5182, "step": 2152 }, { "epoch": 0.3671555252387449, "grad_norm": 0.5449314713478088, "learning_rate": 1.4612586295536136e-06, "loss": 0.5188, "step": 2153 }, { "epoch": 0.36732605729877216, "grad_norm": 0.6206721067428589, "learning_rate": 1.4607685051621605e-06, "loss": 0.5346, "step": 2154 }, { "epoch": 0.3674965893587995, "grad_norm": 0.3753843307495117, "learning_rate": 1.4602782402101909e-06, "loss": 0.5253, "step": 2155 }, { "epoch": 0.36766712141882674, "grad_norm": 0.5091269016265869, "learning_rate": 1.4597878348472626e-06, "loss": 0.5293, "step": 2156 }, { "epoch": 0.367837653478854, "grad_norm": 0.46207624673843384, "learning_rate": 1.4592972892229782e-06, "loss": 0.5218, "step": 2157 }, { "epoch": 0.3680081855388813, "grad_norm": 0.47035977244377136, "learning_rate": 1.458806603486981e-06, "loss": 0.5433, "step": 2158 }, { "epoch": 0.3681787175989086, "grad_norm": 0.5492236614227295, "learning_rate": 1.4583157777889587e-06, "loss": 0.5431, "step": 2159 }, { "epoch": 0.3683492496589359, "grad_norm": 0.4612725079059601, "learning_rate": 1.4578248122786402e-06, "loss": 0.5256, "step": 2160 }, { "epoch": 0.36851978171896316, "grad_norm": 0.4277294874191284, "learning_rate": 1.4573337071057982e-06, "loss": 0.5245, "step": 2161 }, { "epoch": 0.3686903137789905, "grad_norm": 0.6318309307098389, "learning_rate": 1.4568424624202476e-06, "loss": 0.5223, "step": 2162 }, { "epoch": 0.36886084583901774, "grad_norm": 0.5150088667869568, "learning_rate": 1.456351078371846e-06, "loss": 0.5376, "step": 2163 }, { "epoch": 0.369031377899045, "grad_norm": 0.35254305601119995, "learning_rate": 1.4558595551104932e-06, "loss": 0.5078, "step": 2164 }, { "epoch": 0.3692019099590723, "grad_norm": 0.47404444217681885, "learning_rate": 1.4553678927861316e-06, "loss": 0.5224, "step": 2165 }, { "epoch": 0.3693724420190996, "grad_norm": 0.3993200957775116, "learning_rate": 1.4548760915487465e-06, "loss": 0.5354, "step": 2166 }, { "epoch": 0.3695429740791269, "grad_norm": 0.47942835092544556, "learning_rate": 1.4543841515483647e-06, "loss": 0.5307, "step": 2167 }, { "epoch": 0.36971350613915416, "grad_norm": 0.49483659863471985, "learning_rate": 1.453892072935056e-06, "loss": 0.5184, "step": 2168 }, { "epoch": 0.3698840381991814, "grad_norm": 0.35012009739875793, "learning_rate": 1.453399855858932e-06, "loss": 0.5275, "step": 2169 }, { "epoch": 0.37005457025920874, "grad_norm": 0.5223888158798218, "learning_rate": 1.4529075004701475e-06, "loss": 0.544, "step": 2170 }, { "epoch": 0.370225102319236, "grad_norm": 0.49425366520881653, "learning_rate": 1.452415006918898e-06, "loss": 0.5196, "step": 2171 }, { "epoch": 0.3703956343792633, "grad_norm": 0.4764786660671234, "learning_rate": 1.4519223753554224e-06, "loss": 0.5229, "step": 2172 }, { "epoch": 0.3705661664392906, "grad_norm": 0.4898337125778198, "learning_rate": 1.4514296059300016e-06, "loss": 0.5238, "step": 2173 }, { "epoch": 0.3707366984993179, "grad_norm": 0.41198065876960754, "learning_rate": 1.4509366987929576e-06, "loss": 0.5399, "step": 2174 }, { "epoch": 0.37090723055934516, "grad_norm": 0.5006463527679443, "learning_rate": 1.4504436540946547e-06, "loss": 0.5251, "step": 2175 }, { "epoch": 0.3710777626193724, "grad_norm": 0.3775595426559448, "learning_rate": 1.4499504719855003e-06, "loss": 0.5187, "step": 2176 }, { "epoch": 0.37124829467939974, "grad_norm": 0.4422166645526886, "learning_rate": 1.4494571526159427e-06, "loss": 0.5182, "step": 2177 }, { "epoch": 0.371418826739427, "grad_norm": 0.5421523451805115, "learning_rate": 1.448963696136472e-06, "loss": 0.5196, "step": 2178 }, { "epoch": 0.3715893587994543, "grad_norm": 0.43927666544914246, "learning_rate": 1.4484701026976207e-06, "loss": 0.5293, "step": 2179 }, { "epoch": 0.3717598908594816, "grad_norm": 0.48039036989212036, "learning_rate": 1.4479763724499627e-06, "loss": 0.5324, "step": 2180 }, { "epoch": 0.37193042291950884, "grad_norm": 0.46991389989852905, "learning_rate": 1.4474825055441137e-06, "loss": 0.5313, "step": 2181 }, { "epoch": 0.37210095497953616, "grad_norm": 0.45620325207710266, "learning_rate": 1.446988502130731e-06, "loss": 0.5234, "step": 2182 }, { "epoch": 0.3722714870395634, "grad_norm": 0.514532208442688, "learning_rate": 1.4464943623605139e-06, "loss": 0.5377, "step": 2183 }, { "epoch": 0.37244201909959074, "grad_norm": 0.44968950748443604, "learning_rate": 1.4460000863842024e-06, "loss": 0.5345, "step": 2184 }, { "epoch": 0.372612551159618, "grad_norm": 0.4595887362957001, "learning_rate": 1.4455056743525794e-06, "loss": 0.5111, "step": 2185 }, { "epoch": 0.3727830832196453, "grad_norm": 0.5584328174591064, "learning_rate": 1.4450111264164684e-06, "loss": 0.5284, "step": 2186 }, { "epoch": 0.3729536152796726, "grad_norm": 0.394270658493042, "learning_rate": 1.4445164427267346e-06, "loss": 0.5194, "step": 2187 }, { "epoch": 0.37312414733969984, "grad_norm": 0.4683585464954376, "learning_rate": 1.4440216234342842e-06, "loss": 0.5328, "step": 2188 }, { "epoch": 0.37329467939972716, "grad_norm": 0.4643068313598633, "learning_rate": 1.443526668690066e-06, "loss": 0.5389, "step": 2189 }, { "epoch": 0.3734652114597544, "grad_norm": 0.38309845328330994, "learning_rate": 1.4430315786450685e-06, "loss": 0.5294, "step": 2190 }, { "epoch": 0.37363574351978174, "grad_norm": 0.46844732761383057, "learning_rate": 1.442536353450322e-06, "loss": 0.5072, "step": 2191 }, { "epoch": 0.373806275579809, "grad_norm": 0.48929598927497864, "learning_rate": 1.4420409932568992e-06, "loss": 0.5241, "step": 2192 }, { "epoch": 0.37397680763983626, "grad_norm": 0.5985404849052429, "learning_rate": 1.4415454982159123e-06, "loss": 0.5243, "step": 2193 }, { "epoch": 0.3741473396998636, "grad_norm": 0.4248568117618561, "learning_rate": 1.4410498684785153e-06, "loss": 0.5336, "step": 2194 }, { "epoch": 0.37431787175989084, "grad_norm": 0.4180203676223755, "learning_rate": 1.440554104195904e-06, "loss": 0.5286, "step": 2195 }, { "epoch": 0.37448840381991816, "grad_norm": 0.4288659393787384, "learning_rate": 1.4400582055193143e-06, "loss": 0.5372, "step": 2196 }, { "epoch": 0.3746589358799454, "grad_norm": 0.4723179340362549, "learning_rate": 1.4395621726000235e-06, "loss": 0.5226, "step": 2197 }, { "epoch": 0.37482946793997274, "grad_norm": 0.48728570342063904, "learning_rate": 1.4390660055893496e-06, "loss": 0.5294, "step": 2198 }, { "epoch": 0.375, "grad_norm": 0.4278232157230377, "learning_rate": 1.4385697046386515e-06, "loss": 0.52, "step": 2199 }, { "epoch": 0.37517053206002726, "grad_norm": 0.37193500995635986, "learning_rate": 1.4380732698993294e-06, "loss": 0.5189, "step": 2200 }, { "epoch": 0.3753410641200546, "grad_norm": 0.5276831388473511, "learning_rate": 1.437576701522824e-06, "loss": 0.543, "step": 2201 }, { "epoch": 0.37551159618008184, "grad_norm": 0.485494464635849, "learning_rate": 1.437079999660617e-06, "loss": 0.5206, "step": 2202 }, { "epoch": 0.37568212824010916, "grad_norm": 0.4124785363674164, "learning_rate": 1.4365831644642302e-06, "loss": 0.5146, "step": 2203 }, { "epoch": 0.3758526603001364, "grad_norm": 0.4600485563278198, "learning_rate": 1.436086196085227e-06, "loss": 0.5115, "step": 2204 }, { "epoch": 0.37602319236016374, "grad_norm": 0.4196353256702423, "learning_rate": 1.4355890946752103e-06, "loss": 0.5269, "step": 2205 }, { "epoch": 0.376193724420191, "grad_norm": 0.4327806532382965, "learning_rate": 1.435091860385825e-06, "loss": 0.5141, "step": 2206 }, { "epoch": 0.37636425648021826, "grad_norm": 0.3661247491836548, "learning_rate": 1.434594493368755e-06, "loss": 0.5096, "step": 2207 }, { "epoch": 0.3765347885402456, "grad_norm": 0.45045968890190125, "learning_rate": 1.4340969937757259e-06, "loss": 0.5304, "step": 2208 }, { "epoch": 0.37670532060027284, "grad_norm": 0.4632346034049988, "learning_rate": 1.433599361758503e-06, "loss": 0.5364, "step": 2209 }, { "epoch": 0.37687585266030016, "grad_norm": 0.44703951478004456, "learning_rate": 1.4331015974688925e-06, "loss": 0.5356, "step": 2210 }, { "epoch": 0.3770463847203274, "grad_norm": 0.5135074257850647, "learning_rate": 1.4326037010587407e-06, "loss": 0.5191, "step": 2211 }, { "epoch": 0.3772169167803547, "grad_norm": 0.407526433467865, "learning_rate": 1.432105672679934e-06, "loss": 0.5211, "step": 2212 }, { "epoch": 0.377387448840382, "grad_norm": 0.4435472786426544, "learning_rate": 1.4316075124843996e-06, "loss": 0.5214, "step": 2213 }, { "epoch": 0.37755798090040926, "grad_norm": 0.5043038725852966, "learning_rate": 1.4311092206241048e-06, "loss": 0.5227, "step": 2214 }, { "epoch": 0.3777285129604366, "grad_norm": 0.3871316611766815, "learning_rate": 1.430610797251056e-06, "loss": 0.5277, "step": 2215 }, { "epoch": 0.37789904502046384, "grad_norm": 0.48304349184036255, "learning_rate": 1.4301122425173015e-06, "loss": 0.5209, "step": 2216 }, { "epoch": 0.37806957708049116, "grad_norm": 0.5158309936523438, "learning_rate": 1.4296135565749281e-06, "loss": 0.5238, "step": 2217 }, { "epoch": 0.3782401091405184, "grad_norm": 0.3965489864349365, "learning_rate": 1.4291147395760637e-06, "loss": 0.5383, "step": 2218 }, { "epoch": 0.3784106412005457, "grad_norm": 0.47193005681037903, "learning_rate": 1.4286157916728757e-06, "loss": 0.5146, "step": 2219 }, { "epoch": 0.378581173260573, "grad_norm": 0.4689446985721588, "learning_rate": 1.4281167130175714e-06, "loss": 0.5243, "step": 2220 }, { "epoch": 0.37875170532060026, "grad_norm": 0.43518558144569397, "learning_rate": 1.4276175037623982e-06, "loss": 0.521, "step": 2221 }, { "epoch": 0.3789222373806276, "grad_norm": 0.34477195143699646, "learning_rate": 1.4271181640596429e-06, "loss": 0.5194, "step": 2222 }, { "epoch": 0.37909276944065484, "grad_norm": 0.538991391658783, "learning_rate": 1.426618694061633e-06, "loss": 0.5217, "step": 2223 }, { "epoch": 0.3792633015006821, "grad_norm": 0.5617110729217529, "learning_rate": 1.4261190939207345e-06, "loss": 0.5058, "step": 2224 }, { "epoch": 0.3794338335607094, "grad_norm": 0.6033090353012085, "learning_rate": 1.4256193637893542e-06, "loss": 0.5331, "step": 2225 }, { "epoch": 0.3796043656207367, "grad_norm": 0.5407723188400269, "learning_rate": 1.425119503819938e-06, "loss": 0.5353, "step": 2226 }, { "epoch": 0.379774897680764, "grad_norm": 0.5833804607391357, "learning_rate": 1.4246195141649717e-06, "loss": 0.5204, "step": 2227 }, { "epoch": 0.37994542974079126, "grad_norm": 0.727768063545227, "learning_rate": 1.4241193949769802e-06, "loss": 0.5283, "step": 2228 }, { "epoch": 0.3801159618008186, "grad_norm": 0.4322068989276886, "learning_rate": 1.4236191464085287e-06, "loss": 0.5353, "step": 2229 }, { "epoch": 0.38028649386084584, "grad_norm": 0.4498087167739868, "learning_rate": 1.4231187686122205e-06, "loss": 0.5273, "step": 2230 }, { "epoch": 0.3804570259208731, "grad_norm": 0.6119716763496399, "learning_rate": 1.4226182617406998e-06, "loss": 0.5331, "step": 2231 }, { "epoch": 0.3806275579809004, "grad_norm": 0.3565307855606079, "learning_rate": 1.4221176259466495e-06, "loss": 0.5248, "step": 2232 }, { "epoch": 0.3807980900409277, "grad_norm": 0.5091411471366882, "learning_rate": 1.421616861382792e-06, "loss": 0.5385, "step": 2233 }, { "epoch": 0.380968622100955, "grad_norm": 0.5057311058044434, "learning_rate": 1.4211159682018886e-06, "loss": 0.5155, "step": 2234 }, { "epoch": 0.38113915416098226, "grad_norm": 0.37856343388557434, "learning_rate": 1.4206149465567404e-06, "loss": 0.5205, "step": 2235 }, { "epoch": 0.3813096862210095, "grad_norm": 0.6066394448280334, "learning_rate": 1.4201137966001875e-06, "loss": 0.5244, "step": 2236 }, { "epoch": 0.38148021828103684, "grad_norm": 0.4703941345214844, "learning_rate": 1.4196125184851085e-06, "loss": 0.5485, "step": 2237 }, { "epoch": 0.3816507503410641, "grad_norm": 0.402211993932724, "learning_rate": 1.4191111123644223e-06, "loss": 0.526, "step": 2238 }, { "epoch": 0.3818212824010914, "grad_norm": 0.5045869946479797, "learning_rate": 1.4186095783910856e-06, "loss": 0.5225, "step": 2239 }, { "epoch": 0.3819918144611187, "grad_norm": 0.5181368589401245, "learning_rate": 1.418107916718095e-06, "loss": 0.529, "step": 2240 }, { "epoch": 0.382162346521146, "grad_norm": 0.4280795753002167, "learning_rate": 1.417606127498486e-06, "loss": 0.5219, "step": 2241 }, { "epoch": 0.38233287858117326, "grad_norm": 0.526380717754364, "learning_rate": 1.417104210885332e-06, "loss": 0.5159, "step": 2242 }, { "epoch": 0.3825034106412005, "grad_norm": 0.5254361033439636, "learning_rate": 1.4166021670317466e-06, "loss": 0.522, "step": 2243 }, { "epoch": 0.38267394270122784, "grad_norm": 0.626717209815979, "learning_rate": 1.416099996090882e-06, "loss": 0.5169, "step": 2244 }, { "epoch": 0.3828444747612551, "grad_norm": 0.546101450920105, "learning_rate": 1.4155976982159281e-06, "loss": 0.5239, "step": 2245 }, { "epoch": 0.3830150068212824, "grad_norm": 0.48548194766044617, "learning_rate": 1.4150952735601144e-06, "loss": 0.5138, "step": 2246 }, { "epoch": 0.3831855388813097, "grad_norm": 0.43900060653686523, "learning_rate": 1.4145927222767092e-06, "loss": 0.5182, "step": 2247 }, { "epoch": 0.38335607094133695, "grad_norm": 0.44476473331451416, "learning_rate": 1.4140900445190188e-06, "loss": 0.5223, "step": 2248 }, { "epoch": 0.38352660300136426, "grad_norm": 0.4795719385147095, "learning_rate": 1.4135872404403883e-06, "loss": 0.5306, "step": 2249 }, { "epoch": 0.3836971350613915, "grad_norm": 0.39385172724723816, "learning_rate": 1.413084310194202e-06, "loss": 0.5283, "step": 2250 }, { "epoch": 0.38386766712141884, "grad_norm": 0.5722368955612183, "learning_rate": 1.4125812539338817e-06, "loss": 0.5322, "step": 2251 }, { "epoch": 0.3840381991814461, "grad_norm": 0.42854925990104675, "learning_rate": 1.412078071812888e-06, "loss": 0.5328, "step": 2252 }, { "epoch": 0.3842087312414734, "grad_norm": 0.4990580677986145, "learning_rate": 1.4115747639847205e-06, "loss": 0.529, "step": 2253 }, { "epoch": 0.3843792633015007, "grad_norm": 0.38250014185905457, "learning_rate": 1.411071330602916e-06, "loss": 0.5127, "step": 2254 }, { "epoch": 0.38454979536152795, "grad_norm": 0.5111120939254761, "learning_rate": 1.4105677718210508e-06, "loss": 0.5226, "step": 2255 }, { "epoch": 0.38472032742155526, "grad_norm": 0.3982873260974884, "learning_rate": 1.4100640877927385e-06, "loss": 0.5284, "step": 2256 }, { "epoch": 0.3848908594815825, "grad_norm": 0.44653573632240295, "learning_rate": 1.4095602786716312e-06, "loss": 0.5231, "step": 2257 }, { "epoch": 0.38506139154160984, "grad_norm": 0.4471384286880493, "learning_rate": 1.4090563446114197e-06, "loss": 0.5169, "step": 2258 }, { "epoch": 0.3852319236016371, "grad_norm": 0.48839473724365234, "learning_rate": 1.4085522857658321e-06, "loss": 0.5165, "step": 2259 }, { "epoch": 0.38540245566166437, "grad_norm": 0.5165295004844666, "learning_rate": 1.408048102288635e-06, "loss": 0.5246, "step": 2260 }, { "epoch": 0.3855729877216917, "grad_norm": 0.45982906222343445, "learning_rate": 1.4075437943336328e-06, "loss": 0.5046, "step": 2261 }, { "epoch": 0.38574351978171895, "grad_norm": 0.5657344460487366, "learning_rate": 1.4070393620546685e-06, "loss": 0.5252, "step": 2262 }, { "epoch": 0.38591405184174626, "grad_norm": 0.40879642963409424, "learning_rate": 1.406534805605622e-06, "loss": 0.514, "step": 2263 }, { "epoch": 0.3860845839017735, "grad_norm": 0.48264119029045105, "learning_rate": 1.4060301251404117e-06, "loss": 0.5148, "step": 2264 }, { "epoch": 0.38625511596180084, "grad_norm": 0.47972968220710754, "learning_rate": 1.4055253208129942e-06, "loss": 0.5274, "step": 2265 }, { "epoch": 0.3864256480218281, "grad_norm": 0.4307866394519806, "learning_rate": 1.4050203927773628e-06, "loss": 0.5125, "step": 2266 }, { "epoch": 0.38659618008185537, "grad_norm": 0.5022274255752563, "learning_rate": 1.4045153411875497e-06, "loss": 0.5162, "step": 2267 }, { "epoch": 0.3867667121418827, "grad_norm": 0.413129597902298, "learning_rate": 1.404010166197624e-06, "loss": 0.5293, "step": 2268 }, { "epoch": 0.38693724420190995, "grad_norm": 0.4377501606941223, "learning_rate": 1.403504867961693e-06, "loss": 0.5411, "step": 2269 }, { "epoch": 0.38710777626193726, "grad_norm": 0.501990020275116, "learning_rate": 1.4029994466339009e-06, "loss": 0.5214, "step": 2270 }, { "epoch": 0.3872783083219645, "grad_norm": 0.49853450059890747, "learning_rate": 1.40249390236843e-06, "loss": 0.5194, "step": 2271 }, { "epoch": 0.3874488403819918, "grad_norm": 0.46902474761009216, "learning_rate": 1.4019882353195003e-06, "loss": 0.5256, "step": 2272 }, { "epoch": 0.3876193724420191, "grad_norm": 0.46112629771232605, "learning_rate": 1.401482445641368e-06, "loss": 0.5245, "step": 2273 }, { "epoch": 0.38778990450204637, "grad_norm": 0.39838969707489014, "learning_rate": 1.4009765334883288e-06, "loss": 0.5142, "step": 2274 }, { "epoch": 0.3879604365620737, "grad_norm": 0.39665982127189636, "learning_rate": 1.4004704990147142e-06, "loss": 0.5362, "step": 2275 }, { "epoch": 0.38813096862210095, "grad_norm": 0.4456673860549927, "learning_rate": 1.3999643423748932e-06, "loss": 0.524, "step": 2276 }, { "epoch": 0.38830150068212826, "grad_norm": 0.48728182911872864, "learning_rate": 1.3994580637232718e-06, "loss": 0.5105, "step": 2277 }, { "epoch": 0.3884720327421555, "grad_norm": 0.4686523973941803, "learning_rate": 1.3989516632142946e-06, "loss": 0.529, "step": 2278 }, { "epoch": 0.3886425648021828, "grad_norm": 0.41218599677085876, "learning_rate": 1.3984451410024422e-06, "loss": 0.5275, "step": 2279 }, { "epoch": 0.3888130968622101, "grad_norm": 0.6312074661254883, "learning_rate": 1.397938497242232e-06, "loss": 0.5303, "step": 2280 }, { "epoch": 0.38898362892223737, "grad_norm": 0.541376531124115, "learning_rate": 1.3974317320882203e-06, "loss": 0.5208, "step": 2281 }, { "epoch": 0.3891541609822647, "grad_norm": 0.3768637776374817, "learning_rate": 1.3969248456949977e-06, "loss": 0.538, "step": 2282 }, { "epoch": 0.38932469304229195, "grad_norm": 0.5026829838752747, "learning_rate": 1.3964178382171943e-06, "loss": 0.5389, "step": 2283 }, { "epoch": 0.3894952251023192, "grad_norm": 0.4570540487766266, "learning_rate": 1.3959107098094758e-06, "loss": 0.5123, "step": 2284 }, { "epoch": 0.3896657571623465, "grad_norm": 0.4274302124977112, "learning_rate": 1.3954034606265453e-06, "loss": 0.5098, "step": 2285 }, { "epoch": 0.3898362892223738, "grad_norm": 0.4339488744735718, "learning_rate": 1.3948960908231424e-06, "loss": 0.5349, "step": 2286 }, { "epoch": 0.3900068212824011, "grad_norm": 0.3499389588832855, "learning_rate": 1.3943886005540437e-06, "loss": 0.5277, "step": 2287 }, { "epoch": 0.39017735334242837, "grad_norm": 0.3941383361816406, "learning_rate": 1.3938809899740623e-06, "loss": 0.5294, "step": 2288 }, { "epoch": 0.3903478854024557, "grad_norm": 0.39110586047172546, "learning_rate": 1.3933732592380486e-06, "loss": 0.5139, "step": 2289 }, { "epoch": 0.39051841746248295, "grad_norm": 0.3773321211338043, "learning_rate": 1.3928654085008888e-06, "loss": 0.5284, "step": 2290 }, { "epoch": 0.3906889495225102, "grad_norm": 0.4338504374027252, "learning_rate": 1.392357437917507e-06, "loss": 0.5233, "step": 2291 }, { "epoch": 0.3908594815825375, "grad_norm": 0.4193192720413208, "learning_rate": 1.391849347642862e-06, "loss": 0.5075, "step": 2292 }, { "epoch": 0.3910300136425648, "grad_norm": 0.49013859033584595, "learning_rate": 1.3913411378319504e-06, "loss": 0.5208, "step": 2293 }, { "epoch": 0.3912005457025921, "grad_norm": 0.5191242098808289, "learning_rate": 1.3908328086398056e-06, "loss": 0.5271, "step": 2294 }, { "epoch": 0.39137107776261937, "grad_norm": 0.43833374977111816, "learning_rate": 1.390324360221496e-06, "loss": 0.52, "step": 2295 }, { "epoch": 0.39154160982264663, "grad_norm": 0.35007816553115845, "learning_rate": 1.389815792732128e-06, "loss": 0.5254, "step": 2296 }, { "epoch": 0.39171214188267395, "grad_norm": 0.5012977123260498, "learning_rate": 1.3893071063268433e-06, "loss": 0.5052, "step": 2297 }, { "epoch": 0.3918826739427012, "grad_norm": 0.4835526943206787, "learning_rate": 1.3887983011608194e-06, "loss": 0.5206, "step": 2298 }, { "epoch": 0.3920532060027285, "grad_norm": 0.5067761540412903, "learning_rate": 1.3882893773892716e-06, "loss": 0.5223, "step": 2299 }, { "epoch": 0.3922237380627558, "grad_norm": 0.4298695921897888, "learning_rate": 1.3877803351674497e-06, "loss": 0.515, "step": 2300 }, { "epoch": 0.3923942701227831, "grad_norm": 0.6273713707923889, "learning_rate": 1.3872711746506415e-06, "loss": 0.5162, "step": 2301 }, { "epoch": 0.39256480218281037, "grad_norm": 0.6779302954673767, "learning_rate": 1.3867618959941687e-06, "loss": 0.5213, "step": 2302 }, { "epoch": 0.39273533424283763, "grad_norm": 0.3425716757774353, "learning_rate": 1.386252499353391e-06, "loss": 0.5204, "step": 2303 }, { "epoch": 0.39290586630286495, "grad_norm": 0.5272434949874878, "learning_rate": 1.3857429848837025e-06, "loss": 0.5177, "step": 2304 }, { "epoch": 0.3930763983628922, "grad_norm": 0.363675594329834, "learning_rate": 1.3852333527405348e-06, "loss": 0.5113, "step": 2305 }, { "epoch": 0.3932469304229195, "grad_norm": 0.5462455153465271, "learning_rate": 1.3847236030793539e-06, "loss": 0.5254, "step": 2306 }, { "epoch": 0.3934174624829468, "grad_norm": 0.3669333755970001, "learning_rate": 1.384213736055663e-06, "loss": 0.5248, "step": 2307 }, { "epoch": 0.3935879945429741, "grad_norm": 0.46187612414360046, "learning_rate": 1.3837037518249999e-06, "loss": 0.525, "step": 2308 }, { "epoch": 0.39375852660300137, "grad_norm": 0.40820184350013733, "learning_rate": 1.3831936505429392e-06, "loss": 0.5158, "step": 2309 }, { "epoch": 0.39392905866302863, "grad_norm": 0.3934749662876129, "learning_rate": 1.38268343236509e-06, "loss": 0.5195, "step": 2310 }, { "epoch": 0.39409959072305595, "grad_norm": 0.5167784094810486, "learning_rate": 1.3821730974470983e-06, "loss": 0.5304, "step": 2311 }, { "epoch": 0.3942701227830832, "grad_norm": 0.3810725808143616, "learning_rate": 1.3816626459446452e-06, "loss": 0.5276, "step": 2312 }, { "epoch": 0.3944406548431105, "grad_norm": 0.49950870871543884, "learning_rate": 1.3811520780134473e-06, "loss": 0.5093, "step": 2313 }, { "epoch": 0.3946111869031378, "grad_norm": 0.4600581228733063, "learning_rate": 1.3806413938092568e-06, "loss": 0.5229, "step": 2314 }, { "epoch": 0.39478171896316505, "grad_norm": 0.3954922556877136, "learning_rate": 1.380130593487861e-06, "loss": 0.5319, "step": 2315 }, { "epoch": 0.39495225102319237, "grad_norm": 0.43245062232017517, "learning_rate": 1.379619677205083e-06, "loss": 0.5283, "step": 2316 }, { "epoch": 0.39512278308321963, "grad_norm": 0.43941789865493774, "learning_rate": 1.3791086451167815e-06, "loss": 0.5174, "step": 2317 }, { "epoch": 0.39529331514324695, "grad_norm": 0.4048894941806793, "learning_rate": 1.3785974973788507e-06, "loss": 0.507, "step": 2318 }, { "epoch": 0.3954638472032742, "grad_norm": 0.36088645458221436, "learning_rate": 1.3780862341472184e-06, "loss": 0.5296, "step": 2319 }, { "epoch": 0.3956343792633015, "grad_norm": 0.5440859198570251, "learning_rate": 1.3775748555778498e-06, "loss": 0.5291, "step": 2320 }, { "epoch": 0.3958049113233288, "grad_norm": 0.4294169843196869, "learning_rate": 1.3770633618267445e-06, "loss": 0.5162, "step": 2321 }, { "epoch": 0.39597544338335605, "grad_norm": 0.4461040496826172, "learning_rate": 1.3765517530499365e-06, "loss": 0.5187, "step": 2322 }, { "epoch": 0.39614597544338337, "grad_norm": 0.4923355281352997, "learning_rate": 1.3760400294034957e-06, "loss": 0.5143, "step": 2323 }, { "epoch": 0.39631650750341063, "grad_norm": 0.3550982177257538, "learning_rate": 1.3755281910435268e-06, "loss": 0.5266, "step": 2324 }, { "epoch": 0.39648703956343795, "grad_norm": 0.44544970989227295, "learning_rate": 1.3750162381261694e-06, "loss": 0.5328, "step": 2325 }, { "epoch": 0.3966575716234652, "grad_norm": 0.47301942110061646, "learning_rate": 1.3745041708075985e-06, "loss": 0.5281, "step": 2326 }, { "epoch": 0.3968281036834925, "grad_norm": 0.4111325740814209, "learning_rate": 1.3739919892440236e-06, "loss": 0.5214, "step": 2327 }, { "epoch": 0.3969986357435198, "grad_norm": 0.3858391344547272, "learning_rate": 1.373479693591689e-06, "loss": 0.5268, "step": 2328 }, { "epoch": 0.39716916780354705, "grad_norm": 0.5584406852722168, "learning_rate": 1.3729672840068734e-06, "loss": 0.5264, "step": 2329 }, { "epoch": 0.39733969986357437, "grad_norm": 0.5933360457420349, "learning_rate": 1.3724547606458918e-06, "loss": 0.5241, "step": 2330 }, { "epoch": 0.39751023192360163, "grad_norm": 0.6099199056625366, "learning_rate": 1.3719421236650923e-06, "loss": 0.5174, "step": 2331 }, { "epoch": 0.39768076398362895, "grad_norm": 0.48414215445518494, "learning_rate": 1.3714293732208584e-06, "loss": 0.5181, "step": 2332 }, { "epoch": 0.3978512960436562, "grad_norm": 0.5974043011665344, "learning_rate": 1.3709165094696077e-06, "loss": 0.5216, "step": 2333 }, { "epoch": 0.3980218281036835, "grad_norm": 0.4411584734916687, "learning_rate": 1.3704035325677932e-06, "loss": 0.5177, "step": 2334 }, { "epoch": 0.3981923601637108, "grad_norm": 0.49411454796791077, "learning_rate": 1.3698904426719015e-06, "loss": 0.5123, "step": 2335 }, { "epoch": 0.39836289222373805, "grad_norm": 0.3845353126525879, "learning_rate": 1.3693772399384548e-06, "loss": 0.5159, "step": 2336 }, { "epoch": 0.39853342428376537, "grad_norm": 0.47084566950798035, "learning_rate": 1.368863924524008e-06, "loss": 0.5309, "step": 2337 }, { "epoch": 0.39870395634379263, "grad_norm": 0.39838436245918274, "learning_rate": 1.3683504965851523e-06, "loss": 0.5172, "step": 2338 }, { "epoch": 0.3988744884038199, "grad_norm": 0.39336255192756653, "learning_rate": 1.367836956278512e-06, "loss": 0.5219, "step": 2339 }, { "epoch": 0.3990450204638472, "grad_norm": 0.4916491210460663, "learning_rate": 1.3673233037607459e-06, "loss": 0.5018, "step": 2340 }, { "epoch": 0.3992155525238745, "grad_norm": 0.3593173325061798, "learning_rate": 1.3668095391885474e-06, "loss": 0.5197, "step": 2341 }, { "epoch": 0.3993860845839018, "grad_norm": 0.3894543945789337, "learning_rate": 1.3662956627186434e-06, "loss": 0.5207, "step": 2342 }, { "epoch": 0.39955661664392905, "grad_norm": 0.40112966299057007, "learning_rate": 1.3657816745077958e-06, "loss": 0.5118, "step": 2343 }, { "epoch": 0.39972714870395637, "grad_norm": 0.41793084144592285, "learning_rate": 1.3652675747127996e-06, "loss": 0.5173, "step": 2344 }, { "epoch": 0.39989768076398363, "grad_norm": 0.4209323227405548, "learning_rate": 1.3647533634904852e-06, "loss": 0.5166, "step": 2345 }, { "epoch": 0.4000682128240109, "grad_norm": 0.3716372549533844, "learning_rate": 1.3642390409977156e-06, "loss": 0.5062, "step": 2346 }, { "epoch": 0.4002387448840382, "grad_norm": 0.3547699749469757, "learning_rate": 1.3637246073913885e-06, "loss": 0.5066, "step": 2347 }, { "epoch": 0.4004092769440655, "grad_norm": 0.3524262607097626, "learning_rate": 1.363210062828435e-06, "loss": 0.5227, "step": 2348 }, { "epoch": 0.4005798090040928, "grad_norm": 0.3743390440940857, "learning_rate": 1.3626954074658212e-06, "loss": 0.5013, "step": 2349 }, { "epoch": 0.40075034106412005, "grad_norm": 0.41511663794517517, "learning_rate": 1.3621806414605448e-06, "loss": 0.5232, "step": 2350 }, { "epoch": 0.4009208731241473, "grad_norm": 0.4181976616382599, "learning_rate": 1.3616657649696398e-06, "loss": 0.513, "step": 2351 }, { "epoch": 0.40109140518417463, "grad_norm": 0.44195061922073364, "learning_rate": 1.3611507781501723e-06, "loss": 0.5193, "step": 2352 }, { "epoch": 0.4012619372442019, "grad_norm": 0.40733179450035095, "learning_rate": 1.360635681159243e-06, "loss": 0.5096, "step": 2353 }, { "epoch": 0.4014324693042292, "grad_norm": 0.49988704919815063, "learning_rate": 1.3601204741539843e-06, "loss": 0.511, "step": 2354 }, { "epoch": 0.4016030013642565, "grad_norm": 0.43027475476264954, "learning_rate": 1.359605157291565e-06, "loss": 0.5181, "step": 2355 }, { "epoch": 0.4017735334242838, "grad_norm": 0.4969724714756012, "learning_rate": 1.359089730729185e-06, "loss": 0.5138, "step": 2356 }, { "epoch": 0.40194406548431105, "grad_norm": 0.7880139946937561, "learning_rate": 1.3585741946240793e-06, "loss": 0.5093, "step": 2357 }, { "epoch": 0.4021145975443383, "grad_norm": 0.47539666295051575, "learning_rate": 1.3580585491335153e-06, "loss": 0.5032, "step": 2358 }, { "epoch": 0.40228512960436563, "grad_norm": 0.4887872040271759, "learning_rate": 1.3575427944147939e-06, "loss": 0.5026, "step": 2359 }, { "epoch": 0.4024556616643929, "grad_norm": 0.4302110970020294, "learning_rate": 1.35702693062525e-06, "loss": 0.5176, "step": 2360 }, { "epoch": 0.4026261937244202, "grad_norm": 0.44501402974128723, "learning_rate": 1.3565109579222513e-06, "loss": 0.5192, "step": 2361 }, { "epoch": 0.4027967257844475, "grad_norm": 0.5135705471038818, "learning_rate": 1.3559948764631983e-06, "loss": 0.53, "step": 2362 }, { "epoch": 0.40296725784447474, "grad_norm": 0.35561659932136536, "learning_rate": 1.3554786864055252e-06, "loss": 0.5132, "step": 2363 }, { "epoch": 0.40313778990450205, "grad_norm": 0.532179594039917, "learning_rate": 1.3549623879066996e-06, "loss": 0.5234, "step": 2364 }, { "epoch": 0.4033083219645293, "grad_norm": 0.5692497491836548, "learning_rate": 1.3544459811242213e-06, "loss": 0.5352, "step": 2365 }, { "epoch": 0.40347885402455663, "grad_norm": 0.3840283453464508, "learning_rate": 1.3539294662156241e-06, "loss": 0.5233, "step": 2366 }, { "epoch": 0.4036493860845839, "grad_norm": 0.4098012447357178, "learning_rate": 1.3534128433384743e-06, "loss": 0.5163, "step": 2367 }, { "epoch": 0.4038199181446112, "grad_norm": 0.4464457035064697, "learning_rate": 1.3528961126503704e-06, "loss": 0.5119, "step": 2368 }, { "epoch": 0.4039904502046385, "grad_norm": 0.4604474902153015, "learning_rate": 1.3523792743089457e-06, "loss": 0.544, "step": 2369 }, { "epoch": 0.40416098226466574, "grad_norm": 0.36774614453315735, "learning_rate": 1.3518623284718645e-06, "loss": 0.5302, "step": 2370 }, { "epoch": 0.40433151432469305, "grad_norm": 0.4567425549030304, "learning_rate": 1.351345275296825e-06, "loss": 0.5336, "step": 2371 }, { "epoch": 0.4045020463847203, "grad_norm": 0.42046618461608887, "learning_rate": 1.3508281149415571e-06, "loss": 0.5257, "step": 2372 }, { "epoch": 0.40467257844474763, "grad_norm": 0.5821329951286316, "learning_rate": 1.3503108475638246e-06, "loss": 0.5263, "step": 2373 }, { "epoch": 0.4048431105047749, "grad_norm": 0.40758076310157776, "learning_rate": 1.3497934733214233e-06, "loss": 0.5071, "step": 2374 }, { "epoch": 0.40501364256480216, "grad_norm": 0.39630794525146484, "learning_rate": 1.349275992372181e-06, "loss": 0.5252, "step": 2375 }, { "epoch": 0.4051841746248295, "grad_norm": 0.4020107388496399, "learning_rate": 1.34875840487396e-06, "loss": 0.5133, "step": 2376 }, { "epoch": 0.40535470668485674, "grad_norm": 0.39645326137542725, "learning_rate": 1.3482407109846529e-06, "loss": 0.52, "step": 2377 }, { "epoch": 0.40552523874488405, "grad_norm": 0.4198255240917206, "learning_rate": 1.3477229108621856e-06, "loss": 0.5201, "step": 2378 }, { "epoch": 0.4056957708049113, "grad_norm": 0.3982833921909332, "learning_rate": 1.3472050046645168e-06, "loss": 0.5325, "step": 2379 }, { "epoch": 0.40586630286493863, "grad_norm": 0.43688592314720154, "learning_rate": 1.3466869925496373e-06, "loss": 0.4985, "step": 2380 }, { "epoch": 0.4060368349249659, "grad_norm": 0.4188794791698456, "learning_rate": 1.3461688746755705e-06, "loss": 0.5165, "step": 2381 }, { "epoch": 0.40620736698499316, "grad_norm": 0.40829628705978394, "learning_rate": 1.3456506512003708e-06, "loss": 0.5237, "step": 2382 }, { "epoch": 0.4063778990450205, "grad_norm": 0.3171619176864624, "learning_rate": 1.3451323222821268e-06, "loss": 0.5105, "step": 2383 }, { "epoch": 0.40654843110504774, "grad_norm": 0.4364809989929199, "learning_rate": 1.3446138880789572e-06, "loss": 0.5233, "step": 2384 }, { "epoch": 0.40671896316507505, "grad_norm": 0.3572435677051544, "learning_rate": 1.3440953487490145e-06, "loss": 0.518, "step": 2385 }, { "epoch": 0.4068894952251023, "grad_norm": 0.4391252100467682, "learning_rate": 1.3435767044504827e-06, "loss": 0.5117, "step": 2386 }, { "epoch": 0.4070600272851296, "grad_norm": 0.4334906339645386, "learning_rate": 1.3430579553415772e-06, "loss": 0.5136, "step": 2387 }, { "epoch": 0.4072305593451569, "grad_norm": 0.3785998821258545, "learning_rate": 1.342539101580546e-06, "loss": 0.5311, "step": 2388 }, { "epoch": 0.40740109140518416, "grad_norm": 0.40511175990104675, "learning_rate": 1.342020143325669e-06, "loss": 0.5063, "step": 2389 }, { "epoch": 0.4075716234652115, "grad_norm": 0.42973968386650085, "learning_rate": 1.341501080735258e-06, "loss": 0.5109, "step": 2390 }, { "epoch": 0.40774215552523874, "grad_norm": 0.38002124428749084, "learning_rate": 1.3409819139676562e-06, "loss": 0.5143, "step": 2391 }, { "epoch": 0.40791268758526605, "grad_norm": 0.4136904776096344, "learning_rate": 1.3404626431812395e-06, "loss": 0.5093, "step": 2392 }, { "epoch": 0.4080832196452933, "grad_norm": 0.3670330345630646, "learning_rate": 1.3399432685344144e-06, "loss": 0.5153, "step": 2393 }, { "epoch": 0.4082537517053206, "grad_norm": 0.3782718777656555, "learning_rate": 1.3394237901856196e-06, "loss": 0.5128, "step": 2394 }, { "epoch": 0.4084242837653479, "grad_norm": 0.39012637734413147, "learning_rate": 1.3389042082933258e-06, "loss": 0.5225, "step": 2395 }, { "epoch": 0.40859481582537516, "grad_norm": 0.38971012830734253, "learning_rate": 1.3383845230160348e-06, "loss": 0.5148, "step": 2396 }, { "epoch": 0.4087653478854025, "grad_norm": 0.3932477831840515, "learning_rate": 1.3378647345122797e-06, "loss": 0.5271, "step": 2397 }, { "epoch": 0.40893587994542974, "grad_norm": 0.37541669607162476, "learning_rate": 1.337344842940626e-06, "loss": 0.5174, "step": 2398 }, { "epoch": 0.409106412005457, "grad_norm": 0.43585965037345886, "learning_rate": 1.33682484845967e-06, "loss": 0.5162, "step": 2399 }, { "epoch": 0.4092769440654843, "grad_norm": 0.6322606205940247, "learning_rate": 1.3363047512280393e-06, "loss": 0.5239, "step": 2400 }, { "epoch": 0.4094474761255116, "grad_norm": 0.636260986328125, "learning_rate": 1.335784551404393e-06, "loss": 0.5231, "step": 2401 }, { "epoch": 0.4096180081855389, "grad_norm": 0.34062281250953674, "learning_rate": 1.3352642491474216e-06, "loss": 0.5235, "step": 2402 }, { "epoch": 0.40978854024556616, "grad_norm": 0.5656194090843201, "learning_rate": 1.3347438446158469e-06, "loss": 0.5166, "step": 2403 }, { "epoch": 0.4099590723055935, "grad_norm": 0.758741557598114, "learning_rate": 1.3342233379684218e-06, "loss": 0.5187, "step": 2404 }, { "epoch": 0.41012960436562074, "grad_norm": 0.4431105852127075, "learning_rate": 1.3337027293639303e-06, "loss": 0.5174, "step": 2405 }, { "epoch": 0.410300136425648, "grad_norm": 0.5212686061859131, "learning_rate": 1.3331820189611874e-06, "loss": 0.5238, "step": 2406 }, { "epoch": 0.4104706684856753, "grad_norm": 0.6115703582763672, "learning_rate": 1.3326612069190395e-06, "loss": 0.5142, "step": 2407 }, { "epoch": 0.4106412005457026, "grad_norm": 0.5647085309028625, "learning_rate": 1.3321402933963638e-06, "loss": 0.5204, "step": 2408 }, { "epoch": 0.4108117326057299, "grad_norm": 0.5248672962188721, "learning_rate": 1.3316192785520683e-06, "loss": 0.5131, "step": 2409 }, { "epoch": 0.41098226466575716, "grad_norm": 0.41708463430404663, "learning_rate": 1.3310981625450917e-06, "loss": 0.5285, "step": 2410 }, { "epoch": 0.4111527967257845, "grad_norm": 0.5683760643005371, "learning_rate": 1.330576945534405e-06, "loss": 0.5241, "step": 2411 }, { "epoch": 0.41132332878581174, "grad_norm": 0.38032862544059753, "learning_rate": 1.330055627679008e-06, "loss": 0.5105, "step": 2412 }, { "epoch": 0.411493860845839, "grad_norm": 0.42650604248046875, "learning_rate": 1.3295342091379324e-06, "loss": 0.5221, "step": 2413 }, { "epoch": 0.4116643929058663, "grad_norm": 0.5264064073562622, "learning_rate": 1.3290126900702404e-06, "loss": 0.5186, "step": 2414 }, { "epoch": 0.4118349249658936, "grad_norm": 0.42799603939056396, "learning_rate": 1.3284910706350251e-06, "loss": 0.5217, "step": 2415 }, { "epoch": 0.4120054570259209, "grad_norm": 0.4704800248146057, "learning_rate": 1.3279693509914102e-06, "loss": 0.5118, "step": 2416 }, { "epoch": 0.41217598908594816, "grad_norm": 0.41373327374458313, "learning_rate": 1.3274475312985492e-06, "loss": 0.5147, "step": 2417 }, { "epoch": 0.4123465211459754, "grad_norm": 0.44518589973449707, "learning_rate": 1.326925611715627e-06, "loss": 0.5247, "step": 2418 }, { "epoch": 0.41251705320600274, "grad_norm": 0.6880121231079102, "learning_rate": 1.3264035924018585e-06, "loss": 0.5348, "step": 2419 }, { "epoch": 0.41268758526603, "grad_norm": 0.5007564425468445, "learning_rate": 1.3258814735164893e-06, "loss": 0.5354, "step": 2420 }, { "epoch": 0.4128581173260573, "grad_norm": 0.481301486492157, "learning_rate": 1.3253592552187951e-06, "loss": 0.5053, "step": 2421 }, { "epoch": 0.4130286493860846, "grad_norm": 0.46144989132881165, "learning_rate": 1.3248369376680825e-06, "loss": 0.5065, "step": 2422 }, { "epoch": 0.4131991814461119, "grad_norm": 0.5711754560470581, "learning_rate": 1.3243145210236877e-06, "loss": 0.5348, "step": 2423 }, { "epoch": 0.41336971350613916, "grad_norm": 0.6206766963005066, "learning_rate": 1.3237920054449776e-06, "loss": 0.5256, "step": 2424 }, { "epoch": 0.4135402455661664, "grad_norm": 0.43933790922164917, "learning_rate": 1.3232693910913486e-06, "loss": 0.522, "step": 2425 }, { "epoch": 0.41371077762619374, "grad_norm": 0.4370189905166626, "learning_rate": 1.322746678122228e-06, "loss": 0.5255, "step": 2426 }, { "epoch": 0.413881309686221, "grad_norm": 0.5346240401268005, "learning_rate": 1.322223866697073e-06, "loss": 0.5282, "step": 2427 }, { "epoch": 0.4140518417462483, "grad_norm": 0.46231934428215027, "learning_rate": 1.3217009569753704e-06, "loss": 0.5201, "step": 2428 }, { "epoch": 0.4142223738062756, "grad_norm": 0.40670233964920044, "learning_rate": 1.3211779491166376e-06, "loss": 0.5192, "step": 2429 }, { "epoch": 0.41439290586630284, "grad_norm": 0.42665842175483704, "learning_rate": 1.3206548432804216e-06, "loss": 0.5177, "step": 2430 }, { "epoch": 0.41456343792633016, "grad_norm": 0.5745131373405457, "learning_rate": 1.3201316396262993e-06, "loss": 0.5196, "step": 2431 }, { "epoch": 0.4147339699863574, "grad_norm": 0.48258480429649353, "learning_rate": 1.3196083383138776e-06, "loss": 0.5214, "step": 2432 }, { "epoch": 0.41490450204638474, "grad_norm": 0.4157755672931671, "learning_rate": 1.3190849395027927e-06, "loss": 0.5276, "step": 2433 }, { "epoch": 0.415075034106412, "grad_norm": 0.5874139070510864, "learning_rate": 1.318561443352711e-06, "loss": 0.5085, "step": 2434 }, { "epoch": 0.4152455661664393, "grad_norm": 0.44920822978019714, "learning_rate": 1.3180378500233292e-06, "loss": 0.5122, "step": 2435 }, { "epoch": 0.4154160982264666, "grad_norm": 0.42331570386886597, "learning_rate": 1.317514159674372e-06, "loss": 0.5165, "step": 2436 }, { "epoch": 0.41558663028649384, "grad_norm": 0.5718460083007812, "learning_rate": 1.3169903724655952e-06, "loss": 0.523, "step": 2437 }, { "epoch": 0.41575716234652116, "grad_norm": 0.4036332964897156, "learning_rate": 1.3164664885567833e-06, "loss": 0.5158, "step": 2438 }, { "epoch": 0.4159276944065484, "grad_norm": 0.5522937178611755, "learning_rate": 1.315942508107751e-06, "loss": 0.5006, "step": 2439 }, { "epoch": 0.41609822646657574, "grad_norm": 0.46217483282089233, "learning_rate": 1.3154184312783418e-06, "loss": 0.5188, "step": 2440 }, { "epoch": 0.416268758526603, "grad_norm": 0.3863103985786438, "learning_rate": 1.3148942582284287e-06, "loss": 0.5265, "step": 2441 }, { "epoch": 0.41643929058663026, "grad_norm": 0.45400503277778625, "learning_rate": 1.3143699891179141e-06, "loss": 0.5169, "step": 2442 }, { "epoch": 0.4166098226466576, "grad_norm": 0.3837527632713318, "learning_rate": 1.3138456241067303e-06, "loss": 0.5172, "step": 2443 }, { "epoch": 0.41678035470668484, "grad_norm": 0.45465466380119324, "learning_rate": 1.313321163354838e-06, "loss": 0.5099, "step": 2444 }, { "epoch": 0.41695088676671216, "grad_norm": 0.3569267988204956, "learning_rate": 1.3127966070222274e-06, "loss": 0.5144, "step": 2445 }, { "epoch": 0.4171214188267394, "grad_norm": 0.40313395857810974, "learning_rate": 1.312271955268918e-06, "loss": 0.5311, "step": 2446 }, { "epoch": 0.41729195088676674, "grad_norm": 0.4062875509262085, "learning_rate": 1.3117472082549588e-06, "loss": 0.508, "step": 2447 }, { "epoch": 0.417462482946794, "grad_norm": 0.4219088852405548, "learning_rate": 1.311222366140427e-06, "loss": 0.5047, "step": 2448 }, { "epoch": 0.41763301500682126, "grad_norm": 0.3890495002269745, "learning_rate": 1.3106974290854287e-06, "loss": 0.5218, "step": 2449 }, { "epoch": 0.4178035470668486, "grad_norm": 0.36203303933143616, "learning_rate": 1.3101723972501003e-06, "loss": 0.516, "step": 2450 }, { "epoch": 0.41797407912687584, "grad_norm": 0.343011736869812, "learning_rate": 1.309647270794606e-06, "loss": 0.5083, "step": 2451 }, { "epoch": 0.41814461118690316, "grad_norm": 0.3661821782588959, "learning_rate": 1.3091220498791388e-06, "loss": 0.5202, "step": 2452 }, { "epoch": 0.4183151432469304, "grad_norm": 0.37110432982444763, "learning_rate": 1.3085967346639214e-06, "loss": 0.522, "step": 2453 }, { "epoch": 0.4184856753069577, "grad_norm": 0.4287030100822449, "learning_rate": 1.308071325309204e-06, "loss": 0.5193, "step": 2454 }, { "epoch": 0.418656207366985, "grad_norm": 0.4393904507160187, "learning_rate": 1.3075458219752672e-06, "loss": 0.5035, "step": 2455 }, { "epoch": 0.41882673942701226, "grad_norm": 0.33759546279907227, "learning_rate": 1.3070202248224182e-06, "loss": 0.5124, "step": 2456 }, { "epoch": 0.4189972714870396, "grad_norm": 0.4863134026527405, "learning_rate": 1.306494534010995e-06, "loss": 0.5139, "step": 2457 }, { "epoch": 0.41916780354706684, "grad_norm": 0.43133077025413513, "learning_rate": 1.3059687497013627e-06, "loss": 0.5197, "step": 2458 }, { "epoch": 0.41933833560709416, "grad_norm": 0.3589963912963867, "learning_rate": 1.3054428720539148e-06, "loss": 0.5283, "step": 2459 }, { "epoch": 0.4195088676671214, "grad_norm": 0.3672996461391449, "learning_rate": 1.3049169012290748e-06, "loss": 0.5174, "step": 2460 }, { "epoch": 0.4196793997271487, "grad_norm": 0.39421847462654114, "learning_rate": 1.3043908373872923e-06, "loss": 0.5183, "step": 2461 }, { "epoch": 0.419849931787176, "grad_norm": 0.38718149065971375, "learning_rate": 1.3038646806890479e-06, "loss": 0.5291, "step": 2462 }, { "epoch": 0.42002046384720326, "grad_norm": 0.3574490249156952, "learning_rate": 1.3033384312948488e-06, "loss": 0.5189, "step": 2463 }, { "epoch": 0.4201909959072306, "grad_norm": 0.38356050848960876, "learning_rate": 1.3028120893652306e-06, "loss": 0.51, "step": 2464 }, { "epoch": 0.42036152796725784, "grad_norm": 0.3885401487350464, "learning_rate": 1.3022856550607574e-06, "loss": 0.5261, "step": 2465 }, { "epoch": 0.4205320600272851, "grad_norm": 0.37062355875968933, "learning_rate": 1.301759128542022e-06, "loss": 0.5205, "step": 2466 }, { "epoch": 0.4207025920873124, "grad_norm": 0.4637162685394287, "learning_rate": 1.3012325099696444e-06, "loss": 0.5163, "step": 2467 }, { "epoch": 0.4208731241473397, "grad_norm": 0.4647408127784729, "learning_rate": 1.3007057995042733e-06, "loss": 0.5172, "step": 2468 }, { "epoch": 0.421043656207367, "grad_norm": 0.3651617765426636, "learning_rate": 1.3001789973065853e-06, "loss": 0.5168, "step": 2469 }, { "epoch": 0.42121418826739426, "grad_norm": 0.3944820165634155, "learning_rate": 1.2996521035372847e-06, "loss": 0.5259, "step": 2470 }, { "epoch": 0.4213847203274216, "grad_norm": 0.5131377577781677, "learning_rate": 1.2991251183571045e-06, "loss": 0.5259, "step": 2471 }, { "epoch": 0.42155525238744884, "grad_norm": 0.6908408999443054, "learning_rate": 1.2985980419268045e-06, "loss": 0.5264, "step": 2472 }, { "epoch": 0.4217257844474761, "grad_norm": 0.5145502090454102, "learning_rate": 1.298070874407173e-06, "loss": 0.5227, "step": 2473 }, { "epoch": 0.4218963165075034, "grad_norm": 0.42013034224510193, "learning_rate": 1.2975436159590262e-06, "loss": 0.5315, "step": 2474 }, { "epoch": 0.4220668485675307, "grad_norm": 0.6404533982276917, "learning_rate": 1.2970162667432076e-06, "loss": 0.5218, "step": 2475 }, { "epoch": 0.422237380627558, "grad_norm": 0.7172034382820129, "learning_rate": 1.2964888269205888e-06, "loss": 0.5202, "step": 2476 }, { "epoch": 0.42240791268758526, "grad_norm": 0.4771793484687805, "learning_rate": 1.2959612966520684e-06, "loss": 0.5105, "step": 2477 }, { "epoch": 0.4225784447476125, "grad_norm": 0.39662736654281616, "learning_rate": 1.2954336760985738e-06, "loss": 0.5223, "step": 2478 }, { "epoch": 0.42274897680763984, "grad_norm": 0.6462126970291138, "learning_rate": 1.2949059654210588e-06, "loss": 0.5211, "step": 2479 }, { "epoch": 0.4229195088676671, "grad_norm": 0.4810219705104828, "learning_rate": 1.294378164780505e-06, "loss": 0.5328, "step": 2480 }, { "epoch": 0.4230900409276944, "grad_norm": 0.5060651302337646, "learning_rate": 1.2938502743379213e-06, "loss": 0.5133, "step": 2481 }, { "epoch": 0.4232605729877217, "grad_norm": 0.6474683880805969, "learning_rate": 1.2933222942543445e-06, "loss": 0.516, "step": 2482 }, { "epoch": 0.423431105047749, "grad_norm": 0.380156546831131, "learning_rate": 1.2927942246908383e-06, "loss": 0.5237, "step": 2483 }, { "epoch": 0.42360163710777626, "grad_norm": 0.5847602486610413, "learning_rate": 1.2922660658084946e-06, "loss": 0.5206, "step": 2484 }, { "epoch": 0.4237721691678035, "grad_norm": 0.5852933526039124, "learning_rate": 1.2917378177684305e-06, "loss": 0.5333, "step": 2485 }, { "epoch": 0.42394270122783084, "grad_norm": 0.3461321294307709, "learning_rate": 1.291209480731793e-06, "loss": 0.4982, "step": 2486 }, { "epoch": 0.4241132332878581, "grad_norm": 0.5730729103088379, "learning_rate": 1.2906810548597533e-06, "loss": 0.5151, "step": 2487 }, { "epoch": 0.4242837653478854, "grad_norm": 0.4181745946407318, "learning_rate": 1.2901525403135127e-06, "loss": 0.5277, "step": 2488 }, { "epoch": 0.4244542974079127, "grad_norm": 0.4739188849925995, "learning_rate": 1.2896239372542973e-06, "loss": 0.5342, "step": 2489 }, { "epoch": 0.42462482946793995, "grad_norm": 0.5515470504760742, "learning_rate": 1.2890952458433611e-06, "loss": 0.5276, "step": 2490 }, { "epoch": 0.42479536152796726, "grad_norm": 0.3359604775905609, "learning_rate": 1.2885664662419852e-06, "loss": 0.5377, "step": 2491 }, { "epoch": 0.4249658935879945, "grad_norm": 0.5798554420471191, "learning_rate": 1.2880375986114771e-06, "loss": 0.5217, "step": 2492 }, { "epoch": 0.42513642564802184, "grad_norm": 0.4647768437862396, "learning_rate": 1.2875086431131717e-06, "loss": 0.5191, "step": 2493 }, { "epoch": 0.4253069577080491, "grad_norm": 0.38144662976264954, "learning_rate": 1.28697959990843e-06, "loss": 0.5183, "step": 2494 }, { "epoch": 0.4254774897680764, "grad_norm": 0.504442572593689, "learning_rate": 1.2864504691586405e-06, "loss": 0.5129, "step": 2495 }, { "epoch": 0.4256480218281037, "grad_norm": 0.3849034011363983, "learning_rate": 1.2859212510252178e-06, "loss": 0.5212, "step": 2496 }, { "epoch": 0.42581855388813095, "grad_norm": 0.3967511057853699, "learning_rate": 1.2853919456696038e-06, "loss": 0.5092, "step": 2497 }, { "epoch": 0.42598908594815826, "grad_norm": 0.4667202830314636, "learning_rate": 1.2848625532532664e-06, "loss": 0.5361, "step": 2498 }, { "epoch": 0.4261596180081855, "grad_norm": 0.4540242850780487, "learning_rate": 1.2843330739377004e-06, "loss": 0.5316, "step": 2499 }, { "epoch": 0.42633015006821284, "grad_norm": 0.4949699342250824, "learning_rate": 1.283803507884427e-06, "loss": 0.5095, "step": 2500 }, { "epoch": 0.4265006821282401, "grad_norm": 0.4083558917045593, "learning_rate": 1.2832738552549935e-06, "loss": 0.522, "step": 2501 }, { "epoch": 0.42667121418826737, "grad_norm": 0.44709694385528564, "learning_rate": 1.282744116210975e-06, "loss": 0.5333, "step": 2502 }, { "epoch": 0.4268417462482947, "grad_norm": 0.4692583680152893, "learning_rate": 1.282214290913971e-06, "loss": 0.5329, "step": 2503 }, { "epoch": 0.42701227830832195, "grad_norm": 0.48816362023353577, "learning_rate": 1.281684379525608e-06, "loss": 0.5374, "step": 2504 }, { "epoch": 0.42718281036834926, "grad_norm": 0.42191752791404724, "learning_rate": 1.2811543822075398e-06, "loss": 0.5149, "step": 2505 }, { "epoch": 0.4273533424283765, "grad_norm": 0.5076993107795715, "learning_rate": 1.2806242991214455e-06, "loss": 0.5166, "step": 2506 }, { "epoch": 0.42752387448840384, "grad_norm": 0.49512800574302673, "learning_rate": 1.2800941304290302e-06, "loss": 0.5253, "step": 2507 }, { "epoch": 0.4276944065484311, "grad_norm": 0.3681822419166565, "learning_rate": 1.2795638762920254e-06, "loss": 0.5305, "step": 2508 }, { "epoch": 0.42786493860845837, "grad_norm": 0.4675699770450592, "learning_rate": 1.2790335368721892e-06, "loss": 0.529, "step": 2509 }, { "epoch": 0.4280354706684857, "grad_norm": 0.4286367893218994, "learning_rate": 1.2785031123313046e-06, "loss": 0.5161, "step": 2510 }, { "epoch": 0.42820600272851295, "grad_norm": 0.35007959604263306, "learning_rate": 1.2779726028311814e-06, "loss": 0.5233, "step": 2511 }, { "epoch": 0.42837653478854026, "grad_norm": 0.4429203271865845, "learning_rate": 1.277442008533655e-06, "loss": 0.5355, "step": 2512 }, { "epoch": 0.4285470668485675, "grad_norm": 0.34723106026649475, "learning_rate": 1.276911329600587e-06, "loss": 0.5098, "step": 2513 }, { "epoch": 0.42871759890859484, "grad_norm": 0.39496925473213196, "learning_rate": 1.276380566193864e-06, "loss": 0.517, "step": 2514 }, { "epoch": 0.4288881309686221, "grad_norm": 0.42757344245910645, "learning_rate": 1.2758497184753996e-06, "loss": 0.5079, "step": 2515 }, { "epoch": 0.42905866302864937, "grad_norm": 0.36008015275001526, "learning_rate": 1.2753187866071319e-06, "loss": 0.5226, "step": 2516 }, { "epoch": 0.4292291950886767, "grad_norm": 0.4112105369567871, "learning_rate": 1.2747877707510253e-06, "loss": 0.5095, "step": 2517 }, { "epoch": 0.42939972714870395, "grad_norm": 0.354653924703598, "learning_rate": 1.2742566710690703e-06, "loss": 0.5151, "step": 2518 }, { "epoch": 0.42957025920873126, "grad_norm": 0.41790133714675903, "learning_rate": 1.273725487723282e-06, "loss": 0.5117, "step": 2519 }, { "epoch": 0.4297407912687585, "grad_norm": 0.34194713830947876, "learning_rate": 1.2731942208757011e-06, "loss": 0.5109, "step": 2520 }, { "epoch": 0.4299113233287858, "grad_norm": 0.435222327709198, "learning_rate": 1.2726628706883948e-06, "loss": 0.5324, "step": 2521 }, { "epoch": 0.4300818553888131, "grad_norm": 0.4597698450088501, "learning_rate": 1.2721314373234544e-06, "loss": 0.5238, "step": 2522 }, { "epoch": 0.43025238744884037, "grad_norm": 0.43926915526390076, "learning_rate": 1.2715999209429975e-06, "loss": 0.5098, "step": 2523 }, { "epoch": 0.4304229195088677, "grad_norm": 0.600695788860321, "learning_rate": 1.2710683217091669e-06, "loss": 0.5158, "step": 2524 }, { "epoch": 0.43059345156889495, "grad_norm": 0.549557626247406, "learning_rate": 1.2705366397841304e-06, "loss": 0.5343, "step": 2525 }, { "epoch": 0.43076398362892226, "grad_norm": 0.43904390931129456, "learning_rate": 1.2700048753300808e-06, "loss": 0.5275, "step": 2526 }, { "epoch": 0.4309345156889495, "grad_norm": 0.446802020072937, "learning_rate": 1.2694730285092365e-06, "loss": 0.5184, "step": 2527 }, { "epoch": 0.4311050477489768, "grad_norm": 0.4266647696495056, "learning_rate": 1.2689410994838415e-06, "loss": 0.5134, "step": 2528 }, { "epoch": 0.4312755798090041, "grad_norm": 0.3999023735523224, "learning_rate": 1.2684090884161635e-06, "loss": 0.5227, "step": 2529 }, { "epoch": 0.43144611186903137, "grad_norm": 0.4615280330181122, "learning_rate": 1.2678769954684967e-06, "loss": 0.5178, "step": 2530 }, { "epoch": 0.4316166439290587, "grad_norm": 0.37461206316947937, "learning_rate": 1.2673448208031593e-06, "loss": 0.5245, "step": 2531 }, { "epoch": 0.43178717598908595, "grad_norm": 0.6492916345596313, "learning_rate": 1.2668125645824945e-06, "loss": 0.5193, "step": 2532 }, { "epoch": 0.4319577080491132, "grad_norm": 0.3368079662322998, "learning_rate": 1.2662802269688715e-06, "loss": 0.5156, "step": 2533 }, { "epoch": 0.4321282401091405, "grad_norm": 0.37977778911590576, "learning_rate": 1.2657478081246826e-06, "loss": 0.5198, "step": 2534 }, { "epoch": 0.4322987721691678, "grad_norm": 0.39551234245300293, "learning_rate": 1.2652153082123459e-06, "loss": 0.5198, "step": 2535 }, { "epoch": 0.4324693042291951, "grad_norm": 0.4476735591888428, "learning_rate": 1.2646827273943043e-06, "loss": 0.5185, "step": 2536 }, { "epoch": 0.43263983628922237, "grad_norm": 0.45589369535446167, "learning_rate": 1.2641500658330248e-06, "loss": 0.5189, "step": 2537 }, { "epoch": 0.4328103683492497, "grad_norm": 0.4167502224445343, "learning_rate": 1.2636173236909996e-06, "loss": 0.5158, "step": 2538 }, { "epoch": 0.43298090040927695, "grad_norm": 0.42350873351097107, "learning_rate": 1.2630845011307453e-06, "loss": 0.5093, "step": 2539 }, { "epoch": 0.4331514324693042, "grad_norm": 0.4852334260940552, "learning_rate": 1.262551598314803e-06, "loss": 0.5202, "step": 2540 }, { "epoch": 0.4333219645293315, "grad_norm": 0.3522724509239197, "learning_rate": 1.2620186154057383e-06, "loss": 0.515, "step": 2541 }, { "epoch": 0.4334924965893588, "grad_norm": 0.4680606424808502, "learning_rate": 1.261485552566141e-06, "loss": 0.5082, "step": 2542 }, { "epoch": 0.4336630286493861, "grad_norm": 0.46284425258636475, "learning_rate": 1.2609524099586256e-06, "loss": 0.5163, "step": 2543 }, { "epoch": 0.43383356070941337, "grad_norm": 0.35307639837265015, "learning_rate": 1.2604191877458308e-06, "loss": 0.5184, "step": 2544 }, { "epoch": 0.43400409276944063, "grad_norm": 0.4327385723590851, "learning_rate": 1.2598858860904193e-06, "loss": 0.5291, "step": 2545 }, { "epoch": 0.43417462482946795, "grad_norm": 0.4670164883136749, "learning_rate": 1.2593525051550792e-06, "loss": 0.5163, "step": 2546 }, { "epoch": 0.4343451568894952, "grad_norm": 0.39771389961242676, "learning_rate": 1.258819045102521e-06, "loss": 0.5164, "step": 2547 }, { "epoch": 0.4345156889495225, "grad_norm": 0.4475443363189697, "learning_rate": 1.2582855060954808e-06, "loss": 0.5067, "step": 2548 }, { "epoch": 0.4346862210095498, "grad_norm": 0.4804600179195404, "learning_rate": 1.2577518882967182e-06, "loss": 0.5181, "step": 2549 }, { "epoch": 0.4348567530695771, "grad_norm": 0.35155707597732544, "learning_rate": 1.2572181918690164e-06, "loss": 0.5177, "step": 2550 }, { "epoch": 0.43502728512960437, "grad_norm": 0.41589272022247314, "learning_rate": 1.2566844169751837e-06, "loss": 0.5078, "step": 2551 }, { "epoch": 0.43519781718963163, "grad_norm": 0.43547019362449646, "learning_rate": 1.2561505637780515e-06, "loss": 0.5179, "step": 2552 }, { "epoch": 0.43536834924965895, "grad_norm": 0.35825809836387634, "learning_rate": 1.255616632440475e-06, "loss": 0.5254, "step": 2553 }, { "epoch": 0.4355388813096862, "grad_norm": 0.3896865248680115, "learning_rate": 1.2550826231253336e-06, "loss": 0.5141, "step": 2554 }, { "epoch": 0.4357094133697135, "grad_norm": 0.3662707209587097, "learning_rate": 1.254548535995531e-06, "loss": 0.5197, "step": 2555 }, { "epoch": 0.4358799454297408, "grad_norm": 0.4130113422870636, "learning_rate": 1.2540143712139935e-06, "loss": 0.5106, "step": 2556 }, { "epoch": 0.43605047748976805, "grad_norm": 0.4342820942401886, "learning_rate": 1.2534801289436716e-06, "loss": 0.5146, "step": 2557 }, { "epoch": 0.43622100954979537, "grad_norm": 0.4437672793865204, "learning_rate": 1.25294580934754e-06, "loss": 0.5218, "step": 2558 }, { "epoch": 0.43639154160982263, "grad_norm": 0.43240147829055786, "learning_rate": 1.252411412588596e-06, "loss": 0.5192, "step": 2559 }, { "epoch": 0.43656207366984995, "grad_norm": 0.4971958100795746, "learning_rate": 1.2518769388298606e-06, "loss": 0.5165, "step": 2560 }, { "epoch": 0.4367326057298772, "grad_norm": 0.4937379062175751, "learning_rate": 1.2513423882343797e-06, "loss": 0.5088, "step": 2561 }, { "epoch": 0.4369031377899045, "grad_norm": 0.44213834404945374, "learning_rate": 1.2508077609652204e-06, "loss": 0.5179, "step": 2562 }, { "epoch": 0.4370736698499318, "grad_norm": 0.4229736924171448, "learning_rate": 1.2502730571854747e-06, "loss": 0.5228, "step": 2563 }, { "epoch": 0.43724420190995905, "grad_norm": 0.5028852224349976, "learning_rate": 1.2497382770582582e-06, "loss": 0.5113, "step": 2564 }, { "epoch": 0.43741473396998637, "grad_norm": 0.4949003756046295, "learning_rate": 1.2492034207467082e-06, "loss": 0.5146, "step": 2565 }, { "epoch": 0.43758526603001363, "grad_norm": 0.43527311086654663, "learning_rate": 1.2486684884139868e-06, "loss": 0.522, "step": 2566 }, { "epoch": 0.43775579809004095, "grad_norm": 0.4403958022594452, "learning_rate": 1.2481334802232782e-06, "loss": 0.5138, "step": 2567 }, { "epoch": 0.4379263301500682, "grad_norm": 0.5198670029640198, "learning_rate": 1.2475983963377908e-06, "loss": 0.5127, "step": 2568 }, { "epoch": 0.4380968622100955, "grad_norm": 0.4271129369735718, "learning_rate": 1.247063236920755e-06, "loss": 0.5112, "step": 2569 }, { "epoch": 0.4382673942701228, "grad_norm": 0.4282945692539215, "learning_rate": 1.2465280021354255e-06, "loss": 0.5124, "step": 2570 }, { "epoch": 0.43843792633015005, "grad_norm": 0.5314650535583496, "learning_rate": 1.245992692145078e-06, "loss": 0.5135, "step": 2571 }, { "epoch": 0.43860845839017737, "grad_norm": 0.3869926631450653, "learning_rate": 1.245457307113014e-06, "loss": 0.5148, "step": 2572 }, { "epoch": 0.43877899045020463, "grad_norm": 0.3884088397026062, "learning_rate": 1.2449218472025549e-06, "loss": 0.5271, "step": 2573 }, { "epoch": 0.43894952251023195, "grad_norm": 0.48845013976097107, "learning_rate": 1.2443863125770473e-06, "loss": 0.5215, "step": 2574 }, { "epoch": 0.4391200545702592, "grad_norm": 0.44390156865119934, "learning_rate": 1.243850703399859e-06, "loss": 0.5128, "step": 2575 }, { "epoch": 0.4392905866302865, "grad_norm": 0.37384307384490967, "learning_rate": 1.243315019834381e-06, "loss": 0.5167, "step": 2576 }, { "epoch": 0.4394611186903138, "grad_norm": 0.45294705033302307, "learning_rate": 1.242779262044028e-06, "loss": 0.5217, "step": 2577 }, { "epoch": 0.43963165075034105, "grad_norm": 0.5091094970703125, "learning_rate": 1.2422434301922358e-06, "loss": 0.5254, "step": 2578 }, { "epoch": 0.43980218281036837, "grad_norm": 0.364459365606308, "learning_rate": 1.2417075244424638e-06, "loss": 0.5058, "step": 2579 }, { "epoch": 0.43997271487039563, "grad_norm": 0.4908876419067383, "learning_rate": 1.241171544958194e-06, "loss": 0.4999, "step": 2580 }, { "epoch": 0.4401432469304229, "grad_norm": 0.6493481993675232, "learning_rate": 1.2406354919029294e-06, "loss": 0.5307, "step": 2581 }, { "epoch": 0.4403137789904502, "grad_norm": 0.5132988095283508, "learning_rate": 1.2400993654401974e-06, "loss": 0.5156, "step": 2582 }, { "epoch": 0.4404843110504775, "grad_norm": 0.4408731758594513, "learning_rate": 1.2395631657335471e-06, "loss": 0.5192, "step": 2583 }, { "epoch": 0.4406548431105048, "grad_norm": 0.524540901184082, "learning_rate": 1.2390268929465494e-06, "loss": 0.5092, "step": 2584 }, { "epoch": 0.44082537517053205, "grad_norm": 0.5012420415878296, "learning_rate": 1.2384905472427975e-06, "loss": 0.5114, "step": 2585 }, { "epoch": 0.44099590723055937, "grad_norm": 0.5423673987388611, "learning_rate": 1.2379541287859086e-06, "loss": 0.5128, "step": 2586 }, { "epoch": 0.44116643929058663, "grad_norm": 0.48322269320487976, "learning_rate": 1.2374176377395196e-06, "loss": 0.522, "step": 2587 }, { "epoch": 0.4413369713506139, "grad_norm": 0.5741340517997742, "learning_rate": 1.2368810742672911e-06, "loss": 0.5064, "step": 2588 }, { "epoch": 0.4415075034106412, "grad_norm": 0.4697413444519043, "learning_rate": 1.2363444385329052e-06, "loss": 0.5216, "step": 2589 }, { "epoch": 0.4416780354706685, "grad_norm": 0.4826829135417938, "learning_rate": 1.2358077307000663e-06, "loss": 0.5178, "step": 2590 }, { "epoch": 0.4418485675306958, "grad_norm": 0.45279660820961, "learning_rate": 1.2352709509325006e-06, "loss": 0.5115, "step": 2591 }, { "epoch": 0.44201909959072305, "grad_norm": 0.38878384232521057, "learning_rate": 1.2347340993939569e-06, "loss": 0.5173, "step": 2592 }, { "epoch": 0.4421896316507503, "grad_norm": 0.5535585880279541, "learning_rate": 1.2341971762482045e-06, "loss": 0.5106, "step": 2593 }, { "epoch": 0.44236016371077763, "grad_norm": 0.4223726689815521, "learning_rate": 1.233660181659036e-06, "loss": 0.5172, "step": 2594 }, { "epoch": 0.4425306957708049, "grad_norm": 0.42020705342292786, "learning_rate": 1.233123115790265e-06, "loss": 0.539, "step": 2595 }, { "epoch": 0.4427012278308322, "grad_norm": 0.43967166543006897, "learning_rate": 1.2325859788057274e-06, "loss": 0.5054, "step": 2596 }, { "epoch": 0.4428717598908595, "grad_norm": 0.3231273889541626, "learning_rate": 1.2320487708692797e-06, "loss": 0.5142, "step": 2597 }, { "epoch": 0.4430422919508868, "grad_norm": 0.4471146762371063, "learning_rate": 1.2315114921448013e-06, "loss": 0.5186, "step": 2598 }, { "epoch": 0.44321282401091405, "grad_norm": 0.40211814641952515, "learning_rate": 1.2309741427961927e-06, "loss": 0.5255, "step": 2599 }, { "epoch": 0.4433833560709413, "grad_norm": 0.42954233288764954, "learning_rate": 1.2304367229873756e-06, "loss": 0.5116, "step": 2600 }, { "epoch": 0.44355388813096863, "grad_norm": 0.6420926451683044, "learning_rate": 1.2298992328822937e-06, "loss": 0.5109, "step": 2601 }, { "epoch": 0.4437244201909959, "grad_norm": 0.5584779381752014, "learning_rate": 1.229361672644912e-06, "loss": 0.5143, "step": 2602 }, { "epoch": 0.4438949522510232, "grad_norm": 0.3985927700996399, "learning_rate": 1.2288240424392168e-06, "loss": 0.5024, "step": 2603 }, { "epoch": 0.4440654843110505, "grad_norm": 0.44909727573394775, "learning_rate": 1.2282863424292157e-06, "loss": 0.5189, "step": 2604 }, { "epoch": 0.44423601637107774, "grad_norm": 0.5488520860671997, "learning_rate": 1.227748572778938e-06, "loss": 0.5198, "step": 2605 }, { "epoch": 0.44440654843110505, "grad_norm": 0.45999622344970703, "learning_rate": 1.2272107336524334e-06, "loss": 0.5198, "step": 2606 }, { "epoch": 0.4445770804911323, "grad_norm": 0.5359815955162048, "learning_rate": 1.2266728252137736e-06, "loss": 0.5169, "step": 2607 }, { "epoch": 0.44474761255115963, "grad_norm": 0.5039993524551392, "learning_rate": 1.2261348476270514e-06, "loss": 0.5265, "step": 2608 }, { "epoch": 0.4449181446111869, "grad_norm": 0.46197831630706787, "learning_rate": 1.2255968010563801e-06, "loss": 0.5165, "step": 2609 }, { "epoch": 0.4450886766712142, "grad_norm": 0.44239184260368347, "learning_rate": 1.2250586856658946e-06, "loss": 0.5149, "step": 2610 }, { "epoch": 0.4452592087312415, "grad_norm": 0.5294963717460632, "learning_rate": 1.224520501619751e-06, "loss": 0.5126, "step": 2611 }, { "epoch": 0.44542974079126874, "grad_norm": 0.40589872002601624, "learning_rate": 1.2239822490821249e-06, "loss": 0.5245, "step": 2612 }, { "epoch": 0.44560027285129605, "grad_norm": 0.4539930522441864, "learning_rate": 1.2234439282172145e-06, "loss": 0.5163, "step": 2613 }, { "epoch": 0.4457708049113233, "grad_norm": 0.44857555627822876, "learning_rate": 1.2229055391892384e-06, "loss": 0.5104, "step": 2614 }, { "epoch": 0.44594133697135063, "grad_norm": 0.5156710147857666, "learning_rate": 1.2223670821624355e-06, "loss": 0.5081, "step": 2615 }, { "epoch": 0.4461118690313779, "grad_norm": 0.34579339623451233, "learning_rate": 1.2218285573010654e-06, "loss": 0.5181, "step": 2616 }, { "epoch": 0.44628240109140516, "grad_norm": 0.5912550687789917, "learning_rate": 1.2212899647694094e-06, "loss": 0.5162, "step": 2617 }, { "epoch": 0.4464529331514325, "grad_norm": 0.4505324959754944, "learning_rate": 1.2207513047317685e-06, "loss": 0.5228, "step": 2618 }, { "epoch": 0.44662346521145974, "grad_norm": 0.42937779426574707, "learning_rate": 1.2202125773524641e-06, "loss": 0.5093, "step": 2619 }, { "epoch": 0.44679399727148705, "grad_norm": 0.4112337529659271, "learning_rate": 1.2196737827958393e-06, "loss": 0.5122, "step": 2620 }, { "epoch": 0.4469645293315143, "grad_norm": 0.3574349880218506, "learning_rate": 1.2191349212262563e-06, "loss": 0.5197, "step": 2621 }, { "epoch": 0.44713506139154163, "grad_norm": 0.48161396384239197, "learning_rate": 1.218595992808099e-06, "loss": 0.5111, "step": 2622 }, { "epoch": 0.4473055934515689, "grad_norm": 0.6249895095825195, "learning_rate": 1.2180569977057707e-06, "loss": 0.5032, "step": 2623 }, { "epoch": 0.44747612551159616, "grad_norm": 0.45782434940338135, "learning_rate": 1.2175179360836956e-06, "loss": 0.531, "step": 2624 }, { "epoch": 0.4476466575716235, "grad_norm": 0.3763248026371002, "learning_rate": 1.2169788081063183e-06, "loss": 0.5087, "step": 2625 }, { "epoch": 0.44781718963165074, "grad_norm": 0.5124301910400391, "learning_rate": 1.216439613938103e-06, "loss": 0.5101, "step": 2626 }, { "epoch": 0.44798772169167805, "grad_norm": 0.41541916131973267, "learning_rate": 1.215900353743535e-06, "loss": 0.5108, "step": 2627 }, { "epoch": 0.4481582537517053, "grad_norm": 0.37283164262771606, "learning_rate": 1.2153610276871187e-06, "loss": 0.5183, "step": 2628 }, { "epoch": 0.44832878581173263, "grad_norm": 0.5663114786148071, "learning_rate": 1.2148216359333795e-06, "loss": 0.5059, "step": 2629 }, { "epoch": 0.4484993178717599, "grad_norm": 0.5109935998916626, "learning_rate": 1.214282178646862e-06, "loss": 0.4996, "step": 2630 }, { "epoch": 0.44866984993178716, "grad_norm": 0.45246654748916626, "learning_rate": 1.2137426559921318e-06, "loss": 0.5014, "step": 2631 }, { "epoch": 0.4488403819918145, "grad_norm": 0.498762309551239, "learning_rate": 1.2132030681337738e-06, "loss": 0.5135, "step": 2632 }, { "epoch": 0.44901091405184174, "grad_norm": 0.6093807220458984, "learning_rate": 1.2126634152363925e-06, "loss": 0.5157, "step": 2633 }, { "epoch": 0.44918144611186905, "grad_norm": 0.4820914566516876, "learning_rate": 1.2121236974646127e-06, "loss": 0.5281, "step": 2634 }, { "epoch": 0.4493519781718963, "grad_norm": 0.40519341826438904, "learning_rate": 1.2115839149830798e-06, "loss": 0.5134, "step": 2635 }, { "epoch": 0.4495225102319236, "grad_norm": 0.5242198705673218, "learning_rate": 1.2110440679564567e-06, "loss": 0.5173, "step": 2636 }, { "epoch": 0.4496930422919509, "grad_norm": 0.379910945892334, "learning_rate": 1.210504156549428e-06, "loss": 0.5074, "step": 2637 }, { "epoch": 0.44986357435197816, "grad_norm": 0.4137760102748871, "learning_rate": 1.2099641809266978e-06, "loss": 0.5283, "step": 2638 }, { "epoch": 0.4500341064120055, "grad_norm": 0.4397674798965454, "learning_rate": 1.2094241412529884e-06, "loss": 0.5227, "step": 2639 }, { "epoch": 0.45020463847203274, "grad_norm": 0.42571043968200684, "learning_rate": 1.208884037693043e-06, "loss": 0.5006, "step": 2640 }, { "epoch": 0.45037517053206005, "grad_norm": 0.4621511995792389, "learning_rate": 1.2083438704116235e-06, "loss": 0.5283, "step": 2641 }, { "epoch": 0.4505457025920873, "grad_norm": 0.4140154719352722, "learning_rate": 1.207803639573512e-06, "loss": 0.5192, "step": 2642 }, { "epoch": 0.4507162346521146, "grad_norm": 0.4587588906288147, "learning_rate": 1.2072633453435093e-06, "loss": 0.5205, "step": 2643 }, { "epoch": 0.4508867667121419, "grad_norm": 0.3948979377746582, "learning_rate": 1.2067229878864355e-06, "loss": 0.5086, "step": 2644 }, { "epoch": 0.45105729877216916, "grad_norm": 0.4351895749568939, "learning_rate": 1.206182567367131e-06, "loss": 0.5336, "step": 2645 }, { "epoch": 0.4512278308321965, "grad_norm": 0.4272501766681671, "learning_rate": 1.2056420839504536e-06, "loss": 0.5174, "step": 2646 }, { "epoch": 0.45139836289222374, "grad_norm": 0.3314935564994812, "learning_rate": 1.2051015378012824e-06, "loss": 0.5191, "step": 2647 }, { "epoch": 0.451568894952251, "grad_norm": 0.3617965579032898, "learning_rate": 1.2045609290845139e-06, "loss": 0.5259, "step": 2648 }, { "epoch": 0.4517394270122783, "grad_norm": 0.3362520635128021, "learning_rate": 1.204020257965065e-06, "loss": 0.5148, "step": 2649 }, { "epoch": 0.4519099590723056, "grad_norm": 0.389750212430954, "learning_rate": 1.2034795246078707e-06, "loss": 0.5092, "step": 2650 }, { "epoch": 0.4520804911323329, "grad_norm": 0.4418506622314453, "learning_rate": 1.2029387291778855e-06, "loss": 0.5287, "step": 2651 }, { "epoch": 0.45225102319236016, "grad_norm": 0.349391371011734, "learning_rate": 1.2023978718400821e-06, "loss": 0.526, "step": 2652 }, { "epoch": 0.4524215552523875, "grad_norm": 0.533689558506012, "learning_rate": 1.2018569527594533e-06, "loss": 0.5168, "step": 2653 }, { "epoch": 0.45259208731241474, "grad_norm": 0.7081760168075562, "learning_rate": 1.2013159721010103e-06, "loss": 0.5263, "step": 2654 }, { "epoch": 0.452762619372442, "grad_norm": 0.5865058302879333, "learning_rate": 1.2007749300297818e-06, "loss": 0.5188, "step": 2655 }, { "epoch": 0.4529331514324693, "grad_norm": 0.39208078384399414, "learning_rate": 1.2002338267108172e-06, "loss": 0.5193, "step": 2656 }, { "epoch": 0.4531036834924966, "grad_norm": 0.5570546984672546, "learning_rate": 1.199692662309184e-06, "loss": 0.5234, "step": 2657 }, { "epoch": 0.4532742155525239, "grad_norm": 0.5408545136451721, "learning_rate": 1.199151436989967e-06, "loss": 0.5138, "step": 2658 }, { "epoch": 0.45344474761255116, "grad_norm": 0.33498716354370117, "learning_rate": 1.1986101509182712e-06, "loss": 0.5095, "step": 2659 }, { "epoch": 0.4536152796725784, "grad_norm": 0.46534663438796997, "learning_rate": 1.1980688042592193e-06, "loss": 0.5018, "step": 2660 }, { "epoch": 0.45378581173260574, "grad_norm": 0.4153357446193695, "learning_rate": 1.1975273971779529e-06, "loss": 0.5273, "step": 2661 }, { "epoch": 0.453956343792633, "grad_norm": 0.43084970116615295, "learning_rate": 1.1969859298396318e-06, "loss": 0.515, "step": 2662 }, { "epoch": 0.4541268758526603, "grad_norm": 0.5067896842956543, "learning_rate": 1.1964444024094346e-06, "loss": 0.4999, "step": 2663 }, { "epoch": 0.4542974079126876, "grad_norm": 0.41986194252967834, "learning_rate": 1.195902815052557e-06, "loss": 0.5285, "step": 2664 }, { "epoch": 0.4544679399727149, "grad_norm": 0.4705030918121338, "learning_rate": 1.1953611679342143e-06, "loss": 0.5474, "step": 2665 }, { "epoch": 0.45463847203274216, "grad_norm": 0.4286784529685974, "learning_rate": 1.1948194612196403e-06, "loss": 0.5154, "step": 2666 }, { "epoch": 0.4548090040927694, "grad_norm": 0.3951047360897064, "learning_rate": 1.194277695074085e-06, "loss": 0.5075, "step": 2667 }, { "epoch": 0.45497953615279674, "grad_norm": 0.44156450033187866, "learning_rate": 1.1937358696628186e-06, "loss": 0.5234, "step": 2668 }, { "epoch": 0.455150068212824, "grad_norm": 0.45110470056533813, "learning_rate": 1.1931939851511284e-06, "loss": 0.531, "step": 2669 }, { "epoch": 0.4553206002728513, "grad_norm": 0.3986232578754425, "learning_rate": 1.1926520417043196e-06, "loss": 0.5176, "step": 2670 }, { "epoch": 0.4554911323328786, "grad_norm": 0.38686272501945496, "learning_rate": 1.192110039487716e-06, "loss": 0.5148, "step": 2671 }, { "epoch": 0.45566166439290584, "grad_norm": 0.486208975315094, "learning_rate": 1.1915679786666592e-06, "loss": 0.5293, "step": 2672 }, { "epoch": 0.45583219645293316, "grad_norm": 0.5004016160964966, "learning_rate": 1.1910258594065078e-06, "loss": 0.5232, "step": 2673 }, { "epoch": 0.4560027285129604, "grad_norm": 0.3850324749946594, "learning_rate": 1.1904836818726399e-06, "loss": 0.5284, "step": 2674 }, { "epoch": 0.45617326057298774, "grad_norm": 0.36427977681159973, "learning_rate": 1.1899414462304495e-06, "loss": 0.5296, "step": 2675 }, { "epoch": 0.456343792633015, "grad_norm": 0.42257794737815857, "learning_rate": 1.1893991526453494e-06, "loss": 0.5167, "step": 2676 }, { "epoch": 0.4565143246930423, "grad_norm": 0.3928245007991791, "learning_rate": 1.18885680128277e-06, "loss": 0.5142, "step": 2677 }, { "epoch": 0.4566848567530696, "grad_norm": 0.39062950015068054, "learning_rate": 1.1883143923081593e-06, "loss": 0.5094, "step": 2678 }, { "epoch": 0.45685538881309684, "grad_norm": 0.4366256594657898, "learning_rate": 1.1877719258869828e-06, "loss": 0.5274, "step": 2679 }, { "epoch": 0.45702592087312416, "grad_norm": 0.48190537095069885, "learning_rate": 1.1872294021847232e-06, "loss": 0.5197, "step": 2680 }, { "epoch": 0.4571964529331514, "grad_norm": 0.34900742769241333, "learning_rate": 1.1866868213668815e-06, "loss": 0.5189, "step": 2681 }, { "epoch": 0.45736698499317874, "grad_norm": 0.45446139574050903, "learning_rate": 1.1861441835989754e-06, "loss": 0.5291, "step": 2682 }, { "epoch": 0.457537517053206, "grad_norm": 0.47677281498908997, "learning_rate": 1.1856014890465397e-06, "loss": 0.5234, "step": 2683 }, { "epoch": 0.45770804911323326, "grad_norm": 0.38069313764572144, "learning_rate": 1.1850587378751276e-06, "loss": 0.5248, "step": 2684 }, { "epoch": 0.4578785811732606, "grad_norm": 0.36604219675064087, "learning_rate": 1.1845159302503086e-06, "loss": 0.5036, "step": 2685 }, { "epoch": 0.45804911323328784, "grad_norm": 0.42271777987480164, "learning_rate": 1.1839730663376703e-06, "loss": 0.5144, "step": 2686 }, { "epoch": 0.45821964529331516, "grad_norm": 0.42497578263282776, "learning_rate": 1.1834301463028167e-06, "loss": 0.5205, "step": 2687 }, { "epoch": 0.4583901773533424, "grad_norm": 0.4204833507537842, "learning_rate": 1.1828871703113689e-06, "loss": 0.5284, "step": 2688 }, { "epoch": 0.45856070941336974, "grad_norm": 0.4158252477645874, "learning_rate": 1.1823441385289656e-06, "loss": 0.5157, "step": 2689 }, { "epoch": 0.458731241473397, "grad_norm": 0.46866708993911743, "learning_rate": 1.1818010511212625e-06, "loss": 0.5224, "step": 2690 }, { "epoch": 0.45890177353342426, "grad_norm": 0.4711799621582031, "learning_rate": 1.1812579082539318e-06, "loss": 0.513, "step": 2691 }, { "epoch": 0.4590723055934516, "grad_norm": 0.6736330389976501, "learning_rate": 1.1807147100926626e-06, "loss": 0.5061, "step": 2692 }, { "epoch": 0.45924283765347884, "grad_norm": 0.6674357056617737, "learning_rate": 1.1801714568031617e-06, "loss": 0.5326, "step": 2693 }, { "epoch": 0.45941336971350616, "grad_norm": 0.6352670192718506, "learning_rate": 1.179628148551152e-06, "loss": 0.5358, "step": 2694 }, { "epoch": 0.4595839017735334, "grad_norm": 0.5269036889076233, "learning_rate": 1.1790847855023727e-06, "loss": 0.5132, "step": 2695 }, { "epoch": 0.4597544338335607, "grad_norm": 0.46895119547843933, "learning_rate": 1.178541367822581e-06, "loss": 0.5118, "step": 2696 }, { "epoch": 0.459924965893588, "grad_norm": 0.6236005425453186, "learning_rate": 1.1779978956775506e-06, "loss": 0.5222, "step": 2697 }, { "epoch": 0.46009549795361526, "grad_norm": 0.48662877082824707, "learning_rate": 1.1774543692330701e-06, "loss": 0.5203, "step": 2698 }, { "epoch": 0.4602660300136426, "grad_norm": 0.4667477309703827, "learning_rate": 1.1769107886549465e-06, "loss": 0.5168, "step": 2699 }, { "epoch": 0.46043656207366984, "grad_norm": 0.4196608066558838, "learning_rate": 1.1763671541090027e-06, "loss": 0.5126, "step": 2700 }, { "epoch": 0.46060709413369716, "grad_norm": 0.5549945831298828, "learning_rate": 1.1758234657610779e-06, "loss": 0.528, "step": 2701 }, { "epoch": 0.4607776261937244, "grad_norm": 0.3792808949947357, "learning_rate": 1.175279723777028e-06, "loss": 0.5097, "step": 2702 }, { "epoch": 0.4609481582537517, "grad_norm": 0.5246736407279968, "learning_rate": 1.1747359283227251e-06, "loss": 0.5034, "step": 2703 }, { "epoch": 0.461118690313779, "grad_norm": 0.5328319668769836, "learning_rate": 1.174192079564058e-06, "loss": 0.5206, "step": 2704 }, { "epoch": 0.46128922237380626, "grad_norm": 0.42339950799942017, "learning_rate": 1.1736481776669307e-06, "loss": 0.5107, "step": 2705 }, { "epoch": 0.4614597544338336, "grad_norm": 0.4282023310661316, "learning_rate": 1.1731042227972646e-06, "loss": 0.5278, "step": 2706 }, { "epoch": 0.46163028649386084, "grad_norm": 0.42935076355934143, "learning_rate": 1.1725602151209963e-06, "loss": 0.524, "step": 2707 }, { "epoch": 0.4618008185538881, "grad_norm": 0.5306663513183594, "learning_rate": 1.1720161548040794e-06, "loss": 0.5255, "step": 2708 }, { "epoch": 0.4619713506139154, "grad_norm": 0.43583446741104126, "learning_rate": 1.1714720420124833e-06, "loss": 0.5233, "step": 2709 }, { "epoch": 0.4621418826739427, "grad_norm": 0.4610784947872162, "learning_rate": 1.1709278769121929e-06, "loss": 0.5121, "step": 2710 }, { "epoch": 0.46231241473397, "grad_norm": 0.414095014333725, "learning_rate": 1.1703836596692094e-06, "loss": 0.5149, "step": 2711 }, { "epoch": 0.46248294679399726, "grad_norm": 0.4780326783657074, "learning_rate": 1.16983939044955e-06, "loss": 0.5058, "step": 2712 }, { "epoch": 0.4626534788540246, "grad_norm": 0.5305455923080444, "learning_rate": 1.169295069419248e-06, "loss": 0.5088, "step": 2713 }, { "epoch": 0.46282401091405184, "grad_norm": 0.36969780921936035, "learning_rate": 1.1687506967443516e-06, "loss": 0.5167, "step": 2714 }, { "epoch": 0.4629945429740791, "grad_norm": 0.47688403725624084, "learning_rate": 1.168206272590926e-06, "loss": 0.5223, "step": 2715 }, { "epoch": 0.4631650750341064, "grad_norm": 0.43216776847839355, "learning_rate": 1.1676617971250505e-06, "loss": 0.5096, "step": 2716 }, { "epoch": 0.4633356070941337, "grad_norm": 0.49492067098617554, "learning_rate": 1.1671172705128217e-06, "loss": 0.513, "step": 2717 }, { "epoch": 0.463506139154161, "grad_norm": 0.4933083951473236, "learning_rate": 1.166572692920351e-06, "loss": 0.508, "step": 2718 }, { "epoch": 0.46367667121418826, "grad_norm": 0.4092305302619934, "learning_rate": 1.166028064513765e-06, "loss": 0.5214, "step": 2719 }, { "epoch": 0.4638472032742155, "grad_norm": 0.5383063554763794, "learning_rate": 1.1654833854592067e-06, "loss": 0.5273, "step": 2720 }, { "epoch": 0.46401773533424284, "grad_norm": 0.3836551606655121, "learning_rate": 1.1649386559228343e-06, "loss": 0.5189, "step": 2721 }, { "epoch": 0.4641882673942701, "grad_norm": 0.5678832530975342, "learning_rate": 1.1643938760708207e-06, "loss": 0.5089, "step": 2722 }, { "epoch": 0.4643587994542974, "grad_norm": 0.4550630748271942, "learning_rate": 1.1638490460693547e-06, "loss": 0.5319, "step": 2723 }, { "epoch": 0.4645293315143247, "grad_norm": 0.431937575340271, "learning_rate": 1.1633041660846406e-06, "loss": 0.5343, "step": 2724 }, { "epoch": 0.464699863574352, "grad_norm": 0.47891420125961304, "learning_rate": 1.1627592362828974e-06, "loss": 0.5196, "step": 2725 }, { "epoch": 0.46487039563437926, "grad_norm": 0.4315849244594574, "learning_rate": 1.1622142568303596e-06, "loss": 0.5291, "step": 2726 }, { "epoch": 0.4650409276944065, "grad_norm": 0.5645219087600708, "learning_rate": 1.1616692278932775e-06, "loss": 0.5213, "step": 2727 }, { "epoch": 0.46521145975443384, "grad_norm": 0.41248124837875366, "learning_rate": 1.1611241496379145e-06, "loss": 0.5216, "step": 2728 }, { "epoch": 0.4653819918144611, "grad_norm": 0.5293885469436646, "learning_rate": 1.1605790222305517e-06, "loss": 0.5101, "step": 2729 }, { "epoch": 0.4655525238744884, "grad_norm": 0.5205586552619934, "learning_rate": 1.160033845837483e-06, "loss": 0.5221, "step": 2730 }, { "epoch": 0.4657230559345157, "grad_norm": 0.573829710483551, "learning_rate": 1.1594886206250187e-06, "loss": 0.5098, "step": 2731 }, { "epoch": 0.465893587994543, "grad_norm": 0.5038020610809326, "learning_rate": 1.1589433467594827e-06, "loss": 0.5069, "step": 2732 }, { "epoch": 0.46606412005457026, "grad_norm": 0.5627439022064209, "learning_rate": 1.1583980244072151e-06, "loss": 0.5098, "step": 2733 }, { "epoch": 0.4662346521145975, "grad_norm": 0.45539039373397827, "learning_rate": 1.1578526537345699e-06, "loss": 0.5081, "step": 2734 }, { "epoch": 0.46640518417462484, "grad_norm": 0.5840868949890137, "learning_rate": 1.157307234907916e-06, "loss": 0.5194, "step": 2735 }, { "epoch": 0.4665757162346521, "grad_norm": 0.4483746290206909, "learning_rate": 1.1567617680936373e-06, "loss": 0.5116, "step": 2736 }, { "epoch": 0.4667462482946794, "grad_norm": 0.4605935215950012, "learning_rate": 1.156216253458132e-06, "loss": 0.5073, "step": 2737 }, { "epoch": 0.4669167803547067, "grad_norm": 0.3922935426235199, "learning_rate": 1.1556706911678126e-06, "loss": 0.5125, "step": 2738 }, { "epoch": 0.46708731241473395, "grad_norm": 0.4241859018802643, "learning_rate": 1.1551250813891068e-06, "loss": 0.5119, "step": 2739 }, { "epoch": 0.46725784447476126, "grad_norm": 0.4191175103187561, "learning_rate": 1.1545794242884569e-06, "loss": 0.5128, "step": 2740 }, { "epoch": 0.4674283765347885, "grad_norm": 0.40228796005249023, "learning_rate": 1.1540337200323188e-06, "loss": 0.51, "step": 2741 }, { "epoch": 0.46759890859481584, "grad_norm": 0.45950260758399963, "learning_rate": 1.153487968787163e-06, "loss": 0.5123, "step": 2742 }, { "epoch": 0.4677694406548431, "grad_norm": 0.4424385726451874, "learning_rate": 1.1529421707194754e-06, "loss": 0.5178, "step": 2743 }, { "epoch": 0.4679399727148704, "grad_norm": 0.4888211190700531, "learning_rate": 1.1523963259957549e-06, "loss": 0.5218, "step": 2744 }, { "epoch": 0.4681105047748977, "grad_norm": 0.4428039789199829, "learning_rate": 1.1518504347825146e-06, "loss": 0.5114, "step": 2745 }, { "epoch": 0.46828103683492495, "grad_norm": 0.4225022494792938, "learning_rate": 1.151304497246283e-06, "loss": 0.5083, "step": 2746 }, { "epoch": 0.46845156889495226, "grad_norm": 0.46187952160835266, "learning_rate": 1.1507585135536017e-06, "loss": 0.5126, "step": 2747 }, { "epoch": 0.4686221009549795, "grad_norm": 0.4135882556438446, "learning_rate": 1.1502124838710267e-06, "loss": 0.5216, "step": 2748 }, { "epoch": 0.46879263301500684, "grad_norm": 0.45884090662002563, "learning_rate": 1.1496664083651283e-06, "loss": 0.5152, "step": 2749 }, { "epoch": 0.4689631650750341, "grad_norm": 0.40598103404045105, "learning_rate": 1.1491202872024897e-06, "loss": 0.5108, "step": 2750 }, { "epoch": 0.46913369713506137, "grad_norm": 0.36626097559928894, "learning_rate": 1.1485741205497096e-06, "loss": 0.5047, "step": 2751 }, { "epoch": 0.4693042291950887, "grad_norm": 0.43675321340560913, "learning_rate": 1.1480279085733997e-06, "loss": 0.5075, "step": 2752 }, { "epoch": 0.46947476125511595, "grad_norm": 0.415915310382843, "learning_rate": 1.1474816514401853e-06, "loss": 0.5203, "step": 2753 }, { "epoch": 0.46964529331514326, "grad_norm": 0.4475071430206299, "learning_rate": 1.1469353493167058e-06, "loss": 0.5092, "step": 2754 }, { "epoch": 0.4698158253751705, "grad_norm": 0.43330270051956177, "learning_rate": 1.1463890023696145e-06, "loss": 0.509, "step": 2755 }, { "epoch": 0.46998635743519784, "grad_norm": 0.4516720771789551, "learning_rate": 1.1458426107655781e-06, "loss": 0.5084, "step": 2756 }, { "epoch": 0.4701568894952251, "grad_norm": 0.5397230982780457, "learning_rate": 1.145296174671277e-06, "loss": 0.5206, "step": 2757 }, { "epoch": 0.47032742155525237, "grad_norm": 0.5378764867782593, "learning_rate": 1.1447496942534057e-06, "loss": 0.5257, "step": 2758 }, { "epoch": 0.4704979536152797, "grad_norm": 0.5214230418205261, "learning_rate": 1.1442031696786707e-06, "loss": 0.5277, "step": 2759 }, { "epoch": 0.47066848567530695, "grad_norm": 0.4218697249889374, "learning_rate": 1.143656601113794e-06, "loss": 0.5091, "step": 2760 }, { "epoch": 0.47083901773533426, "grad_norm": 0.46463119983673096, "learning_rate": 1.143109988725509e-06, "loss": 0.5135, "step": 2761 }, { "epoch": 0.4710095497953615, "grad_norm": 0.4969916343688965, "learning_rate": 1.1425633326805646e-06, "loss": 0.5159, "step": 2762 }, { "epoch": 0.4711800818553888, "grad_norm": 0.4337003827095032, "learning_rate": 1.142016633145721e-06, "loss": 0.5173, "step": 2763 }, { "epoch": 0.4713506139154161, "grad_norm": 0.500644326210022, "learning_rate": 1.1414698902877528e-06, "loss": 0.5197, "step": 2764 }, { "epoch": 0.47152114597544337, "grad_norm": 0.553901731967926, "learning_rate": 1.1409231042734478e-06, "loss": 0.5137, "step": 2765 }, { "epoch": 0.4716916780354707, "grad_norm": 0.47816386818885803, "learning_rate": 1.1403762752696063e-06, "loss": 0.5124, "step": 2766 }, { "epoch": 0.47186221009549795, "grad_norm": 0.3969438970088959, "learning_rate": 1.1398294034430424e-06, "loss": 0.5041, "step": 2767 }, { "epoch": 0.47203274215552526, "grad_norm": 0.4518330991268158, "learning_rate": 1.1392824889605835e-06, "loss": 0.5071, "step": 2768 }, { "epoch": 0.4722032742155525, "grad_norm": 0.63393634557724, "learning_rate": 1.1387355319890685e-06, "loss": 0.5224, "step": 2769 }, { "epoch": 0.4723738062755798, "grad_norm": 0.5123917460441589, "learning_rate": 1.1381885326953508e-06, "loss": 0.5103, "step": 2770 }, { "epoch": 0.4725443383356071, "grad_norm": 0.3764648139476776, "learning_rate": 1.1376414912462966e-06, "loss": 0.5102, "step": 2771 }, { "epoch": 0.47271487039563437, "grad_norm": 0.523368239402771, "learning_rate": 1.137094407808784e-06, "loss": 0.5276, "step": 2772 }, { "epoch": 0.4728854024556617, "grad_norm": 0.3376198709011078, "learning_rate": 1.1365472825497043e-06, "loss": 0.5134, "step": 2773 }, { "epoch": 0.47305593451568895, "grad_norm": 0.5027055144309998, "learning_rate": 1.1360001156359624e-06, "loss": 0.5068, "step": 2774 }, { "epoch": 0.4732264665757162, "grad_norm": 0.45107799768447876, "learning_rate": 1.135452907234475e-06, "loss": 0.5024, "step": 2775 }, { "epoch": 0.4733969986357435, "grad_norm": 0.47594237327575684, "learning_rate": 1.1349056575121714e-06, "loss": 0.5217, "step": 2776 }, { "epoch": 0.4735675306957708, "grad_norm": 0.495340496301651, "learning_rate": 1.134358366635994e-06, "loss": 0.5101, "step": 2777 }, { "epoch": 0.4737380627557981, "grad_norm": 0.4050142765045166, "learning_rate": 1.1338110347728973e-06, "loss": 0.5053, "step": 2778 }, { "epoch": 0.47390859481582537, "grad_norm": 0.4666402041912079, "learning_rate": 1.1332636620898487e-06, "loss": 0.5138, "step": 2779 }, { "epoch": 0.4740791268758527, "grad_norm": 0.403601735830307, "learning_rate": 1.1327162487538284e-06, "loss": 0.5143, "step": 2780 }, { "epoch": 0.47424965893587995, "grad_norm": 0.4783070981502533, "learning_rate": 1.1321687949318277e-06, "loss": 0.5007, "step": 2781 }, { "epoch": 0.4744201909959072, "grad_norm": 0.6048984527587891, "learning_rate": 1.1316213007908514e-06, "loss": 0.5092, "step": 2782 }, { "epoch": 0.4745907230559345, "grad_norm": 0.39854034781455994, "learning_rate": 1.1310737664979169e-06, "loss": 0.5126, "step": 2783 }, { "epoch": 0.4747612551159618, "grad_norm": 0.5207147002220154, "learning_rate": 1.130526192220052e-06, "loss": 0.514, "step": 2784 }, { "epoch": 0.4749317871759891, "grad_norm": 0.6135899424552917, "learning_rate": 1.1299785781242983e-06, "loss": 0.5014, "step": 2785 }, { "epoch": 0.47510231923601637, "grad_norm": 0.5457688570022583, "learning_rate": 1.1294309243777097e-06, "loss": 0.5098, "step": 2786 }, { "epoch": 0.47527285129604363, "grad_norm": 0.44323474168777466, "learning_rate": 1.1288832311473509e-06, "loss": 0.5142, "step": 2787 }, { "epoch": 0.47544338335607095, "grad_norm": 0.40029415488243103, "learning_rate": 1.1283354986002997e-06, "loss": 0.5173, "step": 2788 }, { "epoch": 0.4756139154160982, "grad_norm": 0.4944052994251251, "learning_rate": 1.1277877269036463e-06, "loss": 0.5206, "step": 2789 }, { "epoch": 0.4757844474761255, "grad_norm": 0.37801826000213623, "learning_rate": 1.1272399162244908e-06, "loss": 0.5143, "step": 2790 }, { "epoch": 0.4759549795361528, "grad_norm": 0.4556177258491516, "learning_rate": 1.1266920667299473e-06, "loss": 0.5117, "step": 2791 }, { "epoch": 0.4761255115961801, "grad_norm": 0.4916275441646576, "learning_rate": 1.126144178587141e-06, "loss": 0.5183, "step": 2792 }, { "epoch": 0.47629604365620737, "grad_norm": 0.3971802294254303, "learning_rate": 1.1255962519632083e-06, "loss": 0.5098, "step": 2793 }, { "epoch": 0.47646657571623463, "grad_norm": 0.5458121299743652, "learning_rate": 1.1250482870252985e-06, "loss": 0.5121, "step": 2794 }, { "epoch": 0.47663710777626195, "grad_norm": 0.7432170510292053, "learning_rate": 1.1245002839405715e-06, "loss": 0.5216, "step": 2795 }, { "epoch": 0.4768076398362892, "grad_norm": 0.5885338187217712, "learning_rate": 1.1239522428761996e-06, "loss": 0.52, "step": 2796 }, { "epoch": 0.4769781718963165, "grad_norm": 0.36348477005958557, "learning_rate": 1.1234041639993664e-06, "loss": 0.5196, "step": 2797 }, { "epoch": 0.4771487039563438, "grad_norm": 0.6516478061676025, "learning_rate": 1.122856047477267e-06, "loss": 0.5212, "step": 2798 }, { "epoch": 0.47731923601637105, "grad_norm": 0.5506260395050049, "learning_rate": 1.122307893477108e-06, "loss": 0.5197, "step": 2799 }, { "epoch": 0.47748976807639837, "grad_norm": 0.34980344772338867, "learning_rate": 1.1217597021661071e-06, "loss": 0.5132, "step": 2800 }, { "epoch": 0.47766030013642563, "grad_norm": 0.5539247989654541, "learning_rate": 1.121211473711494e-06, "loss": 0.5019, "step": 2801 }, { "epoch": 0.47783083219645295, "grad_norm": 0.40467381477355957, "learning_rate": 1.1206632082805099e-06, "loss": 0.5137, "step": 2802 }, { "epoch": 0.4780013642564802, "grad_norm": 0.3608384132385254, "learning_rate": 1.120114906040406e-06, "loss": 0.5236, "step": 2803 }, { "epoch": 0.4781718963165075, "grad_norm": 0.41437652707099915, "learning_rate": 1.1195665671584464e-06, "loss": 0.5126, "step": 2804 }, { "epoch": 0.4783424283765348, "grad_norm": 0.35578078031539917, "learning_rate": 1.119018191801905e-06, "loss": 0.518, "step": 2805 }, { "epoch": 0.47851296043656205, "grad_norm": 0.43420663475990295, "learning_rate": 1.1184697801380675e-06, "loss": 0.5169, "step": 2806 }, { "epoch": 0.47868349249658937, "grad_norm": 0.4014913737773895, "learning_rate": 1.1179213323342305e-06, "loss": 0.5162, "step": 2807 }, { "epoch": 0.47885402455661663, "grad_norm": 0.4695773124694824, "learning_rate": 1.117372848557702e-06, "loss": 0.5227, "step": 2808 }, { "epoch": 0.47902455661664395, "grad_norm": 0.5101913213729858, "learning_rate": 1.1168243289758002e-06, "loss": 0.5246, "step": 2809 }, { "epoch": 0.4791950886766712, "grad_norm": 0.46013885736465454, "learning_rate": 1.1162757737558546e-06, "loss": 0.5141, "step": 2810 }, { "epoch": 0.4793656207366985, "grad_norm": 0.46474412083625793, "learning_rate": 1.1157271830652065e-06, "loss": 0.5162, "step": 2811 }, { "epoch": 0.4795361527967258, "grad_norm": 0.38749903440475464, "learning_rate": 1.115178557071206e-06, "loss": 0.5156, "step": 2812 }, { "epoch": 0.47970668485675305, "grad_norm": 0.43065133690834045, "learning_rate": 1.114629895941216e-06, "loss": 0.5211, "step": 2813 }, { "epoch": 0.47987721691678037, "grad_norm": 0.4182431399822235, "learning_rate": 1.114081199842609e-06, "loss": 0.531, "step": 2814 }, { "epoch": 0.48004774897680763, "grad_norm": 0.49661538004875183, "learning_rate": 1.113532468942768e-06, "loss": 0.509, "step": 2815 }, { "epoch": 0.48021828103683495, "grad_norm": 0.5234668254852295, "learning_rate": 1.1129837034090877e-06, "loss": 0.5221, "step": 2816 }, { "epoch": 0.4803888130968622, "grad_norm": 0.6002328991889954, "learning_rate": 1.1124349034089725e-06, "loss": 0.5253, "step": 2817 }, { "epoch": 0.4805593451568895, "grad_norm": 0.3524320721626282, "learning_rate": 1.1118860691098372e-06, "loss": 0.5361, "step": 2818 }, { "epoch": 0.4807298772169168, "grad_norm": 0.5052375793457031, "learning_rate": 1.1113372006791075e-06, "loss": 0.5061, "step": 2819 }, { "epoch": 0.48090040927694405, "grad_norm": 0.4445066452026367, "learning_rate": 1.1107882982842197e-06, "loss": 0.525, "step": 2820 }, { "epoch": 0.48107094133697137, "grad_norm": 0.3796360492706299, "learning_rate": 1.1102393620926197e-06, "loss": 0.5083, "step": 2821 }, { "epoch": 0.48124147339699863, "grad_norm": 0.4080047905445099, "learning_rate": 1.1096903922717646e-06, "loss": 0.5205, "step": 2822 }, { "epoch": 0.4814120054570259, "grad_norm": 0.4210074245929718, "learning_rate": 1.1091413889891213e-06, "loss": 0.523, "step": 2823 }, { "epoch": 0.4815825375170532, "grad_norm": 0.44352221488952637, "learning_rate": 1.1085923524121663e-06, "loss": 0.5274, "step": 2824 }, { "epoch": 0.4817530695770805, "grad_norm": 0.46211662888526917, "learning_rate": 1.1080432827083875e-06, "loss": 0.5199, "step": 2825 }, { "epoch": 0.4819236016371078, "grad_norm": 0.4778159260749817, "learning_rate": 1.107494180045282e-06, "loss": 0.5188, "step": 2826 }, { "epoch": 0.48209413369713505, "grad_norm": 0.47870731353759766, "learning_rate": 1.1069450445903575e-06, "loss": 0.5114, "step": 2827 }, { "epoch": 0.48226466575716237, "grad_norm": 0.44184383749961853, "learning_rate": 1.106395876511131e-06, "loss": 0.5096, "step": 2828 }, { "epoch": 0.48243519781718963, "grad_norm": 0.5257557034492493, "learning_rate": 1.1058466759751305e-06, "loss": 0.5197, "step": 2829 }, { "epoch": 0.4826057298772169, "grad_norm": 0.4767327308654785, "learning_rate": 1.105297443149893e-06, "loss": 0.5233, "step": 2830 }, { "epoch": 0.4827762619372442, "grad_norm": 0.5272330045700073, "learning_rate": 1.1047481782029655e-06, "loss": 0.527, "step": 2831 }, { "epoch": 0.4829467939972715, "grad_norm": 0.5258830785751343, "learning_rate": 1.1041988813019052e-06, "loss": 0.5189, "step": 2832 }, { "epoch": 0.4831173260572988, "grad_norm": 0.4522515833377838, "learning_rate": 1.1036495526142783e-06, "loss": 0.5173, "step": 2833 }, { "epoch": 0.48328785811732605, "grad_norm": 0.6141322255134583, "learning_rate": 1.103100192307662e-06, "loss": 0.541, "step": 2834 }, { "epoch": 0.48345839017735337, "grad_norm": 0.37719348073005676, "learning_rate": 1.1025508005496419e-06, "loss": 0.5002, "step": 2835 }, { "epoch": 0.48362892223738063, "grad_norm": 0.6246870160102844, "learning_rate": 1.1020013775078134e-06, "loss": 0.519, "step": 2836 }, { "epoch": 0.4837994542974079, "grad_norm": 0.5842008590698242, "learning_rate": 1.1014519233497822e-06, "loss": 0.5316, "step": 2837 }, { "epoch": 0.4839699863574352, "grad_norm": 0.46794211864471436, "learning_rate": 1.1009024382431625e-06, "loss": 0.5244, "step": 2838 }, { "epoch": 0.4841405184174625, "grad_norm": 0.4208543598651886, "learning_rate": 1.1003529223555787e-06, "loss": 0.5145, "step": 2839 }, { "epoch": 0.4843110504774898, "grad_norm": 0.429081529378891, "learning_rate": 1.0998033758546642e-06, "loss": 0.5112, "step": 2840 }, { "epoch": 0.48448158253751705, "grad_norm": 0.44880661368370056, "learning_rate": 1.099253798908062e-06, "loss": 0.5245, "step": 2841 }, { "epoch": 0.4846521145975443, "grad_norm": 0.4802000820636749, "learning_rate": 1.0987041916834237e-06, "loss": 0.5206, "step": 2842 }, { "epoch": 0.48482264665757163, "grad_norm": 0.5355414152145386, "learning_rate": 1.0981545543484111e-06, "loss": 0.5095, "step": 2843 }, { "epoch": 0.4849931787175989, "grad_norm": 0.434264212846756, "learning_rate": 1.0976048870706952e-06, "loss": 0.5206, "step": 2844 }, { "epoch": 0.4851637107776262, "grad_norm": 0.5525639653205872, "learning_rate": 1.0970551900179545e-06, "loss": 0.5338, "step": 2845 }, { "epoch": 0.4853342428376535, "grad_norm": 0.4568883180618286, "learning_rate": 1.0965054633578789e-06, "loss": 0.5131, "step": 2846 }, { "epoch": 0.4855047748976808, "grad_norm": 0.5602957606315613, "learning_rate": 1.0959557072581654e-06, "loss": 0.4977, "step": 2847 }, { "epoch": 0.48567530695770805, "grad_norm": 0.552498459815979, "learning_rate": 1.0954059218865213e-06, "loss": 0.5246, "step": 2848 }, { "epoch": 0.4858458390177353, "grad_norm": 0.5079182982444763, "learning_rate": 1.094856107410662e-06, "loss": 0.5097, "step": 2849 }, { "epoch": 0.48601637107776263, "grad_norm": 0.4334324300289154, "learning_rate": 1.094306263998312e-06, "loss": 0.5152, "step": 2850 }, { "epoch": 0.4861869031377899, "grad_norm": 0.5188226103782654, "learning_rate": 1.0937563918172052e-06, "loss": 0.5159, "step": 2851 }, { "epoch": 0.4863574351978172, "grad_norm": 0.4685499370098114, "learning_rate": 1.0932064910350836e-06, "loss": 0.5064, "step": 2852 }, { "epoch": 0.4865279672578445, "grad_norm": 0.4312712252140045, "learning_rate": 1.092656561819698e-06, "loss": 0.5146, "step": 2853 }, { "epoch": 0.48669849931787174, "grad_norm": 0.4952917993068695, "learning_rate": 1.0921066043388078e-06, "loss": 0.5091, "step": 2854 }, { "epoch": 0.48686903137789905, "grad_norm": 0.36304834485054016, "learning_rate": 1.0915566187601814e-06, "loss": 0.5082, "step": 2855 }, { "epoch": 0.4870395634379263, "grad_norm": 0.46254342794418335, "learning_rate": 1.0910066052515956e-06, "loss": 0.519, "step": 2856 }, { "epoch": 0.48721009549795363, "grad_norm": 0.3810098171234131, "learning_rate": 1.0904565639808358e-06, "loss": 0.4963, "step": 2857 }, { "epoch": 0.4873806275579809, "grad_norm": 0.42867311835289, "learning_rate": 1.0899064951156957e-06, "loss": 0.4969, "step": 2858 }, { "epoch": 0.4875511596180082, "grad_norm": 0.3933941721916199, "learning_rate": 1.0893563988239773e-06, "loss": 0.5132, "step": 2859 }, { "epoch": 0.4877216916780355, "grad_norm": 0.3573196828365326, "learning_rate": 1.0888062752734915e-06, "loss": 0.521, "step": 2860 }, { "epoch": 0.48789222373806274, "grad_norm": 0.3725636601448059, "learning_rate": 1.0882561246320572e-06, "loss": 0.52, "step": 2861 }, { "epoch": 0.48806275579809005, "grad_norm": 0.39118441939353943, "learning_rate": 1.0877059470675007e-06, "loss": 0.5227, "step": 2862 }, { "epoch": 0.4882332878581173, "grad_norm": 0.42825067043304443, "learning_rate": 1.0871557427476585e-06, "loss": 0.5184, "step": 2863 }, { "epoch": 0.48840381991814463, "grad_norm": 0.4308590590953827, "learning_rate": 1.0866055118403735e-06, "loss": 0.5167, "step": 2864 }, { "epoch": 0.4885743519781719, "grad_norm": 0.39540690183639526, "learning_rate": 1.0860552545134972e-06, "loss": 0.5036, "step": 2865 }, { "epoch": 0.48874488403819916, "grad_norm": 0.4011569917201996, "learning_rate": 1.0855049709348896e-06, "loss": 0.515, "step": 2866 }, { "epoch": 0.4889154160982265, "grad_norm": 0.4647868275642395, "learning_rate": 1.084954661272418e-06, "loss": 0.5212, "step": 2867 }, { "epoch": 0.48908594815825374, "grad_norm": 0.39145034551620483, "learning_rate": 1.0844043256939585e-06, "loss": 0.5219, "step": 2868 }, { "epoch": 0.48925648021828105, "grad_norm": 0.40879708528518677, "learning_rate": 1.083853964367395e-06, "loss": 0.52, "step": 2869 }, { "epoch": 0.4894270122783083, "grad_norm": 0.425218403339386, "learning_rate": 1.0833035774606176e-06, "loss": 0.5027, "step": 2870 }, { "epoch": 0.48959754433833563, "grad_norm": 0.36446821689605713, "learning_rate": 1.0827531651415268e-06, "loss": 0.5184, "step": 2871 }, { "epoch": 0.4897680763983629, "grad_norm": 0.43742719292640686, "learning_rate": 1.0822027275780289e-06, "loss": 0.5025, "step": 2872 }, { "epoch": 0.48993860845839016, "grad_norm": 0.4060668647289276, "learning_rate": 1.0816522649380384e-06, "loss": 0.5188, "step": 2873 }, { "epoch": 0.4901091405184175, "grad_norm": 0.4716636836528778, "learning_rate": 1.0811017773894781e-06, "loss": 0.4979, "step": 2874 }, { "epoch": 0.49027967257844474, "grad_norm": 0.4533891975879669, "learning_rate": 1.080551265100278e-06, "loss": 0.5021, "step": 2875 }, { "epoch": 0.49045020463847205, "grad_norm": 0.35617199540138245, "learning_rate": 1.080000728238375e-06, "loss": 0.5142, "step": 2876 }, { "epoch": 0.4906207366984993, "grad_norm": 0.3525446951389313, "learning_rate": 1.0794501669717146e-06, "loss": 0.5241, "step": 2877 }, { "epoch": 0.4907912687585266, "grad_norm": 0.38873711228370667, "learning_rate": 1.0788995814682487e-06, "loss": 0.5047, "step": 2878 }, { "epoch": 0.4909618008185539, "grad_norm": 0.3586026430130005, "learning_rate": 1.0783489718959379e-06, "loss": 0.5123, "step": 2879 }, { "epoch": 0.49113233287858116, "grad_norm": 0.4532160460948944, "learning_rate": 1.077798338422748e-06, "loss": 0.5145, "step": 2880 }, { "epoch": 0.4913028649386085, "grad_norm": 0.4291527569293976, "learning_rate": 1.0772476812166546e-06, "loss": 0.5198, "step": 2881 }, { "epoch": 0.49147339699863574, "grad_norm": 0.41730403900146484, "learning_rate": 1.076697000445639e-06, "loss": 0.5257, "step": 2882 }, { "epoch": 0.49164392905866305, "grad_norm": 0.49823060631752014, "learning_rate": 1.0761462962776898e-06, "loss": 0.5069, "step": 2883 }, { "epoch": 0.4918144611186903, "grad_norm": 0.635140061378479, "learning_rate": 1.0755955688808037e-06, "loss": 0.5228, "step": 2884 }, { "epoch": 0.4919849931787176, "grad_norm": 0.5624722242355347, "learning_rate": 1.075044818422983e-06, "loss": 0.526, "step": 2885 }, { "epoch": 0.4921555252387449, "grad_norm": 0.6729494333267212, "learning_rate": 1.0744940450722379e-06, "loss": 0.5223, "step": 2886 }, { "epoch": 0.49232605729877216, "grad_norm": 0.6167219877243042, "learning_rate": 1.073943248996586e-06, "loss": 0.5068, "step": 2887 }, { "epoch": 0.4924965893587995, "grad_norm": 0.45822858810424805, "learning_rate": 1.073392430364051e-06, "loss": 0.5097, "step": 2888 }, { "epoch": 0.49266712141882674, "grad_norm": 0.7486544847488403, "learning_rate": 1.0728415893426637e-06, "loss": 0.502, "step": 2889 }, { "epoch": 0.492837653478854, "grad_norm": 0.4141033887863159, "learning_rate": 1.0722907261004618e-06, "loss": 0.5138, "step": 2890 }, { "epoch": 0.4930081855388813, "grad_norm": 0.6413578987121582, "learning_rate": 1.0717398408054906e-06, "loss": 0.5168, "step": 2891 }, { "epoch": 0.4931787175989086, "grad_norm": 0.49279406666755676, "learning_rate": 1.0711889336258004e-06, "loss": 0.5076, "step": 2892 }, { "epoch": 0.4933492496589359, "grad_norm": 0.4992629587650299, "learning_rate": 1.0706380047294497e-06, "loss": 0.5015, "step": 2893 }, { "epoch": 0.49351978171896316, "grad_norm": 0.506038248538971, "learning_rate": 1.0700870542845027e-06, "loss": 0.5209, "step": 2894 }, { "epoch": 0.4936903137789905, "grad_norm": 0.48432669043540955, "learning_rate": 1.0695360824590306e-06, "loss": 0.5105, "step": 2895 }, { "epoch": 0.49386084583901774, "grad_norm": 0.6199078559875488, "learning_rate": 1.068985089421111e-06, "loss": 0.5154, "step": 2896 }, { "epoch": 0.494031377899045, "grad_norm": 0.4056002199649811, "learning_rate": 1.0684340753388283e-06, "loss": 0.5193, "step": 2897 }, { "epoch": 0.4942019099590723, "grad_norm": 0.5460501313209534, "learning_rate": 1.0678830403802728e-06, "loss": 0.509, "step": 2898 }, { "epoch": 0.4943724420190996, "grad_norm": 0.4023360013961792, "learning_rate": 1.067331984713541e-06, "loss": 0.5092, "step": 2899 }, { "epoch": 0.4945429740791269, "grad_norm": 0.42605215311050415, "learning_rate": 1.0667809085067376e-06, "loss": 0.523, "step": 2900 }, { "epoch": 0.49471350613915416, "grad_norm": 0.4854925572872162, "learning_rate": 1.0662298119279702e-06, "loss": 0.515, "step": 2901 }, { "epoch": 0.4948840381991814, "grad_norm": 0.3809516131877899, "learning_rate": 1.0656786951453556e-06, "loss": 0.5149, "step": 2902 }, { "epoch": 0.49505457025920874, "grad_norm": 0.4749661087989807, "learning_rate": 1.0651275583270155e-06, "loss": 0.4991, "step": 2903 }, { "epoch": 0.495225102319236, "grad_norm": 0.35364648699760437, "learning_rate": 1.0645764016410777e-06, "loss": 0.5192, "step": 2904 }, { "epoch": 0.4953956343792633, "grad_norm": 0.51173996925354, "learning_rate": 1.064025225255676e-06, "loss": 0.5171, "step": 2905 }, { "epoch": 0.4955661664392906, "grad_norm": 0.47044792771339417, "learning_rate": 1.0634740293389512e-06, "loss": 0.5209, "step": 2906 }, { "epoch": 0.4957366984993179, "grad_norm": 0.3833619952201843, "learning_rate": 1.0629228140590488e-06, "loss": 0.509, "step": 2907 }, { "epoch": 0.49590723055934516, "grad_norm": 0.49732497334480286, "learning_rate": 1.0623715795841208e-06, "loss": 0.5132, "step": 2908 }, { "epoch": 0.4960777626193724, "grad_norm": 0.3693459630012512, "learning_rate": 1.061820326082325e-06, "loss": 0.5199, "step": 2909 }, { "epoch": 0.49624829467939974, "grad_norm": 0.4614149034023285, "learning_rate": 1.0612690537218247e-06, "loss": 0.5199, "step": 2910 }, { "epoch": 0.496418826739427, "grad_norm": 0.4367601275444031, "learning_rate": 1.0607177626707894e-06, "loss": 0.5109, "step": 2911 }, { "epoch": 0.4965893587994543, "grad_norm": 0.4477463960647583, "learning_rate": 1.0601664530973946e-06, "loss": 0.5001, "step": 2912 }, { "epoch": 0.4967598908594816, "grad_norm": 0.44696980714797974, "learning_rate": 1.05961512516982e-06, "loss": 0.5143, "step": 2913 }, { "epoch": 0.49693042291950884, "grad_norm": 0.3601173460483551, "learning_rate": 1.0590637790562527e-06, "loss": 0.5097, "step": 2914 }, { "epoch": 0.49710095497953616, "grad_norm": 0.49771782755851746, "learning_rate": 1.0585124149248842e-06, "loss": 0.5046, "step": 2915 }, { "epoch": 0.4972714870395634, "grad_norm": 0.4949286878108978, "learning_rate": 1.057961032943912e-06, "loss": 0.528, "step": 2916 }, { "epoch": 0.49744201909959074, "grad_norm": 0.37806761264801025, "learning_rate": 1.0574096332815389e-06, "loss": 0.5117, "step": 2917 }, { "epoch": 0.497612551159618, "grad_norm": 0.574327290058136, "learning_rate": 1.0568582161059725e-06, "loss": 0.5078, "step": 2918 }, { "epoch": 0.4977830832196453, "grad_norm": 0.38559508323669434, "learning_rate": 1.0563067815854267e-06, "loss": 0.5031, "step": 2919 }, { "epoch": 0.4979536152796726, "grad_norm": 0.5216695666313171, "learning_rate": 1.0557553298881204e-06, "loss": 0.5249, "step": 2920 }, { "epoch": 0.49812414733969984, "grad_norm": 0.5521080493927002, "learning_rate": 1.0552038611822775e-06, "loss": 0.5216, "step": 2921 }, { "epoch": 0.49829467939972716, "grad_norm": 0.4828493893146515, "learning_rate": 1.0546523756361272e-06, "loss": 0.5181, "step": 2922 }, { "epoch": 0.4984652114597544, "grad_norm": 0.5427225828170776, "learning_rate": 1.054100873417904e-06, "loss": 0.5269, "step": 2923 }, { "epoch": 0.49863574351978174, "grad_norm": 0.48165130615234375, "learning_rate": 1.0535493546958471e-06, "loss": 0.523, "step": 2924 }, { "epoch": 0.498806275579809, "grad_norm": 0.41212841868400574, "learning_rate": 1.0529978196382012e-06, "loss": 0.5248, "step": 2925 }, { "epoch": 0.49897680763983626, "grad_norm": 0.5029284954071045, "learning_rate": 1.0524462684132157e-06, "loss": 0.5132, "step": 2926 }, { "epoch": 0.4991473396998636, "grad_norm": 0.4131450355052948, "learning_rate": 1.0518947011891448e-06, "loss": 0.5092, "step": 2927 }, { "epoch": 0.49931787175989084, "grad_norm": 0.44756412506103516, "learning_rate": 1.051343118134248e-06, "loss": 0.5216, "step": 2928 }, { "epoch": 0.49948840381991816, "grad_norm": 0.4106735289096832, "learning_rate": 1.0507915194167893e-06, "loss": 0.5198, "step": 2929 }, { "epoch": 0.4996589358799454, "grad_norm": 0.5048739314079285, "learning_rate": 1.0502399052050378e-06, "loss": 0.5186, "step": 2930 }, { "epoch": 0.49982946793997274, "grad_norm": 0.547248125076294, "learning_rate": 1.0496882756672667e-06, "loss": 0.5242, "step": 2931 }, { "epoch": 0.5, "grad_norm": 0.41109588742256165, "learning_rate": 1.0491366309717549e-06, "loss": 0.5322, "step": 2932 }, { "epoch": 0.5001705320600273, "grad_norm": 0.4631933271884918, "learning_rate": 1.0485849712867844e-06, "loss": 0.5025, "step": 2933 }, { "epoch": 0.5003410641200545, "grad_norm": 0.3782828748226166, "learning_rate": 1.0480332967806436e-06, "loss": 0.5161, "step": 2934 }, { "epoch": 0.5005115961800819, "grad_norm": 0.43556368350982666, "learning_rate": 1.0474816076216237e-06, "loss": 0.53, "step": 2935 }, { "epoch": 0.5006821282401092, "grad_norm": 0.4012020230293274, "learning_rate": 1.0469299039780218e-06, "loss": 0.5137, "step": 2936 }, { "epoch": 0.5008526603001364, "grad_norm": 0.4638964831829071, "learning_rate": 1.0463781860181387e-06, "loss": 0.5277, "step": 2937 }, { "epoch": 0.5010231923601637, "grad_norm": 0.4636898636817932, "learning_rate": 1.0458264539102791e-06, "loss": 0.5314, "step": 2938 }, { "epoch": 0.501193724420191, "grad_norm": 0.363491415977478, "learning_rate": 1.0452747078227536e-06, "loss": 0.5171, "step": 2939 }, { "epoch": 0.5013642564802183, "grad_norm": 0.4326697885990143, "learning_rate": 1.044722947923875e-06, "loss": 0.5218, "step": 2940 }, { "epoch": 0.5015347885402456, "grad_norm": 0.5075788497924805, "learning_rate": 1.0441711743819618e-06, "loss": 0.5256, "step": 2941 }, { "epoch": 0.5017053206002728, "grad_norm": 0.6462679505348206, "learning_rate": 1.0436193873653363e-06, "loss": 0.534, "step": 2942 }, { "epoch": 0.5018758526603001, "grad_norm": 0.5000764727592468, "learning_rate": 1.0430675870423246e-06, "loss": 0.528, "step": 2943 }, { "epoch": 0.5020463847203275, "grad_norm": 0.45578262209892273, "learning_rate": 1.0425157735812572e-06, "loss": 0.5266, "step": 2944 }, { "epoch": 0.5022169167803547, "grad_norm": 0.6543422341346741, "learning_rate": 1.0419639471504683e-06, "loss": 0.5154, "step": 2945 }, { "epoch": 0.502387448840382, "grad_norm": 0.6309098601341248, "learning_rate": 1.0414121079182969e-06, "loss": 0.5236, "step": 2946 }, { "epoch": 0.5025579809004093, "grad_norm": 0.5663894414901733, "learning_rate": 1.0408602560530845e-06, "loss": 0.523, "step": 2947 }, { "epoch": 0.5027285129604365, "grad_norm": 0.454391211271286, "learning_rate": 1.0403083917231775e-06, "loss": 0.527, "step": 2948 }, { "epoch": 0.5028990450204639, "grad_norm": 0.4640673100948334, "learning_rate": 1.039756515096926e-06, "loss": 0.5185, "step": 2949 }, { "epoch": 0.5030695770804912, "grad_norm": 0.3818853795528412, "learning_rate": 1.0392046263426833e-06, "loss": 0.5175, "step": 2950 }, { "epoch": 0.5032401091405184, "grad_norm": 0.43370091915130615, "learning_rate": 1.0386527256288066e-06, "loss": 0.51, "step": 2951 }, { "epoch": 0.5034106412005457, "grad_norm": 0.49242711067199707, "learning_rate": 1.0381008131236576e-06, "loss": 0.5152, "step": 2952 }, { "epoch": 0.503581173260573, "grad_norm": 0.41316694021224976, "learning_rate": 1.0375488889956003e-06, "loss": 0.5147, "step": 2953 }, { "epoch": 0.5037517053206003, "grad_norm": 0.4294221103191376, "learning_rate": 1.0369969534130031e-06, "loss": 0.5175, "step": 2954 }, { "epoch": 0.5039222373806276, "grad_norm": 0.3391348421573639, "learning_rate": 1.0364450065442379e-06, "loss": 0.5031, "step": 2955 }, { "epoch": 0.5040927694406548, "grad_norm": 0.6170563697814941, "learning_rate": 1.0358930485576794e-06, "loss": 0.5033, "step": 2956 }, { "epoch": 0.5042633015006821, "grad_norm": 0.5732797980308533, "learning_rate": 1.035341079621706e-06, "loss": 0.5173, "step": 2957 }, { "epoch": 0.5044338335607094, "grad_norm": 0.3765585422515869, "learning_rate": 1.0347890999047e-06, "loss": 0.5338, "step": 2958 }, { "epoch": 0.5046043656207367, "grad_norm": 0.5756282806396484, "learning_rate": 1.0342371095750458e-06, "loss": 0.5016, "step": 2959 }, { "epoch": 0.504774897680764, "grad_norm": 0.5575646758079529, "learning_rate": 1.0336851088011322e-06, "loss": 0.5021, "step": 2960 }, { "epoch": 0.5049454297407913, "grad_norm": 0.4088723957538605, "learning_rate": 1.033133097751351e-06, "loss": 0.5094, "step": 2961 }, { "epoch": 0.5051159618008185, "grad_norm": 0.6527062058448792, "learning_rate": 1.0325810765940962e-06, "loss": 0.5093, "step": 2962 }, { "epoch": 0.5052864938608458, "grad_norm": 0.591107964515686, "learning_rate": 1.0320290454977662e-06, "loss": 0.5152, "step": 2963 }, { "epoch": 0.5054570259208732, "grad_norm": 0.5586540102958679, "learning_rate": 1.0314770046307613e-06, "loss": 0.5035, "step": 2964 }, { "epoch": 0.5056275579809004, "grad_norm": 0.49104535579681396, "learning_rate": 1.0309249541614854e-06, "loss": 0.5141, "step": 2965 }, { "epoch": 0.5057980900409277, "grad_norm": 0.6136547327041626, "learning_rate": 1.0303728942583453e-06, "loss": 0.5035, "step": 2966 }, { "epoch": 0.505968622100955, "grad_norm": 0.5435359477996826, "learning_rate": 1.0298208250897505e-06, "loss": 0.5019, "step": 2967 }, { "epoch": 0.5061391541609823, "grad_norm": 0.6272081732749939, "learning_rate": 1.0292687468241137e-06, "loss": 0.5139, "step": 2968 }, { "epoch": 0.5063096862210096, "grad_norm": 0.5421731472015381, "learning_rate": 1.0287166596298492e-06, "loss": 0.5028, "step": 2969 }, { "epoch": 0.5064802182810368, "grad_norm": 0.5001494884490967, "learning_rate": 1.028164563675376e-06, "loss": 0.5092, "step": 2970 }, { "epoch": 0.5066507503410641, "grad_norm": 0.6292235255241394, "learning_rate": 1.027612459129114e-06, "loss": 0.5036, "step": 2971 }, { "epoch": 0.5068212824010914, "grad_norm": 0.37526097893714905, "learning_rate": 1.0270603461594862e-06, "loss": 0.516, "step": 2972 }, { "epoch": 0.5069918144611187, "grad_norm": 0.6769919395446777, "learning_rate": 1.0265082249349188e-06, "loss": 0.5046, "step": 2973 }, { "epoch": 0.507162346521146, "grad_norm": 0.39580002427101135, "learning_rate": 1.02595609562384e-06, "loss": 0.5182, "step": 2974 }, { "epoch": 0.5073328785811733, "grad_norm": 0.6619483232498169, "learning_rate": 1.0254039583946804e-06, "loss": 0.5132, "step": 2975 }, { "epoch": 0.5075034106412005, "grad_norm": 0.5659598708152771, "learning_rate": 1.0248518134158732e-06, "loss": 0.503, "step": 2976 }, { "epoch": 0.5076739427012278, "grad_norm": 0.5417669415473938, "learning_rate": 1.0242996608558543e-06, "loss": 0.5119, "step": 2977 }, { "epoch": 0.5078444747612552, "grad_norm": 0.5509529709815979, "learning_rate": 1.0237475008830608e-06, "loss": 0.5094, "step": 2978 }, { "epoch": 0.5080150068212824, "grad_norm": 0.4370351731777191, "learning_rate": 1.0231953336659336e-06, "loss": 0.5269, "step": 2979 }, { "epoch": 0.5081855388813097, "grad_norm": 0.5920512080192566, "learning_rate": 1.0226431593729146e-06, "loss": 0.5026, "step": 2980 }, { "epoch": 0.508356070941337, "grad_norm": 0.4498980939388275, "learning_rate": 1.0220909781724479e-06, "loss": 0.5119, "step": 2981 }, { "epoch": 0.5085266030013642, "grad_norm": 0.654874861240387, "learning_rate": 1.0215387902329806e-06, "loss": 0.5116, "step": 2982 }, { "epoch": 0.5086971350613916, "grad_norm": 0.4357649087905884, "learning_rate": 1.0209865957229613e-06, "loss": 0.5143, "step": 2983 }, { "epoch": 0.5088676671214188, "grad_norm": 0.5545060038566589, "learning_rate": 1.0204343948108403e-06, "loss": 0.5138, "step": 2984 }, { "epoch": 0.5090381991814461, "grad_norm": 0.552129328250885, "learning_rate": 1.0198821876650702e-06, "loss": 0.5121, "step": 2985 }, { "epoch": 0.5092087312414734, "grad_norm": 0.6152744889259338, "learning_rate": 1.0193299744541065e-06, "loss": 0.5171, "step": 2986 }, { "epoch": 0.5093792633015006, "grad_norm": 0.42170122265815735, "learning_rate": 1.0187777553464038e-06, "loss": 0.5061, "step": 2987 }, { "epoch": 0.509549795361528, "grad_norm": 0.5506285429000854, "learning_rate": 1.0182255305104215e-06, "loss": 0.5162, "step": 2988 }, { "epoch": 0.5097203274215553, "grad_norm": 0.5527845025062561, "learning_rate": 1.0176733001146192e-06, "loss": 0.511, "step": 2989 }, { "epoch": 0.5098908594815825, "grad_norm": 0.43512266874313354, "learning_rate": 1.0171210643274582e-06, "loss": 0.4988, "step": 2990 }, { "epoch": 0.5100613915416098, "grad_norm": 0.4943109452724457, "learning_rate": 1.016568823317402e-06, "loss": 0.5137, "step": 2991 }, { "epoch": 0.5102319236016372, "grad_norm": 0.3899883031845093, "learning_rate": 1.0160165772529151e-06, "loss": 0.5275, "step": 2992 }, { "epoch": 0.5104024556616644, "grad_norm": 0.45650139451026917, "learning_rate": 1.0154643263024645e-06, "loss": 0.5107, "step": 2993 }, { "epoch": 0.5105729877216917, "grad_norm": 0.38634830713272095, "learning_rate": 1.0149120706345176e-06, "loss": 0.5121, "step": 2994 }, { "epoch": 0.510743519781719, "grad_norm": 0.46154528856277466, "learning_rate": 1.0143598104175435e-06, "loss": 0.5104, "step": 2995 }, { "epoch": 0.5109140518417462, "grad_norm": 0.5653091073036194, "learning_rate": 1.0138075458200133e-06, "loss": 0.5148, "step": 2996 }, { "epoch": 0.5110845839017736, "grad_norm": 0.47336509823799133, "learning_rate": 1.0132552770103988e-06, "loss": 0.5131, "step": 2997 }, { "epoch": 0.5112551159618008, "grad_norm": 0.4555085599422455, "learning_rate": 1.0127030041571733e-06, "loss": 0.5052, "step": 2998 }, { "epoch": 0.5114256480218281, "grad_norm": 0.395914226770401, "learning_rate": 1.0121507274288112e-06, "loss": 0.5144, "step": 2999 }, { "epoch": 0.5115961800818554, "grad_norm": 0.466209352016449, "learning_rate": 1.0115984469937885e-06, "loss": 0.5233, "step": 3000 }, { "epoch": 0.5117667121418826, "grad_norm": 0.4672727882862091, "learning_rate": 1.011046163020582e-06, "loss": 0.5094, "step": 3001 }, { "epoch": 0.51193724420191, "grad_norm": 0.38614600896835327, "learning_rate": 1.0104938756776696e-06, "loss": 0.5152, "step": 3002 }, { "epoch": 0.5121077762619373, "grad_norm": 0.4924212396144867, "learning_rate": 1.00994158513353e-06, "loss": 0.5141, "step": 3003 }, { "epoch": 0.5122783083219645, "grad_norm": 0.4234980642795563, "learning_rate": 1.009389291556643e-06, "loss": 0.5138, "step": 3004 }, { "epoch": 0.5124488403819918, "grad_norm": 0.5439965128898621, "learning_rate": 1.0088369951154904e-06, "loss": 0.5159, "step": 3005 }, { "epoch": 0.512619372442019, "grad_norm": 0.4107781648635864, "learning_rate": 1.0082846959785528e-06, "loss": 0.5081, "step": 3006 }, { "epoch": 0.5127899045020464, "grad_norm": 0.4752189517021179, "learning_rate": 1.0077323943143135e-06, "loss": 0.5137, "step": 3007 }, { "epoch": 0.5129604365620737, "grad_norm": 0.43996524810791016, "learning_rate": 1.0071800902912555e-06, "loss": 0.5139, "step": 3008 }, { "epoch": 0.513130968622101, "grad_norm": 0.34770911931991577, "learning_rate": 1.0066277840778627e-06, "loss": 0.515, "step": 3009 }, { "epoch": 0.5133015006821282, "grad_norm": 0.5615414977073669, "learning_rate": 1.0060754758426204e-06, "loss": 0.5093, "step": 3010 }, { "epoch": 0.5134720327421555, "grad_norm": 0.5643600225448608, "learning_rate": 1.0055231657540133e-06, "loss": 0.5072, "step": 3011 }, { "epoch": 0.5136425648021828, "grad_norm": 0.4654306173324585, "learning_rate": 1.0049708539805274e-06, "loss": 0.5197, "step": 3012 }, { "epoch": 0.5138130968622101, "grad_norm": 0.4522657096385956, "learning_rate": 1.0044185406906493e-06, "loss": 0.5174, "step": 3013 }, { "epoch": 0.5139836289222374, "grad_norm": 0.4825378656387329, "learning_rate": 1.003866226052866e-06, "loss": 0.5215, "step": 3014 }, { "epoch": 0.5141541609822646, "grad_norm": 0.40646103024482727, "learning_rate": 1.0033139102356643e-06, "loss": 0.5123, "step": 3015 }, { "epoch": 0.514324693042292, "grad_norm": 0.35144925117492676, "learning_rate": 1.0027615934075322e-06, "loss": 0.5184, "step": 3016 }, { "epoch": 0.5144952251023193, "grad_norm": 0.46346837282180786, "learning_rate": 1.0022092757369576e-06, "loss": 0.5117, "step": 3017 }, { "epoch": 0.5146657571623465, "grad_norm": 0.42966172099113464, "learning_rate": 1.0016569573924288e-06, "loss": 0.5138, "step": 3018 }, { "epoch": 0.5148362892223738, "grad_norm": 0.37160927057266235, "learning_rate": 1.0011046385424335e-06, "loss": 0.5076, "step": 3019 }, { "epoch": 0.515006821282401, "grad_norm": 0.5897586941719055, "learning_rate": 1.0005523193554615e-06, "loss": 0.5164, "step": 3020 }, { "epoch": 0.5151773533424284, "grad_norm": 0.45426005125045776, "learning_rate": 1.0000000000000002e-06, "loss": 0.5091, "step": 3021 }, { "epoch": 0.5153478854024557, "grad_norm": 0.415068656206131, "learning_rate": 9.994476806445394e-07, "loss": 0.5107, "step": 3022 }, { "epoch": 0.515518417462483, "grad_norm": 0.5580230951309204, "learning_rate": 9.988953614575669e-07, "loss": 0.5052, "step": 3023 }, { "epoch": 0.5156889495225102, "grad_norm": 0.4167662560939789, "learning_rate": 9.98343042607572e-07, "loss": 0.5057, "step": 3024 }, { "epoch": 0.5158594815825375, "grad_norm": 0.41590461134910583, "learning_rate": 9.977907242630427e-07, "loss": 0.5214, "step": 3025 }, { "epoch": 0.5160300136425648, "grad_norm": 0.4774099588394165, "learning_rate": 9.972384065924684e-07, "loss": 0.5142, "step": 3026 }, { "epoch": 0.5162005457025921, "grad_norm": 0.34568390250205994, "learning_rate": 9.96686089764336e-07, "loss": 0.508, "step": 3027 }, { "epoch": 0.5163710777626194, "grad_norm": 0.4375649094581604, "learning_rate": 9.961337739471345e-07, "loss": 0.5079, "step": 3028 }, { "epoch": 0.5165416098226466, "grad_norm": 0.35460013151168823, "learning_rate": 9.95581459309351e-07, "loss": 0.524, "step": 3029 }, { "epoch": 0.5167121418826739, "grad_norm": 0.42628592252731323, "learning_rate": 9.95029146019473e-07, "loss": 0.5116, "step": 3030 }, { "epoch": 0.5168826739427013, "grad_norm": 0.40556424856185913, "learning_rate": 9.944768342459873e-07, "loss": 0.5251, "step": 3031 }, { "epoch": 0.5170532060027285, "grad_norm": 0.36979591846466064, "learning_rate": 9.939245241573801e-07, "loss": 0.5207, "step": 3032 }, { "epoch": 0.5172237380627558, "grad_norm": 0.4147460162639618, "learning_rate": 9.933722159221377e-07, "loss": 0.5219, "step": 3033 }, { "epoch": 0.517394270122783, "grad_norm": 0.40708544850349426, "learning_rate": 9.928199097087448e-07, "loss": 0.5118, "step": 3034 }, { "epoch": 0.5175648021828103, "grad_norm": 0.37231746315956116, "learning_rate": 9.92267605685687e-07, "loss": 0.511, "step": 3035 }, { "epoch": 0.5177353342428377, "grad_norm": 0.3565663695335388, "learning_rate": 9.917153040214475e-07, "loss": 0.5111, "step": 3036 }, { "epoch": 0.517905866302865, "grad_norm": 0.36910760402679443, "learning_rate": 9.911630048845101e-07, "loss": 0.5102, "step": 3037 }, { "epoch": 0.5180763983628922, "grad_norm": 0.3910926580429077, "learning_rate": 9.906107084433573e-07, "loss": 0.4988, "step": 3038 }, { "epoch": 0.5182469304229195, "grad_norm": 0.47788098454475403, "learning_rate": 9.900584148664706e-07, "loss": 0.5046, "step": 3039 }, { "epoch": 0.5184174624829468, "grad_norm": 0.4352160096168518, "learning_rate": 9.895061243223312e-07, "loss": 0.5179, "step": 3040 }, { "epoch": 0.5185879945429741, "grad_norm": 0.4005039632320404, "learning_rate": 9.889538369794185e-07, "loss": 0.5084, "step": 3041 }, { "epoch": 0.5187585266030014, "grad_norm": 0.3612534999847412, "learning_rate": 9.884015530062119e-07, "loss": 0.5088, "step": 3042 }, { "epoch": 0.5189290586630286, "grad_norm": 0.49642056226730347, "learning_rate": 9.87849272571189e-07, "loss": 0.5052, "step": 3043 }, { "epoch": 0.5190995907230559, "grad_norm": 0.3578031063079834, "learning_rate": 9.87296995842827e-07, "loss": 0.5041, "step": 3044 }, { "epoch": 0.5192701227830833, "grad_norm": 0.4707255959510803, "learning_rate": 9.86744722989602e-07, "loss": 0.5136, "step": 3045 }, { "epoch": 0.5194406548431105, "grad_norm": 0.46732622385025024, "learning_rate": 9.86192454179987e-07, "loss": 0.4998, "step": 3046 }, { "epoch": 0.5196111869031378, "grad_norm": 0.36986300349235535, "learning_rate": 9.85640189582457e-07, "loss": 0.5026, "step": 3047 }, { "epoch": 0.519781718963165, "grad_norm": 0.39670634269714355, "learning_rate": 9.85087929365483e-07, "loss": 0.5172, "step": 3048 }, { "epoch": 0.5199522510231923, "grad_norm": 0.46700161695480347, "learning_rate": 9.84535673697536e-07, "loss": 0.5174, "step": 3049 }, { "epoch": 0.5201227830832197, "grad_norm": 0.5552010536193848, "learning_rate": 9.839834227470852e-07, "loss": 0.5088, "step": 3050 }, { "epoch": 0.520293315143247, "grad_norm": 0.4279073476791382, "learning_rate": 9.834311766825986e-07, "loss": 0.5052, "step": 3051 }, { "epoch": 0.5204638472032742, "grad_norm": 0.3493303656578064, "learning_rate": 9.828789356725421e-07, "loss": 0.5204, "step": 3052 }, { "epoch": 0.5206343792633015, "grad_norm": 0.4930592477321625, "learning_rate": 9.823266998853814e-07, "loss": 0.5065, "step": 3053 }, { "epoch": 0.5208049113233287, "grad_norm": 0.46885567903518677, "learning_rate": 9.817744694895792e-07, "loss": 0.5048, "step": 3054 }, { "epoch": 0.5209754433833561, "grad_norm": 0.42892614006996155, "learning_rate": 9.812222446535965e-07, "loss": 0.5076, "step": 3055 }, { "epoch": 0.5211459754433834, "grad_norm": 0.4651287794113159, "learning_rate": 9.806700255458943e-07, "loss": 0.5122, "step": 3056 }, { "epoch": 0.5213165075034106, "grad_norm": 0.4076344668865204, "learning_rate": 9.8011781233493e-07, "loss": 0.5085, "step": 3057 }, { "epoch": 0.5214870395634379, "grad_norm": 0.6352848410606384, "learning_rate": 9.795656051891602e-07, "loss": 0.5072, "step": 3058 }, { "epoch": 0.5216575716234653, "grad_norm": 0.4328773319721222, "learning_rate": 9.790134042770393e-07, "loss": 0.5162, "step": 3059 }, { "epoch": 0.5218281036834925, "grad_norm": 0.3829416036605835, "learning_rate": 9.7846120976702e-07, "loss": 0.5197, "step": 3060 }, { "epoch": 0.5219986357435198, "grad_norm": 0.3622390925884247, "learning_rate": 9.779090218275527e-07, "loss": 0.5038, "step": 3061 }, { "epoch": 0.522169167803547, "grad_norm": 0.38840627670288086, "learning_rate": 9.77356840627086e-07, "loss": 0.5145, "step": 3062 }, { "epoch": 0.5223396998635743, "grad_norm": 0.41121238470077515, "learning_rate": 9.768046663340671e-07, "loss": 0.5084, "step": 3063 }, { "epoch": 0.5225102319236017, "grad_norm": 0.3751465082168579, "learning_rate": 9.762524991169393e-07, "loss": 0.5109, "step": 3064 }, { "epoch": 0.522680763983629, "grad_norm": 0.49738723039627075, "learning_rate": 9.757003391441463e-07, "loss": 0.4974, "step": 3065 }, { "epoch": 0.5228512960436562, "grad_norm": 0.43950966000556946, "learning_rate": 9.75148186584127e-07, "loss": 0.5168, "step": 3066 }, { "epoch": 0.5230218281036835, "grad_norm": 0.4372667372226715, "learning_rate": 9.745960416053201e-07, "loss": 0.5041, "step": 3067 }, { "epoch": 0.5231923601637107, "grad_norm": 0.46111032366752625, "learning_rate": 9.740439043761605e-07, "loss": 0.5105, "step": 3068 }, { "epoch": 0.5233628922237381, "grad_norm": 0.5779291391372681, "learning_rate": 9.734917750650818e-07, "loss": 0.5112, "step": 3069 }, { "epoch": 0.5235334242837654, "grad_norm": 0.5116357803344727, "learning_rate": 9.729396538405144e-07, "loss": 0.5141, "step": 3070 }, { "epoch": 0.5237039563437926, "grad_norm": 0.44367215037345886, "learning_rate": 9.723875408708866e-07, "loss": 0.5067, "step": 3071 }, { "epoch": 0.5238744884038199, "grad_norm": 0.5452920198440552, "learning_rate": 9.718354363246245e-07, "loss": 0.5052, "step": 3072 }, { "epoch": 0.5240450204638472, "grad_norm": 0.5436156988143921, "learning_rate": 9.712833403701511e-07, "loss": 0.5013, "step": 3073 }, { "epoch": 0.5242155525238745, "grad_norm": 0.5032275915145874, "learning_rate": 9.707312531758869e-07, "loss": 0.5187, "step": 3074 }, { "epoch": 0.5243860845839018, "grad_norm": 0.4169192314147949, "learning_rate": 9.701791749102497e-07, "loss": 0.5139, "step": 3075 }, { "epoch": 0.524556616643929, "grad_norm": 0.4356226623058319, "learning_rate": 9.69627105741655e-07, "loss": 0.5155, "step": 3076 }, { "epoch": 0.5247271487039563, "grad_norm": 0.497597873210907, "learning_rate": 9.690750458385151e-07, "loss": 0.5078, "step": 3077 }, { "epoch": 0.5248976807639836, "grad_norm": 0.360411673784256, "learning_rate": 9.685229953692392e-07, "loss": 0.5152, "step": 3078 }, { "epoch": 0.525068212824011, "grad_norm": 0.5131139755249023, "learning_rate": 9.679709545022344e-07, "loss": 0.5061, "step": 3079 }, { "epoch": 0.5252387448840382, "grad_norm": 0.44146037101745605, "learning_rate": 9.67418923405904e-07, "loss": 0.4981, "step": 3080 }, { "epoch": 0.5254092769440655, "grad_norm": 0.3964713513851166, "learning_rate": 9.668669022486495e-07, "loss": 0.5057, "step": 3081 }, { "epoch": 0.5255798090040927, "grad_norm": 0.42470380663871765, "learning_rate": 9.66314891198868e-07, "loss": 0.5104, "step": 3082 }, { "epoch": 0.5257503410641201, "grad_norm": 0.488445520401001, "learning_rate": 9.657628904249547e-07, "loss": 0.5013, "step": 3083 }, { "epoch": 0.5259208731241474, "grad_norm": 0.5390302538871765, "learning_rate": 9.652109000953008e-07, "loss": 0.5103, "step": 3084 }, { "epoch": 0.5260914051841746, "grad_norm": 0.37049973011016846, "learning_rate": 9.646589203782943e-07, "loss": 0.5156, "step": 3085 }, { "epoch": 0.5262619372442019, "grad_norm": 0.4628862142562866, "learning_rate": 9.641069514423214e-07, "loss": 0.5112, "step": 3086 }, { "epoch": 0.5264324693042292, "grad_norm": 0.5830252766609192, "learning_rate": 9.635549934557627e-07, "loss": 0.5076, "step": 3087 }, { "epoch": 0.5266030013642565, "grad_norm": 0.46300166845321655, "learning_rate": 9.630030465869974e-07, "loss": 0.5254, "step": 3088 }, { "epoch": 0.5267735334242838, "grad_norm": 0.44175946712493896, "learning_rate": 9.624511110043998e-07, "loss": 0.523, "step": 3089 }, { "epoch": 0.526944065484311, "grad_norm": 0.41492757201194763, "learning_rate": 9.61899186876343e-07, "loss": 0.5051, "step": 3090 }, { "epoch": 0.5271145975443383, "grad_norm": 0.4990520477294922, "learning_rate": 9.613472743711937e-07, "loss": 0.502, "step": 3091 }, { "epoch": 0.5272851296043656, "grad_norm": 0.425994336605072, "learning_rate": 9.607953736573173e-07, "loss": 0.5174, "step": 3092 }, { "epoch": 0.527455661664393, "grad_norm": 0.4710950255393982, "learning_rate": 9.602434849030747e-07, "loss": 0.5128, "step": 3093 }, { "epoch": 0.5276261937244202, "grad_norm": 0.42161130905151367, "learning_rate": 9.59691608276823e-07, "loss": 0.5136, "step": 3094 }, { "epoch": 0.5277967257844475, "grad_norm": 0.36253276467323303, "learning_rate": 9.59139743946916e-07, "loss": 0.5055, "step": 3095 }, { "epoch": 0.5279672578444747, "grad_norm": 0.5080360174179077, "learning_rate": 9.585878920817032e-07, "loss": 0.5085, "step": 3096 }, { "epoch": 0.528137789904502, "grad_norm": 0.48823845386505127, "learning_rate": 9.58036052849532e-07, "loss": 0.5119, "step": 3097 }, { "epoch": 0.5283083219645294, "grad_norm": 0.38697031140327454, "learning_rate": 9.574842264187431e-07, "loss": 0.5214, "step": 3098 }, { "epoch": 0.5284788540245566, "grad_norm": 0.5833395719528198, "learning_rate": 9.569324129576757e-07, "loss": 0.5383, "step": 3099 }, { "epoch": 0.5286493860845839, "grad_norm": 0.5464732646942139, "learning_rate": 9.563806126346643e-07, "loss": 0.5107, "step": 3100 }, { "epoch": 0.5288199181446112, "grad_norm": 0.44349968433380127, "learning_rate": 9.558288256180387e-07, "loss": 0.5251, "step": 3101 }, { "epoch": 0.5289904502046384, "grad_norm": 0.4714204668998718, "learning_rate": 9.552770520761256e-07, "loss": 0.5167, "step": 3102 }, { "epoch": 0.5291609822646658, "grad_norm": 0.4778265953063965, "learning_rate": 9.54725292177247e-07, "loss": 0.5194, "step": 3103 }, { "epoch": 0.529331514324693, "grad_norm": 0.39685267210006714, "learning_rate": 9.541735460897212e-07, "loss": 0.5108, "step": 3104 }, { "epoch": 0.5295020463847203, "grad_norm": 0.47264039516448975, "learning_rate": 9.536218139818615e-07, "loss": 0.5057, "step": 3105 }, { "epoch": 0.5296725784447476, "grad_norm": 0.39579394459724426, "learning_rate": 9.530700960219787e-07, "loss": 0.5103, "step": 3106 }, { "epoch": 0.529843110504775, "grad_norm": 0.4496646523475647, "learning_rate": 9.525183923783769e-07, "loss": 0.5122, "step": 3107 }, { "epoch": 0.5300136425648022, "grad_norm": 0.5286594033241272, "learning_rate": 9.519667032193568e-07, "loss": 0.5137, "step": 3108 }, { "epoch": 0.5301841746248295, "grad_norm": 0.38828226923942566, "learning_rate": 9.514150287132161e-07, "loss": 0.5097, "step": 3109 }, { "epoch": 0.5303547066848567, "grad_norm": 0.39556238055229187, "learning_rate": 9.508633690282457e-07, "loss": 0.5174, "step": 3110 }, { "epoch": 0.530525238744884, "grad_norm": 0.3622030019760132, "learning_rate": 9.503117243327337e-07, "loss": 0.5115, "step": 3111 }, { "epoch": 0.5306957708049114, "grad_norm": 0.3833901584148407, "learning_rate": 9.497600947949626e-07, "loss": 0.5142, "step": 3112 }, { "epoch": 0.5308663028649386, "grad_norm": 0.38286980986595154, "learning_rate": 9.492084805832111e-07, "loss": 0.5074, "step": 3113 }, { "epoch": 0.5310368349249659, "grad_norm": 0.3525489270687103, "learning_rate": 9.486568818657522e-07, "loss": 0.5191, "step": 3114 }, { "epoch": 0.5312073669849932, "grad_norm": 0.39206886291503906, "learning_rate": 9.481052988108558e-07, "loss": 0.5167, "step": 3115 }, { "epoch": 0.5313778990450204, "grad_norm": 0.3706628978252411, "learning_rate": 9.475537315867851e-07, "loss": 0.5194, "step": 3116 }, { "epoch": 0.5315484311050478, "grad_norm": 0.4675542116165161, "learning_rate": 9.470021803617991e-07, "loss": 0.5163, "step": 3117 }, { "epoch": 0.531718963165075, "grad_norm": 0.4427764117717743, "learning_rate": 9.464506453041533e-07, "loss": 0.5143, "step": 3118 }, { "epoch": 0.5318894952251023, "grad_norm": 0.3776489794254303, "learning_rate": 9.458991265820963e-07, "loss": 0.5236, "step": 3119 }, { "epoch": 0.5320600272851296, "grad_norm": 0.40652182698249817, "learning_rate": 9.453476243638731e-07, "loss": 0.5171, "step": 3120 }, { "epoch": 0.5322305593451568, "grad_norm": 0.41259488463401794, "learning_rate": 9.447961388177228e-07, "loss": 0.5131, "step": 3121 }, { "epoch": 0.5324010914051842, "grad_norm": 0.4117606282234192, "learning_rate": 9.4424467011188e-07, "loss": 0.5285, "step": 3122 }, { "epoch": 0.5325716234652115, "grad_norm": 0.4624188542366028, "learning_rate": 9.436932184145738e-07, "loss": 0.5124, "step": 3123 }, { "epoch": 0.5327421555252387, "grad_norm": 0.3732365369796753, "learning_rate": 9.431417838940281e-07, "loss": 0.5117, "step": 3124 }, { "epoch": 0.532912687585266, "grad_norm": 0.42948514223098755, "learning_rate": 9.425903667184619e-07, "loss": 0.5089, "step": 3125 }, { "epoch": 0.5330832196452933, "grad_norm": 0.5732792615890503, "learning_rate": 9.420389670560881e-07, "loss": 0.5171, "step": 3126 }, { "epoch": 0.5332537517053206, "grad_norm": 0.701641857624054, "learning_rate": 9.41487585075116e-07, "loss": 0.5106, "step": 3127 }, { "epoch": 0.5334242837653479, "grad_norm": 0.6061239838600159, "learning_rate": 9.409362209437475e-07, "loss": 0.519, "step": 3128 }, { "epoch": 0.5335948158253752, "grad_norm": 0.40224212408065796, "learning_rate": 9.403848748301804e-07, "loss": 0.5081, "step": 3129 }, { "epoch": 0.5337653478854024, "grad_norm": 0.5239095687866211, "learning_rate": 9.39833546902606e-07, "loss": 0.5132, "step": 3130 }, { "epoch": 0.5339358799454298, "grad_norm": 0.6446024775505066, "learning_rate": 9.39282237329211e-07, "loss": 0.5107, "step": 3131 }, { "epoch": 0.534106412005457, "grad_norm": 0.43921467661857605, "learning_rate": 9.387309462781759e-07, "loss": 0.509, "step": 3132 }, { "epoch": 0.5342769440654843, "grad_norm": 0.45637884736061096, "learning_rate": 9.381796739176754e-07, "loss": 0.5099, "step": 3133 }, { "epoch": 0.5344474761255116, "grad_norm": 0.5327367782592773, "learning_rate": 9.376284204158799e-07, "loss": 0.505, "step": 3134 }, { "epoch": 0.5346180081855388, "grad_norm": 0.34699317812919617, "learning_rate": 9.370771859409515e-07, "loss": 0.5003, "step": 3135 }, { "epoch": 0.5347885402455662, "grad_norm": 0.4476794898509979, "learning_rate": 9.365259706610492e-07, "loss": 0.5071, "step": 3136 }, { "epoch": 0.5349590723055935, "grad_norm": 0.5096878409385681, "learning_rate": 9.359747747443242e-07, "loss": 0.5055, "step": 3137 }, { "epoch": 0.5351296043656207, "grad_norm": 0.33670952916145325, "learning_rate": 9.354235983589229e-07, "loss": 0.5079, "step": 3138 }, { "epoch": 0.535300136425648, "grad_norm": 0.41531407833099365, "learning_rate": 9.348724416729853e-07, "loss": 0.516, "step": 3139 }, { "epoch": 0.5354706684856753, "grad_norm": 0.41998550295829773, "learning_rate": 9.34321304854645e-07, "loss": 0.5047, "step": 3140 }, { "epoch": 0.5356412005457026, "grad_norm": 0.3687032163143158, "learning_rate": 9.337701880720304e-07, "loss": 0.4934, "step": 3141 }, { "epoch": 0.5358117326057299, "grad_norm": 0.36565807461738586, "learning_rate": 9.332190914932629e-07, "loss": 0.4982, "step": 3142 }, { "epoch": 0.5359822646657572, "grad_norm": 0.389960914850235, "learning_rate": 9.326680152864592e-07, "loss": 0.5048, "step": 3143 }, { "epoch": 0.5361527967257844, "grad_norm": 0.44277599453926086, "learning_rate": 9.321169596197276e-07, "loss": 0.5068, "step": 3144 }, { "epoch": 0.5363233287858117, "grad_norm": 0.4231550991535187, "learning_rate": 9.315659246611721e-07, "loss": 0.4989, "step": 3145 }, { "epoch": 0.536493860845839, "grad_norm": 0.4637570083141327, "learning_rate": 9.310149105788893e-07, "loss": 0.5062, "step": 3146 }, { "epoch": 0.5366643929058663, "grad_norm": 0.34833332896232605, "learning_rate": 9.3046391754097e-07, "loss": 0.5156, "step": 3147 }, { "epoch": 0.5368349249658936, "grad_norm": 0.41452687978744507, "learning_rate": 9.299129457154979e-07, "loss": 0.5069, "step": 3148 }, { "epoch": 0.5370054570259208, "grad_norm": 0.42182138562202454, "learning_rate": 9.293619952705509e-07, "loss": 0.4999, "step": 3149 }, { "epoch": 0.5371759890859482, "grad_norm": 0.36753180623054504, "learning_rate": 9.288110663742001e-07, "loss": 0.4943, "step": 3150 }, { "epoch": 0.5373465211459755, "grad_norm": 0.417085200548172, "learning_rate": 9.282601591945097e-07, "loss": 0.513, "step": 3151 }, { "epoch": 0.5375170532060027, "grad_norm": 0.5050751566886902, "learning_rate": 9.277092738995385e-07, "loss": 0.4978, "step": 3152 }, { "epoch": 0.53768758526603, "grad_norm": 0.3634929656982422, "learning_rate": 9.271584106573365e-07, "loss": 0.5179, "step": 3153 }, { "epoch": 0.5378581173260573, "grad_norm": 0.4662638306617737, "learning_rate": 9.266075696359496e-07, "loss": 0.5064, "step": 3154 }, { "epoch": 0.5380286493860846, "grad_norm": 0.47530871629714966, "learning_rate": 9.260567510034146e-07, "loss": 0.5048, "step": 3155 }, { "epoch": 0.5381991814461119, "grad_norm": 0.3750818073749542, "learning_rate": 9.255059549277625e-07, "loss": 0.4968, "step": 3156 }, { "epoch": 0.5383697135061392, "grad_norm": 0.4817650318145752, "learning_rate": 9.249551815770178e-07, "loss": 0.5161, "step": 3157 }, { "epoch": 0.5385402455661664, "grad_norm": 0.385273277759552, "learning_rate": 9.24404431119197e-07, "loss": 0.5048, "step": 3158 }, { "epoch": 0.5387107776261937, "grad_norm": 0.39786481857299805, "learning_rate": 9.238537037223106e-07, "loss": 0.5026, "step": 3159 }, { "epoch": 0.538881309686221, "grad_norm": 0.38584035634994507, "learning_rate": 9.233029995543612e-07, "loss": 0.5089, "step": 3160 }, { "epoch": 0.5390518417462483, "grad_norm": 0.36149051785469055, "learning_rate": 9.227523187833456e-07, "loss": 0.5134, "step": 3161 }, { "epoch": 0.5392223738062756, "grad_norm": 0.3750452697277069, "learning_rate": 9.222016615772527e-07, "loss": 0.5117, "step": 3162 }, { "epoch": 0.5393929058663028, "grad_norm": 0.33727264404296875, "learning_rate": 9.216510281040628e-07, "loss": 0.5042, "step": 3163 }, { "epoch": 0.5395634379263301, "grad_norm": 0.39926877617836, "learning_rate": 9.211004185317517e-07, "loss": 0.5012, "step": 3164 }, { "epoch": 0.5397339699863575, "grad_norm": 0.4951111078262329, "learning_rate": 9.205498330282858e-07, "loss": 0.5006, "step": 3165 }, { "epoch": 0.5399045020463847, "grad_norm": 0.3622974157333374, "learning_rate": 9.199992717616253e-07, "loss": 0.5041, "step": 3166 }, { "epoch": 0.540075034106412, "grad_norm": 0.40538978576660156, "learning_rate": 9.194487348997224e-07, "loss": 0.4917, "step": 3167 }, { "epoch": 0.5402455661664393, "grad_norm": 0.470736563205719, "learning_rate": 9.188982226105223e-07, "loss": 0.5079, "step": 3168 }, { "epoch": 0.5404160982264665, "grad_norm": 0.33788180351257324, "learning_rate": 9.183477350619617e-07, "loss": 0.5116, "step": 3169 }, { "epoch": 0.5405866302864939, "grad_norm": 0.4198651611804962, "learning_rate": 9.177972724219717e-07, "loss": 0.5108, "step": 3170 }, { "epoch": 0.5407571623465212, "grad_norm": 0.42386049032211304, "learning_rate": 9.172468348584741e-07, "loss": 0.5058, "step": 3171 }, { "epoch": 0.5409276944065484, "grad_norm": 0.4214495122432709, "learning_rate": 9.166964225393827e-07, "loss": 0.508, "step": 3172 }, { "epoch": 0.5410982264665757, "grad_norm": 0.3338247239589691, "learning_rate": 9.161460356326058e-07, "loss": 0.5041, "step": 3173 }, { "epoch": 0.541268758526603, "grad_norm": 0.37674450874328613, "learning_rate": 9.155956743060417e-07, "loss": 0.504, "step": 3174 }, { "epoch": 0.5414392905866303, "grad_norm": 0.3888840675354004, "learning_rate": 9.150453387275823e-07, "loss": 0.5033, "step": 3175 }, { "epoch": 0.5416098226466576, "grad_norm": 0.3640449047088623, "learning_rate": 9.14495029065111e-07, "loss": 0.5128, "step": 3176 }, { "epoch": 0.5417803547066848, "grad_norm": 0.40192922949790955, "learning_rate": 9.139447454865035e-07, "loss": 0.5085, "step": 3177 }, { "epoch": 0.5419508867667121, "grad_norm": 0.3610800802707672, "learning_rate": 9.133944881596273e-07, "loss": 0.5013, "step": 3178 }, { "epoch": 0.5421214188267395, "grad_norm": 0.5778458118438721, "learning_rate": 9.128442572523419e-07, "loss": 0.51, "step": 3179 }, { "epoch": 0.5422919508867667, "grad_norm": 0.6906993985176086, "learning_rate": 9.122940529324998e-07, "loss": 0.5077, "step": 3180 }, { "epoch": 0.542462482946794, "grad_norm": 0.5404031276702881, "learning_rate": 9.117438753679434e-07, "loss": 0.506, "step": 3181 }, { "epoch": 0.5426330150068213, "grad_norm": 0.41175949573516846, "learning_rate": 9.111937247265089e-07, "loss": 0.5104, "step": 3182 }, { "epoch": 0.5428035470668485, "grad_norm": 0.37495842576026917, "learning_rate": 9.10643601176023e-07, "loss": 0.4939, "step": 3183 }, { "epoch": 0.5429740791268759, "grad_norm": 0.5369127988815308, "learning_rate": 9.100935048843048e-07, "loss": 0.5115, "step": 3184 }, { "epoch": 0.5431446111869032, "grad_norm": 0.45206519961357117, "learning_rate": 9.095434360191643e-07, "loss": 0.5149, "step": 3185 }, { "epoch": 0.5433151432469304, "grad_norm": 0.3371720612049103, "learning_rate": 9.089933947484048e-07, "loss": 0.5028, "step": 3186 }, { "epoch": 0.5434856753069577, "grad_norm": 0.4170704483985901, "learning_rate": 9.084433812398192e-07, "loss": 0.4902, "step": 3187 }, { "epoch": 0.543656207366985, "grad_norm": 0.40767478942871094, "learning_rate": 9.078933956611925e-07, "loss": 0.505, "step": 3188 }, { "epoch": 0.5438267394270123, "grad_norm": 0.4488968253135681, "learning_rate": 9.073434381803025e-07, "loss": 0.4982, "step": 3189 }, { "epoch": 0.5439972714870396, "grad_norm": 0.3908664584159851, "learning_rate": 9.067935089649167e-07, "loss": 0.5007, "step": 3190 }, { "epoch": 0.5441678035470668, "grad_norm": 0.3626483678817749, "learning_rate": 9.062436081827951e-07, "loss": 0.5117, "step": 3191 }, { "epoch": 0.5443383356070941, "grad_norm": 0.4268817901611328, "learning_rate": 9.056937360016881e-07, "loss": 0.5233, "step": 3192 }, { "epoch": 0.5445088676671214, "grad_norm": 0.35921770334243774, "learning_rate": 9.051438925893385e-07, "loss": 0.51, "step": 3193 }, { "epoch": 0.5446793997271487, "grad_norm": 0.4715425670146942, "learning_rate": 9.045940781134794e-07, "loss": 0.5065, "step": 3194 }, { "epoch": 0.544849931787176, "grad_norm": 0.4177314341068268, "learning_rate": 9.040442927418352e-07, "loss": 0.5079, "step": 3195 }, { "epoch": 0.5450204638472033, "grad_norm": 0.4322715401649475, "learning_rate": 9.034945366421219e-07, "loss": 0.5081, "step": 3196 }, { "epoch": 0.5451909959072305, "grad_norm": 0.4056400656700134, "learning_rate": 9.029448099820457e-07, "loss": 0.4995, "step": 3197 }, { "epoch": 0.5453615279672579, "grad_norm": 0.4988594949245453, "learning_rate": 9.023951129293055e-07, "loss": 0.5139, "step": 3198 }, { "epoch": 0.5455320600272852, "grad_norm": 0.5313045382499695, "learning_rate": 9.01845445651589e-07, "loss": 0.5142, "step": 3199 }, { "epoch": 0.5457025920873124, "grad_norm": 0.44585803151130676, "learning_rate": 9.012958083165766e-07, "loss": 0.5125, "step": 3200 }, { "epoch": 0.5458731241473397, "grad_norm": 0.37890028953552246, "learning_rate": 9.007462010919388e-07, "loss": 0.5197, "step": 3201 }, { "epoch": 0.546043656207367, "grad_norm": 0.41216591000556946, "learning_rate": 9.001966241453363e-07, "loss": 0.5216, "step": 3202 }, { "epoch": 0.5462141882673943, "grad_norm": 0.3926418125629425, "learning_rate": 8.996470776444218e-07, "loss": 0.509, "step": 3203 }, { "epoch": 0.5463847203274216, "grad_norm": 0.4098254442214966, "learning_rate": 8.990975617568379e-07, "loss": 0.5067, "step": 3204 }, { "epoch": 0.5465552523874488, "grad_norm": 0.4524468779563904, "learning_rate": 8.985480766502185e-07, "loss": 0.4996, "step": 3205 }, { "epoch": 0.5467257844474761, "grad_norm": 0.3585079610347748, "learning_rate": 8.979986224921868e-07, "loss": 0.528, "step": 3206 }, { "epoch": 0.5468963165075034, "grad_norm": 0.44177594780921936, "learning_rate": 8.974491994503586e-07, "loss": 0.5174, "step": 3207 }, { "epoch": 0.5470668485675307, "grad_norm": 0.5257750153541565, "learning_rate": 8.968998076923382e-07, "loss": 0.5132, "step": 3208 }, { "epoch": 0.547237380627558, "grad_norm": 0.5164710879325867, "learning_rate": 8.963504473857219e-07, "loss": 0.5038, "step": 3209 }, { "epoch": 0.5474079126875853, "grad_norm": 0.47033828496932983, "learning_rate": 8.958011186980954e-07, "loss": 0.5092, "step": 3210 }, { "epoch": 0.5475784447476125, "grad_norm": 0.34907910227775574, "learning_rate": 8.95251821797035e-07, "loss": 0.5168, "step": 3211 }, { "epoch": 0.5477489768076398, "grad_norm": 0.5167688727378845, "learning_rate": 8.947025568501076e-07, "loss": 0.5157, "step": 3212 }, { "epoch": 0.5479195088676672, "grad_norm": 0.4435700476169586, "learning_rate": 8.9415332402487e-07, "loss": 0.5127, "step": 3213 }, { "epoch": 0.5480900409276944, "grad_norm": 0.38165926933288574, "learning_rate": 8.936041234888694e-07, "loss": 0.5167, "step": 3214 }, { "epoch": 0.5482605729877217, "grad_norm": 0.4699343740940094, "learning_rate": 8.930549554096429e-07, "loss": 0.5038, "step": 3215 }, { "epoch": 0.548431105047749, "grad_norm": 0.3915722370147705, "learning_rate": 8.925058199547183e-07, "loss": 0.5098, "step": 3216 }, { "epoch": 0.5486016371077762, "grad_norm": 0.3906923532485962, "learning_rate": 8.919567172916131e-07, "loss": 0.5075, "step": 3217 }, { "epoch": 0.5487721691678036, "grad_norm": 0.45521485805511475, "learning_rate": 8.914076475878342e-07, "loss": 0.4987, "step": 3218 }, { "epoch": 0.5489427012278308, "grad_norm": 0.41876670718193054, "learning_rate": 8.908586110108795e-07, "loss": 0.5032, "step": 3219 }, { "epoch": 0.5491132332878581, "grad_norm": 0.5294049978256226, "learning_rate": 8.903096077282358e-07, "loss": 0.5062, "step": 3220 }, { "epoch": 0.5492837653478854, "grad_norm": 0.450884610414505, "learning_rate": 8.897606379073807e-07, "loss": 0.5055, "step": 3221 }, { "epoch": 0.5494542974079127, "grad_norm": 0.5044273138046265, "learning_rate": 8.892117017157806e-07, "loss": 0.5038, "step": 3222 }, { "epoch": 0.54962482946794, "grad_norm": 0.42872220277786255, "learning_rate": 8.88662799320893e-07, "loss": 0.5085, "step": 3223 }, { "epoch": 0.5497953615279673, "grad_norm": 0.42034220695495605, "learning_rate": 8.881139308901632e-07, "loss": 0.5109, "step": 3224 }, { "epoch": 0.5499658935879945, "grad_norm": 0.42412039637565613, "learning_rate": 8.87565096591028e-07, "loss": 0.5038, "step": 3225 }, { "epoch": 0.5501364256480218, "grad_norm": 0.4287491738796234, "learning_rate": 8.870162965909128e-07, "loss": 0.5076, "step": 3226 }, { "epoch": 0.5503069577080492, "grad_norm": 0.39096325635910034, "learning_rate": 8.864675310572324e-07, "loss": 0.52, "step": 3227 }, { "epoch": 0.5504774897680764, "grad_norm": 0.4827026426792145, "learning_rate": 8.859188001573917e-07, "loss": 0.5066, "step": 3228 }, { "epoch": 0.5506480218281037, "grad_norm": 0.41472142934799194, "learning_rate": 8.853701040587844e-07, "loss": 0.5058, "step": 3229 }, { "epoch": 0.550818553888131, "grad_norm": 0.37448644638061523, "learning_rate": 8.848214429287945e-07, "loss": 0.5129, "step": 3230 }, { "epoch": 0.5509890859481582, "grad_norm": 0.4399203658103943, "learning_rate": 8.842728169347939e-07, "loss": 0.5163, "step": 3231 }, { "epoch": 0.5511596180081856, "grad_norm": 0.3696085810661316, "learning_rate": 8.837242262441457e-07, "loss": 0.5135, "step": 3232 }, { "epoch": 0.5513301500682128, "grad_norm": 0.44104957580566406, "learning_rate": 8.831756710242006e-07, "loss": 0.4954, "step": 3233 }, { "epoch": 0.5515006821282401, "grad_norm": 0.368233323097229, "learning_rate": 8.826271514422984e-07, "loss": 0.514, "step": 3234 }, { "epoch": 0.5516712141882674, "grad_norm": 0.4140407145023346, "learning_rate": 8.820786676657698e-07, "loss": 0.5028, "step": 3235 }, { "epoch": 0.5518417462482946, "grad_norm": 0.4295588433742523, "learning_rate": 8.815302198619329e-07, "loss": 0.4929, "step": 3236 }, { "epoch": 0.552012278308322, "grad_norm": 0.4363129436969757, "learning_rate": 8.809818081980955e-07, "loss": 0.5101, "step": 3237 }, { "epoch": 0.5521828103683493, "grad_norm": 0.45414286851882935, "learning_rate": 8.80433432841554e-07, "loss": 0.5178, "step": 3238 }, { "epoch": 0.5523533424283765, "grad_norm": 0.5862538814544678, "learning_rate": 8.798850939595943e-07, "loss": 0.5096, "step": 3239 }, { "epoch": 0.5525238744884038, "grad_norm": 0.5525916218757629, "learning_rate": 8.793367917194908e-07, "loss": 0.5001, "step": 3240 }, { "epoch": 0.552694406548431, "grad_norm": 0.5342692136764526, "learning_rate": 8.787885262885064e-07, "loss": 0.5042, "step": 3241 }, { "epoch": 0.5528649386084584, "grad_norm": 0.4479496479034424, "learning_rate": 8.782402978338936e-07, "loss": 0.5152, "step": 3242 }, { "epoch": 0.5530354706684857, "grad_norm": 0.46658965945243835, "learning_rate": 8.776921065228924e-07, "loss": 0.5169, "step": 3243 }, { "epoch": 0.553206002728513, "grad_norm": 0.49106746912002563, "learning_rate": 8.771439525227335e-07, "loss": 0.5107, "step": 3244 }, { "epoch": 0.5533765347885402, "grad_norm": 0.5296341776847839, "learning_rate": 8.765958360006338e-07, "loss": 0.5091, "step": 3245 }, { "epoch": 0.5535470668485676, "grad_norm": 0.5267268419265747, "learning_rate": 8.760477571238007e-07, "loss": 0.5118, "step": 3246 }, { "epoch": 0.5537175989085948, "grad_norm": 0.41154804825782776, "learning_rate": 8.754997160594288e-07, "loss": 0.4995, "step": 3247 }, { "epoch": 0.5538881309686221, "grad_norm": 0.38790273666381836, "learning_rate": 8.749517129747021e-07, "loss": 0.4999, "step": 3248 }, { "epoch": 0.5540586630286494, "grad_norm": 0.442150741815567, "learning_rate": 8.744037480367922e-07, "loss": 0.5054, "step": 3249 }, { "epoch": 0.5542291950886766, "grad_norm": 0.36079126596450806, "learning_rate": 8.738558214128595e-07, "loss": 0.5083, "step": 3250 }, { "epoch": 0.554399727148704, "grad_norm": 0.6051453948020935, "learning_rate": 8.733079332700533e-07, "loss": 0.505, "step": 3251 }, { "epoch": 0.5545702592087313, "grad_norm": 0.533747673034668, "learning_rate": 8.727600837755096e-07, "loss": 0.4941, "step": 3252 }, { "epoch": 0.5547407912687585, "grad_norm": 0.4203532934188843, "learning_rate": 8.722122730963544e-07, "loss": 0.5084, "step": 3253 }, { "epoch": 0.5549113233287858, "grad_norm": 0.5379196405410767, "learning_rate": 8.716645013997004e-07, "loss": 0.5148, "step": 3254 }, { "epoch": 0.555081855388813, "grad_norm": 0.4198959469795227, "learning_rate": 8.711167688526495e-07, "loss": 0.5177, "step": 3255 }, { "epoch": 0.5552523874488404, "grad_norm": 0.47099459171295166, "learning_rate": 8.70569075622291e-07, "loss": 0.5112, "step": 3256 }, { "epoch": 0.5554229195088677, "grad_norm": 0.5614515542984009, "learning_rate": 8.700214218757021e-07, "loss": 0.5151, "step": 3257 }, { "epoch": 0.555593451568895, "grad_norm": 0.3508898913860321, "learning_rate": 8.694738077799487e-07, "loss": 0.5176, "step": 3258 }, { "epoch": 0.5557639836289222, "grad_norm": 0.5202228426933289, "learning_rate": 8.689262335020836e-07, "loss": 0.5096, "step": 3259 }, { "epoch": 0.5559345156889495, "grad_norm": 0.4458746910095215, "learning_rate": 8.683786992091489e-07, "loss": 0.5188, "step": 3260 }, { "epoch": 0.5561050477489768, "grad_norm": 0.3903205692768097, "learning_rate": 8.678312050681725e-07, "loss": 0.5101, "step": 3261 }, { "epoch": 0.5562755798090041, "grad_norm": 0.40033742785453796, "learning_rate": 8.67283751246172e-07, "loss": 0.5097, "step": 3262 }, { "epoch": 0.5564461118690314, "grad_norm": 0.4283085763454437, "learning_rate": 8.667363379101514e-07, "loss": 0.5046, "step": 3263 }, { "epoch": 0.5566166439290586, "grad_norm": 0.5091626644134521, "learning_rate": 8.661889652271031e-07, "loss": 0.5058, "step": 3264 }, { "epoch": 0.556787175989086, "grad_norm": 0.4430004358291626, "learning_rate": 8.656416333640068e-07, "loss": 0.517, "step": 3265 }, { "epoch": 0.5569577080491133, "grad_norm": 0.41451820731163025, "learning_rate": 8.650943424878291e-07, "loss": 0.5153, "step": 3266 }, { "epoch": 0.5571282401091405, "grad_norm": 0.43322592973709106, "learning_rate": 8.645470927655256e-07, "loss": 0.5051, "step": 3267 }, { "epoch": 0.5572987721691678, "grad_norm": 0.535474419593811, "learning_rate": 8.639998843640377e-07, "loss": 0.5031, "step": 3268 }, { "epoch": 0.557469304229195, "grad_norm": 0.46225863695144653, "learning_rate": 8.634527174502961e-07, "loss": 0.5047, "step": 3269 }, { "epoch": 0.5576398362892224, "grad_norm": 0.44309374690055847, "learning_rate": 8.629055921912165e-07, "loss": 0.5109, "step": 3270 }, { "epoch": 0.5578103683492497, "grad_norm": 0.5729724764823914, "learning_rate": 8.623585087537039e-07, "loss": 0.5114, "step": 3271 }, { "epoch": 0.557980900409277, "grad_norm": 0.4734056890010834, "learning_rate": 8.618114673046495e-07, "loss": 0.5318, "step": 3272 }, { "epoch": 0.5581514324693042, "grad_norm": 0.41645896434783936, "learning_rate": 8.61264468010932e-07, "loss": 0.515, "step": 3273 }, { "epoch": 0.5583219645293315, "grad_norm": 0.6118432283401489, "learning_rate": 8.607175110394172e-07, "loss": 0.5029, "step": 3274 }, { "epoch": 0.5584924965893588, "grad_norm": 0.5564768314361572, "learning_rate": 8.601705965569578e-07, "loss": 0.5058, "step": 3275 }, { "epoch": 0.5586630286493861, "grad_norm": 0.41912588477134705, "learning_rate": 8.596237247303942e-07, "loss": 0.5096, "step": 3276 }, { "epoch": 0.5588335607094134, "grad_norm": 0.5847219228744507, "learning_rate": 8.590768957265526e-07, "loss": 0.5122, "step": 3277 }, { "epoch": 0.5590040927694406, "grad_norm": 0.5370835661888123, "learning_rate": 8.585301097122474e-07, "loss": 0.5121, "step": 3278 }, { "epoch": 0.5591746248294679, "grad_norm": 0.4499013125896454, "learning_rate": 8.579833668542797e-07, "loss": 0.5173, "step": 3279 }, { "epoch": 0.5593451568894953, "grad_norm": 0.5028849244117737, "learning_rate": 8.574366673194358e-07, "loss": 0.5184, "step": 3280 }, { "epoch": 0.5595156889495225, "grad_norm": 0.6504407525062561, "learning_rate": 8.568900112744913e-07, "loss": 0.5077, "step": 3281 }, { "epoch": 0.5596862210095498, "grad_norm": 0.3862699270248413, "learning_rate": 8.563433988862065e-07, "loss": 0.5235, "step": 3282 }, { "epoch": 0.559856753069577, "grad_norm": 0.5601797699928284, "learning_rate": 8.557968303213297e-07, "loss": 0.508, "step": 3283 }, { "epoch": 0.5600272851296043, "grad_norm": 0.5876139998435974, "learning_rate": 8.552503057465949e-07, "loss": 0.5148, "step": 3284 }, { "epoch": 0.5601978171896317, "grad_norm": 0.36474472284317017, "learning_rate": 8.547038253287234e-07, "loss": 0.5152, "step": 3285 }, { "epoch": 0.560368349249659, "grad_norm": 0.5364213585853577, "learning_rate": 8.541573892344221e-07, "loss": 0.5174, "step": 3286 }, { "epoch": 0.5605388813096862, "grad_norm": 0.46430879831314087, "learning_rate": 8.536109976303859e-07, "loss": 0.5031, "step": 3287 }, { "epoch": 0.5607094133697135, "grad_norm": 0.4579286575317383, "learning_rate": 8.530646506832949e-07, "loss": 0.5067, "step": 3288 }, { "epoch": 0.5608799454297408, "grad_norm": 0.4855574071407318, "learning_rate": 8.525183485598152e-07, "loss": 0.5078, "step": 3289 }, { "epoch": 0.5610504774897681, "grad_norm": 0.40624135732650757, "learning_rate": 8.519720914266009e-07, "loss": 0.5156, "step": 3290 }, { "epoch": 0.5612210095497954, "grad_norm": 0.49204781651496887, "learning_rate": 8.514258794502906e-07, "loss": 0.5055, "step": 3291 }, { "epoch": 0.5613915416098226, "grad_norm": 0.4669218063354492, "learning_rate": 8.508797127975106e-07, "loss": 0.509, "step": 3292 }, { "epoch": 0.5615620736698499, "grad_norm": 0.5329267978668213, "learning_rate": 8.503335916348723e-07, "loss": 0.5175, "step": 3293 }, { "epoch": 0.5617326057298773, "grad_norm": 0.44461092352867126, "learning_rate": 8.497875161289738e-07, "loss": 0.5104, "step": 3294 }, { "epoch": 0.5619031377899045, "grad_norm": 0.47285303473472595, "learning_rate": 8.492414864463988e-07, "loss": 0.5246, "step": 3295 }, { "epoch": 0.5620736698499318, "grad_norm": 0.43100637197494507, "learning_rate": 8.486955027537172e-07, "loss": 0.5081, "step": 3296 }, { "epoch": 0.562244201909959, "grad_norm": 0.535839319229126, "learning_rate": 8.48149565217486e-07, "loss": 0.5141, "step": 3297 }, { "epoch": 0.5624147339699863, "grad_norm": 0.4091265797615051, "learning_rate": 8.476036740042457e-07, "loss": 0.5224, "step": 3298 }, { "epoch": 0.5625852660300137, "grad_norm": 0.4831198751926422, "learning_rate": 8.470578292805251e-07, "loss": 0.5016, "step": 3299 }, { "epoch": 0.562755798090041, "grad_norm": 0.49579057097435, "learning_rate": 8.465120312128371e-07, "loss": 0.5192, "step": 3300 }, { "epoch": 0.5629263301500682, "grad_norm": 0.5047213435173035, "learning_rate": 8.459662799676819e-07, "loss": 0.525, "step": 3301 }, { "epoch": 0.5630968622100955, "grad_norm": 0.44361287355422974, "learning_rate": 8.454205757115435e-07, "loss": 0.5213, "step": 3302 }, { "epoch": 0.5632673942701227, "grad_norm": 0.4202507734298706, "learning_rate": 8.448749186108936e-07, "loss": 0.5083, "step": 3303 }, { "epoch": 0.5634379263301501, "grad_norm": 0.5279480814933777, "learning_rate": 8.443293088321881e-07, "loss": 0.5062, "step": 3304 }, { "epoch": 0.5636084583901774, "grad_norm": 0.5536814332008362, "learning_rate": 8.437837465418686e-07, "loss": 0.5097, "step": 3305 }, { "epoch": 0.5637789904502046, "grad_norm": 0.5440229177474976, "learning_rate": 8.432382319063631e-07, "loss": 0.5128, "step": 3306 }, { "epoch": 0.5639495225102319, "grad_norm": 0.3901594579219818, "learning_rate": 8.426927650920841e-07, "loss": 0.5143, "step": 3307 }, { "epoch": 0.5641200545702592, "grad_norm": 0.49661266803741455, "learning_rate": 8.421473462654304e-07, "loss": 0.5159, "step": 3308 }, { "epoch": 0.5642905866302865, "grad_norm": 0.5394790768623352, "learning_rate": 8.416019755927852e-07, "loss": 0.5162, "step": 3309 }, { "epoch": 0.5644611186903138, "grad_norm": 0.4680979549884796, "learning_rate": 8.410566532405176e-07, "loss": 0.519, "step": 3310 }, { "epoch": 0.564631650750341, "grad_norm": 0.4116993248462677, "learning_rate": 8.40511379374982e-07, "loss": 0.5165, "step": 3311 }, { "epoch": 0.5648021828103683, "grad_norm": 0.5312186479568481, "learning_rate": 8.399661541625174e-07, "loss": 0.5268, "step": 3312 }, { "epoch": 0.5649727148703957, "grad_norm": 0.5763075947761536, "learning_rate": 8.394209777694489e-07, "loss": 0.5081, "step": 3313 }, { "epoch": 0.565143246930423, "grad_norm": 0.47177785634994507, "learning_rate": 8.388758503620856e-07, "loss": 0.512, "step": 3314 }, { "epoch": 0.5653137789904502, "grad_norm": 0.39427313208580017, "learning_rate": 8.383307721067232e-07, "loss": 0.51, "step": 3315 }, { "epoch": 0.5654843110504775, "grad_norm": 0.4980703890323639, "learning_rate": 8.377857431696405e-07, "loss": 0.5093, "step": 3316 }, { "epoch": 0.5656548431105047, "grad_norm": 0.3834083676338196, "learning_rate": 8.37240763717103e-07, "loss": 0.4996, "step": 3317 }, { "epoch": 0.5658253751705321, "grad_norm": 0.518470287322998, "learning_rate": 8.3669583391536e-07, "loss": 0.5063, "step": 3318 }, { "epoch": 0.5659959072305594, "grad_norm": 0.50276780128479, "learning_rate": 8.361509539306457e-07, "loss": 0.5073, "step": 3319 }, { "epoch": 0.5661664392905866, "grad_norm": 0.4729515612125397, "learning_rate": 8.356061239291799e-07, "loss": 0.5109, "step": 3320 }, { "epoch": 0.5663369713506139, "grad_norm": 0.46731480956077576, "learning_rate": 8.350613440771662e-07, "loss": 0.5067, "step": 3321 }, { "epoch": 0.5665075034106412, "grad_norm": 0.4468371272087097, "learning_rate": 8.345166145407936e-07, "loss": 0.5031, "step": 3322 }, { "epoch": 0.5666780354706685, "grad_norm": 0.424031525850296, "learning_rate": 8.339719354862352e-07, "loss": 0.5042, "step": 3323 }, { "epoch": 0.5668485675306958, "grad_norm": 0.3681753873825073, "learning_rate": 8.334273070796496e-07, "loss": 0.4957, "step": 3324 }, { "epoch": 0.567019099590723, "grad_norm": 0.519213080406189, "learning_rate": 8.328827294871787e-07, "loss": 0.5096, "step": 3325 }, { "epoch": 0.5671896316507503, "grad_norm": 0.4725252091884613, "learning_rate": 8.3233820287495e-07, "loss": 0.5221, "step": 3326 }, { "epoch": 0.5673601637107776, "grad_norm": 0.378584086894989, "learning_rate": 8.317937274090748e-07, "loss": 0.5168, "step": 3327 }, { "epoch": 0.567530695770805, "grad_norm": 0.46946248412132263, "learning_rate": 8.312493032556488e-07, "loss": 0.5068, "step": 3328 }, { "epoch": 0.5677012278308322, "grad_norm": 0.4181266129016876, "learning_rate": 8.307049305807526e-07, "loss": 0.5022, "step": 3329 }, { "epoch": 0.5678717598908595, "grad_norm": 0.46339544653892517, "learning_rate": 8.301606095504502e-07, "loss": 0.5052, "step": 3330 }, { "epoch": 0.5680422919508867, "grad_norm": 0.5504735708236694, "learning_rate": 8.296163403307912e-07, "loss": 0.4989, "step": 3331 }, { "epoch": 0.568212824010914, "grad_norm": 0.40414270758628845, "learning_rate": 8.290721230878075e-07, "loss": 0.5136, "step": 3332 }, { "epoch": 0.5683833560709414, "grad_norm": 0.4675517678260803, "learning_rate": 8.285279579875171e-07, "loss": 0.4997, "step": 3333 }, { "epoch": 0.5685538881309686, "grad_norm": 0.40143638849258423, "learning_rate": 8.27983845195921e-07, "loss": 0.5199, "step": 3334 }, { "epoch": 0.5687244201909959, "grad_norm": 0.43805834650993347, "learning_rate": 8.274397848790042e-07, "loss": 0.515, "step": 3335 }, { "epoch": 0.5688949522510232, "grad_norm": 0.48092415928840637, "learning_rate": 8.268957772027362e-07, "loss": 0.4987, "step": 3336 }, { "epoch": 0.5690654843110505, "grad_norm": 0.3663955330848694, "learning_rate": 8.263518223330698e-07, "loss": 0.5049, "step": 3337 }, { "epoch": 0.5692360163710778, "grad_norm": 0.5174563527107239, "learning_rate": 8.258079204359427e-07, "loss": 0.5111, "step": 3338 }, { "epoch": 0.569406548431105, "grad_norm": 0.4845089912414551, "learning_rate": 8.252640716772749e-07, "loss": 0.5083, "step": 3339 }, { "epoch": 0.5695770804911323, "grad_norm": 0.48515674471855164, "learning_rate": 8.247202762229724e-07, "loss": 0.5043, "step": 3340 }, { "epoch": 0.5697476125511596, "grad_norm": 0.4666123688220978, "learning_rate": 8.241765342389223e-07, "loss": 0.5016, "step": 3341 }, { "epoch": 0.569918144611187, "grad_norm": 0.3873993754386902, "learning_rate": 8.236328458909976e-07, "loss": 0.5087, "step": 3342 }, { "epoch": 0.5700886766712142, "grad_norm": 0.3890726864337921, "learning_rate": 8.23089211345054e-07, "loss": 0.5068, "step": 3343 }, { "epoch": 0.5702592087312415, "grad_norm": 0.3836226165294647, "learning_rate": 8.225456307669302e-07, "loss": 0.5125, "step": 3344 }, { "epoch": 0.5704297407912687, "grad_norm": 0.45001837611198425, "learning_rate": 8.2200210432245e-07, "loss": 0.5034, "step": 3345 }, { "epoch": 0.570600272851296, "grad_norm": 0.43816789984703064, "learning_rate": 8.214586321774191e-07, "loss": 0.5026, "step": 3346 }, { "epoch": 0.5707708049113234, "grad_norm": 0.4553149342536926, "learning_rate": 8.209152144976276e-07, "loss": 0.513, "step": 3347 }, { "epoch": 0.5709413369713506, "grad_norm": 0.38841933012008667, "learning_rate": 8.203718514488484e-07, "loss": 0.5038, "step": 3348 }, { "epoch": 0.5711118690313779, "grad_norm": 0.479274719953537, "learning_rate": 8.198285431968389e-07, "loss": 0.4954, "step": 3349 }, { "epoch": 0.5712824010914052, "grad_norm": 0.5238628387451172, "learning_rate": 8.192852899073381e-07, "loss": 0.5164, "step": 3350 }, { "epoch": 0.5714529331514324, "grad_norm": 0.38953498005867004, "learning_rate": 8.187420917460687e-07, "loss": 0.5083, "step": 3351 }, { "epoch": 0.5716234652114598, "grad_norm": 0.407196044921875, "learning_rate": 8.181989488787382e-07, "loss": 0.5073, "step": 3352 }, { "epoch": 0.571793997271487, "grad_norm": 0.43075326085090637, "learning_rate": 8.176558614710348e-07, "loss": 0.5148, "step": 3353 }, { "epoch": 0.5719645293315143, "grad_norm": 0.46021196246147156, "learning_rate": 8.171128296886317e-07, "loss": 0.5198, "step": 3354 }, { "epoch": 0.5721350613915416, "grad_norm": 0.46036338806152344, "learning_rate": 8.165698536971839e-07, "loss": 0.4978, "step": 3355 }, { "epoch": 0.572305593451569, "grad_norm": 0.4110967516899109, "learning_rate": 8.160269336623302e-07, "loss": 0.503, "step": 3356 }, { "epoch": 0.5724761255115962, "grad_norm": 0.43071249127388, "learning_rate": 8.154840697496918e-07, "loss": 0.5049, "step": 3357 }, { "epoch": 0.5726466575716235, "grad_norm": 0.4496152698993683, "learning_rate": 8.14941262124873e-07, "loss": 0.4995, "step": 3358 }, { "epoch": 0.5728171896316507, "grad_norm": 0.4697989225387573, "learning_rate": 8.143985109534609e-07, "loss": 0.5092, "step": 3359 }, { "epoch": 0.572987721691678, "grad_norm": 0.4309699833393097, "learning_rate": 8.138558164010252e-07, "loss": 0.4953, "step": 3360 }, { "epoch": 0.5731582537517054, "grad_norm": 0.38893890380859375, "learning_rate": 8.13313178633119e-07, "loss": 0.4957, "step": 3361 }, { "epoch": 0.5733287858117326, "grad_norm": 0.497585654258728, "learning_rate": 8.12770597815277e-07, "loss": 0.5056, "step": 3362 }, { "epoch": 0.5734993178717599, "grad_norm": 0.43927890062332153, "learning_rate": 8.122280741130177e-07, "loss": 0.5109, "step": 3363 }, { "epoch": 0.5736698499317872, "grad_norm": 0.48195308446884155, "learning_rate": 8.116856076918409e-07, "loss": 0.5158, "step": 3364 }, { "epoch": 0.5738403819918144, "grad_norm": 0.5446553230285645, "learning_rate": 8.111431987172304e-07, "loss": 0.5075, "step": 3365 }, { "epoch": 0.5740109140518418, "grad_norm": 0.39556846022605896, "learning_rate": 8.106008473546512e-07, "loss": 0.5109, "step": 3366 }, { "epoch": 0.574181446111869, "grad_norm": 0.5156859755516052, "learning_rate": 8.10058553769551e-07, "loss": 0.5011, "step": 3367 }, { "epoch": 0.5743519781718963, "grad_norm": 0.5255439877510071, "learning_rate": 8.095163181273609e-07, "loss": 0.5039, "step": 3368 }, { "epoch": 0.5745225102319236, "grad_norm": 0.5791894793510437, "learning_rate": 8.089741405934923e-07, "loss": 0.511, "step": 3369 }, { "epoch": 0.5746930422919508, "grad_norm": 0.4920649826526642, "learning_rate": 8.084320213333413e-07, "loss": 0.5049, "step": 3370 }, { "epoch": 0.5748635743519782, "grad_norm": 0.4737277328968048, "learning_rate": 8.078899605122842e-07, "loss": 0.4991, "step": 3371 }, { "epoch": 0.5750341064120055, "grad_norm": 0.47556519508361816, "learning_rate": 8.073479582956808e-07, "loss": 0.5036, "step": 3372 }, { "epoch": 0.5752046384720327, "grad_norm": 0.5832779407501221, "learning_rate": 8.068060148488723e-07, "loss": 0.5038, "step": 3373 }, { "epoch": 0.57537517053206, "grad_norm": 0.6054734587669373, "learning_rate": 8.062641303371819e-07, "loss": 0.5144, "step": 3374 }, { "epoch": 0.5755457025920873, "grad_norm": 0.4068281054496765, "learning_rate": 8.057223049259156e-07, "loss": 0.5004, "step": 3375 }, { "epoch": 0.5757162346521146, "grad_norm": 0.4555928111076355, "learning_rate": 8.051805387803603e-07, "loss": 0.5097, "step": 3376 }, { "epoch": 0.5758867667121419, "grad_norm": 0.5093322396278381, "learning_rate": 8.046388320657861e-07, "loss": 0.5004, "step": 3377 }, { "epoch": 0.5760572987721692, "grad_norm": 0.38450372219085693, "learning_rate": 8.040971849474433e-07, "loss": 0.5005, "step": 3378 }, { "epoch": 0.5762278308321964, "grad_norm": 0.5079644322395325, "learning_rate": 8.035555975905661e-07, "loss": 0.5154, "step": 3379 }, { "epoch": 0.5763983628922238, "grad_norm": 0.37952208518981934, "learning_rate": 8.030140701603684e-07, "loss": 0.5094, "step": 3380 }, { "epoch": 0.576568894952251, "grad_norm": 0.49966394901275635, "learning_rate": 8.024726028220475e-07, "loss": 0.4944, "step": 3381 }, { "epoch": 0.5767394270122783, "grad_norm": 0.44472551345825195, "learning_rate": 8.019311957407812e-07, "loss": 0.5114, "step": 3382 }, { "epoch": 0.5769099590723056, "grad_norm": 0.42356055974960327, "learning_rate": 8.013898490817295e-07, "loss": 0.4964, "step": 3383 }, { "epoch": 0.5770804911323328, "grad_norm": 0.49199751019477844, "learning_rate": 8.008485630100336e-07, "loss": 0.5038, "step": 3384 }, { "epoch": 0.5772510231923602, "grad_norm": 0.3819704055786133, "learning_rate": 8.003073376908165e-07, "loss": 0.5036, "step": 3385 }, { "epoch": 0.5774215552523875, "grad_norm": 0.47768130898475647, "learning_rate": 7.997661732891831e-07, "loss": 0.5118, "step": 3386 }, { "epoch": 0.5775920873124147, "grad_norm": 0.42203018069267273, "learning_rate": 7.992250699702184e-07, "loss": 0.4919, "step": 3387 }, { "epoch": 0.577762619372442, "grad_norm": 0.41574037075042725, "learning_rate": 7.986840278989903e-07, "loss": 0.5117, "step": 3388 }, { "epoch": 0.5779331514324693, "grad_norm": 0.4440769553184509, "learning_rate": 7.981430472405471e-07, "loss": 0.5062, "step": 3389 }, { "epoch": 0.5781036834924966, "grad_norm": 0.377749502658844, "learning_rate": 7.976021281599183e-07, "loss": 0.5134, "step": 3390 }, { "epoch": 0.5782742155525239, "grad_norm": 0.375430166721344, "learning_rate": 7.970612708221153e-07, "loss": 0.4975, "step": 3391 }, { "epoch": 0.5784447476125512, "grad_norm": 0.43011754751205444, "learning_rate": 7.9652047539213e-07, "loss": 0.5051, "step": 3392 }, { "epoch": 0.5786152796725784, "grad_norm": 0.3825616240501404, "learning_rate": 7.959797420349356e-07, "loss": 0.4956, "step": 3393 }, { "epoch": 0.5787858117326057, "grad_norm": 0.38164180517196655, "learning_rate": 7.954390709154863e-07, "loss": 0.5092, "step": 3394 }, { "epoch": 0.578956343792633, "grad_norm": 0.35513877868652344, "learning_rate": 7.948984621987181e-07, "loss": 0.5056, "step": 3395 }, { "epoch": 0.5791268758526603, "grad_norm": 0.41221168637275696, "learning_rate": 7.943579160495466e-07, "loss": 0.5131, "step": 3396 }, { "epoch": 0.5792974079126876, "grad_norm": 0.37688201665878296, "learning_rate": 7.938174326328696e-07, "loss": 0.5038, "step": 3397 }, { "epoch": 0.5794679399727148, "grad_norm": 0.4648706912994385, "learning_rate": 7.932770121135648e-07, "loss": 0.5067, "step": 3398 }, { "epoch": 0.5796384720327421, "grad_norm": 0.49630671739578247, "learning_rate": 7.927366546564911e-07, "loss": 0.5002, "step": 3399 }, { "epoch": 0.5798090040927695, "grad_norm": 0.4030620753765106, "learning_rate": 7.921963604264884e-07, "loss": 0.5017, "step": 3400 }, { "epoch": 0.5799795361527967, "grad_norm": 0.4664936065673828, "learning_rate": 7.916561295883768e-07, "loss": 0.5088, "step": 3401 }, { "epoch": 0.580150068212824, "grad_norm": 0.49963656067848206, "learning_rate": 7.911159623069576e-07, "loss": 0.4964, "step": 3402 }, { "epoch": 0.5803206002728513, "grad_norm": 0.4847971796989441, "learning_rate": 7.905758587470119e-07, "loss": 0.501, "step": 3403 }, { "epoch": 0.5804911323328786, "grad_norm": 0.48904651403427124, "learning_rate": 7.900358190733027e-07, "loss": 0.4932, "step": 3404 }, { "epoch": 0.5806616643929059, "grad_norm": 0.39514946937561035, "learning_rate": 7.894958434505726e-07, "loss": 0.5011, "step": 3405 }, { "epoch": 0.5808321964529332, "grad_norm": 0.5680859088897705, "learning_rate": 7.889559320435437e-07, "loss": 0.5034, "step": 3406 }, { "epoch": 0.5810027285129604, "grad_norm": 0.5151458978652954, "learning_rate": 7.88416085016921e-07, "loss": 0.4995, "step": 3407 }, { "epoch": 0.5811732605729877, "grad_norm": 0.4129141569137573, "learning_rate": 7.878763025353875e-07, "loss": 0.5064, "step": 3408 }, { "epoch": 0.581343792633015, "grad_norm": 0.5038077235221863, "learning_rate": 7.873365847636081e-07, "loss": 0.4989, "step": 3409 }, { "epoch": 0.5815143246930423, "grad_norm": 0.42516499757766724, "learning_rate": 7.867969318662268e-07, "loss": 0.5146, "step": 3410 }, { "epoch": 0.5816848567530696, "grad_norm": 0.5718380808830261, "learning_rate": 7.862573440078688e-07, "loss": 0.5106, "step": 3411 }, { "epoch": 0.5818553888130968, "grad_norm": 0.43516722321510315, "learning_rate": 7.857178213531386e-07, "loss": 0.5091, "step": 3412 }, { "epoch": 0.5820259208731241, "grad_norm": 0.5345479249954224, "learning_rate": 7.851783640666209e-07, "loss": 0.5034, "step": 3413 }, { "epoch": 0.5821964529331515, "grad_norm": 0.47487872838974, "learning_rate": 7.84638972312882e-07, "loss": 0.5073, "step": 3414 }, { "epoch": 0.5823669849931787, "grad_norm": 0.42536306381225586, "learning_rate": 7.840996462564655e-07, "loss": 0.5094, "step": 3415 }, { "epoch": 0.582537517053206, "grad_norm": 0.5384988784790039, "learning_rate": 7.835603860618972e-07, "loss": 0.4991, "step": 3416 }, { "epoch": 0.5827080491132333, "grad_norm": 0.4918755888938904, "learning_rate": 7.830211918936821e-07, "loss": 0.5217, "step": 3417 }, { "epoch": 0.5828785811732605, "grad_norm": 0.4351602792739868, "learning_rate": 7.824820639163049e-07, "loss": 0.5083, "step": 3418 }, { "epoch": 0.5830491132332879, "grad_norm": 0.4212780296802521, "learning_rate": 7.819430022942298e-07, "loss": 0.5074, "step": 3419 }, { "epoch": 0.5832196452933152, "grad_norm": 0.439581960439682, "learning_rate": 7.814040071919016e-07, "loss": 0.5052, "step": 3420 }, { "epoch": 0.5833901773533424, "grad_norm": 0.4218754470348358, "learning_rate": 7.808650787737443e-07, "loss": 0.4963, "step": 3421 }, { "epoch": 0.5835607094133697, "grad_norm": 0.441415935754776, "learning_rate": 7.803262172041611e-07, "loss": 0.517, "step": 3422 }, { "epoch": 0.583731241473397, "grad_norm": 0.5194700360298157, "learning_rate": 7.797874226475362e-07, "loss": 0.5146, "step": 3423 }, { "epoch": 0.5839017735334243, "grad_norm": 0.3435308039188385, "learning_rate": 7.79248695268232e-07, "loss": 0.5244, "step": 3424 }, { "epoch": 0.5840723055934516, "grad_norm": 0.44093021750450134, "learning_rate": 7.787100352305909e-07, "loss": 0.5132, "step": 3425 }, { "epoch": 0.5842428376534788, "grad_norm": 0.44838279485702515, "learning_rate": 7.781714426989347e-07, "loss": 0.508, "step": 3426 }, { "epoch": 0.5844133697135061, "grad_norm": 1.4488683938980103, "learning_rate": 7.776329178375651e-07, "loss": 0.5336, "step": 3427 }, { "epoch": 0.5845839017735335, "grad_norm": 0.48720383644104004, "learning_rate": 7.770944608107622e-07, "loss": 0.5148, "step": 3428 }, { "epoch": 0.5847544338335607, "grad_norm": 0.392868310213089, "learning_rate": 7.765560717827858e-07, "loss": 0.5061, "step": 3429 }, { "epoch": 0.584924965893588, "grad_norm": 0.3861992359161377, "learning_rate": 7.760177509178758e-07, "loss": 0.5166, "step": 3430 }, { "epoch": 0.5850954979536153, "grad_norm": 0.5674988031387329, "learning_rate": 7.754794983802496e-07, "loss": 0.5126, "step": 3431 }, { "epoch": 0.5852660300136425, "grad_norm": 0.4710623025894165, "learning_rate": 7.749413143341056e-07, "loss": 0.5124, "step": 3432 }, { "epoch": 0.5854365620736699, "grad_norm": 0.3838975131511688, "learning_rate": 7.744031989436201e-07, "loss": 0.5087, "step": 3433 }, { "epoch": 0.5856070941336972, "grad_norm": 0.45589226484298706, "learning_rate": 7.738651523729491e-07, "loss": 0.512, "step": 3434 }, { "epoch": 0.5857776261937244, "grad_norm": 0.3960130214691162, "learning_rate": 7.733271747862266e-07, "loss": 0.5034, "step": 3435 }, { "epoch": 0.5859481582537517, "grad_norm": 0.4465065002441406, "learning_rate": 7.727892663475672e-07, "loss": 0.5236, "step": 3436 }, { "epoch": 0.586118690313779, "grad_norm": 0.47854310274124146, "learning_rate": 7.722514272210628e-07, "loss": 0.5036, "step": 3437 }, { "epoch": 0.5862892223738063, "grad_norm": 0.38538694381713867, "learning_rate": 7.717136575707847e-07, "loss": 0.5217, "step": 3438 }, { "epoch": 0.5864597544338336, "grad_norm": 0.38084253668785095, "learning_rate": 7.711759575607838e-07, "loss": 0.53, "step": 3439 }, { "epoch": 0.5866302864938608, "grad_norm": 0.4159857928752899, "learning_rate": 7.706383273550883e-07, "loss": 0.5142, "step": 3440 }, { "epoch": 0.5868008185538881, "grad_norm": 0.40294331312179565, "learning_rate": 7.701007671177067e-07, "loss": 0.512, "step": 3441 }, { "epoch": 0.5869713506139154, "grad_norm": 0.39000409841537476, "learning_rate": 7.695632770126247e-07, "loss": 0.5046, "step": 3442 }, { "epoch": 0.5871418826739427, "grad_norm": 0.41626620292663574, "learning_rate": 7.690258572038078e-07, "loss": 0.5216, "step": 3443 }, { "epoch": 0.58731241473397, "grad_norm": 0.3855079114437103, "learning_rate": 7.684885078551991e-07, "loss": 0.5061, "step": 3444 }, { "epoch": 0.5874829467939973, "grad_norm": 0.38733789324760437, "learning_rate": 7.679512291307208e-07, "loss": 0.5116, "step": 3445 }, { "epoch": 0.5876534788540245, "grad_norm": 0.4045097827911377, "learning_rate": 7.674140211942732e-07, "loss": 0.5064, "step": 3446 }, { "epoch": 0.5878240109140518, "grad_norm": 0.40880081057548523, "learning_rate": 7.668768842097353e-07, "loss": 0.5273, "step": 3447 }, { "epoch": 0.5879945429740792, "grad_norm": 0.45862430334091187, "learning_rate": 7.663398183409645e-07, "loss": 0.5319, "step": 3448 }, { "epoch": 0.5881650750341064, "grad_norm": 0.35381215810775757, "learning_rate": 7.658028237517958e-07, "loss": 0.5178, "step": 3449 }, { "epoch": 0.5883356070941337, "grad_norm": 0.4584713280200958, "learning_rate": 7.652659006060436e-07, "loss": 0.5144, "step": 3450 }, { "epoch": 0.588506139154161, "grad_norm": 0.47925877571105957, "learning_rate": 7.647290490674997e-07, "loss": 0.5002, "step": 3451 }, { "epoch": 0.5886766712141883, "grad_norm": 0.36030012369155884, "learning_rate": 7.641922692999341e-07, "loss": 0.5165, "step": 3452 }, { "epoch": 0.5888472032742156, "grad_norm": 0.4329701066017151, "learning_rate": 7.636555614670954e-07, "loss": 0.5127, "step": 3453 }, { "epoch": 0.5890177353342428, "grad_norm": 0.4515969157218933, "learning_rate": 7.631189257327093e-07, "loss": 0.5046, "step": 3454 }, { "epoch": 0.5891882673942701, "grad_norm": 0.504779577255249, "learning_rate": 7.625823622604809e-07, "loss": 0.5058, "step": 3455 }, { "epoch": 0.5893587994542974, "grad_norm": 0.5348877310752869, "learning_rate": 7.620458712140916e-07, "loss": 0.5176, "step": 3456 }, { "epoch": 0.5895293315143247, "grad_norm": 0.4561511278152466, "learning_rate": 7.615094527572027e-07, "loss": 0.5042, "step": 3457 }, { "epoch": 0.589699863574352, "grad_norm": 0.5060758590698242, "learning_rate": 7.60973107053451e-07, "loss": 0.5286, "step": 3458 }, { "epoch": 0.5898703956343793, "grad_norm": 0.4909929037094116, "learning_rate": 7.604368342664534e-07, "loss": 0.5105, "step": 3459 }, { "epoch": 0.5900409276944065, "grad_norm": 0.4966579079627991, "learning_rate": 7.599006345598031e-07, "loss": 0.5055, "step": 3460 }, { "epoch": 0.5902114597544338, "grad_norm": 0.6382631063461304, "learning_rate": 7.59364508097071e-07, "loss": 0.5035, "step": 3461 }, { "epoch": 0.5903819918144612, "grad_norm": 0.575469970703125, "learning_rate": 7.588284550418068e-07, "loss": 0.5102, "step": 3462 }, { "epoch": 0.5905525238744884, "grad_norm": 0.5283661484718323, "learning_rate": 7.582924755575366e-07, "loss": 0.5117, "step": 3463 }, { "epoch": 0.5907230559345157, "grad_norm": 0.5372909903526306, "learning_rate": 7.577565698077647e-07, "loss": 0.5043, "step": 3464 }, { "epoch": 0.590893587994543, "grad_norm": 0.5969436764717102, "learning_rate": 7.572207379559723e-07, "loss": 0.5133, "step": 3465 }, { "epoch": 0.5910641200545702, "grad_norm": 0.5289788842201233, "learning_rate": 7.566849801656193e-07, "loss": 0.508, "step": 3466 }, { "epoch": 0.5912346521145976, "grad_norm": 0.4017581343650818, "learning_rate": 7.561492966001418e-07, "loss": 0.5134, "step": 3467 }, { "epoch": 0.5914051841746248, "grad_norm": 0.6081069707870483, "learning_rate": 7.556136874229532e-07, "loss": 0.5159, "step": 3468 }, { "epoch": 0.5915757162346521, "grad_norm": 0.5566324591636658, "learning_rate": 7.550781527974455e-07, "loss": 0.5205, "step": 3469 }, { "epoch": 0.5917462482946794, "grad_norm": 0.5150846838951111, "learning_rate": 7.545426928869864e-07, "loss": 0.5025, "step": 3470 }, { "epoch": 0.5919167803547067, "grad_norm": 0.4910479784011841, "learning_rate": 7.540073078549223e-07, "loss": 0.5104, "step": 3471 }, { "epoch": 0.592087312414734, "grad_norm": 0.4645948112010956, "learning_rate": 7.534719978645751e-07, "loss": 0.5118, "step": 3472 }, { "epoch": 0.5922578444747613, "grad_norm": 0.4650384485721588, "learning_rate": 7.529367630792454e-07, "loss": 0.5212, "step": 3473 }, { "epoch": 0.5924283765347885, "grad_norm": 0.4093935787677765, "learning_rate": 7.524016036622095e-07, "loss": 0.5123, "step": 3474 }, { "epoch": 0.5925989085948158, "grad_norm": 0.5681697130203247, "learning_rate": 7.518665197767224e-07, "loss": 0.5034, "step": 3475 }, { "epoch": 0.5927694406548432, "grad_norm": 0.5197209715843201, "learning_rate": 7.513315115860141e-07, "loss": 0.5123, "step": 3476 }, { "epoch": 0.5929399727148704, "grad_norm": 0.6382885575294495, "learning_rate": 7.507965792532922e-07, "loss": 0.5143, "step": 3477 }, { "epoch": 0.5931105047748977, "grad_norm": 0.6706441044807434, "learning_rate": 7.502617229417424e-07, "loss": 0.5073, "step": 3478 }, { "epoch": 0.593281036834925, "grad_norm": 0.48597392439842224, "learning_rate": 7.497269428145256e-07, "loss": 0.5143, "step": 3479 }, { "epoch": 0.5934515688949522, "grad_norm": 0.4961046576499939, "learning_rate": 7.491922390347802e-07, "loss": 0.5279, "step": 3480 }, { "epoch": 0.5936221009549796, "grad_norm": 0.578697919845581, "learning_rate": 7.486576117656209e-07, "loss": 0.5149, "step": 3481 }, { "epoch": 0.5937926330150068, "grad_norm": 0.6151126623153687, "learning_rate": 7.481230611701397e-07, "loss": 0.5216, "step": 3482 }, { "epoch": 0.5939631650750341, "grad_norm": 0.37831974029541016, "learning_rate": 7.475885874114048e-07, "loss": 0.5036, "step": 3483 }, { "epoch": 0.5941336971350614, "grad_norm": 0.507587194442749, "learning_rate": 7.470541906524603e-07, "loss": 0.5156, "step": 3484 }, { "epoch": 0.5943042291950886, "grad_norm": 0.5136541724205017, "learning_rate": 7.465198710563289e-07, "loss": 0.5046, "step": 3485 }, { "epoch": 0.594474761255116, "grad_norm": 0.44345173239707947, "learning_rate": 7.459856287860069e-07, "loss": 0.4937, "step": 3486 }, { "epoch": 0.5946452933151433, "grad_norm": 0.462137907743454, "learning_rate": 7.454514640044694e-07, "loss": 0.5011, "step": 3487 }, { "epoch": 0.5948158253751705, "grad_norm": 0.40550386905670166, "learning_rate": 7.449173768746665e-07, "loss": 0.5083, "step": 3488 }, { "epoch": 0.5949863574351978, "grad_norm": 0.4405902028083801, "learning_rate": 7.443833675595255e-07, "loss": 0.5087, "step": 3489 }, { "epoch": 0.595156889495225, "grad_norm": 0.4645247757434845, "learning_rate": 7.438494362219493e-07, "loss": 0.52, "step": 3490 }, { "epoch": 0.5953274215552524, "grad_norm": 0.423967570066452, "learning_rate": 7.433155830248168e-07, "loss": 0.5037, "step": 3491 }, { "epoch": 0.5954979536152797, "grad_norm": 0.4495370090007782, "learning_rate": 7.427818081309842e-07, "loss": 0.5106, "step": 3492 }, { "epoch": 0.595668485675307, "grad_norm": 0.38820216059684753, "learning_rate": 7.422481117032823e-07, "loss": 0.512, "step": 3493 }, { "epoch": 0.5958390177353342, "grad_norm": 0.4023274779319763, "learning_rate": 7.417144939045198e-07, "loss": 0.5024, "step": 3494 }, { "epoch": 0.5960095497953616, "grad_norm": 0.4757567346096039, "learning_rate": 7.411809548974793e-07, "loss": 0.5166, "step": 3495 }, { "epoch": 0.5961800818553888, "grad_norm": 0.48980042338371277, "learning_rate": 7.406474948449214e-07, "loss": 0.5129, "step": 3496 }, { "epoch": 0.5963506139154161, "grad_norm": 0.4435463845729828, "learning_rate": 7.401141139095809e-07, "loss": 0.5018, "step": 3497 }, { "epoch": 0.5965211459754434, "grad_norm": 0.4933076500892639, "learning_rate": 7.395808122541697e-07, "loss": 0.5087, "step": 3498 }, { "epoch": 0.5966916780354706, "grad_norm": 0.5443232655525208, "learning_rate": 7.39047590041375e-07, "loss": 0.5108, "step": 3499 }, { "epoch": 0.596862210095498, "grad_norm": 0.487313836812973, "learning_rate": 7.385144474338595e-07, "loss": 0.517, "step": 3500 }, { "epoch": 0.5970327421555253, "grad_norm": 0.5534808039665222, "learning_rate": 7.379813845942623e-07, "loss": 0.5103, "step": 3501 }, { "epoch": 0.5972032742155525, "grad_norm": 0.5094048380851746, "learning_rate": 7.374484016851971e-07, "loss": 0.5048, "step": 3502 }, { "epoch": 0.5973738062755798, "grad_norm": 0.4158516526222229, "learning_rate": 7.369154988692552e-07, "loss": 0.5028, "step": 3503 }, { "epoch": 0.597544338335607, "grad_norm": 0.6141080856323242, "learning_rate": 7.363826763090006e-07, "loss": 0.5068, "step": 3504 }, { "epoch": 0.5977148703956344, "grad_norm": 0.4984683096408844, "learning_rate": 7.358499341669757e-07, "loss": 0.5051, "step": 3505 }, { "epoch": 0.5978854024556617, "grad_norm": 0.5119192004203796, "learning_rate": 7.353172726056964e-07, "loss": 0.5099, "step": 3506 }, { "epoch": 0.598055934515689, "grad_norm": 0.5512797832489014, "learning_rate": 7.347846917876546e-07, "loss": 0.4988, "step": 3507 }, { "epoch": 0.5982264665757162, "grad_norm": 0.4553689658641815, "learning_rate": 7.342521918753181e-07, "loss": 0.5025, "step": 3508 }, { "epoch": 0.5983969986357435, "grad_norm": 0.6097546219825745, "learning_rate": 7.33719773031129e-07, "loss": 0.5177, "step": 3509 }, { "epoch": 0.5985675306957708, "grad_norm": 0.41999533772468567, "learning_rate": 7.331874354175058e-07, "loss": 0.5155, "step": 3510 }, { "epoch": 0.5987380627557981, "grad_norm": 0.5839874744415283, "learning_rate": 7.326551791968409e-07, "loss": 0.5068, "step": 3511 }, { "epoch": 0.5989085948158254, "grad_norm": 0.4493328630924225, "learning_rate": 7.321230045315036e-07, "loss": 0.5115, "step": 3512 }, { "epoch": 0.5990791268758526, "grad_norm": 0.3619157373905182, "learning_rate": 7.315909115838367e-07, "loss": 0.5035, "step": 3513 }, { "epoch": 0.5992496589358799, "grad_norm": 0.508014440536499, "learning_rate": 7.31058900516159e-07, "loss": 0.5265, "step": 3514 }, { "epoch": 0.5994201909959073, "grad_norm": 0.43933480978012085, "learning_rate": 7.305269714907639e-07, "loss": 0.5091, "step": 3515 }, { "epoch": 0.5995907230559345, "grad_norm": 0.3917537033557892, "learning_rate": 7.299951246699198e-07, "loss": 0.5156, "step": 3516 }, { "epoch": 0.5997612551159618, "grad_norm": 0.4158930480480194, "learning_rate": 7.294633602158703e-07, "loss": 0.4951, "step": 3517 }, { "epoch": 0.599931787175989, "grad_norm": 0.4387792944908142, "learning_rate": 7.289316782908335e-07, "loss": 0.501, "step": 3518 }, { "epoch": 0.6001023192360164, "grad_norm": 0.42189764976501465, "learning_rate": 7.28400079057003e-07, "loss": 0.5104, "step": 3519 }, { "epoch": 0.6002728512960437, "grad_norm": 0.3482825756072998, "learning_rate": 7.278685626765459e-07, "loss": 0.4977, "step": 3520 }, { "epoch": 0.600443383356071, "grad_norm": 0.4253080487251282, "learning_rate": 7.273371293116056e-07, "loss": 0.4884, "step": 3521 }, { "epoch": 0.6006139154160982, "grad_norm": 0.42421674728393555, "learning_rate": 7.268057791242994e-07, "loss": 0.5102, "step": 3522 }, { "epoch": 0.6007844474761255, "grad_norm": 0.4532250761985779, "learning_rate": 7.262745122767184e-07, "loss": 0.5064, "step": 3523 }, { "epoch": 0.6009549795361528, "grad_norm": 0.49927690625190735, "learning_rate": 7.257433289309301e-07, "loss": 0.5113, "step": 3524 }, { "epoch": 0.6011255115961801, "grad_norm": 0.45842334628105164, "learning_rate": 7.252122292489748e-07, "loss": 0.5072, "step": 3525 }, { "epoch": 0.6012960436562074, "grad_norm": 0.5091456174850464, "learning_rate": 7.246812133928686e-07, "loss": 0.5005, "step": 3526 }, { "epoch": 0.6014665757162346, "grad_norm": 0.5681532025337219, "learning_rate": 7.241502815246009e-07, "loss": 0.5031, "step": 3527 }, { "epoch": 0.6016371077762619, "grad_norm": 0.37529727816581726, "learning_rate": 7.236194338061365e-07, "loss": 0.5053, "step": 3528 }, { "epoch": 0.6018076398362893, "grad_norm": 0.6965998411178589, "learning_rate": 7.230886703994137e-07, "loss": 0.4997, "step": 3529 }, { "epoch": 0.6019781718963165, "grad_norm": 0.5670928359031677, "learning_rate": 7.225579914663453e-07, "loss": 0.4948, "step": 3530 }, { "epoch": 0.6021487039563438, "grad_norm": 0.5892690420150757, "learning_rate": 7.220273971688193e-07, "loss": 0.5054, "step": 3531 }, { "epoch": 0.602319236016371, "grad_norm": 0.45653554797172546, "learning_rate": 7.214968876686957e-07, "loss": 0.5055, "step": 3532 }, { "epoch": 0.6024897680763983, "grad_norm": 0.44902780652046204, "learning_rate": 7.209664631278113e-07, "loss": 0.5134, "step": 3533 }, { "epoch": 0.6026603001364257, "grad_norm": 0.4176679849624634, "learning_rate": 7.204361237079747e-07, "loss": 0.4995, "step": 3534 }, { "epoch": 0.602830832196453, "grad_norm": 0.39181068539619446, "learning_rate": 7.199058695709703e-07, "loss": 0.4956, "step": 3535 }, { "epoch": 0.6030013642564802, "grad_norm": 0.40236714482307434, "learning_rate": 7.19375700878555e-07, "loss": 0.5061, "step": 3536 }, { "epoch": 0.6031718963165075, "grad_norm": 0.4550640881061554, "learning_rate": 7.188456177924606e-07, "loss": 0.4971, "step": 3537 }, { "epoch": 0.6033424283765347, "grad_norm": 0.5484567880630493, "learning_rate": 7.183156204743926e-07, "loss": 0.5072, "step": 3538 }, { "epoch": 0.6035129604365621, "grad_norm": 0.4102098047733307, "learning_rate": 7.177857090860297e-07, "loss": 0.4999, "step": 3539 }, { "epoch": 0.6036834924965894, "grad_norm": 0.45663487911224365, "learning_rate": 7.172558837890256e-07, "loss": 0.4946, "step": 3540 }, { "epoch": 0.6038540245566166, "grad_norm": 0.43537938594818115, "learning_rate": 7.167261447450065e-07, "loss": 0.4949, "step": 3541 }, { "epoch": 0.6040245566166439, "grad_norm": 0.4498659670352936, "learning_rate": 7.161964921155733e-07, "loss": 0.5055, "step": 3542 }, { "epoch": 0.6041950886766713, "grad_norm": 0.538729727268219, "learning_rate": 7.156669260622998e-07, "loss": 0.5177, "step": 3543 }, { "epoch": 0.6043656207366985, "grad_norm": 0.3828926384449005, "learning_rate": 7.151374467467339e-07, "loss": 0.5022, "step": 3544 }, { "epoch": 0.6045361527967258, "grad_norm": 0.5743893384933472, "learning_rate": 7.146080543303966e-07, "loss": 0.496, "step": 3545 }, { "epoch": 0.604706684856753, "grad_norm": 0.46721726655960083, "learning_rate": 7.140787489747825e-07, "loss": 0.503, "step": 3546 }, { "epoch": 0.6048772169167803, "grad_norm": 0.4808953106403351, "learning_rate": 7.135495308413601e-07, "loss": 0.5032, "step": 3547 }, { "epoch": 0.6050477489768077, "grad_norm": 0.5239036083221436, "learning_rate": 7.130204000915702e-07, "loss": 0.4965, "step": 3548 }, { "epoch": 0.605218281036835, "grad_norm": 0.3505937457084656, "learning_rate": 7.124913568868288e-07, "loss": 0.5052, "step": 3549 }, { "epoch": 0.6053888130968622, "grad_norm": 0.5914868712425232, "learning_rate": 7.119624013885232e-07, "loss": 0.4989, "step": 3550 }, { "epoch": 0.6055593451568895, "grad_norm": 0.5071284770965576, "learning_rate": 7.114335337580153e-07, "loss": 0.498, "step": 3551 }, { "epoch": 0.6057298772169167, "grad_norm": 0.574246883392334, "learning_rate": 7.109047541566392e-07, "loss": 0.4953, "step": 3552 }, { "epoch": 0.6059004092769441, "grad_norm": 0.4426918923854828, "learning_rate": 7.103760627457031e-07, "loss": 0.4996, "step": 3553 }, { "epoch": 0.6060709413369714, "grad_norm": 0.5500320196151733, "learning_rate": 7.098474596864879e-07, "loss": 0.5078, "step": 3554 }, { "epoch": 0.6062414733969986, "grad_norm": 0.6294930577278137, "learning_rate": 7.09318945140247e-07, "loss": 0.5169, "step": 3555 }, { "epoch": 0.6064120054570259, "grad_norm": 0.4358702600002289, "learning_rate": 7.087905192682079e-07, "loss": 0.5118, "step": 3556 }, { "epoch": 0.6065825375170532, "grad_norm": 0.5665766000747681, "learning_rate": 7.082621822315697e-07, "loss": 0.4931, "step": 3557 }, { "epoch": 0.6067530695770805, "grad_norm": 0.5058866739273071, "learning_rate": 7.07733934191506e-07, "loss": 0.5063, "step": 3558 }, { "epoch": 0.6069236016371078, "grad_norm": 0.40108829736709595, "learning_rate": 7.072057753091617e-07, "loss": 0.52, "step": 3559 }, { "epoch": 0.607094133697135, "grad_norm": 0.5396955609321594, "learning_rate": 7.066777057456561e-07, "loss": 0.5025, "step": 3560 }, { "epoch": 0.6072646657571623, "grad_norm": 0.4852167069911957, "learning_rate": 7.061497256620793e-07, "loss": 0.5017, "step": 3561 }, { "epoch": 0.6074351978171897, "grad_norm": 0.39662599563598633, "learning_rate": 7.056218352194958e-07, "loss": 0.5023, "step": 3562 }, { "epoch": 0.607605729877217, "grad_norm": 0.4817587733268738, "learning_rate": 7.050940345789419e-07, "loss": 0.5086, "step": 3563 }, { "epoch": 0.6077762619372442, "grad_norm": 0.42608875036239624, "learning_rate": 7.045663239014266e-07, "loss": 0.4994, "step": 3564 }, { "epoch": 0.6079467939972715, "grad_norm": 0.36719074845314026, "learning_rate": 7.04038703347932e-07, "loss": 0.5057, "step": 3565 }, { "epoch": 0.6081173260572987, "grad_norm": 0.4195724129676819, "learning_rate": 7.035111730794116e-07, "loss": 0.4947, "step": 3566 }, { "epoch": 0.6082878581173261, "grad_norm": 0.40635064244270325, "learning_rate": 7.029837332567929e-07, "loss": 0.5008, "step": 3567 }, { "epoch": 0.6084583901773534, "grad_norm": 0.45059990882873535, "learning_rate": 7.024563840409746e-07, "loss": 0.4961, "step": 3568 }, { "epoch": 0.6086289222373806, "grad_norm": 0.42194345593452454, "learning_rate": 7.019291255928274e-07, "loss": 0.4958, "step": 3569 }, { "epoch": 0.6087994542974079, "grad_norm": 0.39710208773612976, "learning_rate": 7.01401958073196e-07, "loss": 0.5048, "step": 3570 }, { "epoch": 0.6089699863574352, "grad_norm": 0.4051033556461334, "learning_rate": 7.008748816428959e-07, "loss": 0.4988, "step": 3571 }, { "epoch": 0.6091405184174625, "grad_norm": 0.34922194480895996, "learning_rate": 7.003478964627157e-07, "loss": 0.502, "step": 3572 }, { "epoch": 0.6093110504774898, "grad_norm": 0.44243019819259644, "learning_rate": 6.998210026934149e-07, "loss": 0.5043, "step": 3573 }, { "epoch": 0.609481582537517, "grad_norm": 0.4132860600948334, "learning_rate": 6.992942004957271e-07, "loss": 0.4976, "step": 3574 }, { "epoch": 0.6096521145975443, "grad_norm": 0.49820494651794434, "learning_rate": 6.987674900303559e-07, "loss": 0.5086, "step": 3575 }, { "epoch": 0.6098226466575716, "grad_norm": 0.42352399230003357, "learning_rate": 6.982408714579783e-07, "loss": 0.5066, "step": 3576 }, { "epoch": 0.609993178717599, "grad_norm": 0.3854990303516388, "learning_rate": 6.97714344939243e-07, "loss": 0.5026, "step": 3577 }, { "epoch": 0.6101637107776262, "grad_norm": 0.39532220363616943, "learning_rate": 6.971879106347699e-07, "loss": 0.5048, "step": 3578 }, { "epoch": 0.6103342428376535, "grad_norm": 0.4156137704849243, "learning_rate": 6.966615687051517e-07, "loss": 0.499, "step": 3579 }, { "epoch": 0.6105047748976807, "grad_norm": 0.4135255813598633, "learning_rate": 6.961353193109524e-07, "loss": 0.5068, "step": 3580 }, { "epoch": 0.610675306957708, "grad_norm": 0.4143788516521454, "learning_rate": 6.95609162612708e-07, "loss": 0.4992, "step": 3581 }, { "epoch": 0.6108458390177354, "grad_norm": 0.42542439699172974, "learning_rate": 6.950830987709257e-07, "loss": 0.4977, "step": 3582 }, { "epoch": 0.6110163710777626, "grad_norm": 0.42292311787605286, "learning_rate": 6.945571279460857e-07, "loss": 0.495, "step": 3583 }, { "epoch": 0.6111869031377899, "grad_norm": 0.46934792399406433, "learning_rate": 6.940312502986381e-07, "loss": 0.5119, "step": 3584 }, { "epoch": 0.6113574351978172, "grad_norm": 0.4840346574783325, "learning_rate": 6.935054659890053e-07, "loss": 0.4886, "step": 3585 }, { "epoch": 0.6115279672578445, "grad_norm": 0.3723541498184204, "learning_rate": 6.92979775177582e-07, "loss": 0.5021, "step": 3586 }, { "epoch": 0.6116984993178718, "grad_norm": 0.4317921996116638, "learning_rate": 6.924541780247334e-07, "loss": 0.4999, "step": 3587 }, { "epoch": 0.611869031377899, "grad_norm": 0.5158261656761169, "learning_rate": 6.919286746907964e-07, "loss": 0.4978, "step": 3588 }, { "epoch": 0.6120395634379263, "grad_norm": 0.5118577480316162, "learning_rate": 6.914032653360792e-07, "loss": 0.5101, "step": 3589 }, { "epoch": 0.6122100954979536, "grad_norm": 0.4414294362068176, "learning_rate": 6.908779501208617e-07, "loss": 0.4923, "step": 3590 }, { "epoch": 0.612380627557981, "grad_norm": 0.36989259719848633, "learning_rate": 6.903527292053943e-07, "loss": 0.5025, "step": 3591 }, { "epoch": 0.6125511596180082, "grad_norm": 0.47652095556259155, "learning_rate": 6.898276027499003e-07, "loss": 0.5042, "step": 3592 }, { "epoch": 0.6127216916780355, "grad_norm": 0.4678318202495575, "learning_rate": 6.893025709145718e-07, "loss": 0.506, "step": 3593 }, { "epoch": 0.6128922237380627, "grad_norm": 0.33233606815338135, "learning_rate": 6.887776338595736e-07, "loss": 0.4991, "step": 3594 }, { "epoch": 0.61306275579809, "grad_norm": 0.41297683119773865, "learning_rate": 6.882527917450416e-07, "loss": 0.509, "step": 3595 }, { "epoch": 0.6132332878581174, "grad_norm": 0.374800443649292, "learning_rate": 6.877280447310822e-07, "loss": 0.4957, "step": 3596 }, { "epoch": 0.6134038199181446, "grad_norm": 0.40728360414505005, "learning_rate": 6.872033929777731e-07, "loss": 0.511, "step": 3597 }, { "epoch": 0.6135743519781719, "grad_norm": 0.4163961112499237, "learning_rate": 6.866788366451625e-07, "loss": 0.5038, "step": 3598 }, { "epoch": 0.6137448840381992, "grad_norm": 0.39934197068214417, "learning_rate": 6.861543758932703e-07, "loss": 0.4998, "step": 3599 }, { "epoch": 0.6139154160982264, "grad_norm": 0.41028591990470886, "learning_rate": 6.856300108820865e-07, "loss": 0.5018, "step": 3600 }, { "epoch": 0.6140859481582538, "grad_norm": 0.5780900120735168, "learning_rate": 6.851057417715719e-07, "loss": 0.5085, "step": 3601 }, { "epoch": 0.614256480218281, "grad_norm": 0.39337801933288574, "learning_rate": 6.845815687216591e-07, "loss": 0.5039, "step": 3602 }, { "epoch": 0.6144270122783083, "grad_norm": 0.42160749435424805, "learning_rate": 6.840574918922494e-07, "loss": 0.5064, "step": 3603 }, { "epoch": 0.6145975443383356, "grad_norm": 0.3935078978538513, "learning_rate": 6.83533511443217e-07, "loss": 0.5063, "step": 3604 }, { "epoch": 0.6147680763983628, "grad_norm": 0.4175839424133301, "learning_rate": 6.830096275344052e-07, "loss": 0.5025, "step": 3605 }, { "epoch": 0.6149386084583902, "grad_norm": 0.4644913673400879, "learning_rate": 6.824858403256284e-07, "loss": 0.5059, "step": 3606 }, { "epoch": 0.6151091405184175, "grad_norm": 0.5089910626411438, "learning_rate": 6.819621499766714e-07, "loss": 0.5097, "step": 3607 }, { "epoch": 0.6152796725784447, "grad_norm": 0.39027276635169983, "learning_rate": 6.814385566472893e-07, "loss": 0.5003, "step": 3608 }, { "epoch": 0.615450204638472, "grad_norm": 0.5722939372062683, "learning_rate": 6.809150604972079e-07, "loss": 0.506, "step": 3609 }, { "epoch": 0.6156207366984994, "grad_norm": 0.8283845782279968, "learning_rate": 6.803916616861228e-07, "loss": 0.5132, "step": 3610 }, { "epoch": 0.6157912687585266, "grad_norm": 0.5132255554199219, "learning_rate": 6.798683603737013e-07, "loss": 0.5057, "step": 3611 }, { "epoch": 0.6159618008185539, "grad_norm": 0.46312084794044495, "learning_rate": 6.793451567195786e-07, "loss": 0.5008, "step": 3612 }, { "epoch": 0.6161323328785812, "grad_norm": 0.6205770373344421, "learning_rate": 6.788220508833628e-07, "loss": 0.498, "step": 3613 }, { "epoch": 0.6163028649386084, "grad_norm": 0.4256286025047302, "learning_rate": 6.7829904302463e-07, "loss": 0.5111, "step": 3614 }, { "epoch": 0.6164733969986358, "grad_norm": 0.5326489806175232, "learning_rate": 6.777761333029275e-07, "loss": 0.5034, "step": 3615 }, { "epoch": 0.616643929058663, "grad_norm": 0.4669535458087921, "learning_rate": 6.772533218777725e-07, "loss": 0.502, "step": 3616 }, { "epoch": 0.6168144611186903, "grad_norm": 0.4774930775165558, "learning_rate": 6.767306089086519e-07, "loss": 0.5081, "step": 3617 }, { "epoch": 0.6169849931787176, "grad_norm": 0.6294277906417847, "learning_rate": 6.762079945550231e-07, "loss": 0.5053, "step": 3618 }, { "epoch": 0.6171555252387448, "grad_norm": 0.4758334755897522, "learning_rate": 6.756854789763124e-07, "loss": 0.5065, "step": 3619 }, { "epoch": 0.6173260572987722, "grad_norm": 0.545806884765625, "learning_rate": 6.75163062331918e-07, "loss": 0.4929, "step": 3620 }, { "epoch": 0.6174965893587995, "grad_norm": 0.539008617401123, "learning_rate": 6.74640744781205e-07, "loss": 0.4986, "step": 3621 }, { "epoch": 0.6176671214188267, "grad_norm": 0.47142142057418823, "learning_rate": 6.741185264835111e-07, "loss": 0.4932, "step": 3622 }, { "epoch": 0.617837653478854, "grad_norm": 0.7239853739738464, "learning_rate": 6.735964075981421e-07, "loss": 0.512, "step": 3623 }, { "epoch": 0.6180081855388813, "grad_norm": 0.5947273373603821, "learning_rate": 6.730743882843736e-07, "loss": 0.5102, "step": 3624 }, { "epoch": 0.6181787175989086, "grad_norm": 0.4315265715122223, "learning_rate": 6.725524687014515e-07, "loss": 0.4982, "step": 3625 }, { "epoch": 0.6183492496589359, "grad_norm": 0.6664741039276123, "learning_rate": 6.720306490085905e-07, "loss": 0.5013, "step": 3626 }, { "epoch": 0.6185197817189632, "grad_norm": 0.42937299609184265, "learning_rate": 6.715089293649753e-07, "loss": 0.5004, "step": 3627 }, { "epoch": 0.6186903137789904, "grad_norm": 0.5188443064689636, "learning_rate": 6.709873099297597e-07, "loss": 0.492, "step": 3628 }, { "epoch": 0.6188608458390177, "grad_norm": 0.6457641124725342, "learning_rate": 6.704657908620681e-07, "loss": 0.4971, "step": 3629 }, { "epoch": 0.619031377899045, "grad_norm": 0.39259135723114014, "learning_rate": 6.699443723209925e-07, "loss": 0.5043, "step": 3630 }, { "epoch": 0.6192019099590723, "grad_norm": 0.5514686703681946, "learning_rate": 6.694230544655956e-07, "loss": 0.504, "step": 3631 }, { "epoch": 0.6193724420190996, "grad_norm": 0.4360816180706024, "learning_rate": 6.689018374549087e-07, "loss": 0.5099, "step": 3632 }, { "epoch": 0.6195429740791268, "grad_norm": 0.576634407043457, "learning_rate": 6.683807214479324e-07, "loss": 0.4997, "step": 3633 }, { "epoch": 0.6197135061391542, "grad_norm": 0.6791767477989197, "learning_rate": 6.678597066036369e-07, "loss": 0.4935, "step": 3634 }, { "epoch": 0.6198840381991815, "grad_norm": 0.4800211787223816, "learning_rate": 6.67338793080961e-07, "loss": 0.4976, "step": 3635 }, { "epoch": 0.6200545702592087, "grad_norm": 0.5554141998291016, "learning_rate": 6.668179810388132e-07, "loss": 0.5091, "step": 3636 }, { "epoch": 0.620225102319236, "grad_norm": 0.4043811559677124, "learning_rate": 6.6629727063607e-07, "loss": 0.4953, "step": 3637 }, { "epoch": 0.6203956343792633, "grad_norm": 0.5499111413955688, "learning_rate": 6.657766620315784e-07, "loss": 0.5081, "step": 3638 }, { "epoch": 0.6205661664392906, "grad_norm": 0.6283267736434937, "learning_rate": 6.652561553841537e-07, "loss": 0.5007, "step": 3639 }, { "epoch": 0.6207366984993179, "grad_norm": 0.427605539560318, "learning_rate": 6.647357508525787e-07, "loss": 0.5107, "step": 3640 }, { "epoch": 0.6209072305593452, "grad_norm": 0.44812512397766113, "learning_rate": 6.642154485956077e-07, "loss": 0.5022, "step": 3641 }, { "epoch": 0.6210777626193724, "grad_norm": 0.535152792930603, "learning_rate": 6.636952487719613e-07, "loss": 0.5043, "step": 3642 }, { "epoch": 0.6212482946793997, "grad_norm": 0.4807773232460022, "learning_rate": 6.631751515403307e-07, "loss": 0.4924, "step": 3643 }, { "epoch": 0.621418826739427, "grad_norm": 0.4906376004219055, "learning_rate": 6.626551570593744e-07, "loss": 0.4986, "step": 3644 }, { "epoch": 0.6215893587994543, "grad_norm": 0.47907671332359314, "learning_rate": 6.621352654877207e-07, "loss": 0.4909, "step": 3645 }, { "epoch": 0.6217598908594816, "grad_norm": 0.4234660267829895, "learning_rate": 6.616154769839656e-07, "loss": 0.5065, "step": 3646 }, { "epoch": 0.6219304229195088, "grad_norm": 0.4363880753517151, "learning_rate": 6.610957917066745e-07, "loss": 0.5032, "step": 3647 }, { "epoch": 0.6221009549795361, "grad_norm": 0.4534679055213928, "learning_rate": 6.605762098143811e-07, "loss": 0.4969, "step": 3648 }, { "epoch": 0.6222714870395635, "grad_norm": 0.38198530673980713, "learning_rate": 6.60056731465586e-07, "loss": 0.5005, "step": 3649 }, { "epoch": 0.6224420190995907, "grad_norm": 0.3780840039253235, "learning_rate": 6.59537356818761e-07, "loss": 0.5052, "step": 3650 }, { "epoch": 0.622612551159618, "grad_norm": 0.42172762751579285, "learning_rate": 6.59018086032344e-07, "loss": 0.5041, "step": 3651 }, { "epoch": 0.6227830832196453, "grad_norm": 0.44250547885894775, "learning_rate": 6.584989192647425e-07, "loss": 0.4965, "step": 3652 }, { "epoch": 0.6229536152796725, "grad_norm": 0.4835226833820343, "learning_rate": 6.579798566743315e-07, "loss": 0.5082, "step": 3653 }, { "epoch": 0.6231241473396999, "grad_norm": 0.38454940915107727, "learning_rate": 6.574608984194546e-07, "loss": 0.5045, "step": 3654 }, { "epoch": 0.6232946793997272, "grad_norm": 0.5638027787208557, "learning_rate": 6.569420446584235e-07, "loss": 0.5024, "step": 3655 }, { "epoch": 0.6234652114597544, "grad_norm": 0.3625916838645935, "learning_rate": 6.564232955495176e-07, "loss": 0.5046, "step": 3656 }, { "epoch": 0.6236357435197817, "grad_norm": 0.4334798753261566, "learning_rate": 6.55904651250986e-07, "loss": 0.5027, "step": 3657 }, { "epoch": 0.623806275579809, "grad_norm": 0.40530920028686523, "learning_rate": 6.55386111921043e-07, "loss": 0.5066, "step": 3658 }, { "epoch": 0.6239768076398363, "grad_norm": 0.47581782937049866, "learning_rate": 6.548676777178738e-07, "loss": 0.5055, "step": 3659 }, { "epoch": 0.6241473396998636, "grad_norm": 0.42550137639045715, "learning_rate": 6.543493487996294e-07, "loss": 0.4991, "step": 3660 }, { "epoch": 0.6243178717598908, "grad_norm": 0.41812145709991455, "learning_rate": 6.538311253244301e-07, "loss": 0.5032, "step": 3661 }, { "epoch": 0.6244884038199181, "grad_norm": 0.5709978938102722, "learning_rate": 6.533130074503631e-07, "loss": 0.5026, "step": 3662 }, { "epoch": 0.6246589358799455, "grad_norm": 0.5457199215888977, "learning_rate": 6.527949953354836e-07, "loss": 0.503, "step": 3663 }, { "epoch": 0.6248294679399727, "grad_norm": 0.44440513849258423, "learning_rate": 6.522770891378151e-07, "loss": 0.5018, "step": 3664 }, { "epoch": 0.625, "grad_norm": 0.46256470680236816, "learning_rate": 6.517592890153478e-07, "loss": 0.5143, "step": 3665 }, { "epoch": 0.6251705320600273, "grad_norm": 0.39488309621810913, "learning_rate": 6.512415951260406e-07, "loss": 0.4868, "step": 3666 }, { "epoch": 0.6253410641200545, "grad_norm": 0.6106573939323425, "learning_rate": 6.507240076278193e-07, "loss": 0.4977, "step": 3667 }, { "epoch": 0.6255115961800819, "grad_norm": 0.37441346049308777, "learning_rate": 6.502065266785774e-07, "loss": 0.508, "step": 3668 }, { "epoch": 0.6256821282401092, "grad_norm": 0.5005650520324707, "learning_rate": 6.496891524361757e-07, "loss": 0.4986, "step": 3669 }, { "epoch": 0.6258526603001364, "grad_norm": 0.43681997060775757, "learning_rate": 6.491718850584434e-07, "loss": 0.5016, "step": 3670 }, { "epoch": 0.6260231923601637, "grad_norm": 0.5549169182777405, "learning_rate": 6.486547247031756e-07, "loss": 0.509, "step": 3671 }, { "epoch": 0.626193724420191, "grad_norm": 0.5520680546760559, "learning_rate": 6.481376715281358e-07, "loss": 0.5001, "step": 3672 }, { "epoch": 0.6263642564802183, "grad_norm": 0.545689046382904, "learning_rate": 6.476207256910547e-07, "loss": 0.5075, "step": 3673 }, { "epoch": 0.6265347885402456, "grad_norm": 0.4276546239852905, "learning_rate": 6.471038873496296e-07, "loss": 0.511, "step": 3674 }, { "epoch": 0.6267053206002728, "grad_norm": 0.3931770920753479, "learning_rate": 6.465871566615264e-07, "loss": 0.4926, "step": 3675 }, { "epoch": 0.6268758526603001, "grad_norm": 0.436468243598938, "learning_rate": 6.460705337843762e-07, "loss": 0.4976, "step": 3676 }, { "epoch": 0.6270463847203275, "grad_norm": 0.41146066784858704, "learning_rate": 6.45554018875779e-07, "loss": 0.5098, "step": 3677 }, { "epoch": 0.6272169167803547, "grad_norm": 0.433461457490921, "learning_rate": 6.450376120933009e-07, "loss": 0.5072, "step": 3678 }, { "epoch": 0.627387448840382, "grad_norm": 0.42208781838417053, "learning_rate": 6.445213135944752e-07, "loss": 0.5074, "step": 3679 }, { "epoch": 0.6275579809004093, "grad_norm": 0.3760968744754791, "learning_rate": 6.440051235368023e-07, "loss": 0.4948, "step": 3680 }, { "epoch": 0.6277285129604365, "grad_norm": 0.4705601632595062, "learning_rate": 6.434890420777492e-07, "loss": 0.4915, "step": 3681 }, { "epoch": 0.6278990450204639, "grad_norm": 0.5115656852722168, "learning_rate": 6.429730693747504e-07, "loss": 0.5061, "step": 3682 }, { "epoch": 0.6280695770804912, "grad_norm": 0.4798704981803894, "learning_rate": 6.424572055852062e-07, "loss": 0.4926, "step": 3683 }, { "epoch": 0.6282401091405184, "grad_norm": 0.46192893385887146, "learning_rate": 6.419414508664851e-07, "loss": 0.5057, "step": 3684 }, { "epoch": 0.6284106412005457, "grad_norm": 0.46332821249961853, "learning_rate": 6.414258053759211e-07, "loss": 0.5008, "step": 3685 }, { "epoch": 0.628581173260573, "grad_norm": 0.4708835780620575, "learning_rate": 6.409102692708154e-07, "loss": 0.5044, "step": 3686 }, { "epoch": 0.6287517053206003, "grad_norm": 0.4883918762207031, "learning_rate": 6.403948427084357e-07, "loss": 0.5019, "step": 3687 }, { "epoch": 0.6289222373806276, "grad_norm": 0.5046584010124207, "learning_rate": 6.39879525846016e-07, "loss": 0.4981, "step": 3688 }, { "epoch": 0.6290927694406548, "grad_norm": 0.5189499258995056, "learning_rate": 6.393643188407579e-07, "loss": 0.4936, "step": 3689 }, { "epoch": 0.6292633015006821, "grad_norm": 0.4646134376525879, "learning_rate": 6.388492218498277e-07, "loss": 0.5089, "step": 3690 }, { "epoch": 0.6294338335607094, "grad_norm": 0.4980888366699219, "learning_rate": 6.383342350303606e-07, "loss": 0.5057, "step": 3691 }, { "epoch": 0.6296043656207367, "grad_norm": 0.36041122674942017, "learning_rate": 6.378193585394553e-07, "loss": 0.4913, "step": 3692 }, { "epoch": 0.629774897680764, "grad_norm": 0.44873934984207153, "learning_rate": 6.373045925341795e-07, "loss": 0.5044, "step": 3693 }, { "epoch": 0.6299454297407913, "grad_norm": 0.4433237910270691, "learning_rate": 6.367899371715654e-07, "loss": 0.5004, "step": 3694 }, { "epoch": 0.6301159618008185, "grad_norm": 0.383121132850647, "learning_rate": 6.362753926086119e-07, "loss": 0.4989, "step": 3695 }, { "epoch": 0.6302864938608458, "grad_norm": 0.43507349491119385, "learning_rate": 6.357609590022849e-07, "loss": 0.4991, "step": 3696 }, { "epoch": 0.6304570259208732, "grad_norm": 0.34954577684402466, "learning_rate": 6.352466365095151e-07, "loss": 0.5079, "step": 3697 }, { "epoch": 0.6306275579809004, "grad_norm": 0.36680907011032104, "learning_rate": 6.347324252872007e-07, "loss": 0.5076, "step": 3698 }, { "epoch": 0.6307980900409277, "grad_norm": 0.35321205854415894, "learning_rate": 6.342183254922046e-07, "loss": 0.5053, "step": 3699 }, { "epoch": 0.630968622100955, "grad_norm": 0.41826412081718445, "learning_rate": 6.337043372813573e-07, "loss": 0.4961, "step": 3700 }, { "epoch": 0.6311391541609823, "grad_norm": 0.3832581341266632, "learning_rate": 6.331904608114535e-07, "loss": 0.5133, "step": 3701 }, { "epoch": 0.6313096862210096, "grad_norm": 0.34321901202201843, "learning_rate": 6.326766962392545e-07, "loss": 0.496, "step": 3702 }, { "epoch": 0.6314802182810368, "grad_norm": 0.40280675888061523, "learning_rate": 6.321630437214885e-07, "loss": 0.5077, "step": 3703 }, { "epoch": 0.6316507503410641, "grad_norm": 0.4523974657058716, "learning_rate": 6.316495034148481e-07, "loss": 0.5223, "step": 3704 }, { "epoch": 0.6318212824010914, "grad_norm": 0.4422217011451721, "learning_rate": 6.311360754759924e-07, "loss": 0.5198, "step": 3705 }, { "epoch": 0.6319918144611187, "grad_norm": 0.4404323399066925, "learning_rate": 6.306227600615459e-07, "loss": 0.5003, "step": 3706 }, { "epoch": 0.632162346521146, "grad_norm": 0.4646601378917694, "learning_rate": 6.301095573280989e-07, "loss": 0.507, "step": 3707 }, { "epoch": 0.6323328785811733, "grad_norm": 0.5132860541343689, "learning_rate": 6.29596467432207e-07, "loss": 0.5073, "step": 3708 }, { "epoch": 0.6325034106412005, "grad_norm": 0.5008794069290161, "learning_rate": 6.290834905303927e-07, "loss": 0.488, "step": 3709 }, { "epoch": 0.6326739427012278, "grad_norm": 0.4249769449234009, "learning_rate": 6.285706267791424e-07, "loss": 0.4868, "step": 3710 }, { "epoch": 0.6328444747612552, "grad_norm": 0.4569952189922333, "learning_rate": 6.28057876334908e-07, "loss": 0.518, "step": 3711 }, { "epoch": 0.6330150068212824, "grad_norm": 0.45532703399658203, "learning_rate": 6.275452393541085e-07, "loss": 0.5047, "step": 3712 }, { "epoch": 0.6331855388813097, "grad_norm": 0.448268860578537, "learning_rate": 6.270327159931267e-07, "loss": 0.494, "step": 3713 }, { "epoch": 0.633356070941337, "grad_norm": 0.4130880534648895, "learning_rate": 6.265203064083117e-07, "loss": 0.4947, "step": 3714 }, { "epoch": 0.6335266030013642, "grad_norm": 0.455447793006897, "learning_rate": 6.260080107559768e-07, "loss": 0.5032, "step": 3715 }, { "epoch": 0.6336971350613916, "grad_norm": 0.3961569368839264, "learning_rate": 6.254958291924018e-07, "loss": 0.5032, "step": 3716 }, { "epoch": 0.6338676671214188, "grad_norm": 0.42894527316093445, "learning_rate": 6.249837618738311e-07, "loss": 0.5112, "step": 3717 }, { "epoch": 0.6340381991814461, "grad_norm": 0.47216030955314636, "learning_rate": 6.244718089564735e-07, "loss": 0.4972, "step": 3718 }, { "epoch": 0.6342087312414734, "grad_norm": 0.49208471179008484, "learning_rate": 6.23959970596505e-07, "loss": 0.4937, "step": 3719 }, { "epoch": 0.6343792633015006, "grad_norm": 0.4241389334201813, "learning_rate": 6.23448246950064e-07, "loss": 0.5003, "step": 3720 }, { "epoch": 0.634549795361528, "grad_norm": 0.3937288522720337, "learning_rate": 6.229366381732561e-07, "loss": 0.5108, "step": 3721 }, { "epoch": 0.6347203274215553, "grad_norm": 0.3928370177745819, "learning_rate": 6.224251444221503e-07, "loss": 0.5171, "step": 3722 }, { "epoch": 0.6348908594815825, "grad_norm": 0.42944595217704773, "learning_rate": 6.219137658527819e-07, "loss": 0.5072, "step": 3723 }, { "epoch": 0.6350613915416098, "grad_norm": 0.5143080949783325, "learning_rate": 6.2140250262115e-07, "loss": 0.4947, "step": 3724 }, { "epoch": 0.6352319236016372, "grad_norm": 0.339793860912323, "learning_rate": 6.208913548832188e-07, "loss": 0.5139, "step": 3725 }, { "epoch": 0.6354024556616644, "grad_norm": 0.4634465277194977, "learning_rate": 6.203803227949174e-07, "loss": 0.5131, "step": 3726 }, { "epoch": 0.6355729877216917, "grad_norm": 0.5924431085586548, "learning_rate": 6.198694065121394e-07, "loss": 0.4931, "step": 3727 }, { "epoch": 0.635743519781719, "grad_norm": 0.4708423912525177, "learning_rate": 6.19358606190744e-07, "loss": 0.4975, "step": 3728 }, { "epoch": 0.6359140518417462, "grad_norm": 0.400854229927063, "learning_rate": 6.188479219865529e-07, "loss": 0.5068, "step": 3729 }, { "epoch": 0.6360845839017736, "grad_norm": 0.49634453654289246, "learning_rate": 6.18337354055355e-07, "loss": 0.5124, "step": 3730 }, { "epoch": 0.6362551159618008, "grad_norm": 0.41015246510505676, "learning_rate": 6.178269025529018e-07, "loss": 0.4981, "step": 3731 }, { "epoch": 0.6364256480218281, "grad_norm": 0.42830201983451843, "learning_rate": 6.173165676349103e-07, "loss": 0.5096, "step": 3732 }, { "epoch": 0.6365961800818554, "grad_norm": 0.5014932751655579, "learning_rate": 6.168063494570615e-07, "loss": 0.4874, "step": 3733 }, { "epoch": 0.6367667121418826, "grad_norm": 0.5043054819107056, "learning_rate": 6.162962481750005e-07, "loss": 0.5184, "step": 3734 }, { "epoch": 0.63693724420191, "grad_norm": 0.4094924330711365, "learning_rate": 6.157862639443376e-07, "loss": 0.5177, "step": 3735 }, { "epoch": 0.6371077762619373, "grad_norm": 0.43844693899154663, "learning_rate": 6.152763969206461e-07, "loss": 0.5061, "step": 3736 }, { "epoch": 0.6372783083219645, "grad_norm": 0.48379039764404297, "learning_rate": 6.147666472594658e-07, "loss": 0.4945, "step": 3737 }, { "epoch": 0.6374488403819918, "grad_norm": 0.4411104619503021, "learning_rate": 6.142570151162978e-07, "loss": 0.4976, "step": 3738 }, { "epoch": 0.637619372442019, "grad_norm": 0.45324018597602844, "learning_rate": 6.137475006466097e-07, "loss": 0.5014, "step": 3739 }, { "epoch": 0.6377899045020464, "grad_norm": 0.4081723093986511, "learning_rate": 6.132381040058318e-07, "loss": 0.5051, "step": 3740 }, { "epoch": 0.6379604365620737, "grad_norm": 0.5187481045722961, "learning_rate": 6.127288253493591e-07, "loss": 0.5075, "step": 3741 }, { "epoch": 0.638130968622101, "grad_norm": 0.4198801815509796, "learning_rate": 6.122196648325507e-07, "loss": 0.501, "step": 3742 }, { "epoch": 0.6383015006821282, "grad_norm": 0.40012413263320923, "learning_rate": 6.117106226107291e-07, "loss": 0.4998, "step": 3743 }, { "epoch": 0.6384720327421555, "grad_norm": 0.45775869488716125, "learning_rate": 6.112016988391811e-07, "loss": 0.5314, "step": 3744 }, { "epoch": 0.6386425648021828, "grad_norm": 0.4114890694618225, "learning_rate": 6.106928936731572e-07, "loss": 0.5002, "step": 3745 }, { "epoch": 0.6388130968622101, "grad_norm": 0.44415518641471863, "learning_rate": 6.101842072678725e-07, "loss": 0.5067, "step": 3746 }, { "epoch": 0.6389836289222374, "grad_norm": 0.43056219816207886, "learning_rate": 6.096756397785041e-07, "loss": 0.5057, "step": 3747 }, { "epoch": 0.6391541609822646, "grad_norm": 0.3749973475933075, "learning_rate": 6.091671913601949e-07, "loss": 0.5136, "step": 3748 }, { "epoch": 0.639324693042292, "grad_norm": 0.46487438678741455, "learning_rate": 6.086588621680499e-07, "loss": 0.5059, "step": 3749 }, { "epoch": 0.6394952251023193, "grad_norm": 0.39657068252563477, "learning_rate": 6.081506523571385e-07, "loss": 0.5162, "step": 3750 }, { "epoch": 0.6396657571623465, "grad_norm": 0.45455026626586914, "learning_rate": 6.076425620824936e-07, "loss": 0.4889, "step": 3751 }, { "epoch": 0.6398362892223738, "grad_norm": 0.46647676825523376, "learning_rate": 6.071345914991114e-07, "loss": 0.5121, "step": 3752 }, { "epoch": 0.640006821282401, "grad_norm": 0.43015286326408386, "learning_rate": 6.06626740761952e-07, "loss": 0.5095, "step": 3753 }, { "epoch": 0.6401773533424284, "grad_norm": 0.4729073643684387, "learning_rate": 6.061190100259378e-07, "loss": 0.498, "step": 3754 }, { "epoch": 0.6403478854024557, "grad_norm": 0.44330206513404846, "learning_rate": 6.056113994459567e-07, "loss": 0.5053, "step": 3755 }, { "epoch": 0.640518417462483, "grad_norm": 0.4980153441429138, "learning_rate": 6.051039091768583e-07, "loss": 0.5196, "step": 3756 }, { "epoch": 0.6406889495225102, "grad_norm": 0.5406501889228821, "learning_rate": 6.045965393734551e-07, "loss": 0.5112, "step": 3757 }, { "epoch": 0.6408594815825375, "grad_norm": 0.4644082486629486, "learning_rate": 6.040892901905247e-07, "loss": 0.512, "step": 3758 }, { "epoch": 0.6410300136425648, "grad_norm": 0.5462424159049988, "learning_rate": 6.03582161782806e-07, "loss": 0.4952, "step": 3759 }, { "epoch": 0.6412005457025921, "grad_norm": 0.35903412103652954, "learning_rate": 6.030751543050029e-07, "loss": 0.5066, "step": 3760 }, { "epoch": 0.6413710777626194, "grad_norm": 0.39143240451812744, "learning_rate": 6.025682679117804e-07, "loss": 0.5019, "step": 3761 }, { "epoch": 0.6415416098226466, "grad_norm": 0.44252634048461914, "learning_rate": 6.020615027577684e-07, "loss": 0.5188, "step": 3762 }, { "epoch": 0.6417121418826739, "grad_norm": 0.4334603250026703, "learning_rate": 6.015548589975582e-07, "loss": 0.5007, "step": 3763 }, { "epoch": 0.6418826739427013, "grad_norm": 0.5139437913894653, "learning_rate": 6.010483367857057e-07, "loss": 0.4994, "step": 3764 }, { "epoch": 0.6420532060027285, "grad_norm": 0.5231024622917175, "learning_rate": 6.005419362767287e-07, "loss": 0.4997, "step": 3765 }, { "epoch": 0.6422237380627558, "grad_norm": 0.5182217359542847, "learning_rate": 6.000356576251075e-07, "loss": 0.5038, "step": 3766 }, { "epoch": 0.642394270122783, "grad_norm": 0.5182235240936279, "learning_rate": 5.995295009852862e-07, "loss": 0.5166, "step": 3767 }, { "epoch": 0.6425648021828103, "grad_norm": 0.45456960797309875, "learning_rate": 5.990234665116713e-07, "loss": 0.513, "step": 3768 }, { "epoch": 0.6427353342428377, "grad_norm": 0.5825948119163513, "learning_rate": 5.985175543586322e-07, "loss": 0.5027, "step": 3769 }, { "epoch": 0.642905866302865, "grad_norm": 0.4596474766731262, "learning_rate": 5.980117646805002e-07, "loss": 0.501, "step": 3770 }, { "epoch": 0.6430763983628922, "grad_norm": 0.5294000506401062, "learning_rate": 5.975060976315703e-07, "loss": 0.5101, "step": 3771 }, { "epoch": 0.6432469304229195, "grad_norm": 0.4442268908023834, "learning_rate": 5.970005533660997e-07, "loss": 0.5208, "step": 3772 }, { "epoch": 0.6434174624829468, "grad_norm": 0.510656476020813, "learning_rate": 5.964951320383072e-07, "loss": 0.5096, "step": 3773 }, { "epoch": 0.6435879945429741, "grad_norm": 0.5467284321784973, "learning_rate": 5.959898338023765e-07, "loss": 0.5131, "step": 3774 }, { "epoch": 0.6437585266030014, "grad_norm": 0.5428006052970886, "learning_rate": 5.954846588124506e-07, "loss": 0.5052, "step": 3775 }, { "epoch": 0.6439290586630286, "grad_norm": 0.5226187109947205, "learning_rate": 5.949796072226375e-07, "loss": 0.5006, "step": 3776 }, { "epoch": 0.6440995907230559, "grad_norm": 0.45977094769477844, "learning_rate": 5.944746791870063e-07, "loss": 0.5037, "step": 3777 }, { "epoch": 0.6442701227830833, "grad_norm": 0.43148553371429443, "learning_rate": 5.939698748595887e-07, "loss": 0.5105, "step": 3778 }, { "epoch": 0.6444406548431105, "grad_norm": 0.44732269644737244, "learning_rate": 5.934651943943786e-07, "loss": 0.4943, "step": 3779 }, { "epoch": 0.6446111869031378, "grad_norm": 0.4386376142501831, "learning_rate": 5.92960637945332e-07, "loss": 0.5176, "step": 3780 }, { "epoch": 0.644781718963165, "grad_norm": 0.4881812036037445, "learning_rate": 5.924562056663677e-07, "loss": 0.5126, "step": 3781 }, { "epoch": 0.6449522510231923, "grad_norm": 0.42850565910339355, "learning_rate": 5.919518977113653e-07, "loss": 0.4969, "step": 3782 }, { "epoch": 0.6451227830832197, "grad_norm": 0.5664390325546265, "learning_rate": 5.914477142341683e-07, "loss": 0.4965, "step": 3783 }, { "epoch": 0.645293315143247, "grad_norm": 0.45635077357292175, "learning_rate": 5.909436553885806e-07, "loss": 0.5197, "step": 3784 }, { "epoch": 0.6454638472032742, "grad_norm": 0.5621646046638489, "learning_rate": 5.90439721328369e-07, "loss": 0.4968, "step": 3785 }, { "epoch": 0.6456343792633015, "grad_norm": 0.4447271227836609, "learning_rate": 5.899359122072619e-07, "loss": 0.5149, "step": 3786 }, { "epoch": 0.6458049113233287, "grad_norm": 0.5080307722091675, "learning_rate": 5.894322281789495e-07, "loss": 0.5152, "step": 3787 }, { "epoch": 0.6459754433833561, "grad_norm": 0.5570019483566284, "learning_rate": 5.889286693970844e-07, "loss": 0.5046, "step": 3788 }, { "epoch": 0.6461459754433834, "grad_norm": 0.394611120223999, "learning_rate": 5.884252360152801e-07, "loss": 0.5162, "step": 3789 }, { "epoch": 0.6463165075034106, "grad_norm": 0.5731703639030457, "learning_rate": 5.879219281871125e-07, "loss": 0.5103, "step": 3790 }, { "epoch": 0.6464870395634379, "grad_norm": 0.48665788769721985, "learning_rate": 5.874187460661188e-07, "loss": 0.4975, "step": 3791 }, { "epoch": 0.6466575716234653, "grad_norm": 0.503473162651062, "learning_rate": 5.869156898057984e-07, "loss": 0.5027, "step": 3792 }, { "epoch": 0.6468281036834925, "grad_norm": 0.5446816682815552, "learning_rate": 5.864127595596119e-07, "loss": 0.5025, "step": 3793 }, { "epoch": 0.6469986357435198, "grad_norm": 0.4279031753540039, "learning_rate": 5.859099554809818e-07, "loss": 0.5001, "step": 3794 }, { "epoch": 0.647169167803547, "grad_norm": 0.6133118271827698, "learning_rate": 5.854072777232915e-07, "loss": 0.5042, "step": 3795 }, { "epoch": 0.6473396998635743, "grad_norm": 0.5323469638824463, "learning_rate": 5.84904726439886e-07, "loss": 0.505, "step": 3796 }, { "epoch": 0.6475102319236017, "grad_norm": 0.3823695778846741, "learning_rate": 5.844023017840725e-07, "loss": 0.4983, "step": 3797 }, { "epoch": 0.647680763983629, "grad_norm": 0.5067130923271179, "learning_rate": 5.839000039091185e-07, "loss": 0.5117, "step": 3798 }, { "epoch": 0.6478512960436562, "grad_norm": 0.4036625027656555, "learning_rate": 5.833978329682535e-07, "loss": 0.4973, "step": 3799 }, { "epoch": 0.6480218281036835, "grad_norm": 0.5435891151428223, "learning_rate": 5.828957891146682e-07, "loss": 0.5048, "step": 3800 }, { "epoch": 0.6481923601637107, "grad_norm": 0.3958360254764557, "learning_rate": 5.823938725015149e-07, "loss": 0.5119, "step": 3801 }, { "epoch": 0.6483628922237381, "grad_norm": 0.533546507358551, "learning_rate": 5.818920832819051e-07, "loss": 0.5019, "step": 3802 }, { "epoch": 0.6485334242837654, "grad_norm": 0.43487948179244995, "learning_rate": 5.813904216089149e-07, "loss": 0.5022, "step": 3803 }, { "epoch": 0.6487039563437926, "grad_norm": 0.4595380425453186, "learning_rate": 5.808888876355785e-07, "loss": 0.5076, "step": 3804 }, { "epoch": 0.6488744884038199, "grad_norm": 0.45527392625808716, "learning_rate": 5.803874815148917e-07, "loss": 0.503, "step": 3805 }, { "epoch": 0.6490450204638472, "grad_norm": 0.36405813694000244, "learning_rate": 5.798862033998132e-07, "loss": 0.5049, "step": 3806 }, { "epoch": 0.6492155525238745, "grad_norm": 0.5357425212860107, "learning_rate": 5.7938505344326e-07, "loss": 0.5053, "step": 3807 }, { "epoch": 0.6493860845839018, "grad_norm": 0.44985252618789673, "learning_rate": 5.788840317981118e-07, "loss": 0.5127, "step": 3808 }, { "epoch": 0.649556616643929, "grad_norm": 0.4584481418132782, "learning_rate": 5.783831386172083e-07, "loss": 0.5033, "step": 3809 }, { "epoch": 0.6497271487039563, "grad_norm": 0.6204929351806641, "learning_rate": 5.77882374053351e-07, "loss": 0.503, "step": 3810 }, { "epoch": 0.6498976807639836, "grad_norm": 0.42705973982810974, "learning_rate": 5.773817382593008e-07, "loss": 0.5033, "step": 3811 }, { "epoch": 0.650068212824011, "grad_norm": 0.3911517858505249, "learning_rate": 5.768812313877801e-07, "loss": 0.5098, "step": 3812 }, { "epoch": 0.6502387448840382, "grad_norm": 0.4088066816329956, "learning_rate": 5.763808535914724e-07, "loss": 0.494, "step": 3813 }, { "epoch": 0.6504092769440655, "grad_norm": 0.4033955931663513, "learning_rate": 5.758806050230199e-07, "loss": 0.5019, "step": 3814 }, { "epoch": 0.6505798090040927, "grad_norm": 0.4054994285106659, "learning_rate": 5.753804858350289e-07, "loss": 0.5012, "step": 3815 }, { "epoch": 0.6507503410641201, "grad_norm": 0.4501291811466217, "learning_rate": 5.748804961800623e-07, "loss": 0.507, "step": 3816 }, { "epoch": 0.6509208731241474, "grad_norm": 0.40860098600387573, "learning_rate": 5.743806362106461e-07, "loss": 0.5033, "step": 3817 }, { "epoch": 0.6510914051841746, "grad_norm": 0.4299365282058716, "learning_rate": 5.738809060792661e-07, "loss": 0.5023, "step": 3818 }, { "epoch": 0.6512619372442019, "grad_norm": 0.44453543424606323, "learning_rate": 5.733813059383677e-07, "loss": 0.5102, "step": 3819 }, { "epoch": 0.6514324693042292, "grad_norm": 0.40756890177726746, "learning_rate": 5.728818359403575e-07, "loss": 0.5032, "step": 3820 }, { "epoch": 0.6516030013642565, "grad_norm": 0.42828136682510376, "learning_rate": 5.723824962376022e-07, "loss": 0.5113, "step": 3821 }, { "epoch": 0.6517735334242838, "grad_norm": 0.4323231279850006, "learning_rate": 5.718832869824292e-07, "loss": 0.4994, "step": 3822 }, { "epoch": 0.651944065484311, "grad_norm": 0.49335092306137085, "learning_rate": 5.713842083271244e-07, "loss": 0.5107, "step": 3823 }, { "epoch": 0.6521145975443383, "grad_norm": 0.37755998969078064, "learning_rate": 5.708852604239367e-07, "loss": 0.5079, "step": 3824 }, { "epoch": 0.6522851296043656, "grad_norm": 0.43253105878829956, "learning_rate": 5.703864434250722e-07, "loss": 0.5041, "step": 3825 }, { "epoch": 0.652455661664393, "grad_norm": 0.49581003189086914, "learning_rate": 5.698877574826989e-07, "loss": 0.5075, "step": 3826 }, { "epoch": 0.6526261937244202, "grad_norm": 0.401373028755188, "learning_rate": 5.693892027489445e-07, "loss": 0.4968, "step": 3827 }, { "epoch": 0.6527967257844475, "grad_norm": 0.45689138770103455, "learning_rate": 5.688907793758959e-07, "loss": 0.509, "step": 3828 }, { "epoch": 0.6529672578444747, "grad_norm": 0.44455838203430176, "learning_rate": 5.683924875156007e-07, "loss": 0.5013, "step": 3829 }, { "epoch": 0.653137789904502, "grad_norm": 0.48018378019332886, "learning_rate": 5.678943273200662e-07, "loss": 0.5237, "step": 3830 }, { "epoch": 0.6533083219645294, "grad_norm": 0.43504878878593445, "learning_rate": 5.673962989412599e-07, "loss": 0.5118, "step": 3831 }, { "epoch": 0.6534788540245566, "grad_norm": 0.3849477767944336, "learning_rate": 5.668984025311077e-07, "loss": 0.5087, "step": 3832 }, { "epoch": 0.6536493860845839, "grad_norm": 0.45680201053619385, "learning_rate": 5.664006382414975e-07, "loss": 0.519, "step": 3833 }, { "epoch": 0.6538199181446112, "grad_norm": 0.4410555064678192, "learning_rate": 5.659030062242746e-07, "loss": 0.5174, "step": 3834 }, { "epoch": 0.6539904502046384, "grad_norm": 0.5044737458229065, "learning_rate": 5.654055066312455e-07, "loss": 0.5181, "step": 3835 }, { "epoch": 0.6541609822646658, "grad_norm": 0.4555228650569916, "learning_rate": 5.649081396141758e-07, "loss": 0.5098, "step": 3836 }, { "epoch": 0.654331514324693, "grad_norm": 0.4320026934146881, "learning_rate": 5.644109053247902e-07, "loss": 0.5083, "step": 3837 }, { "epoch": 0.6545020463847203, "grad_norm": 0.6127005815505981, "learning_rate": 5.639138039147737e-07, "loss": 0.5072, "step": 3838 }, { "epoch": 0.6546725784447476, "grad_norm": 0.5029910206794739, "learning_rate": 5.634168355357701e-07, "loss": 0.5104, "step": 3839 }, { "epoch": 0.654843110504775, "grad_norm": 0.4032624363899231, "learning_rate": 5.629200003393838e-07, "loss": 0.52, "step": 3840 }, { "epoch": 0.6550136425648022, "grad_norm": 0.4314282536506653, "learning_rate": 5.62423298477176e-07, "loss": 0.5172, "step": 3841 }, { "epoch": 0.6551841746248295, "grad_norm": 0.37860020995140076, "learning_rate": 5.619267301006711e-07, "loss": 0.5046, "step": 3842 }, { "epoch": 0.6553547066848567, "grad_norm": 0.4099591076374054, "learning_rate": 5.61430295361349e-07, "loss": 0.5122, "step": 3843 }, { "epoch": 0.655525238744884, "grad_norm": 0.38307932019233704, "learning_rate": 5.60933994410651e-07, "loss": 0.5057, "step": 3844 }, { "epoch": 0.6556957708049114, "grad_norm": 0.44933193922042847, "learning_rate": 5.604378273999773e-07, "loss": 0.5192, "step": 3845 }, { "epoch": 0.6558663028649386, "grad_norm": 0.3983853757381439, "learning_rate": 5.599417944806862e-07, "loss": 0.51, "step": 3846 }, { "epoch": 0.6560368349249659, "grad_norm": 0.43134453892707825, "learning_rate": 5.594458958040962e-07, "loss": 0.5024, "step": 3847 }, { "epoch": 0.6562073669849932, "grad_norm": 0.4693518877029419, "learning_rate": 5.58950131521485e-07, "loss": 0.5059, "step": 3848 }, { "epoch": 0.6563778990450204, "grad_norm": 0.48148661851882935, "learning_rate": 5.584545017840886e-07, "loss": 0.5057, "step": 3849 }, { "epoch": 0.6565484311050478, "grad_norm": 0.41671866178512573, "learning_rate": 5.579590067431016e-07, "loss": 0.5003, "step": 3850 }, { "epoch": 0.656718963165075, "grad_norm": 0.5516775846481323, "learning_rate": 5.574636465496783e-07, "loss": 0.5102, "step": 3851 }, { "epoch": 0.6568894952251023, "grad_norm": 0.3486853837966919, "learning_rate": 5.56968421354932e-07, "loss": 0.5124, "step": 3852 }, { "epoch": 0.6570600272851296, "grad_norm": 0.5256639719009399, "learning_rate": 5.564733313099344e-07, "loss": 0.5155, "step": 3853 }, { "epoch": 0.6572305593451568, "grad_norm": 0.45007023215293884, "learning_rate": 5.559783765657161e-07, "loss": 0.5015, "step": 3854 }, { "epoch": 0.6574010914051842, "grad_norm": 0.41359055042266846, "learning_rate": 5.554835572732658e-07, "loss": 0.5019, "step": 3855 }, { "epoch": 0.6575716234652115, "grad_norm": 0.4674064815044403, "learning_rate": 5.549888735835319e-07, "loss": 0.5111, "step": 3856 }, { "epoch": 0.6577421555252387, "grad_norm": 0.41691353917121887, "learning_rate": 5.544943256474208e-07, "loss": 0.4975, "step": 3857 }, { "epoch": 0.657912687585266, "grad_norm": 0.522437334060669, "learning_rate": 5.539999136157978e-07, "loss": 0.5235, "step": 3858 }, { "epoch": 0.6580832196452933, "grad_norm": 0.4822806119918823, "learning_rate": 5.535056376394869e-07, "loss": 0.5114, "step": 3859 }, { "epoch": 0.6582537517053206, "grad_norm": 0.4718954265117645, "learning_rate": 5.530114978692695e-07, "loss": 0.5035, "step": 3860 }, { "epoch": 0.6584242837653479, "grad_norm": 0.5045837759971619, "learning_rate": 5.525174944558867e-07, "loss": 0.5044, "step": 3861 }, { "epoch": 0.6585948158253752, "grad_norm": 0.4522147476673126, "learning_rate": 5.520236275500375e-07, "loss": 0.5004, "step": 3862 }, { "epoch": 0.6587653478854024, "grad_norm": 0.45062634348869324, "learning_rate": 5.515298973023797e-07, "loss": 0.5086, "step": 3863 }, { "epoch": 0.6589358799454298, "grad_norm": 0.5331596732139587, "learning_rate": 5.510363038635283e-07, "loss": 0.5131, "step": 3864 }, { "epoch": 0.659106412005457, "grad_norm": 0.6558175086975098, "learning_rate": 5.505428473840576e-07, "loss": 0.5163, "step": 3865 }, { "epoch": 0.6592769440654843, "grad_norm": 0.5517013669013977, "learning_rate": 5.500495280145003e-07, "loss": 0.5153, "step": 3866 }, { "epoch": 0.6594474761255116, "grad_norm": 0.5525789856910706, "learning_rate": 5.495563459053455e-07, "loss": 0.5121, "step": 3867 }, { "epoch": 0.6596180081855388, "grad_norm": 0.6826182007789612, "learning_rate": 5.490633012070433e-07, "loss": 0.5121, "step": 3868 }, { "epoch": 0.6597885402455662, "grad_norm": 0.393273264169693, "learning_rate": 5.485703940699989e-07, "loss": 0.4985, "step": 3869 }, { "epoch": 0.6599590723055935, "grad_norm": 0.5517546534538269, "learning_rate": 5.480776246445776e-07, "loss": 0.4929, "step": 3870 }, { "epoch": 0.6601296043656207, "grad_norm": 0.494389146566391, "learning_rate": 5.475849930811022e-07, "loss": 0.5149, "step": 3871 }, { "epoch": 0.660300136425648, "grad_norm": 0.4777924418449402, "learning_rate": 5.47092499529853e-07, "loss": 0.4982, "step": 3872 }, { "epoch": 0.6604706684856753, "grad_norm": 0.44324955344200134, "learning_rate": 5.466001441410682e-07, "loss": 0.506, "step": 3873 }, { "epoch": 0.6606412005457026, "grad_norm": 0.4134279191493988, "learning_rate": 5.461079270649445e-07, "loss": 0.4977, "step": 3874 }, { "epoch": 0.6608117326057299, "grad_norm": 0.4815727174282074, "learning_rate": 5.456158484516361e-07, "loss": 0.4951, "step": 3875 }, { "epoch": 0.6609822646657572, "grad_norm": 0.4558385908603668, "learning_rate": 5.451239084512538e-07, "loss": 0.4937, "step": 3876 }, { "epoch": 0.6611527967257844, "grad_norm": 0.43415579199790955, "learning_rate": 5.446321072138689e-07, "loss": 0.503, "step": 3877 }, { "epoch": 0.6613233287858117, "grad_norm": 0.4558171033859253, "learning_rate": 5.441404448895074e-07, "loss": 0.5016, "step": 3878 }, { "epoch": 0.661493860845839, "grad_norm": 0.4061969220638275, "learning_rate": 5.436489216281544e-07, "loss": 0.4943, "step": 3879 }, { "epoch": 0.6616643929058663, "grad_norm": 0.547091007232666, "learning_rate": 5.431575375797527e-07, "loss": 0.4971, "step": 3880 }, { "epoch": 0.6618349249658936, "grad_norm": 0.6131861209869385, "learning_rate": 5.426662928942024e-07, "loss": 0.5102, "step": 3881 }, { "epoch": 0.6620054570259208, "grad_norm": 0.4211820363998413, "learning_rate": 5.421751877213604e-07, "loss": 0.5098, "step": 3882 }, { "epoch": 0.6621759890859482, "grad_norm": 0.5724109411239624, "learning_rate": 5.416842222110419e-07, "loss": 0.5023, "step": 3883 }, { "epoch": 0.6623465211459755, "grad_norm": 0.4997083842754364, "learning_rate": 5.411933965130197e-07, "loss": 0.4978, "step": 3884 }, { "epoch": 0.6625170532060027, "grad_norm": 0.4442778527736664, "learning_rate": 5.40702710777022e-07, "loss": 0.5093, "step": 3885 }, { "epoch": 0.66268758526603, "grad_norm": 0.531562864780426, "learning_rate": 5.402121651527377e-07, "loss": 0.5012, "step": 3886 }, { "epoch": 0.6628581173260573, "grad_norm": 0.623119056224823, "learning_rate": 5.397217597898097e-07, "loss": 0.5025, "step": 3887 }, { "epoch": 0.6630286493860846, "grad_norm": 0.6248323321342468, "learning_rate": 5.392314948378397e-07, "loss": 0.4945, "step": 3888 }, { "epoch": 0.6631991814461119, "grad_norm": 0.40609654784202576, "learning_rate": 5.387413704463871e-07, "loss": 0.5072, "step": 3889 }, { "epoch": 0.6633697135061392, "grad_norm": 0.398261696100235, "learning_rate": 5.382513867649664e-07, "loss": 0.5041, "step": 3890 }, { "epoch": 0.6635402455661664, "grad_norm": 0.5225828289985657, "learning_rate": 5.377615439430508e-07, "loss": 0.5133, "step": 3891 }, { "epoch": 0.6637107776261937, "grad_norm": 0.3824782073497772, "learning_rate": 5.372718421300704e-07, "loss": 0.5098, "step": 3892 }, { "epoch": 0.663881309686221, "grad_norm": 0.5820587873458862, "learning_rate": 5.367822814754125e-07, "loss": 0.5074, "step": 3893 }, { "epoch": 0.6640518417462483, "grad_norm": 0.5361365079879761, "learning_rate": 5.362928621284194e-07, "loss": 0.5059, "step": 3894 }, { "epoch": 0.6642223738062756, "grad_norm": 0.49799999594688416, "learning_rate": 5.358035842383933e-07, "loss": 0.5044, "step": 3895 }, { "epoch": 0.6643929058663028, "grad_norm": 0.6610023975372314, "learning_rate": 5.353144479545907e-07, "loss": 0.5098, "step": 3896 }, { "epoch": 0.6645634379263301, "grad_norm": 0.4974028766155243, "learning_rate": 5.348254534262263e-07, "loss": 0.4858, "step": 3897 }, { "epoch": 0.6647339699863575, "grad_norm": 0.5856749415397644, "learning_rate": 5.343366008024713e-07, "loss": 0.4981, "step": 3898 }, { "epoch": 0.6649045020463847, "grad_norm": 0.5684167742729187, "learning_rate": 5.338478902324529e-07, "loss": 0.5152, "step": 3899 }, { "epoch": 0.665075034106412, "grad_norm": 0.5611979365348816, "learning_rate": 5.333593218652558e-07, "loss": 0.5063, "step": 3900 }, { "epoch": 0.6652455661664393, "grad_norm": 0.4313353896141052, "learning_rate": 5.328708958499212e-07, "loss": 0.5143, "step": 3901 }, { "epoch": 0.6654160982264665, "grad_norm": 0.4924471378326416, "learning_rate": 5.323826123354469e-07, "loss": 0.4946, "step": 3902 }, { "epoch": 0.6655866302864939, "grad_norm": 0.4859728217124939, "learning_rate": 5.318944714707861e-07, "loss": 0.5012, "step": 3903 }, { "epoch": 0.6657571623465212, "grad_norm": 0.40626779198646545, "learning_rate": 5.314064734048509e-07, "loss": 0.4989, "step": 3904 }, { "epoch": 0.6659276944065484, "grad_norm": 0.5151337385177612, "learning_rate": 5.309186182865077e-07, "loss": 0.5104, "step": 3905 }, { "epoch": 0.6660982264665757, "grad_norm": 0.3631163537502289, "learning_rate": 5.304309062645789e-07, "loss": 0.4974, "step": 3906 }, { "epoch": 0.666268758526603, "grad_norm": 0.4548703730106354, "learning_rate": 5.299433374878462e-07, "loss": 0.5019, "step": 3907 }, { "epoch": 0.6664392905866303, "grad_norm": 0.4133833944797516, "learning_rate": 5.294559121050443e-07, "loss": 0.5155, "step": 3908 }, { "epoch": 0.6666098226466576, "grad_norm": 0.41636455059051514, "learning_rate": 5.289686302648662e-07, "loss": 0.5054, "step": 3909 }, { "epoch": 0.6667803547066848, "grad_norm": 0.5298854112625122, "learning_rate": 5.284814921159603e-07, "loss": 0.5002, "step": 3910 }, { "epoch": 0.6669508867667121, "grad_norm": 0.481473833322525, "learning_rate": 5.279944978069319e-07, "loss": 0.491, "step": 3911 }, { "epoch": 0.6671214188267395, "grad_norm": 0.4647562801837921, "learning_rate": 5.275076474863409e-07, "loss": 0.522, "step": 3912 }, { "epoch": 0.6672919508867667, "grad_norm": 0.4734087586402893, "learning_rate": 5.270209413027048e-07, "loss": 0.5105, "step": 3913 }, { "epoch": 0.667462482946794, "grad_norm": 0.4895179271697998, "learning_rate": 5.265343794044965e-07, "loss": 0.5043, "step": 3914 }, { "epoch": 0.6676330150068213, "grad_norm": 0.5346952080726624, "learning_rate": 5.260479619401451e-07, "loss": 0.4984, "step": 3915 }, { "epoch": 0.6678035470668485, "grad_norm": 0.47937634587287903, "learning_rate": 5.255616890580357e-07, "loss": 0.4965, "step": 3916 }, { "epoch": 0.6679740791268759, "grad_norm": 0.46289533376693726, "learning_rate": 5.250755609065083e-07, "loss": 0.5058, "step": 3917 }, { "epoch": 0.6681446111869032, "grad_norm": 0.6581227779388428, "learning_rate": 5.245895776338602e-07, "loss": 0.4989, "step": 3918 }, { "epoch": 0.6683151432469304, "grad_norm": 0.573941171169281, "learning_rate": 5.241037393883436e-07, "loss": 0.4946, "step": 3919 }, { "epoch": 0.6684856753069577, "grad_norm": 0.5698941946029663, "learning_rate": 5.236180463181673e-07, "loss": 0.4941, "step": 3920 }, { "epoch": 0.668656207366985, "grad_norm": 0.47878286242485046, "learning_rate": 5.231324985714942e-07, "loss": 0.511, "step": 3921 }, { "epoch": 0.6688267394270123, "grad_norm": 0.4517112672328949, "learning_rate": 5.226470962964445e-07, "loss": 0.4999, "step": 3922 }, { "epoch": 0.6689972714870396, "grad_norm": 0.45299017429351807, "learning_rate": 5.221618396410933e-07, "loss": 0.4928, "step": 3923 }, { "epoch": 0.6691678035470668, "grad_norm": 0.5403193831443787, "learning_rate": 5.216767287534714e-07, "loss": 0.5172, "step": 3924 }, { "epoch": 0.6693383356070941, "grad_norm": 0.46739518642425537, "learning_rate": 5.211917637815655e-07, "loss": 0.5099, "step": 3925 }, { "epoch": 0.6695088676671214, "grad_norm": 0.38346195220947266, "learning_rate": 5.207069448733166e-07, "loss": 0.5008, "step": 3926 }, { "epoch": 0.6696793997271487, "grad_norm": 0.5461332201957703, "learning_rate": 5.202222721766226e-07, "loss": 0.5074, "step": 3927 }, { "epoch": 0.669849931787176, "grad_norm": 0.3497057855129242, "learning_rate": 5.197377458393363e-07, "loss": 0.5023, "step": 3928 }, { "epoch": 0.6700204638472033, "grad_norm": 0.48500290513038635, "learning_rate": 5.192533660092647e-07, "loss": 0.5051, "step": 3929 }, { "epoch": 0.6701909959072305, "grad_norm": 0.4321812689304352, "learning_rate": 5.18769132834172e-07, "loss": 0.5069, "step": 3930 }, { "epoch": 0.6703615279672579, "grad_norm": 0.380292683839798, "learning_rate": 5.182850464617766e-07, "loss": 0.5117, "step": 3931 }, { "epoch": 0.6705320600272852, "grad_norm": 0.535532534122467, "learning_rate": 5.17801107039752e-07, "loss": 0.5078, "step": 3932 }, { "epoch": 0.6707025920873124, "grad_norm": 0.5220469832420349, "learning_rate": 5.173173147157276e-07, "loss": 0.5089, "step": 3933 }, { "epoch": 0.6708731241473397, "grad_norm": 0.42341408133506775, "learning_rate": 5.168336696372876e-07, "loss": 0.4918, "step": 3934 }, { "epoch": 0.671043656207367, "grad_norm": 0.37881559133529663, "learning_rate": 5.163501719519704e-07, "loss": 0.4889, "step": 3935 }, { "epoch": 0.6712141882673943, "grad_norm": 0.42093196511268616, "learning_rate": 5.158668218072706e-07, "loss": 0.5068, "step": 3936 }, { "epoch": 0.6713847203274216, "grad_norm": 0.41691750288009644, "learning_rate": 5.153836193506379e-07, "loss": 0.504, "step": 3937 }, { "epoch": 0.6715552523874488, "grad_norm": 0.4368269741535187, "learning_rate": 5.149005647294755e-07, "loss": 0.5031, "step": 3938 }, { "epoch": 0.6717257844474761, "grad_norm": 0.385061115026474, "learning_rate": 5.144176580911431e-07, "loss": 0.5033, "step": 3939 }, { "epoch": 0.6718963165075034, "grad_norm": 0.40677857398986816, "learning_rate": 5.139348995829544e-07, "loss": 0.5092, "step": 3940 }, { "epoch": 0.6720668485675307, "grad_norm": 0.3767397701740265, "learning_rate": 5.13452289352178e-07, "loss": 0.515, "step": 3941 }, { "epoch": 0.672237380627558, "grad_norm": 0.36541303992271423, "learning_rate": 5.129698275460376e-07, "loss": 0.5199, "step": 3942 }, { "epoch": 0.6724079126875853, "grad_norm": 0.35933759808540344, "learning_rate": 5.124875143117117e-07, "loss": 0.5014, "step": 3943 }, { "epoch": 0.6725784447476125, "grad_norm": 0.5467775464057922, "learning_rate": 5.120053497963325e-07, "loss": 0.5118, "step": 3944 }, { "epoch": 0.6727489768076398, "grad_norm": 0.45338207483291626, "learning_rate": 5.115233341469878e-07, "loss": 0.5086, "step": 3945 }, { "epoch": 0.6729195088676672, "grad_norm": 0.42042842507362366, "learning_rate": 5.1104146751072e-07, "loss": 0.5013, "step": 3946 }, { "epoch": 0.6730900409276944, "grad_norm": 0.5868545770645142, "learning_rate": 5.105597500345249e-07, "loss": 0.5007, "step": 3947 }, { "epoch": 0.6732605729877217, "grad_norm": 0.41611066460609436, "learning_rate": 5.100781818653548e-07, "loss": 0.5186, "step": 3948 }, { "epoch": 0.673431105047749, "grad_norm": 0.5460180640220642, "learning_rate": 5.095967631501144e-07, "loss": 0.5149, "step": 3949 }, { "epoch": 0.6736016371077762, "grad_norm": 0.5588499307632446, "learning_rate": 5.09115494035664e-07, "loss": 0.5001, "step": 3950 }, { "epoch": 0.6737721691678036, "grad_norm": 0.3679949641227722, "learning_rate": 5.086343746688184e-07, "loss": 0.5091, "step": 3951 }, { "epoch": 0.6739427012278308, "grad_norm": 0.4741192162036896, "learning_rate": 5.081534051963455e-07, "loss": 0.5053, "step": 3952 }, { "epoch": 0.6741132332878581, "grad_norm": 0.4042551517486572, "learning_rate": 5.076725857649685e-07, "loss": 0.4951, "step": 3953 }, { "epoch": 0.6742837653478854, "grad_norm": 0.5772173404693604, "learning_rate": 5.071919165213648e-07, "loss": 0.5071, "step": 3954 }, { "epoch": 0.6744542974079127, "grad_norm": 0.5814014077186584, "learning_rate": 5.067113976121659e-07, "loss": 0.5097, "step": 3955 }, { "epoch": 0.67462482946794, "grad_norm": 0.4528033137321472, "learning_rate": 5.062310291839563e-07, "loss": 0.4971, "step": 3956 }, { "epoch": 0.6747953615279673, "grad_norm": 0.5089178085327148, "learning_rate": 5.057508113832773e-07, "loss": 0.5093, "step": 3957 }, { "epoch": 0.6749658935879945, "grad_norm": 0.591621994972229, "learning_rate": 5.052707443566212e-07, "loss": 0.5052, "step": 3958 }, { "epoch": 0.6751364256480218, "grad_norm": 0.6256417036056519, "learning_rate": 5.047908282504361e-07, "loss": 0.5047, "step": 3959 }, { "epoch": 0.6753069577080492, "grad_norm": 0.48071393370628357, "learning_rate": 5.043110632111239e-07, "loss": 0.5114, "step": 3960 }, { "epoch": 0.6754774897680764, "grad_norm": 0.4122591018676758, "learning_rate": 5.038314493850397e-07, "loss": 0.5142, "step": 3961 }, { "epoch": 0.6756480218281037, "grad_norm": 0.5708420276641846, "learning_rate": 5.033519869184928e-07, "loss": 0.4986, "step": 3962 }, { "epoch": 0.675818553888131, "grad_norm": 0.5030431747436523, "learning_rate": 5.028726759577467e-07, "loss": 0.4956, "step": 3963 }, { "epoch": 0.6759890859481582, "grad_norm": 0.46762701869010925, "learning_rate": 5.023935166490189e-07, "loss": 0.5037, "step": 3964 }, { "epoch": 0.6761596180081856, "grad_norm": 0.46051761507987976, "learning_rate": 5.01914509138479e-07, "loss": 0.5174, "step": 3965 }, { "epoch": 0.6763301500682128, "grad_norm": 0.4886033535003662, "learning_rate": 5.014356535722527e-07, "loss": 0.5029, "step": 3966 }, { "epoch": 0.6765006821282401, "grad_norm": 0.440492182970047, "learning_rate": 5.009569500964176e-07, "loss": 0.4995, "step": 3967 }, { "epoch": 0.6766712141882674, "grad_norm": 0.31643933057785034, "learning_rate": 5.004783988570046e-07, "loss": 0.5057, "step": 3968 }, { "epoch": 0.6768417462482946, "grad_norm": 0.4844537377357483, "learning_rate": 5.000000000000003e-07, "loss": 0.5024, "step": 3969 }, { "epoch": 0.677012278308322, "grad_norm": 0.45205482840538025, "learning_rate": 4.995217536713425e-07, "loss": 0.4922, "step": 3970 }, { "epoch": 0.6771828103683493, "grad_norm": 0.40974223613739014, "learning_rate": 4.990436600169238e-07, "loss": 0.4809, "step": 3971 }, { "epoch": 0.6773533424283765, "grad_norm": 0.45243552327156067, "learning_rate": 4.985657191825898e-07, "loss": 0.4977, "step": 3972 }, { "epoch": 0.6775238744884038, "grad_norm": 0.4544006288051605, "learning_rate": 4.980879313141401e-07, "loss": 0.5197, "step": 3973 }, { "epoch": 0.677694406548431, "grad_norm": 0.375340074300766, "learning_rate": 4.976102965573257e-07, "loss": 0.5101, "step": 3974 }, { "epoch": 0.6778649386084584, "grad_norm": 0.5214848518371582, "learning_rate": 4.97132815057854e-07, "loss": 0.5032, "step": 3975 }, { "epoch": 0.6780354706684857, "grad_norm": 0.501172661781311, "learning_rate": 4.966554869613832e-07, "loss": 0.5026, "step": 3976 }, { "epoch": 0.678206002728513, "grad_norm": 0.5273376703262329, "learning_rate": 4.961783124135245e-07, "loss": 0.5062, "step": 3977 }, { "epoch": 0.6783765347885402, "grad_norm": 0.5391064286231995, "learning_rate": 4.957012915598446e-07, "loss": 0.5048, "step": 3978 }, { "epoch": 0.6785470668485676, "grad_norm": 0.4569733440876007, "learning_rate": 4.952244245458612e-07, "loss": 0.4924, "step": 3979 }, { "epoch": 0.6787175989085948, "grad_norm": 0.4716772139072418, "learning_rate": 4.947477115170457e-07, "loss": 0.5129, "step": 3980 }, { "epoch": 0.6788881309686221, "grad_norm": 0.5155799388885498, "learning_rate": 4.94271152618823e-07, "loss": 0.4999, "step": 3981 }, { "epoch": 0.6790586630286494, "grad_norm": 0.44789519906044006, "learning_rate": 4.937947479965706e-07, "loss": 0.4979, "step": 3982 }, { "epoch": 0.6792291950886766, "grad_norm": 0.5172362327575684, "learning_rate": 4.933184977956184e-07, "loss": 0.4973, "step": 3983 }, { "epoch": 0.679399727148704, "grad_norm": 0.6166170835494995, "learning_rate": 4.928424021612499e-07, "loss": 0.5038, "step": 3984 }, { "epoch": 0.6795702592087313, "grad_norm": 0.40173619985580444, "learning_rate": 4.92366461238702e-07, "loss": 0.5035, "step": 3985 }, { "epoch": 0.6797407912687585, "grad_norm": 0.4869058430194855, "learning_rate": 4.918906751731621e-07, "loss": 0.5071, "step": 3986 }, { "epoch": 0.6799113233287858, "grad_norm": 0.5133165717124939, "learning_rate": 4.914150441097737e-07, "loss": 0.5077, "step": 3987 }, { "epoch": 0.680081855388813, "grad_norm": 0.5123385787010193, "learning_rate": 4.909395681936298e-07, "loss": 0.5151, "step": 3988 }, { "epoch": 0.6802523874488404, "grad_norm": 0.4512058198451996, "learning_rate": 4.904642475697783e-07, "loss": 0.5105, "step": 3989 }, { "epoch": 0.6804229195088677, "grad_norm": 0.47699469327926636, "learning_rate": 4.899890823832192e-07, "loss": 0.503, "step": 3990 }, { "epoch": 0.680593451568895, "grad_norm": 0.5350492596626282, "learning_rate": 4.895140727789038e-07, "loss": 0.4987, "step": 3991 }, { "epoch": 0.6807639836289222, "grad_norm": 0.3770885765552521, "learning_rate": 4.890392189017376e-07, "loss": 0.5131, "step": 3992 }, { "epoch": 0.6809345156889495, "grad_norm": 0.5495342016220093, "learning_rate": 4.885645208965779e-07, "loss": 0.4944, "step": 3993 }, { "epoch": 0.6811050477489768, "grad_norm": 0.5257266759872437, "learning_rate": 4.880899789082349e-07, "loss": 0.5086, "step": 3994 }, { "epoch": 0.6812755798090041, "grad_norm": 0.41709569096565247, "learning_rate": 4.876155930814696e-07, "loss": 0.5007, "step": 3995 }, { "epoch": 0.6814461118690314, "grad_norm": 0.5326833724975586, "learning_rate": 4.87141363560998e-07, "loss": 0.4997, "step": 3996 }, { "epoch": 0.6816166439290586, "grad_norm": 0.4406546354293823, "learning_rate": 4.866672904914863e-07, "loss": 0.5021, "step": 3997 }, { "epoch": 0.681787175989086, "grad_norm": 0.4062040150165558, "learning_rate": 4.861933740175535e-07, "loss": 0.5065, "step": 3998 }, { "epoch": 0.6819577080491133, "grad_norm": 0.46950456500053406, "learning_rate": 4.857196142837716e-07, "loss": 0.4975, "step": 3999 }, { "epoch": 0.6821282401091405, "grad_norm": 0.3387594223022461, "learning_rate": 4.852460114346635e-07, "loss": 0.5033, "step": 4000 }, { "epoch": 0.6822987721691678, "grad_norm": 0.44981011748313904, "learning_rate": 4.847725656147052e-07, "loss": 0.5117, "step": 4001 }, { "epoch": 0.682469304229195, "grad_norm": 0.3584601581096649, "learning_rate": 4.842992769683243e-07, "loss": 0.5091, "step": 4002 }, { "epoch": 0.6826398362892224, "grad_norm": 0.38688042759895325, "learning_rate": 4.838261456399012e-07, "loss": 0.5131, "step": 4003 }, { "epoch": 0.6828103683492497, "grad_norm": 0.43309056758880615, "learning_rate": 4.833531717737673e-07, "loss": 0.5038, "step": 4004 }, { "epoch": 0.682980900409277, "grad_norm": 0.3956446647644043, "learning_rate": 4.82880355514207e-07, "loss": 0.5068, "step": 4005 }, { "epoch": 0.6831514324693042, "grad_norm": 0.3466695547103882, "learning_rate": 4.824076970054554e-07, "loss": 0.4962, "step": 4006 }, { "epoch": 0.6833219645293315, "grad_norm": 0.34976479411125183, "learning_rate": 4.819351963917004e-07, "loss": 0.4899, "step": 4007 }, { "epoch": 0.6834924965893588, "grad_norm": 0.4025235176086426, "learning_rate": 4.814628538170819e-07, "loss": 0.506, "step": 4008 }, { "epoch": 0.6836630286493861, "grad_norm": 0.36366453766822815, "learning_rate": 4.809906694256903e-07, "loss": 0.5015, "step": 4009 }, { "epoch": 0.6838335607094134, "grad_norm": 0.36603251099586487, "learning_rate": 4.805186433615691e-07, "loss": 0.5039, "step": 4010 }, { "epoch": 0.6840040927694406, "grad_norm": 0.4938323199748993, "learning_rate": 4.800467757687131e-07, "loss": 0.4977, "step": 4011 }, { "epoch": 0.6841746248294679, "grad_norm": 0.5089431405067444, "learning_rate": 4.795750667910684e-07, "loss": 0.4969, "step": 4012 }, { "epoch": 0.6843451568894953, "grad_norm": 0.3810231685638428, "learning_rate": 4.791035165725332e-07, "loss": 0.5231, "step": 4013 }, { "epoch": 0.6845156889495225, "grad_norm": 0.3925723433494568, "learning_rate": 4.786321252569574e-07, "loss": 0.5087, "step": 4014 }, { "epoch": 0.6846862210095498, "grad_norm": 0.4524107873439789, "learning_rate": 4.781608929881412e-07, "loss": 0.4986, "step": 4015 }, { "epoch": 0.684856753069577, "grad_norm": 0.3928070664405823, "learning_rate": 4.776898199098377e-07, "loss": 0.5033, "step": 4016 }, { "epoch": 0.6850272851296043, "grad_norm": 0.47441133856773376, "learning_rate": 4.772189061657511e-07, "loss": 0.5035, "step": 4017 }, { "epoch": 0.6851978171896317, "grad_norm": 0.5430171489715576, "learning_rate": 4.767481518995362e-07, "loss": 0.5076, "step": 4018 }, { "epoch": 0.685368349249659, "grad_norm": 0.4817226231098175, "learning_rate": 4.762775572548e-07, "loss": 0.5049, "step": 4019 }, { "epoch": 0.6855388813096862, "grad_norm": 0.5431926846504211, "learning_rate": 4.7580712237510067e-07, "loss": 0.4965, "step": 4020 }, { "epoch": 0.6857094133697135, "grad_norm": 0.47494205832481384, "learning_rate": 4.753368474039474e-07, "loss": 0.5144, "step": 4021 }, { "epoch": 0.6858799454297408, "grad_norm": 0.3338436782360077, "learning_rate": 4.7486673248480113e-07, "loss": 0.503, "step": 4022 }, { "epoch": 0.6860504774897681, "grad_norm": 0.5947521328926086, "learning_rate": 4.743967777610728e-07, "loss": 0.5073, "step": 4023 }, { "epoch": 0.6862210095497954, "grad_norm": 0.5065069794654846, "learning_rate": 4.739269833761257e-07, "loss": 0.4901, "step": 4024 }, { "epoch": 0.6863915416098226, "grad_norm": 0.594238817691803, "learning_rate": 4.7345734947327354e-07, "loss": 0.4983, "step": 4025 }, { "epoch": 0.6865620736698499, "grad_norm": 0.4637637734413147, "learning_rate": 4.7298787619578195e-07, "loss": 0.5051, "step": 4026 }, { "epoch": 0.6867326057298773, "grad_norm": 0.4788523316383362, "learning_rate": 4.725185636868659e-07, "loss": 0.5207, "step": 4027 }, { "epoch": 0.6869031377899045, "grad_norm": 1.1171875, "learning_rate": 4.7204941208969283e-07, "loss": 0.5016, "step": 4028 }, { "epoch": 0.6870736698499318, "grad_norm": 0.4261132776737213, "learning_rate": 4.7158042154738094e-07, "loss": 0.4909, "step": 4029 }, { "epoch": 0.687244201909959, "grad_norm": 0.5414057970046997, "learning_rate": 4.711115922029978e-07, "loss": 0.507, "step": 4030 }, { "epoch": 0.6874147339699863, "grad_norm": 0.5545263290405273, "learning_rate": 4.706429241995643e-07, "loss": 0.507, "step": 4031 }, { "epoch": 0.6875852660300137, "grad_norm": 0.41530779004096985, "learning_rate": 4.7017441768004984e-07, "loss": 0.4946, "step": 4032 }, { "epoch": 0.687755798090041, "grad_norm": 0.4105913043022156, "learning_rate": 4.697060727873757e-07, "loss": 0.5042, "step": 4033 }, { "epoch": 0.6879263301500682, "grad_norm": 0.4197026491165161, "learning_rate": 4.692378896644138e-07, "loss": 0.5025, "step": 4034 }, { "epoch": 0.6880968622100955, "grad_norm": 0.418058842420578, "learning_rate": 4.6876986845398666e-07, "loss": 0.4983, "step": 4035 }, { "epoch": 0.6882673942701227, "grad_norm": 0.5100137591362, "learning_rate": 4.683020092988663e-07, "loss": 0.5035, "step": 4036 }, { "epoch": 0.6884379263301501, "grad_norm": 0.44037339091300964, "learning_rate": 4.6783431234177785e-07, "loss": 0.4955, "step": 4037 }, { "epoch": 0.6886084583901774, "grad_norm": 0.42577651143074036, "learning_rate": 4.6736677772539445e-07, "loss": 0.5116, "step": 4038 }, { "epoch": 0.6887789904502046, "grad_norm": 0.39582791924476624, "learning_rate": 4.668994055923401e-07, "loss": 0.4927, "step": 4039 }, { "epoch": 0.6889495225102319, "grad_norm": 0.5501611828804016, "learning_rate": 4.6643219608519124e-07, "loss": 0.4979, "step": 4040 }, { "epoch": 0.6891200545702592, "grad_norm": 0.5625162124633789, "learning_rate": 4.659651493464721e-07, "loss": 0.499, "step": 4041 }, { "epoch": 0.6892905866302865, "grad_norm": 0.42893165349960327, "learning_rate": 4.6549826551865875e-07, "loss": 0.4943, "step": 4042 }, { "epoch": 0.6894611186903138, "grad_norm": 0.47238093614578247, "learning_rate": 4.650315447441774e-07, "loss": 0.4977, "step": 4043 }, { "epoch": 0.689631650750341, "grad_norm": 0.38662639260292053, "learning_rate": 4.645649871654046e-07, "loss": 0.5065, "step": 4044 }, { "epoch": 0.6898021828103683, "grad_norm": 0.4396331012248993, "learning_rate": 4.64098592924666e-07, "loss": 0.4983, "step": 4045 }, { "epoch": 0.6899727148703957, "grad_norm": 0.4275635778903961, "learning_rate": 4.636323621642389e-07, "loss": 0.5036, "step": 4046 }, { "epoch": 0.690143246930423, "grad_norm": 0.4336490333080292, "learning_rate": 4.631662950263503e-07, "loss": 0.4962, "step": 4047 }, { "epoch": 0.6903137789904502, "grad_norm": 0.40953293442726135, "learning_rate": 4.627003916531761e-07, "loss": 0.5083, "step": 4048 }, { "epoch": 0.6904843110504775, "grad_norm": 0.44607603549957275, "learning_rate": 4.6223465218684463e-07, "loss": 0.5046, "step": 4049 }, { "epoch": 0.6906548431105047, "grad_norm": 0.3914839029312134, "learning_rate": 4.617690767694319e-07, "loss": 0.4991, "step": 4050 }, { "epoch": 0.6908253751705321, "grad_norm": 0.35671597719192505, "learning_rate": 4.6130366554296494e-07, "loss": 0.5078, "step": 4051 }, { "epoch": 0.6909959072305594, "grad_norm": 0.5149934887886047, "learning_rate": 4.608384186494206e-07, "loss": 0.4957, "step": 4052 }, { "epoch": 0.6911664392905866, "grad_norm": 0.3881840109825134, "learning_rate": 4.6037333623072613e-07, "loss": 0.4949, "step": 4053 }, { "epoch": 0.6913369713506139, "grad_norm": 0.4083363115787506, "learning_rate": 4.5990841842875714e-07, "loss": 0.4928, "step": 4054 }, { "epoch": 0.6915075034106412, "grad_norm": 0.4419262707233429, "learning_rate": 4.5944366538534027e-07, "loss": 0.4883, "step": 4055 }, { "epoch": 0.6916780354706685, "grad_norm": 0.43516024947166443, "learning_rate": 4.5897907724225194e-07, "loss": 0.4854, "step": 4056 }, { "epoch": 0.6918485675306958, "grad_norm": 0.6483615636825562, "learning_rate": 4.585146541412168e-07, "loss": 0.5063, "step": 4057 }, { "epoch": 0.692019099590723, "grad_norm": 0.49583396315574646, "learning_rate": 4.5805039622391174e-07, "loss": 0.4937, "step": 4058 }, { "epoch": 0.6921896316507503, "grad_norm": 0.4556744694709778, "learning_rate": 4.575863036319605e-07, "loss": 0.5005, "step": 4059 }, { "epoch": 0.6923601637107776, "grad_norm": 0.5007831454277039, "learning_rate": 4.5712237650693804e-07, "loss": 0.5054, "step": 4060 }, { "epoch": 0.692530695770805, "grad_norm": 0.41077107191085815, "learning_rate": 4.5665861499036894e-07, "loss": 0.5005, "step": 4061 }, { "epoch": 0.6927012278308322, "grad_norm": 0.48699361085891724, "learning_rate": 4.561950192237259e-07, "loss": 0.4917, "step": 4062 }, { "epoch": 0.6928717598908595, "grad_norm": 0.5592864751815796, "learning_rate": 4.5573158934843236e-07, "loss": 0.5045, "step": 4063 }, { "epoch": 0.6930422919508867, "grad_norm": 0.5952345132827759, "learning_rate": 4.5526832550586056e-07, "loss": 0.5055, "step": 4064 }, { "epoch": 0.693212824010914, "grad_norm": 0.5739447474479675, "learning_rate": 4.5480522783733276e-07, "loss": 0.4986, "step": 4065 }, { "epoch": 0.6933833560709414, "grad_norm": 0.4101806581020355, "learning_rate": 4.543422964841188e-07, "loss": 0.506, "step": 4066 }, { "epoch": 0.6935538881309686, "grad_norm": 0.561511754989624, "learning_rate": 4.538795315874405e-07, "loss": 0.4895, "step": 4067 }, { "epoch": 0.6937244201909959, "grad_norm": 0.539454996585846, "learning_rate": 4.5341693328846627e-07, "loss": 0.5154, "step": 4068 }, { "epoch": 0.6938949522510232, "grad_norm": 0.4128101170063019, "learning_rate": 4.5295450172831515e-07, "loss": 0.502, "step": 4069 }, { "epoch": 0.6940654843110505, "grad_norm": 0.4938734173774719, "learning_rate": 4.524922370480555e-07, "loss": 0.4923, "step": 4070 }, { "epoch": 0.6942360163710778, "grad_norm": 0.4720302224159241, "learning_rate": 4.5203013938870324e-07, "loss": 0.5023, "step": 4071 }, { "epoch": 0.694406548431105, "grad_norm": 0.42051199078559875, "learning_rate": 4.51568208891225e-07, "loss": 0.5067, "step": 4072 }, { "epoch": 0.6945770804911323, "grad_norm": 0.5297553539276123, "learning_rate": 4.511064456965358e-07, "loss": 0.4957, "step": 4073 }, { "epoch": 0.6947476125511596, "grad_norm": 0.5808680653572083, "learning_rate": 4.5064484994549966e-07, "loss": 0.5094, "step": 4074 }, { "epoch": 0.694918144611187, "grad_norm": 0.5174522399902344, "learning_rate": 4.5018342177892853e-07, "loss": 0.5117, "step": 4075 }, { "epoch": 0.6950886766712142, "grad_norm": 0.41268858313560486, "learning_rate": 4.4972216133758565e-07, "loss": 0.5047, "step": 4076 }, { "epoch": 0.6952592087312415, "grad_norm": 0.5427170395851135, "learning_rate": 4.4926106876218047e-07, "loss": 0.5143, "step": 4077 }, { "epoch": 0.6954297407912687, "grad_norm": 0.545264720916748, "learning_rate": 4.4880014419337274e-07, "loss": 0.5045, "step": 4078 }, { "epoch": 0.695600272851296, "grad_norm": 0.4440051317214966, "learning_rate": 4.483393877717709e-07, "loss": 0.4965, "step": 4079 }, { "epoch": 0.6957708049113234, "grad_norm": 0.6596631407737732, "learning_rate": 4.4787879963793116e-07, "loss": 0.5055, "step": 4080 }, { "epoch": 0.6959413369713506, "grad_norm": 0.6201470494270325, "learning_rate": 4.474183799323593e-07, "loss": 0.5144, "step": 4081 }, { "epoch": 0.6961118690313779, "grad_norm": 0.5427371263504028, "learning_rate": 4.4695812879550943e-07, "loss": 0.5019, "step": 4082 }, { "epoch": 0.6962824010914052, "grad_norm": 0.6451879739761353, "learning_rate": 4.4649804636778466e-07, "loss": 0.4977, "step": 4083 }, { "epoch": 0.6964529331514324, "grad_norm": 0.47948503494262695, "learning_rate": 4.460381327895354e-07, "loss": 0.5071, "step": 4084 }, { "epoch": 0.6966234652114598, "grad_norm": 0.7414005398750305, "learning_rate": 4.455783882010617e-07, "loss": 0.4989, "step": 4085 }, { "epoch": 0.696793997271487, "grad_norm": 0.4640277326107025, "learning_rate": 4.45118812742612e-07, "loss": 0.4982, "step": 4086 }, { "epoch": 0.6969645293315143, "grad_norm": 0.6825625896453857, "learning_rate": 4.4465940655438246e-07, "loss": 0.52, "step": 4087 }, { "epoch": 0.6971350613915416, "grad_norm": 0.7004467248916626, "learning_rate": 4.442001697765187e-07, "loss": 0.4982, "step": 4088 }, { "epoch": 0.697305593451569, "grad_norm": 0.40038001537323, "learning_rate": 4.437411025491131e-07, "loss": 0.498, "step": 4089 }, { "epoch": 0.6974761255115962, "grad_norm": 0.5949633121490479, "learning_rate": 4.432822050122077e-07, "loss": 0.4982, "step": 4090 }, { "epoch": 0.6976466575716235, "grad_norm": 0.4295502007007599, "learning_rate": 4.4282347730579203e-07, "loss": 0.51, "step": 4091 }, { "epoch": 0.6978171896316507, "grad_norm": 0.44073379039764404, "learning_rate": 4.4236491956980425e-07, "loss": 0.5196, "step": 4092 }, { "epoch": 0.697987721691678, "grad_norm": 0.5889855623245239, "learning_rate": 4.419065319441306e-07, "loss": 0.496, "step": 4093 }, { "epoch": 0.6981582537517054, "grad_norm": 0.39880111813545227, "learning_rate": 4.4144831456860485e-07, "loss": 0.5035, "step": 4094 }, { "epoch": 0.6983287858117326, "grad_norm": 0.5149140954017639, "learning_rate": 4.409902675830095e-07, "loss": 0.5002, "step": 4095 }, { "epoch": 0.6984993178717599, "grad_norm": 0.41448941826820374, "learning_rate": 4.4053239112707483e-07, "loss": 0.5047, "step": 4096 }, { "epoch": 0.6986698499317872, "grad_norm": 0.5223191976547241, "learning_rate": 4.400746853404795e-07, "loss": 0.504, "step": 4097 }, { "epoch": 0.6988403819918144, "grad_norm": 0.3906937837600708, "learning_rate": 4.3961715036284906e-07, "loss": 0.4993, "step": 4098 }, { "epoch": 0.6990109140518418, "grad_norm": 0.5009825825691223, "learning_rate": 4.391597863337579e-07, "loss": 0.5064, "step": 4099 }, { "epoch": 0.699181446111869, "grad_norm": 0.4940502643585205, "learning_rate": 4.3870259339272827e-07, "loss": 0.4939, "step": 4100 }, { "epoch": 0.6993519781718963, "grad_norm": 0.3717692792415619, "learning_rate": 4.3824557167922913e-07, "loss": 0.4929, "step": 4101 }, { "epoch": 0.6995225102319236, "grad_norm": 0.4262048900127411, "learning_rate": 4.3778872133267915e-07, "loss": 0.4985, "step": 4102 }, { "epoch": 0.6996930422919508, "grad_norm": 0.3647048771381378, "learning_rate": 4.373320424924427e-07, "loss": 0.4954, "step": 4103 }, { "epoch": 0.6998635743519782, "grad_norm": 0.5506351590156555, "learning_rate": 4.368755352978329e-07, "loss": 0.5134, "step": 4104 }, { "epoch": 0.7000341064120055, "grad_norm": 0.4715368449687958, "learning_rate": 4.364191998881105e-07, "loss": 0.5025, "step": 4105 }, { "epoch": 0.7002046384720327, "grad_norm": 0.46583038568496704, "learning_rate": 4.3596303640248395e-07, "loss": 0.5079, "step": 4106 }, { "epoch": 0.70037517053206, "grad_norm": 0.475370854139328, "learning_rate": 4.3550704498010837e-07, "loss": 0.4936, "step": 4107 }, { "epoch": 0.7005457025920873, "grad_norm": 0.46047744154930115, "learning_rate": 4.350512257600871e-07, "loss": 0.493, "step": 4108 }, { "epoch": 0.7007162346521146, "grad_norm": 0.5731782913208008, "learning_rate": 4.3459557888147144e-07, "loss": 0.5037, "step": 4109 }, { "epoch": 0.7008867667121419, "grad_norm": 0.4613938331604004, "learning_rate": 4.341401044832583e-07, "loss": 0.4878, "step": 4110 }, { "epoch": 0.7010572987721692, "grad_norm": 0.43121498823165894, "learning_rate": 4.336848027043948e-07, "loss": 0.5007, "step": 4111 }, { "epoch": 0.7012278308321964, "grad_norm": 0.44641441106796265, "learning_rate": 4.332296736837725e-07, "loss": 0.4976, "step": 4112 }, { "epoch": 0.7013983628922238, "grad_norm": 0.39351579546928406, "learning_rate": 4.327747175602321e-07, "loss": 0.4968, "step": 4113 }, { "epoch": 0.701568894952251, "grad_norm": 0.4338296949863434, "learning_rate": 4.3231993447256097e-07, "loss": 0.5026, "step": 4114 }, { "epoch": 0.7017394270122783, "grad_norm": 0.47230708599090576, "learning_rate": 4.3186532455949395e-07, "loss": 0.5041, "step": 4115 }, { "epoch": 0.7019099590723056, "grad_norm": 0.49445465207099915, "learning_rate": 4.314108879597122e-07, "loss": 0.4906, "step": 4116 }, { "epoch": 0.7020804911323328, "grad_norm": 0.36397585272789, "learning_rate": 4.309566248118451e-07, "loss": 0.495, "step": 4117 }, { "epoch": 0.7022510231923602, "grad_norm": 0.4672987759113312, "learning_rate": 4.30502535254469e-07, "loss": 0.4991, "step": 4118 }, { "epoch": 0.7024215552523875, "grad_norm": 0.4790498614311218, "learning_rate": 4.300486194261058e-07, "loss": 0.5088, "step": 4119 }, { "epoch": 0.7025920873124147, "grad_norm": 0.49927717447280884, "learning_rate": 4.29594877465227e-07, "loss": 0.4977, "step": 4120 }, { "epoch": 0.702762619372442, "grad_norm": 0.5749465823173523, "learning_rate": 4.291413095102487e-07, "loss": 0.4968, "step": 4121 }, { "epoch": 0.7029331514324693, "grad_norm": 0.43878257274627686, "learning_rate": 4.28687915699535e-07, "loss": 0.4952, "step": 4122 }, { "epoch": 0.7031036834924966, "grad_norm": 0.5072469711303711, "learning_rate": 4.282346961713972e-07, "loss": 0.5085, "step": 4123 }, { "epoch": 0.7032742155525239, "grad_norm": 0.5195170044898987, "learning_rate": 4.27781651064092e-07, "loss": 0.505, "step": 4124 }, { "epoch": 0.7034447476125512, "grad_norm": 0.42486515641212463, "learning_rate": 4.273287805158245e-07, "loss": 0.51, "step": 4125 }, { "epoch": 0.7036152796725784, "grad_norm": 0.4187718331813812, "learning_rate": 4.2687608466474585e-07, "loss": 0.5034, "step": 4126 }, { "epoch": 0.7037858117326057, "grad_norm": 0.48323482275009155, "learning_rate": 4.264235636489542e-07, "loss": 0.4961, "step": 4127 }, { "epoch": 0.703956343792633, "grad_norm": 0.5653338432312012, "learning_rate": 4.2597121760649313e-07, "loss": 0.5055, "step": 4128 }, { "epoch": 0.7041268758526603, "grad_norm": 0.4599945545196533, "learning_rate": 4.255190466753552e-07, "loss": 0.5071, "step": 4129 }, { "epoch": 0.7042974079126876, "grad_norm": 0.5306625366210938, "learning_rate": 4.2506705099347703e-07, "loss": 0.5123, "step": 4130 }, { "epoch": 0.7044679399727148, "grad_norm": 0.5877465605735779, "learning_rate": 4.246152306987435e-07, "loss": 0.5015, "step": 4131 }, { "epoch": 0.7046384720327421, "grad_norm": 0.5958652496337891, "learning_rate": 4.241635859289855e-07, "loss": 0.4952, "step": 4132 }, { "epoch": 0.7048090040927695, "grad_norm": 0.5003058910369873, "learning_rate": 4.237121168219798e-07, "loss": 0.5076, "step": 4133 }, { "epoch": 0.7049795361527967, "grad_norm": 0.45541825890541077, "learning_rate": 4.232608235154503e-07, "loss": 0.4911, "step": 4134 }, { "epoch": 0.705150068212824, "grad_norm": 0.5043857097625732, "learning_rate": 4.228097061470672e-07, "loss": 0.4892, "step": 4135 }, { "epoch": 0.7053206002728513, "grad_norm": 0.4577189087867737, "learning_rate": 4.223587648544471e-07, "loss": 0.4944, "step": 4136 }, { "epoch": 0.7054911323328786, "grad_norm": 0.4259335398674011, "learning_rate": 4.2190799977515155e-07, "loss": 0.4903, "step": 4137 }, { "epoch": 0.7056616643929059, "grad_norm": 0.5069748759269714, "learning_rate": 4.2145741104669094e-07, "loss": 0.4957, "step": 4138 }, { "epoch": 0.7058321964529332, "grad_norm": 0.47444620728492737, "learning_rate": 4.210069988065197e-07, "loss": 0.5005, "step": 4139 }, { "epoch": 0.7060027285129604, "grad_norm": 0.5006582140922546, "learning_rate": 4.2055676319203837e-07, "loss": 0.5068, "step": 4140 }, { "epoch": 0.7061732605729877, "grad_norm": 0.5743463039398193, "learning_rate": 4.201067043405957e-07, "loss": 0.5007, "step": 4141 }, { "epoch": 0.706343792633015, "grad_norm": 0.591344952583313, "learning_rate": 4.1965682238948417e-07, "loss": 0.4859, "step": 4142 }, { "epoch": 0.7065143246930423, "grad_norm": 0.43809980154037476, "learning_rate": 4.192071174759436e-07, "loss": 0.4964, "step": 4143 }, { "epoch": 0.7066848567530696, "grad_norm": 0.6915422081947327, "learning_rate": 4.1875758973715955e-07, "loss": 0.4972, "step": 4144 }, { "epoch": 0.7068553888130968, "grad_norm": 0.33378657698631287, "learning_rate": 4.183082393102637e-07, "loss": 0.4904, "step": 4145 }, { "epoch": 0.7070259208731241, "grad_norm": 0.7669427990913391, "learning_rate": 4.1785906633233233e-07, "loss": 0.5102, "step": 4146 }, { "epoch": 0.7071964529331515, "grad_norm": 0.6579416990280151, "learning_rate": 4.1741007094039024e-07, "loss": 0.4931, "step": 4147 }, { "epoch": 0.7073669849931787, "grad_norm": 0.5717936754226685, "learning_rate": 4.169612532714053e-07, "loss": 0.5032, "step": 4148 }, { "epoch": 0.707537517053206, "grad_norm": 0.6794208884239197, "learning_rate": 4.165126134622927e-07, "loss": 0.5005, "step": 4149 }, { "epoch": 0.7077080491132333, "grad_norm": 0.40618714690208435, "learning_rate": 4.1606415164991346e-07, "loss": 0.5082, "step": 4150 }, { "epoch": 0.7078785811732605, "grad_norm": 0.5979039072990417, "learning_rate": 4.1561586797107304e-07, "loss": 0.5037, "step": 4151 }, { "epoch": 0.7080491132332879, "grad_norm": 0.47896888852119446, "learning_rate": 4.151677625625238e-07, "loss": 0.5001, "step": 4152 }, { "epoch": 0.7082196452933152, "grad_norm": 0.44115006923675537, "learning_rate": 4.147198355609632e-07, "loss": 0.5108, "step": 4153 }, { "epoch": 0.7083901773533424, "grad_norm": 0.48424452543258667, "learning_rate": 4.1427208710303496e-07, "loss": 0.5033, "step": 4154 }, { "epoch": 0.7085607094133697, "grad_norm": 0.5310095548629761, "learning_rate": 4.1382451732532673e-07, "loss": 0.4922, "step": 4155 }, { "epoch": 0.708731241473397, "grad_norm": 0.44579505920410156, "learning_rate": 4.1337712636437326e-07, "loss": 0.4978, "step": 4156 }, { "epoch": 0.7089017735334243, "grad_norm": 0.6123139262199402, "learning_rate": 4.129299143566539e-07, "loss": 0.5046, "step": 4157 }, { "epoch": 0.7090723055934516, "grad_norm": 0.436917781829834, "learning_rate": 4.12482881438594e-07, "loss": 0.5029, "step": 4158 }, { "epoch": 0.7092428376534788, "grad_norm": 0.46517184376716614, "learning_rate": 4.1203602774656406e-07, "loss": 0.5099, "step": 4159 }, { "epoch": 0.7094133697135061, "grad_norm": 0.5581578016281128, "learning_rate": 4.1158935341687915e-07, "loss": 0.5083, "step": 4160 }, { "epoch": 0.7095839017735335, "grad_norm": 0.5356834530830383, "learning_rate": 4.1114285858580053e-07, "loss": 0.4991, "step": 4161 }, { "epoch": 0.7097544338335607, "grad_norm": 0.439238578081131, "learning_rate": 4.1069654338953494e-07, "loss": 0.5099, "step": 4162 }, { "epoch": 0.709924965893588, "grad_norm": 0.5053290128707886, "learning_rate": 4.1025040796423304e-07, "loss": 0.5099, "step": 4163 }, { "epoch": 0.7100954979536153, "grad_norm": 0.5811809301376343, "learning_rate": 4.098044524459918e-07, "loss": 0.5103, "step": 4164 }, { "epoch": 0.7102660300136425, "grad_norm": 0.47019311785697937, "learning_rate": 4.093586769708529e-07, "loss": 0.4947, "step": 4165 }, { "epoch": 0.7104365620736699, "grad_norm": 0.4350194036960602, "learning_rate": 4.089130816748032e-07, "loss": 0.5095, "step": 4166 }, { "epoch": 0.7106070941336972, "grad_norm": 0.5497310161590576, "learning_rate": 4.084676666937745e-07, "loss": 0.4981, "step": 4167 }, { "epoch": 0.7107776261937244, "grad_norm": 0.47565320134162903, "learning_rate": 4.080224321636438e-07, "loss": 0.5051, "step": 4168 }, { "epoch": 0.7109481582537517, "grad_norm": 0.43900391459465027, "learning_rate": 4.075773782202325e-07, "loss": 0.4887, "step": 4169 }, { "epoch": 0.711118690313779, "grad_norm": 0.44005173444747925, "learning_rate": 4.071325049993074e-07, "loss": 0.5046, "step": 4170 }, { "epoch": 0.7112892223738063, "grad_norm": 0.5328361392021179, "learning_rate": 4.0668781263658047e-07, "loss": 0.5037, "step": 4171 }, { "epoch": 0.7114597544338336, "grad_norm": 0.5043607354164124, "learning_rate": 4.0624330126770744e-07, "loss": 0.4974, "step": 4172 }, { "epoch": 0.7116302864938608, "grad_norm": 0.46430233120918274, "learning_rate": 4.057989710282897e-07, "loss": 0.5009, "step": 4173 }, { "epoch": 0.7118008185538881, "grad_norm": 0.4336589276790619, "learning_rate": 4.0535482205387324e-07, "loss": 0.4889, "step": 4174 }, { "epoch": 0.7119713506139154, "grad_norm": 0.5993257164955139, "learning_rate": 4.0491085447994874e-07, "loss": 0.4962, "step": 4175 }, { "epoch": 0.7121418826739427, "grad_norm": 0.6065536737442017, "learning_rate": 4.044670684419513e-07, "loss": 0.5099, "step": 4176 }, { "epoch": 0.71231241473397, "grad_norm": 0.42699581384658813, "learning_rate": 4.040234640752614e-07, "loss": 0.5028, "step": 4177 }, { "epoch": 0.7124829467939973, "grad_norm": 0.4277159869670868, "learning_rate": 4.035800415152027e-07, "loss": 0.5008, "step": 4178 }, { "epoch": 0.7126534788540245, "grad_norm": 0.4397408366203308, "learning_rate": 4.0313680089704456e-07, "loss": 0.502, "step": 4179 }, { "epoch": 0.7128240109140518, "grad_norm": 0.35132086277008057, "learning_rate": 4.0269374235600084e-07, "loss": 0.507, "step": 4180 }, { "epoch": 0.7129945429740792, "grad_norm": 0.44684553146362305, "learning_rate": 4.0225086602722857e-07, "loss": 0.4998, "step": 4181 }, { "epoch": 0.7131650750341064, "grad_norm": 0.39760348200798035, "learning_rate": 4.018081720458314e-07, "loss": 0.4939, "step": 4182 }, { "epoch": 0.7133356070941337, "grad_norm": 0.400193989276886, "learning_rate": 4.0136566054685514e-07, "loss": 0.5054, "step": 4183 }, { "epoch": 0.713506139154161, "grad_norm": 0.3968668580055237, "learning_rate": 4.009233316652911e-07, "loss": 0.4991, "step": 4184 }, { "epoch": 0.7136766712141883, "grad_norm": 0.3965734839439392, "learning_rate": 4.004811855360749e-07, "loss": 0.5012, "step": 4185 }, { "epoch": 0.7138472032742156, "grad_norm": 0.44471368193626404, "learning_rate": 4.0003922229408636e-07, "loss": 0.4973, "step": 4186 }, { "epoch": 0.7140177353342428, "grad_norm": 0.39058804512023926, "learning_rate": 3.9959744207414857e-07, "loss": 0.4928, "step": 4187 }, { "epoch": 0.7141882673942701, "grad_norm": 0.39435192942619324, "learning_rate": 3.9915584501103e-07, "loss": 0.5077, "step": 4188 }, { "epoch": 0.7143587994542974, "grad_norm": 0.49913209676742554, "learning_rate": 3.987144312394432e-07, "loss": 0.5066, "step": 4189 }, { "epoch": 0.7145293315143247, "grad_norm": 0.40681523084640503, "learning_rate": 3.982732008940432e-07, "loss": 0.5044, "step": 4190 }, { "epoch": 0.714699863574352, "grad_norm": 0.46381300687789917, "learning_rate": 3.9783215410943183e-07, "loss": 0.5074, "step": 4191 }, { "epoch": 0.7148703956343793, "grad_norm": 0.4717521071434021, "learning_rate": 3.973912910201521e-07, "loss": 0.5003, "step": 4192 }, { "epoch": 0.7150409276944065, "grad_norm": 0.5403792262077332, "learning_rate": 3.9695061176069277e-07, "loss": 0.5082, "step": 4193 }, { "epoch": 0.7152114597544338, "grad_norm": 0.3403872549533844, "learning_rate": 3.9651011646548626e-07, "loss": 0.4907, "step": 4194 }, { "epoch": 0.7153819918144612, "grad_norm": 0.5781459212303162, "learning_rate": 3.96069805268908e-07, "loss": 0.5053, "step": 4195 }, { "epoch": 0.7155525238744884, "grad_norm": 0.4355068802833557, "learning_rate": 3.9562967830527803e-07, "loss": 0.4954, "step": 4196 }, { "epoch": 0.7157230559345157, "grad_norm": 0.47292977571487427, "learning_rate": 3.9518973570886025e-07, "loss": 0.5048, "step": 4197 }, { "epoch": 0.715893587994543, "grad_norm": 0.6361278891563416, "learning_rate": 3.947499776138624e-07, "loss": 0.5122, "step": 4198 }, { "epoch": 0.7160641200545702, "grad_norm": 0.5250843167304993, "learning_rate": 3.9431040415443446e-07, "loss": 0.5035, "step": 4199 }, { "epoch": 0.7162346521145976, "grad_norm": 0.5073175430297852, "learning_rate": 3.938710154646727e-07, "loss": 0.5028, "step": 4200 }, { "epoch": 0.7164051841746248, "grad_norm": 0.5830045342445374, "learning_rate": 3.934318116786149e-07, "loss": 0.5096, "step": 4201 }, { "epoch": 0.7165757162346521, "grad_norm": 0.6108426451683044, "learning_rate": 3.9299279293024254e-07, "loss": 0.5094, "step": 4202 }, { "epoch": 0.7167462482946794, "grad_norm": 0.3694036900997162, "learning_rate": 3.9255395935348243e-07, "loss": 0.4977, "step": 4203 }, { "epoch": 0.7169167803547067, "grad_norm": 0.5815227031707764, "learning_rate": 3.921153110822027e-07, "loss": 0.5081, "step": 4204 }, { "epoch": 0.717087312414734, "grad_norm": 0.43358278274536133, "learning_rate": 3.9167684825021636e-07, "loss": 0.5, "step": 4205 }, { "epoch": 0.7172578444747613, "grad_norm": 0.38220107555389404, "learning_rate": 3.9123857099127943e-07, "loss": 0.4998, "step": 4206 }, { "epoch": 0.7174283765347885, "grad_norm": 0.4499671757221222, "learning_rate": 3.908004794390915e-07, "loss": 0.5066, "step": 4207 }, { "epoch": 0.7175989085948158, "grad_norm": 0.4196017384529114, "learning_rate": 3.903625737272945e-07, "loss": 0.4987, "step": 4208 }, { "epoch": 0.7177694406548432, "grad_norm": 0.4220138490200043, "learning_rate": 3.899248539894757e-07, "loss": 0.5036, "step": 4209 }, { "epoch": 0.7179399727148704, "grad_norm": 0.44207295775413513, "learning_rate": 3.8948732035916393e-07, "loss": 0.5113, "step": 4210 }, { "epoch": 0.7181105047748977, "grad_norm": 0.3891289234161377, "learning_rate": 3.89049972969831e-07, "loss": 0.5048, "step": 4211 }, { "epoch": 0.718281036834925, "grad_norm": 0.41989684104919434, "learning_rate": 3.886128119548939e-07, "loss": 0.5011, "step": 4212 }, { "epoch": 0.7184515688949522, "grad_norm": 0.466969758272171, "learning_rate": 3.881758374477106e-07, "loss": 0.51, "step": 4213 }, { "epoch": 0.7186221009549796, "grad_norm": 0.39666444063186646, "learning_rate": 3.877390495815835e-07, "loss": 0.5074, "step": 4214 }, { "epoch": 0.7187926330150068, "grad_norm": 0.3840790390968323, "learning_rate": 3.8730244848975765e-07, "loss": 0.502, "step": 4215 }, { "epoch": 0.7189631650750341, "grad_norm": 0.36528825759887695, "learning_rate": 3.868660343054214e-07, "loss": 0.511, "step": 4216 }, { "epoch": 0.7191336971350614, "grad_norm": 0.4255644381046295, "learning_rate": 3.8642980716170514e-07, "loss": 0.5096, "step": 4217 }, { "epoch": 0.7193042291950886, "grad_norm": 0.36999639868736267, "learning_rate": 3.859937671916833e-07, "loss": 0.5065, "step": 4218 }, { "epoch": 0.719474761255116, "grad_norm": 0.4373064935207367, "learning_rate": 3.8555791452837303e-07, "loss": 0.5037, "step": 4219 }, { "epoch": 0.7196452933151433, "grad_norm": 0.34947025775909424, "learning_rate": 3.8512224930473313e-07, "loss": 0.5005, "step": 4220 }, { "epoch": 0.7198158253751705, "grad_norm": 0.40553170442581177, "learning_rate": 3.846867716536676e-07, "loss": 0.5087, "step": 4221 }, { "epoch": 0.7199863574351978, "grad_norm": 0.45594432950019836, "learning_rate": 3.8425148170802064e-07, "loss": 0.5041, "step": 4222 }, { "epoch": 0.720156889495225, "grad_norm": 0.4455185532569885, "learning_rate": 3.83816379600581e-07, "loss": 0.4992, "step": 4223 }, { "epoch": 0.7203274215552524, "grad_norm": 0.4536997377872467, "learning_rate": 3.833814654640791e-07, "loss": 0.4979, "step": 4224 }, { "epoch": 0.7204979536152797, "grad_norm": 0.42399755120277405, "learning_rate": 3.82946739431189e-07, "loss": 0.4926, "step": 4225 }, { "epoch": 0.720668485675307, "grad_norm": 0.4298648238182068, "learning_rate": 3.825122016345261e-07, "loss": 0.5038, "step": 4226 }, { "epoch": 0.7208390177353342, "grad_norm": 0.416885107755661, "learning_rate": 3.8207785220664944e-07, "loss": 0.4946, "step": 4227 }, { "epoch": 0.7210095497953616, "grad_norm": 0.4566577076911926, "learning_rate": 3.8164369128006034e-07, "loss": 0.4859, "step": 4228 }, { "epoch": 0.7211800818553888, "grad_norm": 0.4706067144870758, "learning_rate": 3.8120971898720165e-07, "loss": 0.4922, "step": 4229 }, { "epoch": 0.7213506139154161, "grad_norm": 0.3943629562854767, "learning_rate": 3.807759354604609e-07, "loss": 0.506, "step": 4230 }, { "epoch": 0.7215211459754434, "grad_norm": 0.4761364459991455, "learning_rate": 3.803423408321655e-07, "loss": 0.4932, "step": 4231 }, { "epoch": 0.7216916780354706, "grad_norm": 0.37824156880378723, "learning_rate": 3.7990893523458685e-07, "loss": 0.5017, "step": 4232 }, { "epoch": 0.721862210095498, "grad_norm": 0.5847135782241821, "learning_rate": 3.7947571879993864e-07, "loss": 0.4983, "step": 4233 }, { "epoch": 0.7220327421555253, "grad_norm": 0.4698280394077301, "learning_rate": 3.790426916603756e-07, "loss": 0.4898, "step": 4234 }, { "epoch": 0.7222032742155525, "grad_norm": 0.5401884913444519, "learning_rate": 3.786098539479959e-07, "loss": 0.5024, "step": 4235 }, { "epoch": 0.7223738062755798, "grad_norm": 0.5750718116760254, "learning_rate": 3.7817720579483967e-07, "loss": 0.5022, "step": 4236 }, { "epoch": 0.722544338335607, "grad_norm": 0.4136122167110443, "learning_rate": 3.7774474733288904e-07, "loss": 0.5019, "step": 4237 }, { "epoch": 0.7227148703956344, "grad_norm": 0.605913519859314, "learning_rate": 3.7731247869406856e-07, "loss": 0.4899, "step": 4238 }, { "epoch": 0.7228854024556617, "grad_norm": 0.32366135716438293, "learning_rate": 3.7688040001024475e-07, "loss": 0.491, "step": 4239 }, { "epoch": 0.723055934515689, "grad_norm": 0.6166736483573914, "learning_rate": 3.764485114132256e-07, "loss": 0.4998, "step": 4240 }, { "epoch": 0.7232264665757162, "grad_norm": 0.3734305799007416, "learning_rate": 3.7601681303476195e-07, "loss": 0.4989, "step": 4241 }, { "epoch": 0.7233969986357435, "grad_norm": 0.5646785497665405, "learning_rate": 3.7558530500654646e-07, "loss": 0.4991, "step": 4242 }, { "epoch": 0.7235675306957708, "grad_norm": 0.5683301687240601, "learning_rate": 3.7515398746021294e-07, "loss": 0.5114, "step": 4243 }, { "epoch": 0.7237380627557981, "grad_norm": 0.5351408123970032, "learning_rate": 3.747228605273381e-07, "loss": 0.496, "step": 4244 }, { "epoch": 0.7239085948158254, "grad_norm": 0.4870230555534363, "learning_rate": 3.742919243394402e-07, "loss": 0.4964, "step": 4245 }, { "epoch": 0.7240791268758526, "grad_norm": 0.4613642990589142, "learning_rate": 3.7386117902797896e-07, "loss": 0.4969, "step": 4246 }, { "epoch": 0.7242496589358799, "grad_norm": 0.48472538590431213, "learning_rate": 3.7343062472435623e-07, "loss": 0.509, "step": 4247 }, { "epoch": 0.7244201909959073, "grad_norm": 0.45103564858436584, "learning_rate": 3.730002615599158e-07, "loss": 0.4988, "step": 4248 }, { "epoch": 0.7245907230559345, "grad_norm": 0.49282583594322205, "learning_rate": 3.7257008966594224e-07, "loss": 0.4984, "step": 4249 }, { "epoch": 0.7247612551159618, "grad_norm": 0.39775902032852173, "learning_rate": 3.7214010917366263e-07, "loss": 0.501, "step": 4250 }, { "epoch": 0.724931787175989, "grad_norm": 0.47374874353408813, "learning_rate": 3.7171032021424574e-07, "loss": 0.5019, "step": 4251 }, { "epoch": 0.7251023192360164, "grad_norm": 0.53701251745224, "learning_rate": 3.7128072291880083e-07, "loss": 0.4915, "step": 4252 }, { "epoch": 0.7252728512960437, "grad_norm": 0.5349600911140442, "learning_rate": 3.708513174183799e-07, "loss": 0.5009, "step": 4253 }, { "epoch": 0.725443383356071, "grad_norm": 0.44183728098869324, "learning_rate": 3.7042210384397596e-07, "loss": 0.4969, "step": 4254 }, { "epoch": 0.7256139154160982, "grad_norm": 0.4504479169845581, "learning_rate": 3.6999308232652337e-07, "loss": 0.499, "step": 4255 }, { "epoch": 0.7257844474761255, "grad_norm": 0.5018069744110107, "learning_rate": 3.695642529968985e-07, "loss": 0.5044, "step": 4256 }, { "epoch": 0.7259549795361528, "grad_norm": 0.3822757601737976, "learning_rate": 3.691356159859178e-07, "loss": 0.508, "step": 4257 }, { "epoch": 0.7261255115961801, "grad_norm": 0.4669684171676636, "learning_rate": 3.6870717142434025e-07, "loss": 0.4972, "step": 4258 }, { "epoch": 0.7262960436562074, "grad_norm": 0.4292471706867218, "learning_rate": 3.682789194428657e-07, "loss": 0.5001, "step": 4259 }, { "epoch": 0.7264665757162346, "grad_norm": 0.3445581793785095, "learning_rate": 3.6785086017213557e-07, "loss": 0.4917, "step": 4260 }, { "epoch": 0.7266371077762619, "grad_norm": 0.5252216458320618, "learning_rate": 3.6742299374273174e-07, "loss": 0.4981, "step": 4261 }, { "epoch": 0.7268076398362893, "grad_norm": 0.4755789041519165, "learning_rate": 3.6699532028517795e-07, "loss": 0.4996, "step": 4262 }, { "epoch": 0.7269781718963165, "grad_norm": 0.4374355673789978, "learning_rate": 3.6656783992993887e-07, "loss": 0.4956, "step": 4263 }, { "epoch": 0.7271487039563438, "grad_norm": 0.4354765713214874, "learning_rate": 3.6614055280742014e-07, "loss": 0.4954, "step": 4264 }, { "epoch": 0.727319236016371, "grad_norm": 0.43414583802223206, "learning_rate": 3.6571345904796897e-07, "loss": 0.5044, "step": 4265 }, { "epoch": 0.7274897680763983, "grad_norm": 0.46638140082359314, "learning_rate": 3.652865587818725e-07, "loss": 0.5041, "step": 4266 }, { "epoch": 0.7276603001364257, "grad_norm": 0.38081035017967224, "learning_rate": 3.6485985213935983e-07, "loss": 0.505, "step": 4267 }, { "epoch": 0.727830832196453, "grad_norm": 0.45600566267967224, "learning_rate": 3.6443333925060066e-07, "loss": 0.4945, "step": 4268 }, { "epoch": 0.7280013642564802, "grad_norm": 0.518722653388977, "learning_rate": 3.64007020245706e-07, "loss": 0.4937, "step": 4269 }, { "epoch": 0.7281718963165075, "grad_norm": 0.33963751792907715, "learning_rate": 3.635808952547263e-07, "loss": 0.4961, "step": 4270 }, { "epoch": 0.7283424283765347, "grad_norm": 0.446537584066391, "learning_rate": 3.631549644076552e-07, "loss": 0.4942, "step": 4271 }, { "epoch": 0.7285129604365621, "grad_norm": 0.4185526967048645, "learning_rate": 3.6272922783442504e-07, "loss": 0.501, "step": 4272 }, { "epoch": 0.7286834924965894, "grad_norm": 0.3797702193260193, "learning_rate": 3.6230368566490883e-07, "loss": 0.5059, "step": 4273 }, { "epoch": 0.7288540245566166, "grad_norm": 0.43924352526664734, "learning_rate": 3.618783380289226e-07, "loss": 0.4932, "step": 4274 }, { "epoch": 0.7290245566166439, "grad_norm": 0.348660945892334, "learning_rate": 3.6145318505622034e-07, "loss": 0.5097, "step": 4275 }, { "epoch": 0.7291950886766713, "grad_norm": 0.5583161115646362, "learning_rate": 3.6102822687649833e-07, "loss": 0.5093, "step": 4276 }, { "epoch": 0.7293656207366985, "grad_norm": 0.4120829105377197, "learning_rate": 3.6060346361939263e-07, "loss": 0.5087, "step": 4277 }, { "epoch": 0.7295361527967258, "grad_norm": 0.45520544052124023, "learning_rate": 3.6017889541448075e-07, "loss": 0.4949, "step": 4278 }, { "epoch": 0.729706684856753, "grad_norm": 0.525784969329834, "learning_rate": 3.597545223912792e-07, "loss": 0.495, "step": 4279 }, { "epoch": 0.7298772169167803, "grad_norm": 0.3586786091327667, "learning_rate": 3.5933034467924616e-07, "loss": 0.5182, "step": 4280 }, { "epoch": 0.7300477489768077, "grad_norm": 0.5064526796340942, "learning_rate": 3.5890636240778026e-07, "loss": 0.4949, "step": 4281 }, { "epoch": 0.730218281036835, "grad_norm": 0.4644537568092346, "learning_rate": 3.5848257570621905e-07, "loss": 0.5062, "step": 4282 }, { "epoch": 0.7303888130968622, "grad_norm": 0.34113574028015137, "learning_rate": 3.5805898470384307e-07, "loss": 0.5025, "step": 4283 }, { "epoch": 0.7305593451568895, "grad_norm": 0.5026168823242188, "learning_rate": 3.5763558952987025e-07, "loss": 0.5077, "step": 4284 }, { "epoch": 0.7307298772169167, "grad_norm": 0.459414005279541, "learning_rate": 3.572123903134607e-07, "loss": 0.5073, "step": 4285 }, { "epoch": 0.7309004092769441, "grad_norm": 0.44578710198402405, "learning_rate": 3.5678938718371415e-07, "loss": 0.512, "step": 4286 }, { "epoch": 0.7310709413369714, "grad_norm": 0.38436904549598694, "learning_rate": 3.5636658026967077e-07, "loss": 0.516, "step": 4287 }, { "epoch": 0.7312414733969986, "grad_norm": 0.47648823261260986, "learning_rate": 3.5594396970031e-07, "loss": 0.5065, "step": 4288 }, { "epoch": 0.7314120054570259, "grad_norm": 0.4478094279766083, "learning_rate": 3.555215556045524e-07, "loss": 0.5094, "step": 4289 }, { "epoch": 0.7315825375170532, "grad_norm": 0.4391409754753113, "learning_rate": 3.5509933811125855e-07, "loss": 0.5072, "step": 4290 }, { "epoch": 0.7317530695770805, "grad_norm": 0.5401695966720581, "learning_rate": 3.5467731734922767e-07, "loss": 0.5016, "step": 4291 }, { "epoch": 0.7319236016371078, "grad_norm": 0.39789271354675293, "learning_rate": 3.542554934472014e-07, "loss": 0.5166, "step": 4292 }, { "epoch": 0.732094133697135, "grad_norm": 0.7003616094589233, "learning_rate": 3.538338665338589e-07, "loss": 0.5065, "step": 4293 }, { "epoch": 0.7322646657571623, "grad_norm": 0.4756523668766022, "learning_rate": 3.534124367378207e-07, "loss": 0.5016, "step": 4294 }, { "epoch": 0.7324351978171897, "grad_norm": 0.3915746212005615, "learning_rate": 3.5299120418764703e-07, "loss": 0.4972, "step": 4295 }, { "epoch": 0.732605729877217, "grad_norm": 0.4228286147117615, "learning_rate": 3.525701690118371e-07, "loss": 0.5064, "step": 4296 }, { "epoch": 0.7327762619372442, "grad_norm": 0.45134690403938293, "learning_rate": 3.521493313388307e-07, "loss": 0.5139, "step": 4297 }, { "epoch": 0.7329467939972715, "grad_norm": 0.5544208288192749, "learning_rate": 3.517286912970074e-07, "loss": 0.4994, "step": 4298 }, { "epoch": 0.7331173260572987, "grad_norm": 0.5639966130256653, "learning_rate": 3.5130824901468643e-07, "loss": 0.4939, "step": 4299 }, { "epoch": 0.7332878581173261, "grad_norm": 0.5455499887466431, "learning_rate": 3.508880046201255e-07, "loss": 0.5041, "step": 4300 }, { "epoch": 0.7334583901773534, "grad_norm": 0.583670437335968, "learning_rate": 3.504679582415245e-07, "loss": 0.5016, "step": 4301 }, { "epoch": 0.7336289222373806, "grad_norm": 0.45605432987213135, "learning_rate": 3.500481100070201e-07, "loss": 0.5119, "step": 4302 }, { "epoch": 0.7337994542974079, "grad_norm": 0.7099131345748901, "learning_rate": 3.496284600446904e-07, "loss": 0.4941, "step": 4303 }, { "epoch": 0.7339699863574352, "grad_norm": 0.6572691202163696, "learning_rate": 3.492090084825528e-07, "loss": 0.4841, "step": 4304 }, { "epoch": 0.7341405184174625, "grad_norm": 0.570122480392456, "learning_rate": 3.4878975544856287e-07, "loss": 0.506, "step": 4305 }, { "epoch": 0.7343110504774898, "grad_norm": 0.5914926528930664, "learning_rate": 3.4837070107061713e-07, "loss": 0.4924, "step": 4306 }, { "epoch": 0.734481582537517, "grad_norm": 0.6704850792884827, "learning_rate": 3.4795184547655075e-07, "loss": 0.5031, "step": 4307 }, { "epoch": 0.7346521145975443, "grad_norm": 0.5630759596824646, "learning_rate": 3.475331887941388e-07, "loss": 0.5025, "step": 4308 }, { "epoch": 0.7348226466575716, "grad_norm": 0.49667057394981384, "learning_rate": 3.471147311510944e-07, "loss": 0.4918, "step": 4309 }, { "epoch": 0.734993178717599, "grad_norm": 0.6504514813423157, "learning_rate": 3.4669647267507225e-07, "loss": 0.4995, "step": 4310 }, { "epoch": 0.7351637107776262, "grad_norm": 0.5181241035461426, "learning_rate": 3.462784134936637e-07, "loss": 0.503, "step": 4311 }, { "epoch": 0.7353342428376535, "grad_norm": 0.5790863633155823, "learning_rate": 3.458605537344008e-07, "loss": 0.491, "step": 4312 }, { "epoch": 0.7355047748976807, "grad_norm": 0.5233603715896606, "learning_rate": 3.4544289352475503e-07, "loss": 0.499, "step": 4313 }, { "epoch": 0.735675306957708, "grad_norm": 0.5413016080856323, "learning_rate": 3.450254329921357e-07, "loss": 0.5026, "step": 4314 }, { "epoch": 0.7358458390177354, "grad_norm": 0.6021640300750732, "learning_rate": 3.446081722638921e-07, "loss": 0.5074, "step": 4315 }, { "epoch": 0.7360163710777626, "grad_norm": 0.5931169986724854, "learning_rate": 3.441911114673126e-07, "loss": 0.4925, "step": 4316 }, { "epoch": 0.7361869031377899, "grad_norm": 0.46309608221054077, "learning_rate": 3.437742507296247e-07, "loss": 0.5053, "step": 4317 }, { "epoch": 0.7363574351978172, "grad_norm": 0.5755923986434937, "learning_rate": 3.433575901779938e-07, "loss": 0.4917, "step": 4318 }, { "epoch": 0.7365279672578445, "grad_norm": 0.4366355538368225, "learning_rate": 3.4294112993952534e-07, "loss": 0.5065, "step": 4319 }, { "epoch": 0.7366984993178718, "grad_norm": 0.589140772819519, "learning_rate": 3.4252487014126335e-07, "loss": 0.4995, "step": 4320 }, { "epoch": 0.736869031377899, "grad_norm": 0.5909987092018127, "learning_rate": 3.421088109101907e-07, "loss": 0.4967, "step": 4321 }, { "epoch": 0.7370395634379263, "grad_norm": 0.46883001923561096, "learning_rate": 3.4169295237322934e-07, "loss": 0.498, "step": 4322 }, { "epoch": 0.7372100954979536, "grad_norm": 0.41802477836608887, "learning_rate": 3.4127729465723894e-07, "loss": 0.4955, "step": 4323 }, { "epoch": 0.737380627557981, "grad_norm": 0.46541598439216614, "learning_rate": 3.408618378890192e-07, "loss": 0.5041, "step": 4324 }, { "epoch": 0.7375511596180082, "grad_norm": 0.534308135509491, "learning_rate": 3.4044658219530785e-07, "loss": 0.5052, "step": 4325 }, { "epoch": 0.7377216916780355, "grad_norm": 0.4054575562477112, "learning_rate": 3.4003152770278127e-07, "loss": 0.498, "step": 4326 }, { "epoch": 0.7378922237380627, "grad_norm": 0.5337191820144653, "learning_rate": 3.3961667453805526e-07, "loss": 0.4885, "step": 4327 }, { "epoch": 0.73806275579809, "grad_norm": 0.37807995080947876, "learning_rate": 3.392020228276828e-07, "loss": 0.4994, "step": 4328 }, { "epoch": 0.7382332878581174, "grad_norm": 0.5176721811294556, "learning_rate": 3.3878757269815633e-07, "loss": 0.502, "step": 4329 }, { "epoch": 0.7384038199181446, "grad_norm": 0.4743328094482422, "learning_rate": 3.3837332427590676e-07, "loss": 0.4955, "step": 4330 }, { "epoch": 0.7385743519781719, "grad_norm": 0.423225998878479, "learning_rate": 3.379592776873038e-07, "loss": 0.4904, "step": 4331 }, { "epoch": 0.7387448840381992, "grad_norm": 0.5088225603103638, "learning_rate": 3.375454330586542e-07, "loss": 0.4896, "step": 4332 }, { "epoch": 0.7389154160982264, "grad_norm": 0.3379969298839569, "learning_rate": 3.371317905162046e-07, "loss": 0.5009, "step": 4333 }, { "epoch": 0.7390859481582538, "grad_norm": 0.445017009973526, "learning_rate": 3.367183501861395e-07, "loss": 0.4962, "step": 4334 }, { "epoch": 0.739256480218281, "grad_norm": 0.31372275948524475, "learning_rate": 3.3630511219458097e-07, "loss": 0.5007, "step": 4335 }, { "epoch": 0.7394270122783083, "grad_norm": 0.4493860900402069, "learning_rate": 3.3589207666759117e-07, "loss": 0.5125, "step": 4336 }, { "epoch": 0.7395975443383356, "grad_norm": 0.4906957149505615, "learning_rate": 3.354792437311684e-07, "loss": 0.5034, "step": 4337 }, { "epoch": 0.7397680763983628, "grad_norm": 0.4305751621723175, "learning_rate": 3.350666135112504e-07, "loss": 0.5023, "step": 4338 }, { "epoch": 0.7399386084583902, "grad_norm": 0.43462157249450684, "learning_rate": 3.346541861337127e-07, "loss": 0.5047, "step": 4339 }, { "epoch": 0.7401091405184175, "grad_norm": 0.4501499831676483, "learning_rate": 3.3424196172436953e-07, "loss": 0.5042, "step": 4340 }, { "epoch": 0.7402796725784447, "grad_norm": 0.4740625321865082, "learning_rate": 3.3382994040897203e-07, "loss": 0.5118, "step": 4341 }, { "epoch": 0.740450204638472, "grad_norm": 0.4675706923007965, "learning_rate": 3.334181223132103e-07, "loss": 0.5029, "step": 4342 }, { "epoch": 0.7406207366984994, "grad_norm": 0.3829689025878906, "learning_rate": 3.330065075627126e-07, "loss": 0.5014, "step": 4343 }, { "epoch": 0.7407912687585266, "grad_norm": 0.4442577362060547, "learning_rate": 3.325950962830437e-07, "loss": 0.4958, "step": 4344 }, { "epoch": 0.7409618008185539, "grad_norm": 0.47646093368530273, "learning_rate": 3.3218388859970877e-07, "loss": 0.5022, "step": 4345 }, { "epoch": 0.7411323328785812, "grad_norm": 0.3984088599681854, "learning_rate": 3.317728846381484e-07, "loss": 0.4956, "step": 4346 }, { "epoch": 0.7413028649386084, "grad_norm": 0.5109058022499084, "learning_rate": 3.3136208452374256e-07, "loss": 0.497, "step": 4347 }, { "epoch": 0.7414733969986358, "grad_norm": 0.41433700919151306, "learning_rate": 3.309514883818084e-07, "loss": 0.5066, "step": 4348 }, { "epoch": 0.741643929058663, "grad_norm": 0.4546804428100586, "learning_rate": 3.305410963376015e-07, "loss": 0.5003, "step": 4349 }, { "epoch": 0.7418144611186903, "grad_norm": 0.43137767910957336, "learning_rate": 3.301309085163139e-07, "loss": 0.4969, "step": 4350 }, { "epoch": 0.7419849931787176, "grad_norm": 0.44788989424705505, "learning_rate": 3.2972092504307664e-07, "loss": 0.5019, "step": 4351 }, { "epoch": 0.7421555252387448, "grad_norm": 0.5021728277206421, "learning_rate": 3.29311146042958e-07, "loss": 0.4981, "step": 4352 }, { "epoch": 0.7423260572987722, "grad_norm": 0.3996676504611969, "learning_rate": 3.289015716409632e-07, "loss": 0.4967, "step": 4353 }, { "epoch": 0.7424965893587995, "grad_norm": 0.5152726769447327, "learning_rate": 3.284922019620366e-07, "loss": 0.496, "step": 4354 }, { "epoch": 0.7426671214188267, "grad_norm": 0.5459908246994019, "learning_rate": 3.280830371310583e-07, "loss": 0.5029, "step": 4355 }, { "epoch": 0.742837653478854, "grad_norm": 0.3832405209541321, "learning_rate": 3.276740772728472e-07, "loss": 0.492, "step": 4356 }, { "epoch": 0.7430081855388813, "grad_norm": 0.44288480281829834, "learning_rate": 3.272653225121592e-07, "loss": 0.4985, "step": 4357 }, { "epoch": 0.7431787175989086, "grad_norm": 0.491085022687912, "learning_rate": 3.268567729736879e-07, "loss": 0.5004, "step": 4358 }, { "epoch": 0.7433492496589359, "grad_norm": 0.4536385238170624, "learning_rate": 3.2644842878206343e-07, "loss": 0.4981, "step": 4359 }, { "epoch": 0.7435197817189632, "grad_norm": 0.4160607159137726, "learning_rate": 3.260402900618544e-07, "loss": 0.4974, "step": 4360 }, { "epoch": 0.7436903137789904, "grad_norm": 0.4935556948184967, "learning_rate": 3.256323569375665e-07, "loss": 0.5009, "step": 4361 }, { "epoch": 0.7438608458390177, "grad_norm": 0.46699386835098267, "learning_rate": 3.2522462953364135e-07, "loss": 0.4939, "step": 4362 }, { "epoch": 0.744031377899045, "grad_norm": 0.45156747102737427, "learning_rate": 3.2481710797446043e-07, "loss": 0.4978, "step": 4363 }, { "epoch": 0.7442019099590723, "grad_norm": 0.455791711807251, "learning_rate": 3.244097923843398e-07, "loss": 0.4988, "step": 4364 }, { "epoch": 0.7443724420190996, "grad_norm": 0.4738144278526306, "learning_rate": 3.2400268288753423e-07, "loss": 0.5138, "step": 4365 }, { "epoch": 0.7445429740791268, "grad_norm": 0.4773343801498413, "learning_rate": 3.2359577960823557e-07, "loss": 0.5, "step": 4366 }, { "epoch": 0.7447135061391542, "grad_norm": 0.4243999421596527, "learning_rate": 3.231890826705715e-07, "loss": 0.4993, "step": 4367 }, { "epoch": 0.7448840381991815, "grad_norm": 0.3990984559059143, "learning_rate": 3.227825921986082e-07, "loss": 0.4981, "step": 4368 }, { "epoch": 0.7450545702592087, "grad_norm": 0.48051950335502625, "learning_rate": 3.223763083163482e-07, "loss": 0.4986, "step": 4369 }, { "epoch": 0.745225102319236, "grad_norm": 0.4449363350868225, "learning_rate": 3.2197023114773145e-07, "loss": 0.5012, "step": 4370 }, { "epoch": 0.7453956343792633, "grad_norm": 0.4994031488895416, "learning_rate": 3.215643608166336e-07, "loss": 0.4919, "step": 4371 }, { "epoch": 0.7455661664392906, "grad_norm": 0.37948834896087646, "learning_rate": 3.2115869744686937e-07, "loss": 0.489, "step": 4372 }, { "epoch": 0.7457366984993179, "grad_norm": 0.45996761322021484, "learning_rate": 3.2075324116218835e-07, "loss": 0.5035, "step": 4373 }, { "epoch": 0.7459072305593452, "grad_norm": 0.4702564775943756, "learning_rate": 3.2034799208627724e-07, "loss": 0.4908, "step": 4374 }, { "epoch": 0.7460777626193724, "grad_norm": 0.5308040976524353, "learning_rate": 3.1994295034276104e-07, "loss": 0.5055, "step": 4375 }, { "epoch": 0.7462482946793997, "grad_norm": 0.4410310387611389, "learning_rate": 3.195381160551997e-07, "loss": 0.4966, "step": 4376 }, { "epoch": 0.746418826739427, "grad_norm": 0.3720689117908478, "learning_rate": 3.191334893470908e-07, "loss": 0.5018, "step": 4377 }, { "epoch": 0.7465893587994543, "grad_norm": 0.5346623063087463, "learning_rate": 3.1872907034186854e-07, "loss": 0.4962, "step": 4378 }, { "epoch": 0.7467598908594816, "grad_norm": 0.6136433482170105, "learning_rate": 3.183248591629038e-07, "loss": 0.4885, "step": 4379 }, { "epoch": 0.7469304229195088, "grad_norm": 0.3642314672470093, "learning_rate": 3.179208559335031e-07, "loss": 0.5014, "step": 4380 }, { "epoch": 0.7471009549795361, "grad_norm": 0.5996096730232239, "learning_rate": 3.175170607769116e-07, "loss": 0.5006, "step": 4381 }, { "epoch": 0.7472714870395635, "grad_norm": 0.5387561321258545, "learning_rate": 3.17113473816309e-07, "loss": 0.5005, "step": 4382 }, { "epoch": 0.7474420190995907, "grad_norm": 0.4318586587905884, "learning_rate": 3.1671009517481157e-07, "loss": 0.5033, "step": 4383 }, { "epoch": 0.747612551159618, "grad_norm": 0.5770013332366943, "learning_rate": 3.16306924975474e-07, "loss": 0.4947, "step": 4384 }, { "epoch": 0.7477830832196453, "grad_norm": 0.43154647946357727, "learning_rate": 3.159039633412851e-07, "loss": 0.4985, "step": 4385 }, { "epoch": 0.7479536152796725, "grad_norm": 0.4739813208580017, "learning_rate": 3.1550121039517124e-07, "loss": 0.4918, "step": 4386 }, { "epoch": 0.7481241473396999, "grad_norm": 0.40933310985565186, "learning_rate": 3.15098666259995e-07, "loss": 0.498, "step": 4387 }, { "epoch": 0.7482946793997272, "grad_norm": 0.477927565574646, "learning_rate": 3.146963310585555e-07, "loss": 0.4966, "step": 4388 }, { "epoch": 0.7484652114597544, "grad_norm": 0.44899311661720276, "learning_rate": 3.14294204913587e-07, "loss": 0.5076, "step": 4389 }, { "epoch": 0.7486357435197817, "grad_norm": 0.5270994305610657, "learning_rate": 3.138922879477613e-07, "loss": 0.5012, "step": 4390 }, { "epoch": 0.748806275579809, "grad_norm": 0.5749548077583313, "learning_rate": 3.1349058028368565e-07, "loss": 0.4931, "step": 4391 }, { "epoch": 0.7489768076398363, "grad_norm": 0.5705610513687134, "learning_rate": 3.130890820439038e-07, "loss": 0.4904, "step": 4392 }, { "epoch": 0.7491473396998636, "grad_norm": 0.40459010004997253, "learning_rate": 3.1268779335089575e-07, "loss": 0.4945, "step": 4393 }, { "epoch": 0.7493178717598908, "grad_norm": 0.5784091949462891, "learning_rate": 3.1228671432707663e-07, "loss": 0.5037, "step": 4394 }, { "epoch": 0.7494884038199181, "grad_norm": 0.5292043685913086, "learning_rate": 3.118858450947987e-07, "loss": 0.497, "step": 4395 }, { "epoch": 0.7496589358799455, "grad_norm": 0.47194743156433105, "learning_rate": 3.114851857763497e-07, "loss": 0.5048, "step": 4396 }, { "epoch": 0.7498294679399727, "grad_norm": 0.4495631456375122, "learning_rate": 3.110847364939538e-07, "loss": 0.498, "step": 4397 }, { "epoch": 0.75, "grad_norm": 0.5370325446128845, "learning_rate": 3.1068449736977015e-07, "loss": 0.4877, "step": 4398 }, { "epoch": 0.7501705320600273, "grad_norm": 0.6217759847640991, "learning_rate": 3.102844685258946e-07, "loss": 0.4871, "step": 4399 }, { "epoch": 0.7503410641200545, "grad_norm": 0.40713444352149963, "learning_rate": 3.0988465008435855e-07, "loss": 0.5077, "step": 4400 }, { "epoch": 0.7505115961800819, "grad_norm": 0.548819363117218, "learning_rate": 3.094850421671295e-07, "loss": 0.4979, "step": 4401 }, { "epoch": 0.7506821282401092, "grad_norm": 0.4234558939933777, "learning_rate": 3.0908564489611083e-07, "loss": 0.4944, "step": 4402 }, { "epoch": 0.7508526603001364, "grad_norm": 0.49774685502052307, "learning_rate": 3.086864583931405e-07, "loss": 0.4984, "step": 4403 }, { "epoch": 0.7510231923601637, "grad_norm": 0.5342044830322266, "learning_rate": 3.0828748277999356e-07, "loss": 0.5056, "step": 4404 }, { "epoch": 0.751193724420191, "grad_norm": 0.4261223077774048, "learning_rate": 3.078887181783803e-07, "loss": 0.493, "step": 4405 }, { "epoch": 0.7513642564802183, "grad_norm": 0.5373156070709229, "learning_rate": 3.074901647099461e-07, "loss": 0.4956, "step": 4406 }, { "epoch": 0.7515347885402456, "grad_norm": 0.45177051424980164, "learning_rate": 3.070918224962726e-07, "loss": 0.5044, "step": 4407 }, { "epoch": 0.7517053206002728, "grad_norm": 0.5984803438186646, "learning_rate": 3.0669369165887675e-07, "loss": 0.5015, "step": 4408 }, { "epoch": 0.7518758526603001, "grad_norm": 0.4983421862125397, "learning_rate": 3.062957723192111e-07, "loss": 0.5058, "step": 4409 }, { "epoch": 0.7520463847203275, "grad_norm": 0.49384385347366333, "learning_rate": 3.058980645986634e-07, "loss": 0.5034, "step": 4410 }, { "epoch": 0.7522169167803547, "grad_norm": 0.6117486357688904, "learning_rate": 3.0550056861855764e-07, "loss": 0.4932, "step": 4411 }, { "epoch": 0.752387448840382, "grad_norm": 0.4372576177120209, "learning_rate": 3.0510328450015173e-07, "loss": 0.4963, "step": 4412 }, { "epoch": 0.7525579809004093, "grad_norm": 0.4864978492259979, "learning_rate": 3.047062123646404e-07, "loss": 0.4874, "step": 4413 }, { "epoch": 0.7527285129604365, "grad_norm": 0.45403429865837097, "learning_rate": 3.043093523331533e-07, "loss": 0.508, "step": 4414 }, { "epoch": 0.7528990450204639, "grad_norm": 0.47508782148361206, "learning_rate": 3.0391270452675437e-07, "loss": 0.4959, "step": 4415 }, { "epoch": 0.7530695770804912, "grad_norm": 0.40006983280181885, "learning_rate": 3.0351626906644505e-07, "loss": 0.493, "step": 4416 }, { "epoch": 0.7532401091405184, "grad_norm": 0.5417851209640503, "learning_rate": 3.031200460731595e-07, "loss": 0.4966, "step": 4417 }, { "epoch": 0.7534106412005457, "grad_norm": 0.507758617401123, "learning_rate": 3.0272403566776856e-07, "loss": 0.4955, "step": 4418 }, { "epoch": 0.753581173260573, "grad_norm": 0.3795713186264038, "learning_rate": 3.0232823797107795e-07, "loss": 0.5046, "step": 4419 }, { "epoch": 0.7537517053206003, "grad_norm": 0.4745026230812073, "learning_rate": 3.019326531038287e-07, "loss": 0.5004, "step": 4420 }, { "epoch": 0.7539222373806276, "grad_norm": 0.485241174697876, "learning_rate": 3.015372811866958e-07, "loss": 0.4875, "step": 4421 }, { "epoch": 0.7540927694406548, "grad_norm": 0.6154580116271973, "learning_rate": 3.0114212234029065e-07, "loss": 0.4991, "step": 4422 }, { "epoch": 0.7542633015006821, "grad_norm": 1.3015596866607666, "learning_rate": 3.007471766851593e-07, "loss": 0.4945, "step": 4423 }, { "epoch": 0.7544338335607094, "grad_norm": 0.4312533438205719, "learning_rate": 3.003524443417817e-07, "loss": 0.4958, "step": 4424 }, { "epoch": 0.7546043656207367, "grad_norm": 0.5360495448112488, "learning_rate": 2.999579254305748e-07, "loss": 0.4995, "step": 4425 }, { "epoch": 0.754774897680764, "grad_norm": 0.382801353931427, "learning_rate": 2.9956362007188824e-07, "loss": 0.4952, "step": 4426 }, { "epoch": 0.7549454297407913, "grad_norm": 0.4729914665222168, "learning_rate": 2.9916952838600793e-07, "loss": 0.5043, "step": 4427 }, { "epoch": 0.7551159618008185, "grad_norm": 0.4124811887741089, "learning_rate": 2.987756504931543e-07, "loss": 0.4922, "step": 4428 }, { "epoch": 0.7552864938608458, "grad_norm": 0.4720993936061859, "learning_rate": 2.9838198651348207e-07, "loss": 0.5099, "step": 4429 }, { "epoch": 0.7554570259208732, "grad_norm": 0.48451411724090576, "learning_rate": 2.9798853656708127e-07, "loss": 0.4908, "step": 4430 }, { "epoch": 0.7556275579809004, "grad_norm": 0.6371461153030396, "learning_rate": 2.9759530077397637e-07, "loss": 0.5009, "step": 4431 }, { "epoch": 0.7557980900409277, "grad_norm": 0.48880821466445923, "learning_rate": 2.9720227925412693e-07, "loss": 0.498, "step": 4432 }, { "epoch": 0.755968622100955, "grad_norm": 0.4787331819534302, "learning_rate": 2.9680947212742596e-07, "loss": 0.5109, "step": 4433 }, { "epoch": 0.7561391541609823, "grad_norm": 0.5188673734664917, "learning_rate": 2.9641687951370305e-07, "loss": 0.4982, "step": 4434 }, { "epoch": 0.7563096862210096, "grad_norm": 0.411820650100708, "learning_rate": 2.9602450153272035e-07, "loss": 0.4994, "step": 4435 }, { "epoch": 0.7564802182810368, "grad_norm": 0.5018237233161926, "learning_rate": 2.9563233830417564e-07, "loss": 0.5034, "step": 4436 }, { "epoch": 0.7566507503410641, "grad_norm": 0.42866119742393494, "learning_rate": 2.9524038994770114e-07, "loss": 0.5043, "step": 4437 }, { "epoch": 0.7568212824010914, "grad_norm": 0.44395798444747925, "learning_rate": 2.948486565828629e-07, "loss": 0.5068, "step": 4438 }, { "epoch": 0.7569918144611187, "grad_norm": 0.4456336498260498, "learning_rate": 2.9445713832916203e-07, "loss": 0.5021, "step": 4439 }, { "epoch": 0.757162346521146, "grad_norm": 0.3952493369579315, "learning_rate": 2.9406583530603376e-07, "loss": 0.5005, "step": 4440 }, { "epoch": 0.7573328785811733, "grad_norm": 0.5085216760635376, "learning_rate": 2.93674747632848e-07, "loss": 0.5109, "step": 4441 }, { "epoch": 0.7575034106412005, "grad_norm": 0.38747841119766235, "learning_rate": 2.932838754289078e-07, "loss": 0.4886, "step": 4442 }, { "epoch": 0.7576739427012278, "grad_norm": 0.43506336212158203, "learning_rate": 2.928932188134526e-07, "loss": 0.5031, "step": 4443 }, { "epoch": 0.7578444747612552, "grad_norm": 0.38857099413871765, "learning_rate": 2.925027779056541e-07, "loss": 0.4937, "step": 4444 }, { "epoch": 0.7580150068212824, "grad_norm": 0.5130706429481506, "learning_rate": 2.9211255282461837e-07, "loss": 0.4845, "step": 4445 }, { "epoch": 0.7581855388813097, "grad_norm": 0.5911456942558289, "learning_rate": 2.917225436893874e-07, "loss": 0.4997, "step": 4446 }, { "epoch": 0.758356070941337, "grad_norm": 0.4976506531238556, "learning_rate": 2.913327506189351e-07, "loss": 0.4941, "step": 4447 }, { "epoch": 0.7585266030013642, "grad_norm": 0.4999774992465973, "learning_rate": 2.909431737321709e-07, "loss": 0.5136, "step": 4448 }, { "epoch": 0.7586971350613916, "grad_norm": 0.38214829564094543, "learning_rate": 2.9055381314793767e-07, "loss": 0.4967, "step": 4449 }, { "epoch": 0.7588676671214188, "grad_norm": 0.6079405546188354, "learning_rate": 2.9016466898501286e-07, "loss": 0.4927, "step": 4450 }, { "epoch": 0.7590381991814461, "grad_norm": 0.4319635331630707, "learning_rate": 2.8977574136210696e-07, "loss": 0.4958, "step": 4451 }, { "epoch": 0.7592087312414734, "grad_norm": 0.5009601712226868, "learning_rate": 2.893870303978651e-07, "loss": 0.4968, "step": 4452 }, { "epoch": 0.7593792633015006, "grad_norm": 0.5611957311630249, "learning_rate": 2.889985362108666e-07, "loss": 0.4899, "step": 4453 }, { "epoch": 0.759549795361528, "grad_norm": 0.391587495803833, "learning_rate": 2.8861025891962317e-07, "loss": 0.4914, "step": 4454 }, { "epoch": 0.7597203274215553, "grad_norm": 0.5809473991394043, "learning_rate": 2.8822219864258275e-07, "loss": 0.4972, "step": 4455 }, { "epoch": 0.7598908594815825, "grad_norm": 0.5915910601615906, "learning_rate": 2.878343554981247e-07, "loss": 0.5051, "step": 4456 }, { "epoch": 0.7600613915416098, "grad_norm": 0.47726011276245117, "learning_rate": 2.8744672960456346e-07, "loss": 0.4919, "step": 4457 }, { "epoch": 0.7602319236016372, "grad_norm": 0.4725464880466461, "learning_rate": 2.87059321080147e-07, "loss": 0.4915, "step": 4458 }, { "epoch": 0.7604024556616644, "grad_norm": 0.508224606513977, "learning_rate": 2.866721300430572e-07, "loss": 0.498, "step": 4459 }, { "epoch": 0.7605729877216917, "grad_norm": 0.4273339509963989, "learning_rate": 2.8628515661140856e-07, "loss": 0.4995, "step": 4460 }, { "epoch": 0.760743519781719, "grad_norm": 0.3837960660457611, "learning_rate": 2.858984009032503e-07, "loss": 0.4937, "step": 4461 }, { "epoch": 0.7609140518417462, "grad_norm": 0.44394344091415405, "learning_rate": 2.855118630365652e-07, "loss": 0.4942, "step": 4462 }, { "epoch": 0.7610845839017736, "grad_norm": 0.5421721935272217, "learning_rate": 2.851255431292681e-07, "loss": 0.4907, "step": 4463 }, { "epoch": 0.7612551159618008, "grad_norm": 0.5385862588882446, "learning_rate": 2.8473944129920987e-07, "loss": 0.4976, "step": 4464 }, { "epoch": 0.7614256480218281, "grad_norm": 0.4513562023639679, "learning_rate": 2.843535576641725e-07, "loss": 0.4957, "step": 4465 }, { "epoch": 0.7615961800818554, "grad_norm": 0.6170771718025208, "learning_rate": 2.839678923418726e-07, "loss": 0.498, "step": 4466 }, { "epoch": 0.7617667121418826, "grad_norm": 0.3476124703884125, "learning_rate": 2.835824454499604e-07, "loss": 0.4915, "step": 4467 }, { "epoch": 0.76193724420191, "grad_norm": 0.6450405716896057, "learning_rate": 2.8319721710601847e-07, "loss": 0.4961, "step": 4468 }, { "epoch": 0.7621077762619373, "grad_norm": 0.4782792925834656, "learning_rate": 2.8281220742756337e-07, "loss": 0.4848, "step": 4469 }, { "epoch": 0.7622783083219645, "grad_norm": 0.49631768465042114, "learning_rate": 2.8242741653204513e-07, "loss": 0.4936, "step": 4470 }, { "epoch": 0.7624488403819918, "grad_norm": 0.4693368375301361, "learning_rate": 2.8204284453684683e-07, "loss": 0.5045, "step": 4471 }, { "epoch": 0.762619372442019, "grad_norm": 0.4708783030509949, "learning_rate": 2.816584915592842e-07, "loss": 0.4899, "step": 4472 }, { "epoch": 0.7627899045020464, "grad_norm": 0.5898051261901855, "learning_rate": 2.812743577166075e-07, "loss": 0.5111, "step": 4473 }, { "epoch": 0.7629604365620737, "grad_norm": 0.4492618143558502, "learning_rate": 2.808904431259986e-07, "loss": 0.4966, "step": 4474 }, { "epoch": 0.763130968622101, "grad_norm": 0.5589696764945984, "learning_rate": 2.8050674790457356e-07, "loss": 0.5017, "step": 4475 }, { "epoch": 0.7633015006821282, "grad_norm": 0.5096504092216492, "learning_rate": 2.801232721693814e-07, "loss": 0.5044, "step": 4476 }, { "epoch": 0.7634720327421555, "grad_norm": 0.4462493658065796, "learning_rate": 2.7974001603740343e-07, "loss": 0.5001, "step": 4477 }, { "epoch": 0.7636425648021828, "grad_norm": 0.45079052448272705, "learning_rate": 2.793569796255548e-07, "loss": 0.5068, "step": 4478 }, { "epoch": 0.7638130968622101, "grad_norm": 0.44240808486938477, "learning_rate": 2.789741630506833e-07, "loss": 0.4968, "step": 4479 }, { "epoch": 0.7639836289222374, "grad_norm": 0.4910196363925934, "learning_rate": 2.7859156642956955e-07, "loss": 0.4889, "step": 4480 }, { "epoch": 0.7641541609822646, "grad_norm": 0.332023024559021, "learning_rate": 2.7820918987892747e-07, "loss": 0.4937, "step": 4481 }, { "epoch": 0.764324693042292, "grad_norm": 0.5296370387077332, "learning_rate": 2.7782703351540364e-07, "loss": 0.4893, "step": 4482 }, { "epoch": 0.7644952251023193, "grad_norm": 0.4386923313140869, "learning_rate": 2.774450974555769e-07, "loss": 0.502, "step": 4483 }, { "epoch": 0.7646657571623465, "grad_norm": 0.430686354637146, "learning_rate": 2.7706338181595986e-07, "loss": 0.5086, "step": 4484 }, { "epoch": 0.7648362892223738, "grad_norm": 0.5587331652641296, "learning_rate": 2.766818867129976e-07, "loss": 0.5095, "step": 4485 }, { "epoch": 0.765006821282401, "grad_norm": 0.5019792914390564, "learning_rate": 2.7630061226306715e-07, "loss": 0.4989, "step": 4486 }, { "epoch": 0.7651773533424284, "grad_norm": 0.4718862771987915, "learning_rate": 2.7591955858247913e-07, "loss": 0.4937, "step": 4487 }, { "epoch": 0.7653478854024557, "grad_norm": 0.5303227305412292, "learning_rate": 2.7553872578747644e-07, "loss": 0.4935, "step": 4488 }, { "epoch": 0.765518417462483, "grad_norm": 0.5563722252845764, "learning_rate": 2.751581139942349e-07, "loss": 0.5052, "step": 4489 }, { "epoch": 0.7656889495225102, "grad_norm": 0.44941800832748413, "learning_rate": 2.747777233188628e-07, "loss": 0.4982, "step": 4490 }, { "epoch": 0.7658594815825375, "grad_norm": 0.4935671091079712, "learning_rate": 2.7439755387740023e-07, "loss": 0.4948, "step": 4491 }, { "epoch": 0.7660300136425648, "grad_norm": 0.3984651565551758, "learning_rate": 2.7401760578582085e-07, "loss": 0.497, "step": 4492 }, { "epoch": 0.7662005457025921, "grad_norm": 0.6637492775917053, "learning_rate": 2.736378791600304e-07, "loss": 0.5156, "step": 4493 }, { "epoch": 0.7663710777626194, "grad_norm": 0.6302986145019531, "learning_rate": 2.732583741158671e-07, "loss": 0.4999, "step": 4494 }, { "epoch": 0.7665416098226466, "grad_norm": 0.3748490512371063, "learning_rate": 2.728790907691012e-07, "loss": 0.5024, "step": 4495 }, { "epoch": 0.7667121418826739, "grad_norm": 0.6701140999794006, "learning_rate": 2.7250002923543576e-07, "loss": 0.4969, "step": 4496 }, { "epoch": 0.7668826739427013, "grad_norm": 0.6671623587608337, "learning_rate": 2.7212118963050597e-07, "loss": 0.4965, "step": 4497 }, { "epoch": 0.7670532060027285, "grad_norm": 0.38242241740226746, "learning_rate": 2.7174257206987946e-07, "loss": 0.5043, "step": 4498 }, { "epoch": 0.7672237380627558, "grad_norm": 0.6067281365394592, "learning_rate": 2.7136417666905647e-07, "loss": 0.4997, "step": 4499 }, { "epoch": 0.767394270122783, "grad_norm": 0.5838785171508789, "learning_rate": 2.709860035434683e-07, "loss": 0.5098, "step": 4500 }, { "epoch": 0.7675648021828103, "grad_norm": 0.4934817850589752, "learning_rate": 2.706080528084795e-07, "loss": 0.5003, "step": 4501 }, { "epoch": 0.7677353342428377, "grad_norm": 0.4918252229690552, "learning_rate": 2.702303245793865e-07, "loss": 0.4968, "step": 4502 }, { "epoch": 0.767905866302865, "grad_norm": 0.490156352519989, "learning_rate": 2.6985281897141815e-07, "loss": 0.4829, "step": 4503 }, { "epoch": 0.7680763983628922, "grad_norm": 0.5496761798858643, "learning_rate": 2.694755360997342e-07, "loss": 0.4857, "step": 4504 }, { "epoch": 0.7682469304229195, "grad_norm": 0.4437698423862457, "learning_rate": 2.690984760794284e-07, "loss": 0.5017, "step": 4505 }, { "epoch": 0.7684174624829468, "grad_norm": 0.5472577214241028, "learning_rate": 2.6872163902552497e-07, "loss": 0.4911, "step": 4506 }, { "epoch": 0.7685879945429741, "grad_norm": 0.38096439838409424, "learning_rate": 2.683450250529798e-07, "loss": 0.4991, "step": 4507 }, { "epoch": 0.7687585266030014, "grad_norm": 0.5786945819854736, "learning_rate": 2.679686342766829e-07, "loss": 0.5001, "step": 4508 }, { "epoch": 0.7689290586630286, "grad_norm": 0.38383108377456665, "learning_rate": 2.675924668114537e-07, "loss": 0.5044, "step": 4509 }, { "epoch": 0.7690995907230559, "grad_norm": 0.5291785001754761, "learning_rate": 2.6721652277204503e-07, "loss": 0.5038, "step": 4510 }, { "epoch": 0.7692701227830833, "grad_norm": 0.4897950291633606, "learning_rate": 2.6684080227314115e-07, "loss": 0.4946, "step": 4511 }, { "epoch": 0.7694406548431105, "grad_norm": 0.45776981115341187, "learning_rate": 2.6646530542935835e-07, "loss": 0.4994, "step": 4512 }, { "epoch": 0.7696111869031378, "grad_norm": 0.41584646701812744, "learning_rate": 2.660900323552436e-07, "loss": 0.5043, "step": 4513 }, { "epoch": 0.769781718963165, "grad_norm": 0.38639041781425476, "learning_rate": 2.6571498316527755e-07, "loss": 0.4968, "step": 4514 }, { "epoch": 0.7699522510231923, "grad_norm": 0.4452049434185028, "learning_rate": 2.6534015797387105e-07, "loss": 0.5143, "step": 4515 }, { "epoch": 0.7701227830832197, "grad_norm": 0.4047050178050995, "learning_rate": 2.6496555689536634e-07, "loss": 0.4849, "step": 4516 }, { "epoch": 0.770293315143247, "grad_norm": 0.46245694160461426, "learning_rate": 2.645911800440392e-07, "loss": 0.5078, "step": 4517 }, { "epoch": 0.7704638472032742, "grad_norm": 0.49868836998939514, "learning_rate": 2.6421702753409475e-07, "loss": 0.4908, "step": 4518 }, { "epoch": 0.7706343792633015, "grad_norm": 0.44043809175491333, "learning_rate": 2.6384309947967124e-07, "loss": 0.498, "step": 4519 }, { "epoch": 0.7708049113233287, "grad_norm": 0.5068042874336243, "learning_rate": 2.634693959948378e-07, "loss": 0.5017, "step": 4520 }, { "epoch": 0.7709754433833561, "grad_norm": 0.514275848865509, "learning_rate": 2.6309591719359566e-07, "loss": 0.4937, "step": 4521 }, { "epoch": 0.7711459754433834, "grad_norm": 0.36614111065864563, "learning_rate": 2.627226631898761e-07, "loss": 0.5046, "step": 4522 }, { "epoch": 0.7713165075034106, "grad_norm": 0.4957779347896576, "learning_rate": 2.623496340975434e-07, "loss": 0.4925, "step": 4523 }, { "epoch": 0.7714870395634379, "grad_norm": 0.45070260763168335, "learning_rate": 2.6197683003039256e-07, "loss": 0.4907, "step": 4524 }, { "epoch": 0.7716575716234653, "grad_norm": 0.4274146854877472, "learning_rate": 2.616042511021492e-07, "loss": 0.4984, "step": 4525 }, { "epoch": 0.7718281036834925, "grad_norm": 0.3768433630466461, "learning_rate": 2.6123189742647226e-07, "loss": 0.5014, "step": 4526 }, { "epoch": 0.7719986357435198, "grad_norm": 0.37700000405311584, "learning_rate": 2.608597691169499e-07, "loss": 0.505, "step": 4527 }, { "epoch": 0.772169167803547, "grad_norm": 0.36473509669303894, "learning_rate": 2.6048786628710247e-07, "loss": 0.49, "step": 4528 }, { "epoch": 0.7723396998635743, "grad_norm": 0.4933687448501587, "learning_rate": 2.6011618905038175e-07, "loss": 0.4993, "step": 4529 }, { "epoch": 0.7725102319236017, "grad_norm": 0.46770721673965454, "learning_rate": 2.5974473752016987e-07, "loss": 0.4912, "step": 4530 }, { "epoch": 0.772680763983629, "grad_norm": 0.3749657869338989, "learning_rate": 2.593735118097808e-07, "loss": 0.4921, "step": 4531 }, { "epoch": 0.7728512960436562, "grad_norm": 0.4584238827228546, "learning_rate": 2.5900251203245945e-07, "loss": 0.5103, "step": 4532 }, { "epoch": 0.7730218281036835, "grad_norm": 0.4406527578830719, "learning_rate": 2.5863173830138214e-07, "loss": 0.4925, "step": 4533 }, { "epoch": 0.7731923601637107, "grad_norm": 0.43562811613082886, "learning_rate": 2.5826119072965495e-07, "loss": 0.5035, "step": 4534 }, { "epoch": 0.7733628922237381, "grad_norm": 0.5225963592529297, "learning_rate": 2.5789086943031705e-07, "loss": 0.4919, "step": 4535 }, { "epoch": 0.7735334242837654, "grad_norm": 0.4101422131061554, "learning_rate": 2.575207745163366e-07, "loss": 0.493, "step": 4536 }, { "epoch": 0.7737039563437926, "grad_norm": 0.43135133385658264, "learning_rate": 2.571509061006138e-07, "loss": 0.5007, "step": 4537 }, { "epoch": 0.7738744884038199, "grad_norm": 0.4580155909061432, "learning_rate": 2.5678126429597987e-07, "loss": 0.4936, "step": 4538 }, { "epoch": 0.7740450204638472, "grad_norm": 0.4478892683982849, "learning_rate": 2.564118492151957e-07, "loss": 0.4903, "step": 4539 }, { "epoch": 0.7742155525238745, "grad_norm": 0.4683667719364166, "learning_rate": 2.560426609709543e-07, "loss": 0.5054, "step": 4540 }, { "epoch": 0.7743860845839018, "grad_norm": 0.5699395537376404, "learning_rate": 2.5567369967587907e-07, "loss": 0.4867, "step": 4541 }, { "epoch": 0.774556616643929, "grad_norm": 0.4768561124801636, "learning_rate": 2.553049654425243e-07, "loss": 0.5034, "step": 4542 }, { "epoch": 0.7747271487039563, "grad_norm": 0.40361812710762024, "learning_rate": 2.549364583833739e-07, "loss": 0.4825, "step": 4543 }, { "epoch": 0.7748976807639836, "grad_norm": 0.45786723494529724, "learning_rate": 2.5456817861084474e-07, "loss": 0.5049, "step": 4544 }, { "epoch": 0.775068212824011, "grad_norm": 0.39945563673973083, "learning_rate": 2.542001262372821e-07, "loss": 0.5072, "step": 4545 }, { "epoch": 0.7752387448840382, "grad_norm": 0.4828221797943115, "learning_rate": 2.538323013749631e-07, "loss": 0.4982, "step": 4546 }, { "epoch": 0.7754092769440655, "grad_norm": 0.41438600420951843, "learning_rate": 2.534647041360955e-07, "loss": 0.4921, "step": 4547 }, { "epoch": 0.7755798090040927, "grad_norm": 0.376027911901474, "learning_rate": 2.5309733463281665e-07, "loss": 0.4951, "step": 4548 }, { "epoch": 0.7757503410641201, "grad_norm": 0.4809345602989197, "learning_rate": 2.5273019297719525e-07, "loss": 0.5037, "step": 4549 }, { "epoch": 0.7759208731241474, "grad_norm": 0.3895203173160553, "learning_rate": 2.523632792812306e-07, "loss": 0.4927, "step": 4550 }, { "epoch": 0.7760914051841746, "grad_norm": 0.42986854910850525, "learning_rate": 2.519965936568524e-07, "loss": 0.4939, "step": 4551 }, { "epoch": 0.7762619372442019, "grad_norm": 0.4543752670288086, "learning_rate": 2.516301362159195e-07, "loss": 0.4909, "step": 4552 }, { "epoch": 0.7764324693042292, "grad_norm": 0.49290981888771057, "learning_rate": 2.5126390707022356e-07, "loss": 0.4931, "step": 4553 }, { "epoch": 0.7766030013642565, "grad_norm": 0.47044581174850464, "learning_rate": 2.5089790633148426e-07, "loss": 0.4873, "step": 4554 }, { "epoch": 0.7767735334242838, "grad_norm": 0.40951424837112427, "learning_rate": 2.5053213411135293e-07, "loss": 0.4887, "step": 4555 }, { "epoch": 0.776944065484311, "grad_norm": 0.45414459705352783, "learning_rate": 2.50166590521411e-07, "loss": 0.4901, "step": 4556 }, { "epoch": 0.7771145975443383, "grad_norm": 0.4336784780025482, "learning_rate": 2.498012756731695e-07, "loss": 0.4989, "step": 4557 }, { "epoch": 0.7772851296043656, "grad_norm": 0.5325224995613098, "learning_rate": 2.494361896780705e-07, "loss": 0.4842, "step": 4558 }, { "epoch": 0.777455661664393, "grad_norm": 0.5549018979072571, "learning_rate": 2.4907133264748586e-07, "loss": 0.4966, "step": 4559 }, { "epoch": 0.7776261937244202, "grad_norm": 0.40138453245162964, "learning_rate": 2.4870670469271785e-07, "loss": 0.4968, "step": 4560 }, { "epoch": 0.7777967257844475, "grad_norm": 0.4988255202770233, "learning_rate": 2.483423059249983e-07, "loss": 0.4895, "step": 4561 }, { "epoch": 0.7779672578444747, "grad_norm": 0.4912653863430023, "learning_rate": 2.4797813645548973e-07, "loss": 0.4998, "step": 4562 }, { "epoch": 0.778137789904502, "grad_norm": 0.40639805793762207, "learning_rate": 2.476141963952844e-07, "loss": 0.4951, "step": 4563 }, { "epoch": 0.7783083219645294, "grad_norm": 0.5302715301513672, "learning_rate": 2.472504858554046e-07, "loss": 0.4897, "step": 4564 }, { "epoch": 0.7784788540245566, "grad_norm": 0.5033547282218933, "learning_rate": 2.468870049468032e-07, "loss": 0.5007, "step": 4565 }, { "epoch": 0.7786493860845839, "grad_norm": 0.4595785439014435, "learning_rate": 2.465237537803617e-07, "loss": 0.4971, "step": 4566 }, { "epoch": 0.7788199181446112, "grad_norm": 0.506290853023529, "learning_rate": 2.4616073246689254e-07, "loss": 0.4946, "step": 4567 }, { "epoch": 0.7789904502046384, "grad_norm": 0.5239617228507996, "learning_rate": 2.4579794111713797e-07, "loss": 0.4873, "step": 4568 }, { "epoch": 0.7791609822646658, "grad_norm": 0.3967903256416321, "learning_rate": 2.4543537984176983e-07, "loss": 0.5091, "step": 4569 }, { "epoch": 0.779331514324693, "grad_norm": 0.4829061031341553, "learning_rate": 2.4507304875139017e-07, "loss": 0.5017, "step": 4570 }, { "epoch": 0.7795020463847203, "grad_norm": 0.5171961784362793, "learning_rate": 2.4471094795653e-07, "loss": 0.4972, "step": 4571 }, { "epoch": 0.7796725784447476, "grad_norm": 0.4271591305732727, "learning_rate": 2.443490775676507e-07, "loss": 0.5031, "step": 4572 }, { "epoch": 0.779843110504775, "grad_norm": 0.4533305764198303, "learning_rate": 2.4398743769514345e-07, "loss": 0.5033, "step": 4573 }, { "epoch": 0.7800136425648022, "grad_norm": 0.45902299880981445, "learning_rate": 2.436260284493292e-07, "loss": 0.5008, "step": 4574 }, { "epoch": 0.7801841746248295, "grad_norm": 0.4595572054386139, "learning_rate": 2.4326484994045754e-07, "loss": 0.489, "step": 4575 }, { "epoch": 0.7803547066848567, "grad_norm": 0.5287113785743713, "learning_rate": 2.429039022787088e-07, "loss": 0.4937, "step": 4576 }, { "epoch": 0.780525238744884, "grad_norm": 0.48877352476119995, "learning_rate": 2.425431855741926e-07, "loss": 0.4846, "step": 4577 }, { "epoch": 0.7806957708049114, "grad_norm": 0.5545276403427124, "learning_rate": 2.4218269993694736e-07, "loss": 0.5099, "step": 4578 }, { "epoch": 0.7808663028649386, "grad_norm": 0.5091724395751953, "learning_rate": 2.4182244547694255e-07, "loss": 0.4957, "step": 4579 }, { "epoch": 0.7810368349249659, "grad_norm": 0.4598775804042816, "learning_rate": 2.414624223040756e-07, "loss": 0.4944, "step": 4580 }, { "epoch": 0.7812073669849932, "grad_norm": 0.5534014701843262, "learning_rate": 2.41102630528174e-07, "loss": 0.5014, "step": 4581 }, { "epoch": 0.7813778990450204, "grad_norm": 0.44594284892082214, "learning_rate": 2.407430702589948e-07, "loss": 0.5019, "step": 4582 }, { "epoch": 0.7815484311050478, "grad_norm": 0.461640328168869, "learning_rate": 2.403837416062246e-07, "loss": 0.4904, "step": 4583 }, { "epoch": 0.781718963165075, "grad_norm": 0.5966527462005615, "learning_rate": 2.400246446794783e-07, "loss": 0.5012, "step": 4584 }, { "epoch": 0.7818894952251023, "grad_norm": 0.3989943563938141, "learning_rate": 2.396657795883013e-07, "loss": 0.49, "step": 4585 }, { "epoch": 0.7820600272851296, "grad_norm": 0.548056423664093, "learning_rate": 2.3930714644216784e-07, "loss": 0.4977, "step": 4586 }, { "epoch": 0.7822305593451568, "grad_norm": 0.47558221220970154, "learning_rate": 2.3894874535048064e-07, "loss": 0.4935, "step": 4587 }, { "epoch": 0.7824010914051842, "grad_norm": 0.4634142220020294, "learning_rate": 2.3859057642257345e-07, "loss": 0.4945, "step": 4588 }, { "epoch": 0.7825716234652115, "grad_norm": 0.5071300864219666, "learning_rate": 2.382326397677073e-07, "loss": 0.4945, "step": 4589 }, { "epoch": 0.7827421555252387, "grad_norm": 0.3968707025051117, "learning_rate": 2.3787493549507348e-07, "loss": 0.4949, "step": 4590 }, { "epoch": 0.782912687585266, "grad_norm": 0.5555402636528015, "learning_rate": 2.3751746371379195e-07, "loss": 0.5007, "step": 4591 }, { "epoch": 0.7830832196452933, "grad_norm": 0.4424824118614197, "learning_rate": 2.371602245329121e-07, "loss": 0.48, "step": 4592 }, { "epoch": 0.7832537517053206, "grad_norm": 0.5057586431503296, "learning_rate": 2.3680321806141185e-07, "loss": 0.5071, "step": 4593 }, { "epoch": 0.7834242837653479, "grad_norm": 0.44602543115615845, "learning_rate": 2.3644644440819843e-07, "loss": 0.4994, "step": 4594 }, { "epoch": 0.7835948158253752, "grad_norm": 0.4586191475391388, "learning_rate": 2.3608990368210852e-07, "loss": 0.5036, "step": 4595 }, { "epoch": 0.7837653478854024, "grad_norm": 0.5694007873535156, "learning_rate": 2.3573359599190618e-07, "loss": 0.4943, "step": 4596 }, { "epoch": 0.7839358799454298, "grad_norm": 0.4275916516780853, "learning_rate": 2.353775214462868e-07, "loss": 0.504, "step": 4597 }, { "epoch": 0.784106412005457, "grad_norm": 0.46295440196990967, "learning_rate": 2.350216801538723e-07, "loss": 0.5034, "step": 4598 }, { "epoch": 0.7842769440654843, "grad_norm": 0.49615952372550964, "learning_rate": 2.3466607222321482e-07, "loss": 0.5159, "step": 4599 }, { "epoch": 0.7844474761255116, "grad_norm": 0.48973459005355835, "learning_rate": 2.3431069776279515e-07, "loss": 0.5121, "step": 4600 }, { "epoch": 0.7846180081855388, "grad_norm": 0.47051751613616943, "learning_rate": 2.3395555688102214e-07, "loss": 0.5045, "step": 4601 }, { "epoch": 0.7847885402455662, "grad_norm": 0.4109767973423004, "learning_rate": 2.3360064968623422e-07, "loss": 0.5067, "step": 4602 }, { "epoch": 0.7849590723055935, "grad_norm": 0.4106697738170624, "learning_rate": 2.33245976286698e-07, "loss": 0.5038, "step": 4603 }, { "epoch": 0.7851296043656207, "grad_norm": 0.4463314116001129, "learning_rate": 2.3289153679060952e-07, "loss": 0.507, "step": 4604 }, { "epoch": 0.785300136425648, "grad_norm": 0.5195944905281067, "learning_rate": 2.3253733130609192e-07, "loss": 0.5078, "step": 4605 }, { "epoch": 0.7854706684856753, "grad_norm": 0.4397755563259125, "learning_rate": 2.3218335994119912e-07, "loss": 0.5056, "step": 4606 }, { "epoch": 0.7856412005457026, "grad_norm": 0.409441739320755, "learning_rate": 2.3182962280391165e-07, "loss": 0.5062, "step": 4607 }, { "epoch": 0.7858117326057299, "grad_norm": 0.5190573930740356, "learning_rate": 2.3147612000213962e-07, "loss": 0.5072, "step": 4608 }, { "epoch": 0.7859822646657572, "grad_norm": 0.5061396360397339, "learning_rate": 2.3112285164372173e-07, "loss": 0.4996, "step": 4609 }, { "epoch": 0.7861527967257844, "grad_norm": 0.43007734417915344, "learning_rate": 2.307698178364243e-07, "loss": 0.5052, "step": 4610 }, { "epoch": 0.7863233287858117, "grad_norm": 0.5201791524887085, "learning_rate": 2.3041701868794288e-07, "loss": 0.499, "step": 4611 }, { "epoch": 0.786493860845839, "grad_norm": 0.4248791038990021, "learning_rate": 2.300644543059012e-07, "loss": 0.502, "step": 4612 }, { "epoch": 0.7866643929058663, "grad_norm": 0.40481671690940857, "learning_rate": 2.297121247978518e-07, "loss": 0.4925, "step": 4613 }, { "epoch": 0.7868349249658936, "grad_norm": 0.4447843134403229, "learning_rate": 2.293600302712742e-07, "loss": 0.5085, "step": 4614 }, { "epoch": 0.7870054570259208, "grad_norm": 0.6058522462844849, "learning_rate": 2.2900817083357832e-07, "loss": 0.4921, "step": 4615 }, { "epoch": 0.7871759890859482, "grad_norm": 0.46241334080696106, "learning_rate": 2.286565465921007e-07, "loss": 0.4988, "step": 4616 }, { "epoch": 0.7873465211459755, "grad_norm": 0.5033152103424072, "learning_rate": 2.2830515765410623e-07, "loss": 0.4892, "step": 4617 }, { "epoch": 0.7875170532060027, "grad_norm": 0.4851181209087372, "learning_rate": 2.2795400412678928e-07, "loss": 0.5072, "step": 4618 }, { "epoch": 0.78768758526603, "grad_norm": 0.5065212249755859, "learning_rate": 2.2760308611727102e-07, "loss": 0.4882, "step": 4619 }, { "epoch": 0.7878581173260573, "grad_norm": 0.6156156659126282, "learning_rate": 2.272524037326015e-07, "loss": 0.5022, "step": 4620 }, { "epoch": 0.7880286493860846, "grad_norm": 0.4571601152420044, "learning_rate": 2.2690195707975882e-07, "loss": 0.4996, "step": 4621 }, { "epoch": 0.7881991814461119, "grad_norm": 0.47723454236984253, "learning_rate": 2.2655174626564915e-07, "loss": 0.5001, "step": 4622 }, { "epoch": 0.7883697135061392, "grad_norm": 0.5316779613494873, "learning_rate": 2.2620177139710632e-07, "loss": 0.503, "step": 4623 }, { "epoch": 0.7885402455661664, "grad_norm": 0.5502619743347168, "learning_rate": 2.2585203258089264e-07, "loss": 0.4914, "step": 4624 }, { "epoch": 0.7887107776261937, "grad_norm": 0.5683772563934326, "learning_rate": 2.255025299236984e-07, "loss": 0.5019, "step": 4625 }, { "epoch": 0.788881309686221, "grad_norm": 0.45791706442832947, "learning_rate": 2.2515326353214152e-07, "loss": 0.4842, "step": 4626 }, { "epoch": 0.7890518417462483, "grad_norm": 0.5102027654647827, "learning_rate": 2.248042335127685e-07, "loss": 0.4976, "step": 4627 }, { "epoch": 0.7892223738062756, "grad_norm": 0.40297356247901917, "learning_rate": 2.244554399720528e-07, "loss": 0.4915, "step": 4628 }, { "epoch": 0.7893929058663028, "grad_norm": 0.4550130367279053, "learning_rate": 2.2410688301639618e-07, "loss": 0.4972, "step": 4629 }, { "epoch": 0.7895634379263301, "grad_norm": 0.5347708463668823, "learning_rate": 2.237585627521286e-07, "loss": 0.4883, "step": 4630 }, { "epoch": 0.7897339699863575, "grad_norm": 0.42419931292533875, "learning_rate": 2.2341047928550765e-07, "loss": 0.5032, "step": 4631 }, { "epoch": 0.7899045020463847, "grad_norm": 0.5129116177558899, "learning_rate": 2.230626327227179e-07, "loss": 0.5012, "step": 4632 }, { "epoch": 0.790075034106412, "grad_norm": 0.5253115296363831, "learning_rate": 2.2271502316987272e-07, "loss": 0.4891, "step": 4633 }, { "epoch": 0.7902455661664393, "grad_norm": 0.38880401849746704, "learning_rate": 2.223676507330125e-07, "loss": 0.4922, "step": 4634 }, { "epoch": 0.7904160982264665, "grad_norm": 0.5668837428092957, "learning_rate": 2.220205155181057e-07, "loss": 0.4936, "step": 4635 }, { "epoch": 0.7905866302864939, "grad_norm": 0.41947200894355774, "learning_rate": 2.2167361763104846e-07, "loss": 0.4942, "step": 4636 }, { "epoch": 0.7907571623465212, "grad_norm": 0.5962218642234802, "learning_rate": 2.2132695717766374e-07, "loss": 0.4875, "step": 4637 }, { "epoch": 0.7909276944065484, "grad_norm": 0.4495481550693512, "learning_rate": 2.2098053426370294e-07, "loss": 0.4971, "step": 4638 }, { "epoch": 0.7910982264665757, "grad_norm": 0.5138792395591736, "learning_rate": 2.2063434899484494e-07, "loss": 0.5022, "step": 4639 }, { "epoch": 0.791268758526603, "grad_norm": 0.5427955389022827, "learning_rate": 2.2028840147669529e-07, "loss": 0.503, "step": 4640 }, { "epoch": 0.7914392905866303, "grad_norm": 0.4466486871242523, "learning_rate": 2.19942691814788e-07, "loss": 0.4928, "step": 4641 }, { "epoch": 0.7916098226466576, "grad_norm": 0.6267170906066895, "learning_rate": 2.1959722011458398e-07, "loss": 0.4923, "step": 4642 }, { "epoch": 0.7917803547066848, "grad_norm": 0.4920784533023834, "learning_rate": 2.1925198648147175e-07, "loss": 0.4912, "step": 4643 }, { "epoch": 0.7919508867667121, "grad_norm": 0.5684638619422913, "learning_rate": 2.189069910207671e-07, "loss": 0.4943, "step": 4644 }, { "epoch": 0.7921214188267395, "grad_norm": 0.5882368683815002, "learning_rate": 2.185622338377135e-07, "loss": 0.4884, "step": 4645 }, { "epoch": 0.7922919508867667, "grad_norm": 0.5498755574226379, "learning_rate": 2.1821771503748092e-07, "loss": 0.5037, "step": 4646 }, { "epoch": 0.792462482946794, "grad_norm": 0.5423489212989807, "learning_rate": 2.1787343472516732e-07, "loss": 0.4927, "step": 4647 }, { "epoch": 0.7926330150068213, "grad_norm": 0.6042127013206482, "learning_rate": 2.1752939300579803e-07, "loss": 0.4992, "step": 4648 }, { "epoch": 0.7928035470668485, "grad_norm": 0.6443652510643005, "learning_rate": 2.1718558998432483e-07, "loss": 0.4856, "step": 4649 }, { "epoch": 0.7929740791268759, "grad_norm": 0.39260923862457275, "learning_rate": 2.168420257656272e-07, "loss": 0.5008, "step": 4650 }, { "epoch": 0.7931446111869032, "grad_norm": 0.5619345903396606, "learning_rate": 2.1649870045451185e-07, "loss": 0.4923, "step": 4651 }, { "epoch": 0.7933151432469304, "grad_norm": 0.3600766360759735, "learning_rate": 2.1615561415571252e-07, "loss": 0.4897, "step": 4652 }, { "epoch": 0.7934856753069577, "grad_norm": 0.4772145450115204, "learning_rate": 2.1581276697388976e-07, "loss": 0.4913, "step": 4653 }, { "epoch": 0.793656207366985, "grad_norm": 0.5267781019210815, "learning_rate": 2.1547015901363191e-07, "loss": 0.4922, "step": 4654 }, { "epoch": 0.7938267394270123, "grad_norm": 0.38906392455101013, "learning_rate": 2.151277903794532e-07, "loss": 0.5007, "step": 4655 }, { "epoch": 0.7939972714870396, "grad_norm": 0.4681243300437927, "learning_rate": 2.147856611757957e-07, "loss": 0.4967, "step": 4656 }, { "epoch": 0.7941678035470668, "grad_norm": 0.44217199087142944, "learning_rate": 2.1444377150702847e-07, "loss": 0.5008, "step": 4657 }, { "epoch": 0.7943383356070941, "grad_norm": 0.43402767181396484, "learning_rate": 2.1410212147744638e-07, "loss": 0.5007, "step": 4658 }, { "epoch": 0.7945088676671214, "grad_norm": 0.43505313992500305, "learning_rate": 2.1376071119127342e-07, "loss": 0.5071, "step": 4659 }, { "epoch": 0.7946793997271487, "grad_norm": 0.5335257053375244, "learning_rate": 2.1341954075265793e-07, "loss": 0.4924, "step": 4660 }, { "epoch": 0.794849931787176, "grad_norm": 0.5619838833808899, "learning_rate": 2.130786102656767e-07, "loss": 0.5025, "step": 4661 }, { "epoch": 0.7950204638472033, "grad_norm": 0.4406771659851074, "learning_rate": 2.1273791983433302e-07, "loss": 0.4848, "step": 4662 }, { "epoch": 0.7951909959072305, "grad_norm": 0.609472393989563, "learning_rate": 2.1239746956255638e-07, "loss": 0.5002, "step": 4663 }, { "epoch": 0.7953615279672579, "grad_norm": 0.4150250554084778, "learning_rate": 2.1205725955420354e-07, "loss": 0.49, "step": 4664 }, { "epoch": 0.7955320600272852, "grad_norm": 0.5046654343605042, "learning_rate": 2.1171728991305798e-07, "loss": 0.4919, "step": 4665 }, { "epoch": 0.7957025920873124, "grad_norm": 0.4161365032196045, "learning_rate": 2.1137756074282992e-07, "loss": 0.5001, "step": 4666 }, { "epoch": 0.7958731241473397, "grad_norm": 0.47114861011505127, "learning_rate": 2.1103807214715512e-07, "loss": 0.4944, "step": 4667 }, { "epoch": 0.796043656207367, "grad_norm": 0.5760286450386047, "learning_rate": 2.106988242295981e-07, "loss": 0.4911, "step": 4668 }, { "epoch": 0.7962141882673943, "grad_norm": 0.43809956312179565, "learning_rate": 2.1035981709364784e-07, "loss": 0.4979, "step": 4669 }, { "epoch": 0.7963847203274216, "grad_norm": 0.5407879948616028, "learning_rate": 2.1002105084272086e-07, "loss": 0.4978, "step": 4670 }, { "epoch": 0.7965552523874488, "grad_norm": 0.40855517983436584, "learning_rate": 2.0968252558016056e-07, "loss": 0.4943, "step": 4671 }, { "epoch": 0.7967257844474761, "grad_norm": 0.5361022353172302, "learning_rate": 2.093442414092356e-07, "loss": 0.507, "step": 4672 }, { "epoch": 0.7968963165075034, "grad_norm": 0.46625766158103943, "learning_rate": 2.090061984331423e-07, "loss": 0.498, "step": 4673 }, { "epoch": 0.7970668485675307, "grad_norm": 0.44276198744773865, "learning_rate": 2.0866839675500274e-07, "loss": 0.5048, "step": 4674 }, { "epoch": 0.797237380627558, "grad_norm": 0.5449864268302917, "learning_rate": 2.0833083647786603e-07, "loss": 0.4888, "step": 4675 }, { "epoch": 0.7974079126875853, "grad_norm": 0.41112643480300903, "learning_rate": 2.0799351770470613e-07, "loss": 0.4944, "step": 4676 }, { "epoch": 0.7975784447476125, "grad_norm": 0.5226261019706726, "learning_rate": 2.0765644053842583e-07, "loss": 0.4907, "step": 4677 }, { "epoch": 0.7977489768076398, "grad_norm": 0.4252602159976959, "learning_rate": 2.0731960508185192e-07, "loss": 0.4966, "step": 4678 }, { "epoch": 0.7979195088676672, "grad_norm": 0.5162569284439087, "learning_rate": 2.0698301143773793e-07, "loss": 0.5034, "step": 4679 }, { "epoch": 0.7980900409276944, "grad_norm": 0.48317229747772217, "learning_rate": 2.0664665970876497e-07, "loss": 0.4978, "step": 4680 }, { "epoch": 0.7982605729877217, "grad_norm": 0.38275066018104553, "learning_rate": 2.0631054999753877e-07, "loss": 0.5002, "step": 4681 }, { "epoch": 0.798431105047749, "grad_norm": 0.5556075572967529, "learning_rate": 2.0597468240659194e-07, "loss": 0.4898, "step": 4682 }, { "epoch": 0.7986016371077762, "grad_norm": 0.47922417521476746, "learning_rate": 2.0563905703838317e-07, "loss": 0.5095, "step": 4683 }, { "epoch": 0.7987721691678036, "grad_norm": 0.5476397275924683, "learning_rate": 2.0530367399529762e-07, "loss": 0.5018, "step": 4684 }, { "epoch": 0.7989427012278308, "grad_norm": 0.43253302574157715, "learning_rate": 2.0496853337964508e-07, "loss": 0.4987, "step": 4685 }, { "epoch": 0.7991132332878581, "grad_norm": 0.5218864679336548, "learning_rate": 2.0463363529366379e-07, "loss": 0.4994, "step": 4686 }, { "epoch": 0.7992837653478854, "grad_norm": 0.5182597637176514, "learning_rate": 2.0429897983951587e-07, "loss": 0.5008, "step": 4687 }, { "epoch": 0.7994542974079127, "grad_norm": 0.3908626139163971, "learning_rate": 2.0396456711928983e-07, "loss": 0.4998, "step": 4688 }, { "epoch": 0.79962482946794, "grad_norm": 0.4519367814064026, "learning_rate": 2.0363039723500158e-07, "loss": 0.4982, "step": 4689 }, { "epoch": 0.7997953615279673, "grad_norm": 0.3876539468765259, "learning_rate": 2.0329647028859101e-07, "loss": 0.5014, "step": 4690 }, { "epoch": 0.7999658935879945, "grad_norm": 0.501013457775116, "learning_rate": 2.0296278638192502e-07, "loss": 0.5081, "step": 4691 }, { "epoch": 0.8001364256480218, "grad_norm": 0.44867897033691406, "learning_rate": 2.0262934561679617e-07, "loss": 0.4989, "step": 4692 }, { "epoch": 0.8003069577080492, "grad_norm": 0.3659909963607788, "learning_rate": 2.0229614809492305e-07, "loss": 0.5088, "step": 4693 }, { "epoch": 0.8004774897680764, "grad_norm": 0.4918709993362427, "learning_rate": 2.019631939179493e-07, "loss": 0.5039, "step": 4694 }, { "epoch": 0.8006480218281037, "grad_norm": 0.40458929538726807, "learning_rate": 2.0163048318744494e-07, "loss": 0.5017, "step": 4695 }, { "epoch": 0.800818553888131, "grad_norm": 0.4595339894294739, "learning_rate": 2.0129801600490608e-07, "loss": 0.5111, "step": 4696 }, { "epoch": 0.8009890859481582, "grad_norm": 0.5194839835166931, "learning_rate": 2.0096579247175321e-07, "loss": 0.5088, "step": 4697 }, { "epoch": 0.8011596180081856, "grad_norm": 0.3969508111476898, "learning_rate": 2.0063381268933426e-07, "loss": 0.4982, "step": 4698 }, { "epoch": 0.8013301500682128, "grad_norm": 0.42648452520370483, "learning_rate": 2.0030207675892125e-07, "loss": 0.5058, "step": 4699 }, { "epoch": 0.8015006821282401, "grad_norm": 0.40289372205734253, "learning_rate": 1.9997058478171264e-07, "loss": 0.4778, "step": 4700 }, { "epoch": 0.8016712141882674, "grad_norm": 0.5085612535476685, "learning_rate": 1.9963933685883255e-07, "loss": 0.4984, "step": 4701 }, { "epoch": 0.8018417462482946, "grad_norm": 0.47089236974716187, "learning_rate": 1.993083330913298e-07, "loss": 0.4953, "step": 4702 }, { "epoch": 0.802012278308322, "grad_norm": 0.370245099067688, "learning_rate": 1.9897757358017957e-07, "loss": 0.5118, "step": 4703 }, { "epoch": 0.8021828103683493, "grad_norm": 0.3889061510562897, "learning_rate": 1.9864705842628242e-07, "loss": 0.497, "step": 4704 }, { "epoch": 0.8023533424283765, "grad_norm": 0.3743768036365509, "learning_rate": 1.9831678773046428e-07, "loss": 0.4986, "step": 4705 }, { "epoch": 0.8025238744884038, "grad_norm": 0.4428059160709381, "learning_rate": 1.9798676159347577e-07, "loss": 0.4849, "step": 4706 }, { "epoch": 0.802694406548431, "grad_norm": 0.3861129581928253, "learning_rate": 1.9765698011599467e-07, "loss": 0.4995, "step": 4707 }, { "epoch": 0.8028649386084584, "grad_norm": 0.4313037395477295, "learning_rate": 1.9732744339862199e-07, "loss": 0.4981, "step": 4708 }, { "epoch": 0.8030354706684857, "grad_norm": 0.48329731822013855, "learning_rate": 1.9699815154188564e-07, "loss": 0.49, "step": 4709 }, { "epoch": 0.803206002728513, "grad_norm": 0.3620572090148926, "learning_rate": 1.9666910464623847e-07, "loss": 0.5051, "step": 4710 }, { "epoch": 0.8033765347885402, "grad_norm": 0.4337708652019501, "learning_rate": 1.9634030281205785e-07, "loss": 0.5011, "step": 4711 }, { "epoch": 0.8035470668485676, "grad_norm": 0.4114370346069336, "learning_rate": 1.9601174613964725e-07, "loss": 0.4985, "step": 4712 }, { "epoch": 0.8037175989085948, "grad_norm": 0.4311154782772064, "learning_rate": 1.9568343472923526e-07, "loss": 0.5064, "step": 4713 }, { "epoch": 0.8038881309686221, "grad_norm": 0.3959866464138031, "learning_rate": 1.953553686809753e-07, "loss": 0.5048, "step": 4714 }, { "epoch": 0.8040586630286494, "grad_norm": 0.5132696628570557, "learning_rate": 1.9502754809494609e-07, "loss": 0.5014, "step": 4715 }, { "epoch": 0.8042291950886766, "grad_norm": 0.42080479860305786, "learning_rate": 1.9469997307115188e-07, "loss": 0.5027, "step": 4716 }, { "epoch": 0.804399727148704, "grad_norm": 0.40302586555480957, "learning_rate": 1.9437264370952103e-07, "loss": 0.4899, "step": 4717 }, { "epoch": 0.8045702592087313, "grad_norm": 0.4599449634552002, "learning_rate": 1.9404556010990796e-07, "loss": 0.4973, "step": 4718 }, { "epoch": 0.8047407912687585, "grad_norm": 0.49274709820747375, "learning_rate": 1.937187223720917e-07, "loss": 0.5, "step": 4719 }, { "epoch": 0.8049113233287858, "grad_norm": 0.38500869274139404, "learning_rate": 1.933921305957759e-07, "loss": 0.4905, "step": 4720 }, { "epoch": 0.805081855388813, "grad_norm": 0.4719330668449402, "learning_rate": 1.9306578488058993e-07, "loss": 0.4908, "step": 4721 }, { "epoch": 0.8052523874488404, "grad_norm": 0.4110085666179657, "learning_rate": 1.9273968532608758e-07, "loss": 0.4877, "step": 4722 }, { "epoch": 0.8054229195088677, "grad_norm": 0.5232803821563721, "learning_rate": 1.9241383203174769e-07, "loss": 0.5018, "step": 4723 }, { "epoch": 0.805593451568895, "grad_norm": 0.5426048636436462, "learning_rate": 1.9208822509697425e-07, "loss": 0.4889, "step": 4724 }, { "epoch": 0.8057639836289222, "grad_norm": 0.4066981077194214, "learning_rate": 1.9176286462109573e-07, "loss": 0.4943, "step": 4725 }, { "epoch": 0.8059345156889495, "grad_norm": 0.6040549874305725, "learning_rate": 1.9143775070336524e-07, "loss": 0.4991, "step": 4726 }, { "epoch": 0.8061050477489768, "grad_norm": 0.6267606616020203, "learning_rate": 1.911128834429612e-07, "loss": 0.5015, "step": 4727 }, { "epoch": 0.8062755798090041, "grad_norm": 0.41096246242523193, "learning_rate": 1.907882629389868e-07, "loss": 0.4926, "step": 4728 }, { "epoch": 0.8064461118690314, "grad_norm": 0.5135898590087891, "learning_rate": 1.90463889290469e-07, "loss": 0.5014, "step": 4729 }, { "epoch": 0.8066166439290586, "grad_norm": 0.5312707424163818, "learning_rate": 1.901397625963606e-07, "loss": 0.4955, "step": 4730 }, { "epoch": 0.806787175989086, "grad_norm": 0.41822531819343567, "learning_rate": 1.8981588295553856e-07, "loss": 0.4996, "step": 4731 }, { "epoch": 0.8069577080491133, "grad_norm": 0.37353163957595825, "learning_rate": 1.8949225046680464e-07, "loss": 0.4973, "step": 4732 }, { "epoch": 0.8071282401091405, "grad_norm": 0.40797603130340576, "learning_rate": 1.8916886522888512e-07, "loss": 0.4994, "step": 4733 }, { "epoch": 0.8072987721691678, "grad_norm": 0.3444114923477173, "learning_rate": 1.8884572734043052e-07, "loss": 0.4912, "step": 4734 }, { "epoch": 0.807469304229195, "grad_norm": 0.4250591993331909, "learning_rate": 1.885228369000164e-07, "loss": 0.4878, "step": 4735 }, { "epoch": 0.8076398362892224, "grad_norm": 0.4311605393886566, "learning_rate": 1.8820019400614265e-07, "loss": 0.4929, "step": 4736 }, { "epoch": 0.8078103683492497, "grad_norm": 0.41141265630722046, "learning_rate": 1.878777987572339e-07, "loss": 0.5018, "step": 4737 }, { "epoch": 0.807980900409277, "grad_norm": 0.3856441378593445, "learning_rate": 1.8755565125163845e-07, "loss": 0.484, "step": 4738 }, { "epoch": 0.8081514324693042, "grad_norm": 0.4327915906906128, "learning_rate": 1.872337515876298e-07, "loss": 0.4971, "step": 4739 }, { "epoch": 0.8083219645293315, "grad_norm": 0.5755069851875305, "learning_rate": 1.8691209986340598e-07, "loss": 0.4959, "step": 4740 }, { "epoch": 0.8084924965893588, "grad_norm": 0.37612563371658325, "learning_rate": 1.8659069617708806e-07, "loss": 0.4853, "step": 4741 }, { "epoch": 0.8086630286493861, "grad_norm": 0.3841988444328308, "learning_rate": 1.8626954062672355e-07, "loss": 0.4953, "step": 4742 }, { "epoch": 0.8088335607094134, "grad_norm": 0.37716707587242126, "learning_rate": 1.8594863331028226e-07, "loss": 0.4875, "step": 4743 }, { "epoch": 0.8090040927694406, "grad_norm": 0.426252156496048, "learning_rate": 1.8562797432565927e-07, "loss": 0.4973, "step": 4744 }, { "epoch": 0.8091746248294679, "grad_norm": 0.4737507104873657, "learning_rate": 1.8530756377067394e-07, "loss": 0.4858, "step": 4745 }, { "epoch": 0.8093451568894953, "grad_norm": 0.4766058325767517, "learning_rate": 1.8498740174306986e-07, "loss": 0.4879, "step": 4746 }, { "epoch": 0.8095156889495225, "grad_norm": 0.3758303225040436, "learning_rate": 1.8466748834051375e-07, "loss": 0.4847, "step": 4747 }, { "epoch": 0.8096862210095498, "grad_norm": 0.48403874039649963, "learning_rate": 1.8434782366059845e-07, "loss": 0.488, "step": 4748 }, { "epoch": 0.809856753069577, "grad_norm": 0.4095841646194458, "learning_rate": 1.8402840780083932e-07, "loss": 0.4857, "step": 4749 }, { "epoch": 0.8100272851296043, "grad_norm": 0.53790283203125, "learning_rate": 1.837092408586756e-07, "loss": 0.4896, "step": 4750 }, { "epoch": 0.8101978171896317, "grad_norm": 0.5169858932495117, "learning_rate": 1.8339032293147253e-07, "loss": 0.4875, "step": 4751 }, { "epoch": 0.810368349249659, "grad_norm": 0.42378848791122437, "learning_rate": 1.830716541165173e-07, "loss": 0.4898, "step": 4752 }, { "epoch": 0.8105388813096862, "grad_norm": 0.49144625663757324, "learning_rate": 1.8275323451102225e-07, "loss": 0.5023, "step": 4753 }, { "epoch": 0.8107094133697135, "grad_norm": 0.41334912180900574, "learning_rate": 1.8243506421212333e-07, "loss": 0.485, "step": 4754 }, { "epoch": 0.8108799454297408, "grad_norm": 0.5738430023193359, "learning_rate": 1.8211714331688093e-07, "loss": 0.4949, "step": 4755 }, { "epoch": 0.8110504774897681, "grad_norm": 0.47940924763679504, "learning_rate": 1.8179947192227834e-07, "loss": 0.4962, "step": 4756 }, { "epoch": 0.8112210095497954, "grad_norm": 0.4434712827205658, "learning_rate": 1.8148205012522358e-07, "loss": 0.4871, "step": 4757 }, { "epoch": 0.8113915416098226, "grad_norm": 0.5584090948104858, "learning_rate": 1.8116487802254868e-07, "loss": 0.4895, "step": 4758 }, { "epoch": 0.8115620736698499, "grad_norm": 0.587884783744812, "learning_rate": 1.808479557110081e-07, "loss": 0.4979, "step": 4759 }, { "epoch": 0.8117326057298773, "grad_norm": 0.6662174463272095, "learning_rate": 1.8053128328728243e-07, "loss": 0.4949, "step": 4760 }, { "epoch": 0.8119031377899045, "grad_norm": 0.3959568440914154, "learning_rate": 1.802148608479737e-07, "loss": 0.4866, "step": 4761 }, { "epoch": 0.8120736698499318, "grad_norm": 0.5144326686859131, "learning_rate": 1.7989868848960913e-07, "loss": 0.5032, "step": 4762 }, { "epoch": 0.812244201909959, "grad_norm": 0.7496875524520874, "learning_rate": 1.7958276630863903e-07, "loss": 0.4984, "step": 4763 }, { "epoch": 0.8124147339699863, "grad_norm": 0.4983857572078705, "learning_rate": 1.79267094401438e-07, "loss": 0.4983, "step": 4764 }, { "epoch": 0.8125852660300137, "grad_norm": 0.46375688910484314, "learning_rate": 1.789516728643033e-07, "loss": 0.4959, "step": 4765 }, { "epoch": 0.812755798090041, "grad_norm": 0.4899948239326477, "learning_rate": 1.786365017934566e-07, "loss": 0.5009, "step": 4766 }, { "epoch": 0.8129263301500682, "grad_norm": 0.5301331281661987, "learning_rate": 1.783215812850433e-07, "loss": 0.4961, "step": 4767 }, { "epoch": 0.8130968622100955, "grad_norm": 0.48252782225608826, "learning_rate": 1.7800691143513105e-07, "loss": 0.5049, "step": 4768 }, { "epoch": 0.8132673942701227, "grad_norm": 0.4890320301055908, "learning_rate": 1.7769249233971322e-07, "loss": 0.4909, "step": 4769 }, { "epoch": 0.8134379263301501, "grad_norm": 0.4261488616466522, "learning_rate": 1.7737832409470458e-07, "loss": 0.4958, "step": 4770 }, { "epoch": 0.8136084583901774, "grad_norm": 0.5246241688728333, "learning_rate": 1.7706440679594463e-07, "loss": 0.5065, "step": 4771 }, { "epoch": 0.8137789904502046, "grad_norm": 0.43197351694107056, "learning_rate": 1.7675074053919606e-07, "loss": 0.494, "step": 4772 }, { "epoch": 0.8139495225102319, "grad_norm": 0.49455928802490234, "learning_rate": 1.7643732542014438e-07, "loss": 0.4949, "step": 4773 }, { "epoch": 0.8141200545702592, "grad_norm": 0.4988599419593811, "learning_rate": 1.7612416153439917e-07, "loss": 0.4993, "step": 4774 }, { "epoch": 0.8142905866302865, "grad_norm": 0.5352221131324768, "learning_rate": 1.7581124897749337e-07, "loss": 0.5002, "step": 4775 }, { "epoch": 0.8144611186903138, "grad_norm": 0.47420138120651245, "learning_rate": 1.7549858784488316e-07, "loss": 0.5019, "step": 4776 }, { "epoch": 0.814631650750341, "grad_norm": 0.5199934840202332, "learning_rate": 1.7518617823194708e-07, "loss": 0.5024, "step": 4777 }, { "epoch": 0.8148021828103683, "grad_norm": 0.4888570010662079, "learning_rate": 1.7487402023398894e-07, "loss": 0.5032, "step": 4778 }, { "epoch": 0.8149727148703957, "grad_norm": 0.6146841049194336, "learning_rate": 1.745621139462338e-07, "loss": 0.5072, "step": 4779 }, { "epoch": 0.815143246930423, "grad_norm": 0.6330520510673523, "learning_rate": 1.7425045946383102e-07, "loss": 0.4926, "step": 4780 }, { "epoch": 0.8153137789904502, "grad_norm": 0.5201252102851868, "learning_rate": 1.739390568818532e-07, "loss": 0.4999, "step": 4781 }, { "epoch": 0.8154843110504775, "grad_norm": 0.501669704914093, "learning_rate": 1.7362790629529514e-07, "loss": 0.4924, "step": 4782 }, { "epoch": 0.8156548431105047, "grad_norm": 0.5077195167541504, "learning_rate": 1.733170077990758e-07, "loss": 0.4992, "step": 4783 }, { "epoch": 0.8158253751705321, "grad_norm": 0.533851683139801, "learning_rate": 1.7300636148803674e-07, "loss": 0.483, "step": 4784 }, { "epoch": 0.8159959072305594, "grad_norm": 0.5307408571243286, "learning_rate": 1.7269596745694297e-07, "loss": 0.5017, "step": 4785 }, { "epoch": 0.8161664392905866, "grad_norm": 0.43948373198509216, "learning_rate": 1.723858258004816e-07, "loss": 0.4951, "step": 4786 }, { "epoch": 0.8163369713506139, "grad_norm": 0.5590949058532715, "learning_rate": 1.7207593661326429e-07, "loss": 0.5046, "step": 4787 }, { "epoch": 0.8165075034106412, "grad_norm": 0.425890177488327, "learning_rate": 1.717662999898241e-07, "loss": 0.4891, "step": 4788 }, { "epoch": 0.8166780354706685, "grad_norm": 0.6012468338012695, "learning_rate": 1.7145691602461812e-07, "loss": 0.497, "step": 4789 }, { "epoch": 0.8168485675306958, "grad_norm": 0.5476680397987366, "learning_rate": 1.711477848120261e-07, "loss": 0.5022, "step": 4790 }, { "epoch": 0.817019099590723, "grad_norm": 0.48487743735313416, "learning_rate": 1.7083890644635017e-07, "loss": 0.4901, "step": 4791 }, { "epoch": 0.8171896316507503, "grad_norm": 0.4986053705215454, "learning_rate": 1.7053028102181601e-07, "loss": 0.4852, "step": 4792 }, { "epoch": 0.8173601637107776, "grad_norm": 0.4170510470867157, "learning_rate": 1.7022190863257184e-07, "loss": 0.4859, "step": 4793 }, { "epoch": 0.817530695770805, "grad_norm": 0.45927199721336365, "learning_rate": 1.6991378937268888e-07, "loss": 0.4956, "step": 4794 }, { "epoch": 0.8177012278308322, "grad_norm": 0.44013354182243347, "learning_rate": 1.6960592333616056e-07, "loss": 0.4939, "step": 4795 }, { "epoch": 0.8178717598908595, "grad_norm": 0.5605259537696838, "learning_rate": 1.6929831061690373e-07, "loss": 0.4967, "step": 4796 }, { "epoch": 0.8180422919508867, "grad_norm": 0.46457862854003906, "learning_rate": 1.6899095130875777e-07, "loss": 0.4988, "step": 4797 }, { "epoch": 0.818212824010914, "grad_norm": 0.41826432943344116, "learning_rate": 1.6868384550548456e-07, "loss": 0.4995, "step": 4798 }, { "epoch": 0.8183833560709414, "grad_norm": 0.48303788900375366, "learning_rate": 1.683769933007691e-07, "loss": 0.4926, "step": 4799 }, { "epoch": 0.8185538881309686, "grad_norm": 0.4445941746234894, "learning_rate": 1.680703947882182e-07, "loss": 0.4831, "step": 4800 }, { "epoch": 0.8187244201909959, "grad_norm": 0.446552038192749, "learning_rate": 1.67764050061362e-07, "loss": 0.4966, "step": 4801 }, { "epoch": 0.8188949522510232, "grad_norm": 0.5134671330451965, "learning_rate": 1.6745795921365316e-07, "loss": 0.489, "step": 4802 }, { "epoch": 0.8190654843110505, "grad_norm": 0.4526304602622986, "learning_rate": 1.6715212233846656e-07, "loss": 0.4958, "step": 4803 }, { "epoch": 0.8192360163710778, "grad_norm": 0.45974987745285034, "learning_rate": 1.6684653952910014e-07, "loss": 0.4956, "step": 4804 }, { "epoch": 0.819406548431105, "grad_norm": 0.49569687247276306, "learning_rate": 1.6654121087877342e-07, "loss": 0.5015, "step": 4805 }, { "epoch": 0.8195770804911323, "grad_norm": 0.34860217571258545, "learning_rate": 1.662361364806292e-07, "loss": 0.4901, "step": 4806 }, { "epoch": 0.8197476125511596, "grad_norm": 0.4773261845111847, "learning_rate": 1.6593131642773254e-07, "loss": 0.5003, "step": 4807 }, { "epoch": 0.819918144611187, "grad_norm": 0.37786346673965454, "learning_rate": 1.6562675081307103e-07, "loss": 0.504, "step": 4808 }, { "epoch": 0.8200886766712142, "grad_norm": 0.41945719718933105, "learning_rate": 1.6532243972955398e-07, "loss": 0.5095, "step": 4809 }, { "epoch": 0.8202592087312415, "grad_norm": 0.5154603123664856, "learning_rate": 1.6501838327001376e-07, "loss": 0.4932, "step": 4810 }, { "epoch": 0.8204297407912687, "grad_norm": 0.4044354259967804, "learning_rate": 1.647145815272051e-07, "loss": 0.491, "step": 4811 }, { "epoch": 0.820600272851296, "grad_norm": 0.5035597681999207, "learning_rate": 1.6441103459380394e-07, "loss": 0.4882, "step": 4812 }, { "epoch": 0.8207708049113234, "grad_norm": 0.4555085301399231, "learning_rate": 1.6410774256241036e-07, "loss": 0.5002, "step": 4813 }, { "epoch": 0.8209413369713506, "grad_norm": 0.3546191453933716, "learning_rate": 1.6380470552554484e-07, "loss": 0.495, "step": 4814 }, { "epoch": 0.8211118690313779, "grad_norm": 0.4193180501461029, "learning_rate": 1.635019235756511e-07, "loss": 0.4985, "step": 4815 }, { "epoch": 0.8212824010914052, "grad_norm": 0.3919258713722229, "learning_rate": 1.631993968050949e-07, "loss": 0.4978, "step": 4816 }, { "epoch": 0.8214529331514324, "grad_norm": 0.43044885993003845, "learning_rate": 1.628971253061642e-07, "loss": 0.5079, "step": 4817 }, { "epoch": 0.8216234652114598, "grad_norm": 0.3775131404399872, "learning_rate": 1.6259510917106847e-07, "loss": 0.4984, "step": 4818 }, { "epoch": 0.821793997271487, "grad_norm": 0.4048754572868347, "learning_rate": 1.6229334849194e-07, "loss": 0.4976, "step": 4819 }, { "epoch": 0.8219645293315143, "grad_norm": 0.40105101466178894, "learning_rate": 1.619918433608332e-07, "loss": 0.4888, "step": 4820 }, { "epoch": 0.8221350613915416, "grad_norm": 0.42789819836616516, "learning_rate": 1.6169059386972342e-07, "loss": 0.4954, "step": 4821 }, { "epoch": 0.822305593451569, "grad_norm": 0.43447840213775635, "learning_rate": 1.6138960011050976e-07, "loss": 0.4977, "step": 4822 }, { "epoch": 0.8224761255115962, "grad_norm": 0.5571722388267517, "learning_rate": 1.6108886217501185e-07, "loss": 0.5014, "step": 4823 }, { "epoch": 0.8226466575716235, "grad_norm": 0.3704339563846588, "learning_rate": 1.607883801549719e-07, "loss": 0.5003, "step": 4824 }, { "epoch": 0.8228171896316507, "grad_norm": 0.3826393485069275, "learning_rate": 1.60488154142054e-07, "loss": 0.4887, "step": 4825 }, { "epoch": 0.822987721691678, "grad_norm": 0.44714003801345825, "learning_rate": 1.6018818422784433e-07, "loss": 0.502, "step": 4826 }, { "epoch": 0.8231582537517054, "grad_norm": 0.4311603307723999, "learning_rate": 1.598884705038504e-07, "loss": 0.4865, "step": 4827 }, { "epoch": 0.8233287858117326, "grad_norm": 0.40986600518226624, "learning_rate": 1.5958901306150177e-07, "loss": 0.4857, "step": 4828 }, { "epoch": 0.8234993178717599, "grad_norm": 0.37676551938056946, "learning_rate": 1.5928981199215044e-07, "loss": 0.4954, "step": 4829 }, { "epoch": 0.8236698499317872, "grad_norm": 0.3872268497943878, "learning_rate": 1.5899086738706906e-07, "loss": 0.4921, "step": 4830 }, { "epoch": 0.8238403819918144, "grad_norm": 0.3704686462879181, "learning_rate": 1.5869217933745342e-07, "loss": 0.4853, "step": 4831 }, { "epoch": 0.8240109140518418, "grad_norm": 0.4022778272628784, "learning_rate": 1.5839374793441966e-07, "loss": 0.4984, "step": 4832 }, { "epoch": 0.824181446111869, "grad_norm": 0.35553985834121704, "learning_rate": 1.580955732690065e-07, "loss": 0.4937, "step": 4833 }, { "epoch": 0.8243519781718963, "grad_norm": 0.40467149019241333, "learning_rate": 1.5779765543217451e-07, "loss": 0.4971, "step": 4834 }, { "epoch": 0.8245225102319236, "grad_norm": 0.3793901205062866, "learning_rate": 1.5749999451480482e-07, "loss": 0.4892, "step": 4835 }, { "epoch": 0.8246930422919508, "grad_norm": 0.4459507167339325, "learning_rate": 1.5720259060770128e-07, "loss": 0.4964, "step": 4836 }, { "epoch": 0.8248635743519782, "grad_norm": 0.4005849361419678, "learning_rate": 1.5690544380158892e-07, "loss": 0.4913, "step": 4837 }, { "epoch": 0.8250341064120055, "grad_norm": 0.4723176658153534, "learning_rate": 1.5660855418711452e-07, "loss": 0.4925, "step": 4838 }, { "epoch": 0.8252046384720327, "grad_norm": 0.5478523969650269, "learning_rate": 1.5631192185484556e-07, "loss": 0.494, "step": 4839 }, { "epoch": 0.82537517053206, "grad_norm": 0.4246988594532013, "learning_rate": 1.5601554689527278e-07, "loss": 0.4973, "step": 4840 }, { "epoch": 0.8255457025920873, "grad_norm": 0.48962587118148804, "learning_rate": 1.5571942939880647e-07, "loss": 0.4982, "step": 4841 }, { "epoch": 0.8257162346521146, "grad_norm": 0.5545381903648376, "learning_rate": 1.5542356945577964e-07, "loss": 0.4835, "step": 4842 }, { "epoch": 0.8258867667121419, "grad_norm": 0.524614155292511, "learning_rate": 1.5512796715644643e-07, "loss": 0.4924, "step": 4843 }, { "epoch": 0.8260572987721692, "grad_norm": 0.540654182434082, "learning_rate": 1.5483262259098198e-07, "loss": 0.4818, "step": 4844 }, { "epoch": 0.8262278308321964, "grad_norm": 0.4969771206378937, "learning_rate": 1.545375358494833e-07, "loss": 0.4965, "step": 4845 }, { "epoch": 0.8263983628922238, "grad_norm": 0.4746538996696472, "learning_rate": 1.5424270702196844e-07, "loss": 0.4932, "step": 4846 }, { "epoch": 0.826568894952251, "grad_norm": 0.6185543537139893, "learning_rate": 1.5394813619837735e-07, "loss": 0.491, "step": 4847 }, { "epoch": 0.8267394270122783, "grad_norm": 0.44103044271469116, "learning_rate": 1.5365382346857008e-07, "loss": 0.4888, "step": 4848 }, { "epoch": 0.8269099590723056, "grad_norm": 0.40380629897117615, "learning_rate": 1.5335976892232951e-07, "loss": 0.5005, "step": 4849 }, { "epoch": 0.8270804911323328, "grad_norm": 0.4558597505092621, "learning_rate": 1.5306597264935865e-07, "loss": 0.4799, "step": 4850 }, { "epoch": 0.8272510231923602, "grad_norm": 0.4192386567592621, "learning_rate": 1.5277243473928151e-07, "loss": 0.4889, "step": 4851 }, { "epoch": 0.8274215552523875, "grad_norm": 0.420249879360199, "learning_rate": 1.524791552816446e-07, "loss": 0.4884, "step": 4852 }, { "epoch": 0.8275920873124147, "grad_norm": 0.6081094741821289, "learning_rate": 1.5218613436591423e-07, "loss": 0.4998, "step": 4853 }, { "epoch": 0.827762619372442, "grad_norm": 0.3973240852355957, "learning_rate": 1.5189337208147848e-07, "loss": 0.4903, "step": 4854 }, { "epoch": 0.8279331514324693, "grad_norm": 0.5748963356018066, "learning_rate": 1.5160086851764667e-07, "loss": 0.4918, "step": 4855 }, { "epoch": 0.8281036834924966, "grad_norm": 0.656119167804718, "learning_rate": 1.51308623763649e-07, "loss": 0.4913, "step": 4856 }, { "epoch": 0.8282742155525239, "grad_norm": 0.3806758224964142, "learning_rate": 1.5101663790863597e-07, "loss": 0.5002, "step": 4857 }, { "epoch": 0.8284447476125512, "grad_norm": 0.5381845235824585, "learning_rate": 1.5072491104168097e-07, "loss": 0.496, "step": 4858 }, { "epoch": 0.8286152796725784, "grad_norm": 0.34899216890335083, "learning_rate": 1.5043344325177633e-07, "loss": 0.489, "step": 4859 }, { "epoch": 0.8287858117326057, "grad_norm": 0.6310024261474609, "learning_rate": 1.5014223462783651e-07, "loss": 0.4919, "step": 4860 }, { "epoch": 0.828956343792633, "grad_norm": 0.5121638774871826, "learning_rate": 1.4985128525869698e-07, "loss": 0.4954, "step": 4861 }, { "epoch": 0.8291268758526603, "grad_norm": 0.48706522583961487, "learning_rate": 1.495605952331133e-07, "loss": 0.4951, "step": 4862 }, { "epoch": 0.8292974079126876, "grad_norm": 0.5409660339355469, "learning_rate": 1.4927016463976263e-07, "loss": 0.4893, "step": 4863 }, { "epoch": 0.8294679399727148, "grad_norm": 0.5660123825073242, "learning_rate": 1.489799935672427e-07, "loss": 0.5016, "step": 4864 }, { "epoch": 0.8296384720327421, "grad_norm": 0.6146024465560913, "learning_rate": 1.4869008210407245e-07, "loss": 0.482, "step": 4865 }, { "epoch": 0.8298090040927695, "grad_norm": 0.4160356819629669, "learning_rate": 1.4840043033869079e-07, "loss": 0.4966, "step": 4866 }, { "epoch": 0.8299795361527967, "grad_norm": 0.600299060344696, "learning_rate": 1.4811103835945818e-07, "loss": 0.4952, "step": 4867 }, { "epoch": 0.830150068212824, "grad_norm": 0.3432973623275757, "learning_rate": 1.4782190625465565e-07, "loss": 0.4979, "step": 4868 }, { "epoch": 0.8303206002728513, "grad_norm": 0.5366365313529968, "learning_rate": 1.4753303411248478e-07, "loss": 0.4836, "step": 4869 }, { "epoch": 0.8304911323328786, "grad_norm": 0.4105454683303833, "learning_rate": 1.4724442202106826e-07, "loss": 0.5035, "step": 4870 }, { "epoch": 0.8306616643929059, "grad_norm": 0.5173560976982117, "learning_rate": 1.469560700684486e-07, "loss": 0.4944, "step": 4871 }, { "epoch": 0.8308321964529332, "grad_norm": 0.5018391013145447, "learning_rate": 1.4666797834258983e-07, "loss": 0.5, "step": 4872 }, { "epoch": 0.8310027285129604, "grad_norm": 0.5189551711082458, "learning_rate": 1.4638014693137638e-07, "loss": 0.4897, "step": 4873 }, { "epoch": 0.8311732605729877, "grad_norm": 0.54872727394104, "learning_rate": 1.4609257592261275e-07, "loss": 0.4917, "step": 4874 }, { "epoch": 0.831343792633015, "grad_norm": 0.4522056579589844, "learning_rate": 1.4580526540402464e-07, "loss": 0.5072, "step": 4875 }, { "epoch": 0.8315143246930423, "grad_norm": 0.4563232362270355, "learning_rate": 1.4551821546325807e-07, "loss": 0.4927, "step": 4876 }, { "epoch": 0.8316848567530696, "grad_norm": 0.4634324908256531, "learning_rate": 1.4523142618787945e-07, "loss": 0.4882, "step": 4877 }, { "epoch": 0.8318553888130968, "grad_norm": 0.5055513381958008, "learning_rate": 1.449448976653758e-07, "loss": 0.4947, "step": 4878 }, { "epoch": 0.8320259208731241, "grad_norm": 0.6747018098831177, "learning_rate": 1.4465862998315485e-07, "loss": 0.5035, "step": 4879 }, { "epoch": 0.8321964529331515, "grad_norm": 0.4617733657360077, "learning_rate": 1.4437262322854397e-07, "loss": 0.4965, "step": 4880 }, { "epoch": 0.8323669849931787, "grad_norm": 0.4791738986968994, "learning_rate": 1.440868774887916e-07, "loss": 0.5042, "step": 4881 }, { "epoch": 0.832537517053206, "grad_norm": 0.43876227736473083, "learning_rate": 1.4380139285106678e-07, "loss": 0.5, "step": 4882 }, { "epoch": 0.8327080491132333, "grad_norm": 0.4775140881538391, "learning_rate": 1.4351616940245794e-07, "loss": 0.4883, "step": 4883 }, { "epoch": 0.8328785811732605, "grad_norm": 0.538390576839447, "learning_rate": 1.4323120722997464e-07, "loss": 0.4958, "step": 4884 }, { "epoch": 0.8330491132332879, "grad_norm": 0.44281888008117676, "learning_rate": 1.4294650642054654e-07, "loss": 0.4876, "step": 4885 }, { "epoch": 0.8332196452933152, "grad_norm": 0.5450493693351746, "learning_rate": 1.426620670610234e-07, "loss": 0.5038, "step": 4886 }, { "epoch": 0.8333901773533424, "grad_norm": 0.4807684123516083, "learning_rate": 1.4237788923817553e-07, "loss": 0.4868, "step": 4887 }, { "epoch": 0.8335607094133697, "grad_norm": 0.5991215705871582, "learning_rate": 1.4209397303869345e-07, "loss": 0.5017, "step": 4888 }, { "epoch": 0.833731241473397, "grad_norm": 0.4470359981060028, "learning_rate": 1.4181031854918716e-07, "loss": 0.4803, "step": 4889 }, { "epoch": 0.8339017735334243, "grad_norm": 0.6047620177268982, "learning_rate": 1.4152692585618767e-07, "loss": 0.4971, "step": 4890 }, { "epoch": 0.8340723055934516, "grad_norm": 0.575122594833374, "learning_rate": 1.4124379504614605e-07, "loss": 0.5003, "step": 4891 }, { "epoch": 0.8342428376534788, "grad_norm": 0.38666024804115295, "learning_rate": 1.4096092620543258e-07, "loss": 0.5019, "step": 4892 }, { "epoch": 0.8344133697135061, "grad_norm": 0.6803966164588928, "learning_rate": 1.4067831942033907e-07, "loss": 0.5025, "step": 4893 }, { "epoch": 0.8345839017735335, "grad_norm": 0.5980350971221924, "learning_rate": 1.4039597477707601e-07, "loss": 0.491, "step": 4894 }, { "epoch": 0.8347544338335607, "grad_norm": 0.44976601004600525, "learning_rate": 1.4011389236177477e-07, "loss": 0.5025, "step": 4895 }, { "epoch": 0.834924965893588, "grad_norm": 0.5966007709503174, "learning_rate": 1.3983207226048656e-07, "loss": 0.4937, "step": 4896 }, { "epoch": 0.8350954979536153, "grad_norm": 0.3893631398677826, "learning_rate": 1.3955051455918258e-07, "loss": 0.4935, "step": 4897 }, { "epoch": 0.8352660300136425, "grad_norm": 0.5327594876289368, "learning_rate": 1.392692193437534e-07, "loss": 0.5043, "step": 4898 }, { "epoch": 0.8354365620736699, "grad_norm": 0.506881833076477, "learning_rate": 1.3898818670001035e-07, "loss": 0.4896, "step": 4899 }, { "epoch": 0.8356070941336972, "grad_norm": 0.4647798538208008, "learning_rate": 1.387074167136846e-07, "loss": 0.4965, "step": 4900 }, { "epoch": 0.8357776261937244, "grad_norm": 0.4127386212348938, "learning_rate": 1.3842690947042605e-07, "loss": 0.5101, "step": 4901 }, { "epoch": 0.8359481582537517, "grad_norm": 0.4460466802120209, "learning_rate": 1.3814666505580632e-07, "loss": 0.4976, "step": 4902 }, { "epoch": 0.836118690313779, "grad_norm": 0.38933101296424866, "learning_rate": 1.3786668355531519e-07, "loss": 0.4986, "step": 4903 }, { "epoch": 0.8362892223738063, "grad_norm": 0.4646453559398651, "learning_rate": 1.375869650543631e-07, "loss": 0.4998, "step": 4904 }, { "epoch": 0.8364597544338336, "grad_norm": 0.4589424729347229, "learning_rate": 1.3730750963828035e-07, "loss": 0.4937, "step": 4905 }, { "epoch": 0.8366302864938608, "grad_norm": 0.3910622298717499, "learning_rate": 1.3702831739231595e-07, "loss": 0.4952, "step": 4906 }, { "epoch": 0.8368008185538881, "grad_norm": 0.6233266592025757, "learning_rate": 1.3674938840163987e-07, "loss": 0.4961, "step": 4907 }, { "epoch": 0.8369713506139154, "grad_norm": 0.46709907054901123, "learning_rate": 1.3647072275134123e-07, "loss": 0.4956, "step": 4908 }, { "epoch": 0.8371418826739427, "grad_norm": 0.4565236270427704, "learning_rate": 1.3619232052642907e-07, "loss": 0.4948, "step": 4909 }, { "epoch": 0.83731241473397, "grad_norm": 0.42009490728378296, "learning_rate": 1.3591418181183103e-07, "loss": 0.4913, "step": 4910 }, { "epoch": 0.8374829467939973, "grad_norm": 0.5678643584251404, "learning_rate": 1.3563630669239627e-07, "loss": 0.4978, "step": 4911 }, { "epoch": 0.8376534788540245, "grad_norm": 0.49767374992370605, "learning_rate": 1.3535869525289182e-07, "loss": 0.4909, "step": 4912 }, { "epoch": 0.8378240109140518, "grad_norm": 0.46664154529571533, "learning_rate": 1.3508134757800462e-07, "loss": 0.4893, "step": 4913 }, { "epoch": 0.8379945429740792, "grad_norm": 0.4974210560321808, "learning_rate": 1.3480426375234236e-07, "loss": 0.5035, "step": 4914 }, { "epoch": 0.8381650750341064, "grad_norm": 0.4552690386772156, "learning_rate": 1.3452744386043048e-07, "loss": 0.4987, "step": 4915 }, { "epoch": 0.8383356070941337, "grad_norm": 0.39272937178611755, "learning_rate": 1.3425088798671505e-07, "loss": 0.5001, "step": 4916 }, { "epoch": 0.838506139154161, "grad_norm": 0.5397828817367554, "learning_rate": 1.339745962155613e-07, "loss": 0.4963, "step": 4917 }, { "epoch": 0.8386766712141883, "grad_norm": 0.42156437039375305, "learning_rate": 1.336985686312542e-07, "loss": 0.5069, "step": 4918 }, { "epoch": 0.8388472032742156, "grad_norm": 0.42294806241989136, "learning_rate": 1.3342280531799692e-07, "loss": 0.4903, "step": 4919 }, { "epoch": 0.8390177353342428, "grad_norm": 0.3916904926300049, "learning_rate": 1.3314730635991393e-07, "loss": 0.4952, "step": 4920 }, { "epoch": 0.8391882673942701, "grad_norm": 0.536283552646637, "learning_rate": 1.3287207184104757e-07, "loss": 0.5039, "step": 4921 }, { "epoch": 0.8393587994542974, "grad_norm": 0.386033296585083, "learning_rate": 1.3259710184535958e-07, "loss": 0.4926, "step": 4922 }, { "epoch": 0.8395293315143247, "grad_norm": 0.5002760887145996, "learning_rate": 1.3232239645673217e-07, "loss": 0.4967, "step": 4923 }, { "epoch": 0.839699863574352, "grad_norm": 0.43488916754722595, "learning_rate": 1.3204795575896546e-07, "loss": 0.5035, "step": 4924 }, { "epoch": 0.8398703956343793, "grad_norm": 0.45359697937965393, "learning_rate": 1.3177377983577966e-07, "loss": 0.4934, "step": 4925 }, { "epoch": 0.8400409276944065, "grad_norm": 0.3645251989364624, "learning_rate": 1.3149986877081401e-07, "loss": 0.4916, "step": 4926 }, { "epoch": 0.8402114597544338, "grad_norm": 0.3643120229244232, "learning_rate": 1.31226222647627e-07, "loss": 0.5052, "step": 4927 }, { "epoch": 0.8403819918144612, "grad_norm": 0.47196370363235474, "learning_rate": 1.309528415496958e-07, "loss": 0.4885, "step": 4928 }, { "epoch": 0.8405525238744884, "grad_norm": 0.6321125626564026, "learning_rate": 1.3067972556041752e-07, "loss": 0.4935, "step": 4929 }, { "epoch": 0.8407230559345157, "grad_norm": 0.5271437764167786, "learning_rate": 1.304068747631081e-07, "loss": 0.5041, "step": 4930 }, { "epoch": 0.840893587994543, "grad_norm": 0.4212459921836853, "learning_rate": 1.3013428924100182e-07, "loss": 0.4928, "step": 4931 }, { "epoch": 0.8410641200545702, "grad_norm": 0.649498462677002, "learning_rate": 1.298619690772537e-07, "loss": 0.4894, "step": 4932 }, { "epoch": 0.8412346521145976, "grad_norm": 0.44042500853538513, "learning_rate": 1.2958991435493618e-07, "loss": 0.4901, "step": 4933 }, { "epoch": 0.8414051841746248, "grad_norm": 0.5599384903907776, "learning_rate": 1.2931812515704147e-07, "loss": 0.5038, "step": 4934 }, { "epoch": 0.8415757162346521, "grad_norm": 0.644747793674469, "learning_rate": 1.2904660156648075e-07, "loss": 0.4948, "step": 4935 }, { "epoch": 0.8417462482946794, "grad_norm": 0.3929704427719116, "learning_rate": 1.2877534366608425e-07, "loss": 0.4891, "step": 4936 }, { "epoch": 0.8419167803547067, "grad_norm": 0.556605875492096, "learning_rate": 1.2850435153860075e-07, "loss": 0.4917, "step": 4937 }, { "epoch": 0.842087312414734, "grad_norm": 0.6048465371131897, "learning_rate": 1.2823362526669825e-07, "loss": 0.4906, "step": 4938 }, { "epoch": 0.8422578444747613, "grad_norm": 0.44814008474349976, "learning_rate": 1.2796316493296402e-07, "loss": 0.5011, "step": 4939 }, { "epoch": 0.8424283765347885, "grad_norm": 0.49208498001098633, "learning_rate": 1.2769297061990294e-07, "loss": 0.4888, "step": 4940 }, { "epoch": 0.8425989085948158, "grad_norm": 0.529579222202301, "learning_rate": 1.2742304240994053e-07, "loss": 0.5029, "step": 4941 }, { "epoch": 0.8427694406548432, "grad_norm": 0.3871929943561554, "learning_rate": 1.271533803854197e-07, "loss": 0.4873, "step": 4942 }, { "epoch": 0.8429399727148704, "grad_norm": 0.5285574793815613, "learning_rate": 1.2688398462860264e-07, "loss": 0.5017, "step": 4943 }, { "epoch": 0.8431105047748977, "grad_norm": 0.3910968601703644, "learning_rate": 1.2661485522167062e-07, "loss": 0.4916, "step": 4944 }, { "epoch": 0.843281036834925, "grad_norm": 0.43763166666030884, "learning_rate": 1.2634599224672297e-07, "loss": 0.4985, "step": 4945 }, { "epoch": 0.8434515688949522, "grad_norm": 0.4757555425167084, "learning_rate": 1.2607739578577838e-07, "loss": 0.4939, "step": 4946 }, { "epoch": 0.8436221009549796, "grad_norm": 0.3990534842014313, "learning_rate": 1.2580906592077403e-07, "loss": 0.5001, "step": 4947 }, { "epoch": 0.8437926330150068, "grad_norm": 0.5799641609191895, "learning_rate": 1.2554100273356553e-07, "loss": 0.494, "step": 4948 }, { "epoch": 0.8439631650750341, "grad_norm": 0.4546438455581665, "learning_rate": 1.2527320630592767e-07, "loss": 0.4943, "step": 4949 }, { "epoch": 0.8441336971350614, "grad_norm": 0.451763778924942, "learning_rate": 1.2500567671955345e-07, "loss": 0.491, "step": 4950 }, { "epoch": 0.8443042291950886, "grad_norm": 0.45567867159843445, "learning_rate": 1.2473841405605427e-07, "loss": 0.4855, "step": 4951 }, { "epoch": 0.844474761255116, "grad_norm": 0.4821739196777344, "learning_rate": 1.2447141839696065e-07, "loss": 0.4906, "step": 4952 }, { "epoch": 0.8446452933151433, "grad_norm": 0.4332144558429718, "learning_rate": 1.242046898237216e-07, "loss": 0.4902, "step": 4953 }, { "epoch": 0.8448158253751705, "grad_norm": 0.3891392946243286, "learning_rate": 1.23938228417704e-07, "loss": 0.4948, "step": 4954 }, { "epoch": 0.8449863574351978, "grad_norm": 0.4128452241420746, "learning_rate": 1.2367203426019382e-07, "loss": 0.4895, "step": 4955 }, { "epoch": 0.845156889495225, "grad_norm": 0.5201817750930786, "learning_rate": 1.2340610743239545e-07, "loss": 0.4834, "step": 4956 }, { "epoch": 0.8453274215552524, "grad_norm": 0.43655940890312195, "learning_rate": 1.2314044801543168e-07, "loss": 0.4901, "step": 4957 }, { "epoch": 0.8454979536152797, "grad_norm": 0.39609473943710327, "learning_rate": 1.2287505609034367e-07, "loss": 0.4887, "step": 4958 }, { "epoch": 0.845668485675307, "grad_norm": 0.43933528661727905, "learning_rate": 1.2260993173809122e-07, "loss": 0.4963, "step": 4959 }, { "epoch": 0.8458390177353342, "grad_norm": 0.32647085189819336, "learning_rate": 1.2234507503955184e-07, "loss": 0.4835, "step": 4960 }, { "epoch": 0.8460095497953616, "grad_norm": 0.40228471159935, "learning_rate": 1.2208048607552215e-07, "loss": 0.4915, "step": 4961 }, { "epoch": 0.8461800818553888, "grad_norm": 0.5278027057647705, "learning_rate": 1.2181616492671677e-07, "loss": 0.4981, "step": 4962 }, { "epoch": 0.8463506139154161, "grad_norm": 0.4137642979621887, "learning_rate": 1.2155211167376837e-07, "loss": 0.5049, "step": 4963 }, { "epoch": 0.8465211459754434, "grad_norm": 0.5432256460189819, "learning_rate": 1.2128832639722845e-07, "loss": 0.4998, "step": 4964 }, { "epoch": 0.8466916780354706, "grad_norm": 0.39955243468284607, "learning_rate": 1.2102480917756633e-07, "loss": 0.497, "step": 4965 }, { "epoch": 0.846862210095498, "grad_norm": 0.4583352506160736, "learning_rate": 1.207615600951697e-07, "loss": 0.4955, "step": 4966 }, { "epoch": 0.8470327421555253, "grad_norm": 0.43716612458229065, "learning_rate": 1.2049857923034471e-07, "loss": 0.5037, "step": 4967 }, { "epoch": 0.8472032742155525, "grad_norm": 0.4870067536830902, "learning_rate": 1.202358666633151e-07, "loss": 0.5039, "step": 4968 }, { "epoch": 0.8473738062755798, "grad_norm": 0.46189653873443604, "learning_rate": 1.1997342247422318e-07, "loss": 0.4964, "step": 4969 }, { "epoch": 0.847544338335607, "grad_norm": 0.45632949471473694, "learning_rate": 1.197112467431294e-07, "loss": 0.4977, "step": 4970 }, { "epoch": 0.8477148703956344, "grad_norm": 0.4396877884864807, "learning_rate": 1.1944933955001225e-07, "loss": 0.4983, "step": 4971 }, { "epoch": 0.8478854024556617, "grad_norm": 0.4021702706813812, "learning_rate": 1.1918770097476818e-07, "loss": 0.4906, "step": 4972 }, { "epoch": 0.848055934515689, "grad_norm": 0.47800371050834656, "learning_rate": 1.1892633109721164e-07, "loss": 0.5015, "step": 4973 }, { "epoch": 0.8482264665757162, "grad_norm": 0.5028043389320374, "learning_rate": 1.1866522999707554e-07, "loss": 0.4934, "step": 4974 }, { "epoch": 0.8483969986357435, "grad_norm": 0.4028972089290619, "learning_rate": 1.1840439775401026e-07, "loss": 0.5008, "step": 4975 }, { "epoch": 0.8485675306957708, "grad_norm": 0.5216171145439148, "learning_rate": 1.1814383444758494e-07, "loss": 0.4799, "step": 4976 }, { "epoch": 0.8487380627557981, "grad_norm": 0.48240017890930176, "learning_rate": 1.1788354015728544e-07, "loss": 0.4938, "step": 4977 }, { "epoch": 0.8489085948158254, "grad_norm": 0.5118283033370972, "learning_rate": 1.1762351496251668e-07, "loss": 0.495, "step": 4978 }, { "epoch": 0.8490791268758526, "grad_norm": 0.4451368451118469, "learning_rate": 1.1736375894260111e-07, "loss": 0.4988, "step": 4979 }, { "epoch": 0.8492496589358799, "grad_norm": 0.5162202715873718, "learning_rate": 1.1710427217677912e-07, "loss": 0.4815, "step": 4980 }, { "epoch": 0.8494201909959073, "grad_norm": 0.5497837662696838, "learning_rate": 1.1684505474420828e-07, "loss": 0.4941, "step": 4981 }, { "epoch": 0.8495907230559345, "grad_norm": 0.44187721610069275, "learning_rate": 1.1658610672396544e-07, "loss": 0.4935, "step": 4982 }, { "epoch": 0.8497612551159618, "grad_norm": 0.5414014458656311, "learning_rate": 1.1632742819504406e-07, "loss": 0.4931, "step": 4983 }, { "epoch": 0.849931787175989, "grad_norm": 0.5177868604660034, "learning_rate": 1.1606901923635529e-07, "loss": 0.4961, "step": 4984 }, { "epoch": 0.8501023192360164, "grad_norm": 0.5770236849784851, "learning_rate": 1.1581087992672935e-07, "loss": 0.4944, "step": 4985 }, { "epoch": 0.8502728512960437, "grad_norm": 0.6375470757484436, "learning_rate": 1.1555301034491285e-07, "loss": 0.4998, "step": 4986 }, { "epoch": 0.850443383356071, "grad_norm": 0.3887059986591339, "learning_rate": 1.1529541056957073e-07, "loss": 0.4911, "step": 4987 }, { "epoch": 0.8506139154160982, "grad_norm": 0.565872848033905, "learning_rate": 1.1503808067928558e-07, "loss": 0.4981, "step": 4988 }, { "epoch": 0.8507844474761255, "grad_norm": 0.3640064001083374, "learning_rate": 1.1478102075255772e-07, "loss": 0.4947, "step": 4989 }, { "epoch": 0.8509549795361528, "grad_norm": 0.5178152918815613, "learning_rate": 1.1452423086780463e-07, "loss": 0.4861, "step": 4990 }, { "epoch": 0.8511255115961801, "grad_norm": 0.5512751936912537, "learning_rate": 1.14267711103362e-07, "loss": 0.5017, "step": 4991 }, { "epoch": 0.8512960436562074, "grad_norm": 0.4500180184841156, "learning_rate": 1.1401146153748311e-07, "loss": 0.4899, "step": 4992 }, { "epoch": 0.8514665757162346, "grad_norm": 0.4403330385684967, "learning_rate": 1.1375548224833811e-07, "loss": 0.5108, "step": 4993 }, { "epoch": 0.8516371077762619, "grad_norm": 0.36600425839424133, "learning_rate": 1.1349977331401575e-07, "loss": 0.5064, "step": 4994 }, { "epoch": 0.8518076398362893, "grad_norm": 0.3654356002807617, "learning_rate": 1.1324433481252141e-07, "loss": 0.5049, "step": 4995 }, { "epoch": 0.8519781718963165, "grad_norm": 0.36991676688194275, "learning_rate": 1.129891668217783e-07, "loss": 0.4946, "step": 4996 }, { "epoch": 0.8521487039563438, "grad_norm": 0.40469640493392944, "learning_rate": 1.127342694196273e-07, "loss": 0.4958, "step": 4997 }, { "epoch": 0.852319236016371, "grad_norm": 0.4282923936843872, "learning_rate": 1.1247964268382661e-07, "loss": 0.4859, "step": 4998 }, { "epoch": 0.8524897680763983, "grad_norm": 0.46982812881469727, "learning_rate": 1.1222528669205159e-07, "loss": 0.5041, "step": 4999 }, { "epoch": 0.8526603001364257, "grad_norm": 0.36918675899505615, "learning_rate": 1.1197120152189523e-07, "loss": 0.4915, "step": 5000 }, { "epoch": 0.852830832196453, "grad_norm": 0.49565747380256653, "learning_rate": 1.1171738725086835e-07, "loss": 0.492, "step": 5001 }, { "epoch": 0.8530013642564802, "grad_norm": 0.41457846760749817, "learning_rate": 1.1146384395639788e-07, "loss": 0.4898, "step": 5002 }, { "epoch": 0.8531718963165075, "grad_norm": 0.45303431153297424, "learning_rate": 1.1121057171582984e-07, "loss": 0.495, "step": 5003 }, { "epoch": 0.8533424283765347, "grad_norm": 0.46331140398979187, "learning_rate": 1.1095757060642588e-07, "loss": 0.5022, "step": 5004 }, { "epoch": 0.8535129604365621, "grad_norm": 0.518911600112915, "learning_rate": 1.1070484070536614e-07, "loss": 0.4841, "step": 5005 }, { "epoch": 0.8536834924965894, "grad_norm": 0.5267069339752197, "learning_rate": 1.1045238208974741e-07, "loss": 0.4957, "step": 5006 }, { "epoch": 0.8538540245566166, "grad_norm": 0.46358224749565125, "learning_rate": 1.1020019483658385e-07, "loss": 0.4886, "step": 5007 }, { "epoch": 0.8540245566166439, "grad_norm": 0.45485424995422363, "learning_rate": 1.0994827902280682e-07, "loss": 0.4955, "step": 5008 }, { "epoch": 0.8541950886766713, "grad_norm": 0.5479266047477722, "learning_rate": 1.0969663472526494e-07, "loss": 0.498, "step": 5009 }, { "epoch": 0.8543656207366985, "grad_norm": 0.5160921216011047, "learning_rate": 1.0944526202072426e-07, "loss": 0.4912, "step": 5010 }, { "epoch": 0.8545361527967258, "grad_norm": 0.39966824650764465, "learning_rate": 1.0919416098586712e-07, "loss": 0.491, "step": 5011 }, { "epoch": 0.854706684856753, "grad_norm": 0.45781365036964417, "learning_rate": 1.0894333169729433e-07, "loss": 0.4895, "step": 5012 }, { "epoch": 0.8548772169167803, "grad_norm": 0.36039382219314575, "learning_rate": 1.0869277423152247e-07, "loss": 0.4888, "step": 5013 }, { "epoch": 0.8550477489768077, "grad_norm": 0.5843361616134644, "learning_rate": 1.0844248866498598e-07, "loss": 0.49, "step": 5014 }, { "epoch": 0.855218281036835, "grad_norm": 0.5285351276397705, "learning_rate": 1.081924750740363e-07, "loss": 0.4888, "step": 5015 }, { "epoch": 0.8553888130968622, "grad_norm": 0.44772446155548096, "learning_rate": 1.0794273353494146e-07, "loss": 0.4812, "step": 5016 }, { "epoch": 0.8555593451568895, "grad_norm": 0.6695955991744995, "learning_rate": 1.0769326412388682e-07, "loss": 0.4976, "step": 5017 }, { "epoch": 0.8557298772169167, "grad_norm": 0.539442777633667, "learning_rate": 1.0744406691697474e-07, "loss": 0.4828, "step": 5018 }, { "epoch": 0.8559004092769441, "grad_norm": 0.5010582208633423, "learning_rate": 1.0719514199022475e-07, "loss": 0.4885, "step": 5019 }, { "epoch": 0.8560709413369714, "grad_norm": 0.4613291621208191, "learning_rate": 1.0694648941957243e-07, "loss": 0.4891, "step": 5020 }, { "epoch": 0.8562414733969986, "grad_norm": 0.4389791786670685, "learning_rate": 1.0669810928087178e-07, "loss": 0.4909, "step": 5021 }, { "epoch": 0.8564120054570259, "grad_norm": 0.5779827833175659, "learning_rate": 1.0645000164989218e-07, "loss": 0.4858, "step": 5022 }, { "epoch": 0.8565825375170532, "grad_norm": 0.46866342425346375, "learning_rate": 1.0620216660232075e-07, "loss": 0.4909, "step": 5023 }, { "epoch": 0.8567530695770805, "grad_norm": 0.4902592897415161, "learning_rate": 1.0595460421376126e-07, "loss": 0.4887, "step": 5024 }, { "epoch": 0.8569236016371078, "grad_norm": 0.5032135248184204, "learning_rate": 1.0570731455973416e-07, "loss": 0.501, "step": 5025 }, { "epoch": 0.857094133697135, "grad_norm": 0.4387756884098053, "learning_rate": 1.054602977156768e-07, "loss": 0.4862, "step": 5026 }, { "epoch": 0.8572646657571623, "grad_norm": 0.40801355242729187, "learning_rate": 1.0521355375694332e-07, "loss": 0.5032, "step": 5027 }, { "epoch": 0.8574351978171897, "grad_norm": 0.4306230843067169, "learning_rate": 1.0496708275880497e-07, "loss": 0.4903, "step": 5028 }, { "epoch": 0.857605729877217, "grad_norm": 0.3730475604534149, "learning_rate": 1.0472088479644883e-07, "loss": 0.4958, "step": 5029 }, { "epoch": 0.8577762619372442, "grad_norm": 0.42054077982902527, "learning_rate": 1.0447495994497959e-07, "loss": 0.4945, "step": 5030 }, { "epoch": 0.8579467939972715, "grad_norm": 0.4239777624607086, "learning_rate": 1.0422930827941803e-07, "loss": 0.4994, "step": 5031 }, { "epoch": 0.8581173260572987, "grad_norm": 0.39427489042282104, "learning_rate": 1.0398392987470208e-07, "loss": 0.5098, "step": 5032 }, { "epoch": 0.8582878581173261, "grad_norm": 0.43317997455596924, "learning_rate": 1.0373882480568621e-07, "loss": 0.5067, "step": 5033 }, { "epoch": 0.8584583901773534, "grad_norm": 0.4470864534378052, "learning_rate": 1.0349399314714082e-07, "loss": 0.4955, "step": 5034 }, { "epoch": 0.8586289222373806, "grad_norm": 0.3927958905696869, "learning_rate": 1.0324943497375384e-07, "loss": 0.4984, "step": 5035 }, { "epoch": 0.8587994542974079, "grad_norm": 0.42295411229133606, "learning_rate": 1.0300515036012927e-07, "loss": 0.5007, "step": 5036 }, { "epoch": 0.8589699863574352, "grad_norm": 0.5283581614494324, "learning_rate": 1.027611393807877e-07, "loss": 0.4972, "step": 5037 }, { "epoch": 0.8591405184174625, "grad_norm": 0.48814842104911804, "learning_rate": 1.025174021101666e-07, "loss": 0.4959, "step": 5038 }, { "epoch": 0.8593110504774898, "grad_norm": 0.4385189414024353, "learning_rate": 1.0227393862261937e-07, "loss": 0.5083, "step": 5039 }, { "epoch": 0.859481582537517, "grad_norm": 0.5434243083000183, "learning_rate": 1.0203074899241617e-07, "loss": 0.5133, "step": 5040 }, { "epoch": 0.8596521145975443, "grad_norm": 0.48706600069999695, "learning_rate": 1.0178783329374366e-07, "loss": 0.5069, "step": 5041 }, { "epoch": 0.8598226466575716, "grad_norm": 0.5134983658790588, "learning_rate": 1.0154519160070521e-07, "loss": 0.4947, "step": 5042 }, { "epoch": 0.859993178717599, "grad_norm": 0.5847577452659607, "learning_rate": 1.0130282398731984e-07, "loss": 0.4919, "step": 5043 }, { "epoch": 0.8601637107776262, "grad_norm": 0.5677647590637207, "learning_rate": 1.0106073052752364e-07, "loss": 0.5007, "step": 5044 }, { "epoch": 0.8603342428376535, "grad_norm": 0.4906907081604004, "learning_rate": 1.0081891129516898e-07, "loss": 0.4939, "step": 5045 }, { "epoch": 0.8605047748976807, "grad_norm": 0.4433136284351349, "learning_rate": 1.0057736636402382e-07, "loss": 0.503, "step": 5046 }, { "epoch": 0.860675306957708, "grad_norm": 0.550963819026947, "learning_rate": 1.0033609580777393e-07, "loss": 0.5154, "step": 5047 }, { "epoch": 0.8608458390177354, "grad_norm": 0.5232732892036438, "learning_rate": 1.0009509970001986e-07, "loss": 0.5014, "step": 5048 }, { "epoch": 0.8610163710777626, "grad_norm": 0.4386729896068573, "learning_rate": 9.985437811427935e-08, "loss": 0.4983, "step": 5049 }, { "epoch": 0.8611869031377899, "grad_norm": 0.5767355561256409, "learning_rate": 9.961393112398599e-08, "loss": 0.5067, "step": 5050 }, { "epoch": 0.8613574351978172, "grad_norm": 0.41584306955337524, "learning_rate": 9.937375880249013e-08, "loss": 0.5022, "step": 5051 }, { "epoch": 0.8615279672578445, "grad_norm": 0.5685270428657532, "learning_rate": 9.913386122305746e-08, "loss": 0.4941, "step": 5052 }, { "epoch": 0.8616984993178718, "grad_norm": 0.5781306624412537, "learning_rate": 9.889423845887048e-08, "loss": 0.4879, "step": 5053 }, { "epoch": 0.861869031377899, "grad_norm": 0.4216209948062897, "learning_rate": 9.865489058302791e-08, "loss": 0.5053, "step": 5054 }, { "epoch": 0.8620395634379263, "grad_norm": 0.6022716760635376, "learning_rate": 9.841581766854403e-08, "loss": 0.4927, "step": 5055 }, { "epoch": 0.8622100954979536, "grad_norm": 0.47441956400871277, "learning_rate": 9.817701978835026e-08, "loss": 0.4861, "step": 5056 }, { "epoch": 0.862380627557981, "grad_norm": 0.4886791408061981, "learning_rate": 9.79384970152928e-08, "loss": 0.5001, "step": 5057 }, { "epoch": 0.8625511596180082, "grad_norm": 0.5147485733032227, "learning_rate": 9.770024942213508e-08, "loss": 0.4978, "step": 5058 }, { "epoch": 0.8627216916780355, "grad_norm": 0.37711355090141296, "learning_rate": 9.746227708155588e-08, "loss": 0.4812, "step": 5059 }, { "epoch": 0.8628922237380627, "grad_norm": 0.6241587400436401, "learning_rate": 9.722458006615066e-08, "loss": 0.4887, "step": 5060 }, { "epoch": 0.86306275579809, "grad_norm": 0.43906447291374207, "learning_rate": 9.698715844842988e-08, "loss": 0.4887, "step": 5061 }, { "epoch": 0.8632332878581174, "grad_norm": 0.456028014421463, "learning_rate": 9.675001230082094e-08, "loss": 0.4856, "step": 5062 }, { "epoch": 0.8634038199181446, "grad_norm": 0.5518314838409424, "learning_rate": 9.651314169566695e-08, "loss": 0.4823, "step": 5063 }, { "epoch": 0.8635743519781719, "grad_norm": 0.4024985134601593, "learning_rate": 9.627654670522647e-08, "loss": 0.4892, "step": 5064 }, { "epoch": 0.8637448840381992, "grad_norm": 0.575171709060669, "learning_rate": 9.604022740167497e-08, "loss": 0.5066, "step": 5065 }, { "epoch": 0.8639154160982264, "grad_norm": 0.491485595703125, "learning_rate": 9.580418385710278e-08, "loss": 0.4864, "step": 5066 }, { "epoch": 0.8640859481582538, "grad_norm": 0.3869340419769287, "learning_rate": 9.556841614351664e-08, "loss": 0.4914, "step": 5067 }, { "epoch": 0.864256480218281, "grad_norm": 0.5501559376716614, "learning_rate": 9.533292433283915e-08, "loss": 0.4869, "step": 5068 }, { "epoch": 0.8644270122783083, "grad_norm": 0.3556576669216156, "learning_rate": 9.509770849690882e-08, "loss": 0.494, "step": 5069 }, { "epoch": 0.8645975443383356, "grad_norm": 0.6158584356307983, "learning_rate": 9.486276870747946e-08, "loss": 0.4884, "step": 5070 }, { "epoch": 0.8647680763983628, "grad_norm": 0.41104334592819214, "learning_rate": 9.462810503622122e-08, "loss": 0.4886, "step": 5071 }, { "epoch": 0.8649386084583902, "grad_norm": 0.4305412173271179, "learning_rate": 9.439371755472005e-08, "loss": 0.4923, "step": 5072 }, { "epoch": 0.8651091405184175, "grad_norm": 0.3920281231403351, "learning_rate": 9.415960633447674e-08, "loss": 0.4819, "step": 5073 }, { "epoch": 0.8652796725784447, "grad_norm": 0.4484941065311432, "learning_rate": 9.39257714469093e-08, "loss": 0.4918, "step": 5074 }, { "epoch": 0.865450204638472, "grad_norm": 0.41160324215888977, "learning_rate": 9.369221296335008e-08, "loss": 0.4872, "step": 5075 }, { "epoch": 0.8656207366984994, "grad_norm": 0.3788316249847412, "learning_rate": 9.345893095504778e-08, "loss": 0.4862, "step": 5076 }, { "epoch": 0.8657912687585266, "grad_norm": 0.46384909749031067, "learning_rate": 9.322592549316701e-08, "loss": 0.4842, "step": 5077 }, { "epoch": 0.8659618008185539, "grad_norm": 0.47224605083465576, "learning_rate": 9.299319664878704e-08, "loss": 0.4951, "step": 5078 }, { "epoch": 0.8661323328785812, "grad_norm": 0.3998444676399231, "learning_rate": 9.276074449290363e-08, "loss": 0.4919, "step": 5079 }, { "epoch": 0.8663028649386084, "grad_norm": 0.37172403931617737, "learning_rate": 9.252856909642794e-08, "loss": 0.4787, "step": 5080 }, { "epoch": 0.8664733969986358, "grad_norm": 0.4312663972377777, "learning_rate": 9.229667053018666e-08, "loss": 0.4894, "step": 5081 }, { "epoch": 0.866643929058663, "grad_norm": 0.41867002844810486, "learning_rate": 9.206504886492163e-08, "loss": 0.4868, "step": 5082 }, { "epoch": 0.8668144611186903, "grad_norm": 0.5481159090995789, "learning_rate": 9.183370417129124e-08, "loss": 0.4927, "step": 5083 }, { "epoch": 0.8669849931787176, "grad_norm": 0.46602848172187805, "learning_rate": 9.160263651986834e-08, "loss": 0.4848, "step": 5084 }, { "epoch": 0.8671555252387448, "grad_norm": 0.5236701369285583, "learning_rate": 9.137184598114136e-08, "loss": 0.4913, "step": 5085 }, { "epoch": 0.8673260572987722, "grad_norm": 0.6084462404251099, "learning_rate": 9.114133262551518e-08, "loss": 0.4849, "step": 5086 }, { "epoch": 0.8674965893587995, "grad_norm": 0.4827587604522705, "learning_rate": 9.091109652330888e-08, "loss": 0.49, "step": 5087 }, { "epoch": 0.8676671214188267, "grad_norm": 0.4759979248046875, "learning_rate": 9.068113774475785e-08, "loss": 0.4912, "step": 5088 }, { "epoch": 0.867837653478854, "grad_norm": 0.6387996077537537, "learning_rate": 9.04514563600123e-08, "loss": 0.4889, "step": 5089 }, { "epoch": 0.8680081855388813, "grad_norm": 0.4683969020843506, "learning_rate": 9.022205243913852e-08, "loss": 0.4834, "step": 5090 }, { "epoch": 0.8681787175989086, "grad_norm": 0.4864799678325653, "learning_rate": 8.999292605211697e-08, "loss": 0.4772, "step": 5091 }, { "epoch": 0.8683492496589359, "grad_norm": 0.40657493472099304, "learning_rate": 8.976407726884496e-08, "loss": 0.4879, "step": 5092 }, { "epoch": 0.8685197817189632, "grad_norm": 0.5255517363548279, "learning_rate": 8.953550615913398e-08, "loss": 0.4851, "step": 5093 }, { "epoch": 0.8686903137789904, "grad_norm": 0.48127636313438416, "learning_rate": 8.930721279271093e-08, "loss": 0.4762, "step": 5094 }, { "epoch": 0.8688608458390177, "grad_norm": 0.3790895938873291, "learning_rate": 8.907919723921889e-08, "loss": 0.4927, "step": 5095 }, { "epoch": 0.869031377899045, "grad_norm": 0.5422013998031616, "learning_rate": 8.885145956821496e-08, "loss": 0.4902, "step": 5096 }, { "epoch": 0.8692019099590723, "grad_norm": 0.4123353362083435, "learning_rate": 8.862399984917214e-08, "loss": 0.4844, "step": 5097 }, { "epoch": 0.8693724420190996, "grad_norm": 0.43875542283058167, "learning_rate": 8.839681815147872e-08, "loss": 0.4767, "step": 5098 }, { "epoch": 0.8695429740791268, "grad_norm": 0.5209375023841858, "learning_rate": 8.816991454443803e-08, "loss": 0.4841, "step": 5099 }, { "epoch": 0.8697135061391542, "grad_norm": 0.5397298336029053, "learning_rate": 8.794328909726825e-08, "loss": 0.4743, "step": 5100 }, { "epoch": 0.8698840381991815, "grad_norm": 0.4797486662864685, "learning_rate": 8.771694187910307e-08, "loss": 0.4876, "step": 5101 }, { "epoch": 0.8700545702592087, "grad_norm": 0.40398508310317993, "learning_rate": 8.74908729589914e-08, "loss": 0.4942, "step": 5102 }, { "epoch": 0.870225102319236, "grad_norm": 0.527980625629425, "learning_rate": 8.726508240589693e-08, "loss": 0.4855, "step": 5103 }, { "epoch": 0.8703956343792633, "grad_norm": 0.5567640662193298, "learning_rate": 8.703957028869881e-08, "loss": 0.4873, "step": 5104 }, { "epoch": 0.8705661664392906, "grad_norm": 0.4136599004268646, "learning_rate": 8.681433667619066e-08, "loss": 0.4807, "step": 5105 }, { "epoch": 0.8707366984993179, "grad_norm": 0.5601804852485657, "learning_rate": 8.658938163708175e-08, "loss": 0.4908, "step": 5106 }, { "epoch": 0.8709072305593452, "grad_norm": 0.35288894176483154, "learning_rate": 8.636470523999597e-08, "loss": 0.4844, "step": 5107 }, { "epoch": 0.8710777626193724, "grad_norm": 0.5323822498321533, "learning_rate": 8.614030755347269e-08, "loss": 0.4885, "step": 5108 }, { "epoch": 0.8712482946793997, "grad_norm": 0.4504302144050598, "learning_rate": 8.591618864596541e-08, "loss": 0.4865, "step": 5109 }, { "epoch": 0.871418826739427, "grad_norm": 0.46580883860588074, "learning_rate": 8.569234858584342e-08, "loss": 0.4929, "step": 5110 }, { "epoch": 0.8715893587994543, "grad_norm": 0.6324315667152405, "learning_rate": 8.546878744139065e-08, "loss": 0.4887, "step": 5111 }, { "epoch": 0.8717598908594816, "grad_norm": 0.4285943806171417, "learning_rate": 8.524550528080578e-08, "loss": 0.4835, "step": 5112 }, { "epoch": 0.8719304229195088, "grad_norm": 0.5929010510444641, "learning_rate": 8.502250217220276e-08, "loss": 0.493, "step": 5113 }, { "epoch": 0.8721009549795361, "grad_norm": 0.5795062184333801, "learning_rate": 8.479977818360986e-08, "loss": 0.4873, "step": 5114 }, { "epoch": 0.8722714870395635, "grad_norm": 0.3946116268634796, "learning_rate": 8.457733338297071e-08, "loss": 0.4955, "step": 5115 }, { "epoch": 0.8724420190995907, "grad_norm": 0.5786437392234802, "learning_rate": 8.43551678381437e-08, "loss": 0.4814, "step": 5116 }, { "epoch": 0.872612551159618, "grad_norm": 0.3585083484649658, "learning_rate": 8.41332816169016e-08, "loss": 0.4901, "step": 5117 }, { "epoch": 0.8727830832196453, "grad_norm": 0.5978417992591858, "learning_rate": 8.391167478693241e-08, "loss": 0.4959, "step": 5118 }, { "epoch": 0.8729536152796725, "grad_norm": 0.373609721660614, "learning_rate": 8.369034741583893e-08, "loss": 0.4852, "step": 5119 }, { "epoch": 0.8731241473396999, "grad_norm": 0.5995633006095886, "learning_rate": 8.346929957113848e-08, "loss": 0.4938, "step": 5120 }, { "epoch": 0.8732946793997272, "grad_norm": 0.4977155029773712, "learning_rate": 8.324853132026301e-08, "loss": 0.4851, "step": 5121 }, { "epoch": 0.8734652114597544, "grad_norm": 0.5150020122528076, "learning_rate": 8.302804273055976e-08, "loss": 0.4951, "step": 5122 }, { "epoch": 0.8736357435197817, "grad_norm": 0.6290849447250366, "learning_rate": 8.280783386928982e-08, "loss": 0.4773, "step": 5123 }, { "epoch": 0.873806275579809, "grad_norm": 0.4567163288593292, "learning_rate": 8.258790480362956e-08, "loss": 0.4962, "step": 5124 }, { "epoch": 0.8739768076398363, "grad_norm": 0.44084906578063965, "learning_rate": 8.236825560066997e-08, "loss": 0.4961, "step": 5125 }, { "epoch": 0.8741473396998636, "grad_norm": 0.5285959839820862, "learning_rate": 8.214888632741603e-08, "loss": 0.4841, "step": 5126 }, { "epoch": 0.8743178717598908, "grad_norm": 0.4977264106273651, "learning_rate": 8.192979705078853e-08, "loss": 0.4911, "step": 5127 }, { "epoch": 0.8744884038199181, "grad_norm": 0.5132419466972351, "learning_rate": 8.171098783762133e-08, "loss": 0.4838, "step": 5128 }, { "epoch": 0.8746589358799455, "grad_norm": 0.6812865734100342, "learning_rate": 8.149245875466417e-08, "loss": 0.4882, "step": 5129 }, { "epoch": 0.8748294679399727, "grad_norm": 0.5702094435691833, "learning_rate": 8.12742098685807e-08, "loss": 0.4923, "step": 5130 }, { "epoch": 0.875, "grad_norm": 0.7046767473220825, "learning_rate": 8.105624124594925e-08, "loss": 0.4935, "step": 5131 }, { "epoch": 0.8751705320600273, "grad_norm": 0.7282161712646484, "learning_rate": 8.083855295326238e-08, "loss": 0.4899, "step": 5132 }, { "epoch": 0.8753410641200545, "grad_norm": 0.39676764607429504, "learning_rate": 8.062114505692743e-08, "loss": 0.4838, "step": 5133 }, { "epoch": 0.8755115961800819, "grad_norm": 0.5209677815437317, "learning_rate": 8.040401762326655e-08, "loss": 0.4847, "step": 5134 }, { "epoch": 0.8756821282401092, "grad_norm": 0.475814551115036, "learning_rate": 8.018717071851512e-08, "loss": 0.4919, "step": 5135 }, { "epoch": 0.8758526603001364, "grad_norm": 0.5101640820503235, "learning_rate": 7.997060440882454e-08, "loss": 0.4844, "step": 5136 }, { "epoch": 0.8760231923601637, "grad_norm": 0.610802948474884, "learning_rate": 7.975431876025931e-08, "loss": 0.4884, "step": 5137 }, { "epoch": 0.876193724420191, "grad_norm": 0.5693475604057312, "learning_rate": 7.953831383879896e-08, "loss": 0.4847, "step": 5138 }, { "epoch": 0.8763642564802183, "grad_norm": 0.505389392375946, "learning_rate": 7.932258971033747e-08, "loss": 0.4864, "step": 5139 }, { "epoch": 0.8765347885402456, "grad_norm": 0.39394593238830566, "learning_rate": 7.910714644068252e-08, "loss": 0.4977, "step": 5140 }, { "epoch": 0.8767053206002728, "grad_norm": 0.5716357231140137, "learning_rate": 7.889198409555666e-08, "loss": 0.4942, "step": 5141 }, { "epoch": 0.8768758526603001, "grad_norm": 0.4407377541065216, "learning_rate": 7.86771027405967e-08, "loss": 0.4931, "step": 5142 }, { "epoch": 0.8770463847203275, "grad_norm": 0.528765857219696, "learning_rate": 7.84625024413538e-08, "loss": 0.4771, "step": 5143 }, { "epoch": 0.8772169167803547, "grad_norm": 0.4909074902534485, "learning_rate": 7.824818326329265e-08, "loss": 0.494, "step": 5144 }, { "epoch": 0.877387448840382, "grad_norm": 0.4644756317138672, "learning_rate": 7.803414527179344e-08, "loss": 0.4946, "step": 5145 }, { "epoch": 0.8775579809004093, "grad_norm": 0.4189339280128479, "learning_rate": 7.782038853214936e-08, "loss": 0.4944, "step": 5146 }, { "epoch": 0.8777285129604365, "grad_norm": 0.47703874111175537, "learning_rate": 7.760691310956848e-08, "loss": 0.4969, "step": 5147 }, { "epoch": 0.8778990450204639, "grad_norm": 0.5297402739524841, "learning_rate": 7.739371906917326e-08, "loss": 0.498, "step": 5148 }, { "epoch": 0.8780695770804912, "grad_norm": 0.4273715317249298, "learning_rate": 7.718080647599946e-08, "loss": 0.4918, "step": 5149 }, { "epoch": 0.8782401091405184, "grad_norm": 0.4729132056236267, "learning_rate": 7.696817539499769e-08, "loss": 0.4946, "step": 5150 }, { "epoch": 0.8784106412005457, "grad_norm": 0.44749221205711365, "learning_rate": 7.675582589103249e-08, "loss": 0.4997, "step": 5151 }, { "epoch": 0.878581173260573, "grad_norm": 0.531842827796936, "learning_rate": 7.654375802888271e-08, "loss": 0.4927, "step": 5152 }, { "epoch": 0.8787517053206003, "grad_norm": 0.5416556596755981, "learning_rate": 7.633197187324049e-08, "loss": 0.4951, "step": 5153 }, { "epoch": 0.8789222373806276, "grad_norm": 0.43462786078453064, "learning_rate": 7.612046748871327e-08, "loss": 0.4975, "step": 5154 }, { "epoch": 0.8790927694406548, "grad_norm": 0.535729706287384, "learning_rate": 7.590924493982167e-08, "loss": 0.4922, "step": 5155 }, { "epoch": 0.8792633015006821, "grad_norm": 0.4104560613632202, "learning_rate": 7.569830429100013e-08, "loss": 0.5074, "step": 5156 }, { "epoch": 0.8794338335607094, "grad_norm": 0.5912339687347412, "learning_rate": 7.548764560659816e-08, "loss": 0.5, "step": 5157 }, { "epoch": 0.8796043656207367, "grad_norm": 0.48691239953041077, "learning_rate": 7.527726895087818e-08, "loss": 0.5054, "step": 5158 }, { "epoch": 0.879774897680764, "grad_norm": 0.3652518093585968, "learning_rate": 7.506717438801703e-08, "loss": 0.4919, "step": 5159 }, { "epoch": 0.8799454297407913, "grad_norm": 0.6726717948913574, "learning_rate": 7.485736198210559e-08, "loss": 0.4965, "step": 5160 }, { "epoch": 0.8801159618008185, "grad_norm": 0.34279900789260864, "learning_rate": 7.464783179714864e-08, "loss": 0.4835, "step": 5161 }, { "epoch": 0.8802864938608458, "grad_norm": 0.5112735629081726, "learning_rate": 7.443858389706449e-08, "loss": 0.4858, "step": 5162 }, { "epoch": 0.8804570259208732, "grad_norm": 0.4501814842224121, "learning_rate": 7.422961834568566e-08, "loss": 0.4892, "step": 5163 }, { "epoch": 0.8806275579809004, "grad_norm": 0.44029852747917175, "learning_rate": 7.40209352067588e-08, "loss": 0.4844, "step": 5164 }, { "epoch": 0.8807980900409277, "grad_norm": 0.5076018571853638, "learning_rate": 7.381253454394344e-08, "loss": 0.4811, "step": 5165 }, { "epoch": 0.880968622100955, "grad_norm": 0.42398062348365784, "learning_rate": 7.360441642081451e-08, "loss": 0.4827, "step": 5166 }, { "epoch": 0.8811391541609823, "grad_norm": 0.515562117099762, "learning_rate": 7.3396580900859e-08, "loss": 0.4884, "step": 5167 }, { "epoch": 0.8813096862210096, "grad_norm": 0.47236382961273193, "learning_rate": 7.318902804747897e-08, "loss": 0.4856, "step": 5168 }, { "epoch": 0.8814802182810368, "grad_norm": 0.4556432366371155, "learning_rate": 7.298175792398977e-08, "loss": 0.4945, "step": 5169 }, { "epoch": 0.8816507503410641, "grad_norm": 0.5327446460723877, "learning_rate": 7.27747705936206e-08, "loss": 0.482, "step": 5170 }, { "epoch": 0.8818212824010914, "grad_norm": 0.3763869106769562, "learning_rate": 7.256806611951396e-08, "loss": 0.4998, "step": 5171 }, { "epoch": 0.8819918144611187, "grad_norm": 0.5114072561264038, "learning_rate": 7.236164456472673e-08, "loss": 0.4874, "step": 5172 }, { "epoch": 0.882162346521146, "grad_norm": 0.4112900197505951, "learning_rate": 7.215550599222932e-08, "loss": 0.5014, "step": 5173 }, { "epoch": 0.8823328785811733, "grad_norm": 0.43765711784362793, "learning_rate": 7.194965046490521e-08, "loss": 0.4978, "step": 5174 }, { "epoch": 0.8825034106412005, "grad_norm": 0.6492908596992493, "learning_rate": 7.174407804555261e-08, "loss": 0.5022, "step": 5175 }, { "epoch": 0.8826739427012278, "grad_norm": 0.4558906555175781, "learning_rate": 7.153878879688226e-08, "loss": 0.4933, "step": 5176 }, { "epoch": 0.8828444747612552, "grad_norm": 0.6890559196472168, "learning_rate": 7.133378278151926e-08, "loss": 0.5027, "step": 5177 }, { "epoch": 0.8830150068212824, "grad_norm": 0.48971065878868103, "learning_rate": 7.112906006200216e-08, "loss": 0.4882, "step": 5178 }, { "epoch": 0.8831855388813097, "grad_norm": 0.6021869778633118, "learning_rate": 7.092462070078266e-08, "loss": 0.4962, "step": 5179 }, { "epoch": 0.883356070941337, "grad_norm": 0.6577697396278381, "learning_rate": 7.072046476022654e-08, "loss": 0.4882, "step": 5180 }, { "epoch": 0.8835266030013642, "grad_norm": 0.4322252571582794, "learning_rate": 7.051659230261299e-08, "loss": 0.5037, "step": 5181 }, { "epoch": 0.8836971350613916, "grad_norm": 0.6661120057106018, "learning_rate": 7.031300339013481e-08, "loss": 0.4843, "step": 5182 }, { "epoch": 0.8838676671214188, "grad_norm": 0.6816942095756531, "learning_rate": 7.010969808489754e-08, "loss": 0.5055, "step": 5183 }, { "epoch": 0.8840381991814461, "grad_norm": 0.6046971678733826, "learning_rate": 6.990667644892169e-08, "loss": 0.488, "step": 5184 }, { "epoch": 0.8842087312414734, "grad_norm": 0.6977609395980835, "learning_rate": 6.970393854413971e-08, "loss": 0.5025, "step": 5185 }, { "epoch": 0.8843792633015006, "grad_norm": 0.5585513710975647, "learning_rate": 6.950148443239846e-08, "loss": 0.4875, "step": 5186 }, { "epoch": 0.884549795361528, "grad_norm": 0.41966915130615234, "learning_rate": 6.929931417545788e-08, "loss": 0.5154, "step": 5187 }, { "epoch": 0.8847203274215553, "grad_norm": 0.5780752897262573, "learning_rate": 6.90974278349913e-08, "loss": 0.5023, "step": 5188 }, { "epoch": 0.8848908594815825, "grad_norm": 0.49180328845977783, "learning_rate": 6.889582547258545e-08, "loss": 0.4864, "step": 5189 }, { "epoch": 0.8850613915416098, "grad_norm": 0.5427616834640503, "learning_rate": 6.869450714974058e-08, "loss": 0.5007, "step": 5190 }, { "epoch": 0.8852319236016372, "grad_norm": 0.4825659692287445, "learning_rate": 6.849347292787013e-08, "loss": 0.4923, "step": 5191 }, { "epoch": 0.8854024556616644, "grad_norm": 0.47590872645378113, "learning_rate": 6.829272286830103e-08, "loss": 0.4864, "step": 5192 }, { "epoch": 0.8855729877216917, "grad_norm": 0.43711164593696594, "learning_rate": 6.809225703227352e-08, "loss": 0.488, "step": 5193 }, { "epoch": 0.885743519781719, "grad_norm": 0.5710449814796448, "learning_rate": 6.789207548094068e-08, "loss": 0.5006, "step": 5194 }, { "epoch": 0.8859140518417462, "grad_norm": 0.5110660195350647, "learning_rate": 6.769217827536957e-08, "loss": 0.4982, "step": 5195 }, { "epoch": 0.8860845839017736, "grad_norm": 0.3535400629043579, "learning_rate": 6.749256547654004e-08, "loss": 0.4929, "step": 5196 }, { "epoch": 0.8862551159618008, "grad_norm": 0.46066713333129883, "learning_rate": 6.729323714534531e-08, "loss": 0.4912, "step": 5197 }, { "epoch": 0.8864256480218281, "grad_norm": 0.3608464002609253, "learning_rate": 6.709419334259172e-08, "loss": 0.5013, "step": 5198 }, { "epoch": 0.8865961800818554, "grad_norm": 0.48876771330833435, "learning_rate": 6.689543412899914e-08, "loss": 0.5096, "step": 5199 }, { "epoch": 0.8867667121418826, "grad_norm": 0.466252863407135, "learning_rate": 6.669695956520018e-08, "loss": 0.496, "step": 5200 }, { "epoch": 0.88693724420191, "grad_norm": 0.45235931873321533, "learning_rate": 6.649876971174119e-08, "loss": 0.4862, "step": 5201 }, { "epoch": 0.8871077762619373, "grad_norm": 0.4634920656681061, "learning_rate": 6.630086462908081e-08, "loss": 0.5018, "step": 5202 }, { "epoch": 0.8872783083219645, "grad_norm": 0.4214763045310974, "learning_rate": 6.610324437759153e-08, "loss": 0.5075, "step": 5203 }, { "epoch": 0.8874488403819918, "grad_norm": 0.5170018672943115, "learning_rate": 6.59059090175589e-08, "loss": 0.4944, "step": 5204 }, { "epoch": 0.887619372442019, "grad_norm": 0.4873010516166687, "learning_rate": 6.57088586091813e-08, "loss": 0.4892, "step": 5205 }, { "epoch": 0.8877899045020464, "grad_norm": 0.5427590608596802, "learning_rate": 6.551209321257014e-08, "loss": 0.502, "step": 5206 }, { "epoch": 0.8879604365620737, "grad_norm": 0.4779849648475647, "learning_rate": 6.531561288775002e-08, "loss": 0.5112, "step": 5207 }, { "epoch": 0.888130968622101, "grad_norm": 0.5059120655059814, "learning_rate": 6.51194176946588e-08, "loss": 0.4968, "step": 5208 }, { "epoch": 0.8883015006821282, "grad_norm": 0.48147422075271606, "learning_rate": 6.492350769314693e-08, "loss": 0.5051, "step": 5209 }, { "epoch": 0.8884720327421555, "grad_norm": 0.36521458625793457, "learning_rate": 6.472788294297828e-08, "loss": 0.4892, "step": 5210 }, { "epoch": 0.8886425648021828, "grad_norm": 0.5547593832015991, "learning_rate": 6.453254350382921e-08, "loss": 0.5029, "step": 5211 }, { "epoch": 0.8888130968622101, "grad_norm": 0.48255598545074463, "learning_rate": 6.43374894352895e-08, "loss": 0.4751, "step": 5212 }, { "epoch": 0.8889836289222374, "grad_norm": 0.49428290128707886, "learning_rate": 6.414272079686157e-08, "loss": 0.4943, "step": 5213 }, { "epoch": 0.8891541609822646, "grad_norm": 0.46080899238586426, "learning_rate": 6.394823764796121e-08, "loss": 0.4904, "step": 5214 }, { "epoch": 0.889324693042292, "grad_norm": 0.4548596739768982, "learning_rate": 6.37540400479163e-08, "loss": 0.4857, "step": 5215 }, { "epoch": 0.8894952251023193, "grad_norm": 0.5355672836303711, "learning_rate": 6.356012805596868e-08, "loss": 0.4906, "step": 5216 }, { "epoch": 0.8896657571623465, "grad_norm": 0.38427338004112244, "learning_rate": 6.336650173127224e-08, "loss": 0.5018, "step": 5217 }, { "epoch": 0.8898362892223738, "grad_norm": 0.5887435078620911, "learning_rate": 6.317316113289375e-08, "loss": 0.4988, "step": 5218 }, { "epoch": 0.890006821282401, "grad_norm": 0.4572054445743561, "learning_rate": 6.298010631981345e-08, "loss": 0.4782, "step": 5219 }, { "epoch": 0.8901773533424284, "grad_norm": 0.5091980695724487, "learning_rate": 6.27873373509238e-08, "loss": 0.489, "step": 5220 }, { "epoch": 0.8903478854024557, "grad_norm": 0.6113678812980652, "learning_rate": 6.259485428503021e-08, "loss": 0.4883, "step": 5221 }, { "epoch": 0.890518417462483, "grad_norm": 0.4003571569919586, "learning_rate": 6.240265718085104e-08, "loss": 0.4926, "step": 5222 }, { "epoch": 0.8906889495225102, "grad_norm": 0.5379875898361206, "learning_rate": 6.221074609701739e-08, "loss": 0.4876, "step": 5223 }, { "epoch": 0.8908594815825375, "grad_norm": 0.4984959363937378, "learning_rate": 6.201912109207265e-08, "loss": 0.4949, "step": 5224 }, { "epoch": 0.8910300136425648, "grad_norm": 0.4263933598995209, "learning_rate": 6.182778222447384e-08, "loss": 0.494, "step": 5225 }, { "epoch": 0.8912005457025921, "grad_norm": 0.510601282119751, "learning_rate": 6.163672955258982e-08, "loss": 0.489, "step": 5226 }, { "epoch": 0.8913710777626194, "grad_norm": 0.4412578046321869, "learning_rate": 6.144596313470242e-08, "loss": 0.4941, "step": 5227 }, { "epoch": 0.8915416098226466, "grad_norm": 0.4267738461494446, "learning_rate": 6.125548302900653e-08, "loss": 0.4844, "step": 5228 }, { "epoch": 0.8917121418826739, "grad_norm": 0.516291618347168, "learning_rate": 6.106528929360911e-08, "loss": 0.4975, "step": 5229 }, { "epoch": 0.8918826739427013, "grad_norm": 0.5590835213661194, "learning_rate": 6.08753819865301e-08, "loss": 0.4855, "step": 5230 }, { "epoch": 0.8920532060027285, "grad_norm": 0.4871748983860016, "learning_rate": 6.068576116570192e-08, "loss": 0.484, "step": 5231 }, { "epoch": 0.8922237380627558, "grad_norm": 0.4421897530555725, "learning_rate": 6.049642688897006e-08, "loss": 0.4999, "step": 5232 }, { "epoch": 0.892394270122783, "grad_norm": 0.5166879296302795, "learning_rate": 6.030737921409169e-08, "loss": 0.5015, "step": 5233 }, { "epoch": 0.8925648021828103, "grad_norm": 0.408371239900589, "learning_rate": 6.011861819873733e-08, "loss": 0.4876, "step": 5234 }, { "epoch": 0.8927353342428377, "grad_norm": 0.6659005284309387, "learning_rate": 5.993014390048991e-08, "loss": 0.4929, "step": 5235 }, { "epoch": 0.892905866302865, "grad_norm": 0.4018940329551697, "learning_rate": 5.974195637684422e-08, "loss": 0.4928, "step": 5236 }, { "epoch": 0.8930763983628922, "grad_norm": 0.5677812695503235, "learning_rate": 5.95540556852089e-08, "loss": 0.4862, "step": 5237 }, { "epoch": 0.8932469304229195, "grad_norm": 0.466183066368103, "learning_rate": 5.936644188290386e-08, "loss": 0.4923, "step": 5238 }, { "epoch": 0.8934174624829468, "grad_norm": 0.573516309261322, "learning_rate": 5.9179115027161895e-08, "loss": 0.4921, "step": 5239 }, { "epoch": 0.8935879945429741, "grad_norm": 0.6025112867355347, "learning_rate": 5.899207517512873e-08, "loss": 0.4986, "step": 5240 }, { "epoch": 0.8937585266030014, "grad_norm": 0.46804147958755493, "learning_rate": 5.880532238386161e-08, "loss": 0.4945, "step": 5241 }, { "epoch": 0.8939290586630286, "grad_norm": 0.5839747190475464, "learning_rate": 5.8618856710330966e-08, "loss": 0.4805, "step": 5242 }, { "epoch": 0.8940995907230559, "grad_norm": 0.4837527573108673, "learning_rate": 5.8432678211419405e-08, "loss": 0.48, "step": 5243 }, { "epoch": 0.8942701227830833, "grad_norm": 0.5325782895088196, "learning_rate": 5.8246786943921946e-08, "loss": 0.4922, "step": 5244 }, { "epoch": 0.8944406548431105, "grad_norm": 0.41280415654182434, "learning_rate": 5.8061182964545777e-08, "loss": 0.4801, "step": 5245 }, { "epoch": 0.8946111869031378, "grad_norm": 0.47790399193763733, "learning_rate": 5.787586632991105e-08, "loss": 0.4817, "step": 5246 }, { "epoch": 0.894781718963165, "grad_norm": 0.4562673568725586, "learning_rate": 5.769083709654933e-08, "loss": 0.491, "step": 5247 }, { "epoch": 0.8949522510231923, "grad_norm": 0.3849164843559265, "learning_rate": 5.750609532090535e-08, "loss": 0.4871, "step": 5248 }, { "epoch": 0.8951227830832197, "grad_norm": 0.46881672739982605, "learning_rate": 5.732164105933591e-08, "loss": 0.4902, "step": 5249 }, { "epoch": 0.895293315143247, "grad_norm": 0.3761109709739685, "learning_rate": 5.713747436810968e-08, "loss": 0.4891, "step": 5250 }, { "epoch": 0.8954638472032742, "grad_norm": 0.4707009792327881, "learning_rate": 5.695359530340816e-08, "loss": 0.4927, "step": 5251 }, { "epoch": 0.8956343792633015, "grad_norm": 0.4072950482368469, "learning_rate": 5.677000392132482e-08, "loss": 0.5016, "step": 5252 }, { "epoch": 0.8958049113233287, "grad_norm": 0.4647950530052185, "learning_rate": 5.658670027786562e-08, "loss": 0.4899, "step": 5253 }, { "epoch": 0.8959754433833561, "grad_norm": 0.4397040903568268, "learning_rate": 5.6403684428948064e-08, "loss": 0.4834, "step": 5254 }, { "epoch": 0.8961459754433834, "grad_norm": 0.5452993512153625, "learning_rate": 5.622095643040305e-08, "loss": 0.4924, "step": 5255 }, { "epoch": 0.8963165075034106, "grad_norm": 0.44161882996559143, "learning_rate": 5.603851633797242e-08, "loss": 0.4938, "step": 5256 }, { "epoch": 0.8964870395634379, "grad_norm": 0.3644738793373108, "learning_rate": 5.5856364207311e-08, "loss": 0.4908, "step": 5257 }, { "epoch": 0.8966575716234653, "grad_norm": 0.47635167837142944, "learning_rate": 5.567450009398568e-08, "loss": 0.492, "step": 5258 }, { "epoch": 0.8968281036834925, "grad_norm": 0.3558651804924011, "learning_rate": 5.549292405347495e-08, "loss": 0.4875, "step": 5259 }, { "epoch": 0.8969986357435198, "grad_norm": 0.4246607720851898, "learning_rate": 5.531163614116986e-08, "loss": 0.5027, "step": 5260 }, { "epoch": 0.897169167803547, "grad_norm": 0.6894022822380066, "learning_rate": 5.513063641237371e-08, "loss": 0.4962, "step": 5261 }, { "epoch": 0.8973396998635743, "grad_norm": 0.39976760745048523, "learning_rate": 5.494992492230167e-08, "loss": 0.4825, "step": 5262 }, { "epoch": 0.8975102319236017, "grad_norm": 0.3872816264629364, "learning_rate": 5.476950172608065e-08, "loss": 0.4938, "step": 5263 }, { "epoch": 0.897680763983629, "grad_norm": 0.46357059478759766, "learning_rate": 5.45893668787505e-08, "loss": 0.5052, "step": 5264 }, { "epoch": 0.8978512960436562, "grad_norm": 0.5831755995750427, "learning_rate": 5.4409520435262155e-08, "loss": 0.4941, "step": 5265 }, { "epoch": 0.8980218281036835, "grad_norm": 0.43296298384666443, "learning_rate": 5.422996245047896e-08, "loss": 0.4876, "step": 5266 }, { "epoch": 0.8981923601637107, "grad_norm": 0.5589202046394348, "learning_rate": 5.405069297917665e-08, "loss": 0.4982, "step": 5267 }, { "epoch": 0.8983628922237381, "grad_norm": 0.4898163378238678, "learning_rate": 5.387171207604214e-08, "loss": 0.4842, "step": 5268 }, { "epoch": 0.8985334242837654, "grad_norm": 0.4723389446735382, "learning_rate": 5.369301979567499e-08, "loss": 0.5015, "step": 5269 }, { "epoch": 0.8987039563437926, "grad_norm": 0.4845086634159088, "learning_rate": 5.351461619258636e-08, "loss": 0.4801, "step": 5270 }, { "epoch": 0.8988744884038199, "grad_norm": 0.6618307828903198, "learning_rate": 5.3336501321199724e-08, "loss": 0.4971, "step": 5271 }, { "epoch": 0.8990450204638472, "grad_norm": 0.5553351640701294, "learning_rate": 5.315867523584984e-08, "loss": 0.5004, "step": 5272 }, { "epoch": 0.8992155525238745, "grad_norm": 0.41072601079940796, "learning_rate": 5.298113799078398e-08, "loss": 0.4875, "step": 5273 }, { "epoch": 0.8993860845839018, "grad_norm": 0.6561560034751892, "learning_rate": 5.280388964016093e-08, "loss": 0.4925, "step": 5274 }, { "epoch": 0.899556616643929, "grad_norm": 0.3833219110965729, "learning_rate": 5.262693023805166e-08, "loss": 0.4921, "step": 5275 }, { "epoch": 0.8997271487039563, "grad_norm": 0.662147045135498, "learning_rate": 5.245025983843888e-08, "loss": 0.4981, "step": 5276 }, { "epoch": 0.8998976807639836, "grad_norm": 0.45154041051864624, "learning_rate": 5.2273878495216704e-08, "loss": 0.4806, "step": 5277 }, { "epoch": 0.900068212824011, "grad_norm": 0.592162549495697, "learning_rate": 5.2097786262191755e-08, "loss": 0.4834, "step": 5278 }, { "epoch": 0.9002387448840382, "grad_norm": 0.5438944101333618, "learning_rate": 5.1921983193082065e-08, "loss": 0.4987, "step": 5279 }, { "epoch": 0.9004092769440655, "grad_norm": 0.43703946471214294, "learning_rate": 5.174646934151751e-08, "loss": 0.4925, "step": 5280 }, { "epoch": 0.9005798090040927, "grad_norm": 0.5259987711906433, "learning_rate": 5.157124476103992e-08, "loss": 0.4815, "step": 5281 }, { "epoch": 0.9007503410641201, "grad_norm": 0.4846348166465759, "learning_rate": 5.1396309505102444e-08, "loss": 0.4932, "step": 5282 }, { "epoch": 0.9009208731241474, "grad_norm": 0.5075209140777588, "learning_rate": 5.122166362707049e-08, "loss": 0.4941, "step": 5283 }, { "epoch": 0.9010914051841746, "grad_norm": 0.4234059154987335, "learning_rate": 5.104730718022078e-08, "loss": 0.4882, "step": 5284 }, { "epoch": 0.9012619372442019, "grad_norm": 0.49569621682167053, "learning_rate": 5.08732402177421e-08, "loss": 0.4891, "step": 5285 }, { "epoch": 0.9014324693042292, "grad_norm": 0.43381068110466003, "learning_rate": 5.069946279273464e-08, "loss": 0.4928, "step": 5286 }, { "epoch": 0.9016030013642565, "grad_norm": 0.4606774151325226, "learning_rate": 5.0525974958210326e-08, "loss": 0.4774, "step": 5287 }, { "epoch": 0.9017735334242838, "grad_norm": 0.5518508553504944, "learning_rate": 5.035277676709294e-08, "loss": 0.4845, "step": 5288 }, { "epoch": 0.901944065484311, "grad_norm": 0.5774950981140137, "learning_rate": 5.017986827221733e-08, "loss": 0.483, "step": 5289 }, { "epoch": 0.9021145975443383, "grad_norm": 0.5312044024467468, "learning_rate": 5.000724952633096e-08, "loss": 0.4885, "step": 5290 }, { "epoch": 0.9022851296043656, "grad_norm": 0.5509528517723083, "learning_rate": 4.9834920582091944e-08, "loss": 0.4931, "step": 5291 }, { "epoch": 0.902455661664393, "grad_norm": 0.6059635877609253, "learning_rate": 4.9662881492070554e-08, "loss": 0.4882, "step": 5292 }, { "epoch": 0.9026261937244202, "grad_norm": 0.4796038568019867, "learning_rate": 4.949113230874847e-08, "loss": 0.4827, "step": 5293 }, { "epoch": 0.9027967257844475, "grad_norm": 0.5281833410263062, "learning_rate": 4.9319673084518905e-08, "loss": 0.4876, "step": 5294 }, { "epoch": 0.9029672578444747, "grad_norm": 0.4335166811943054, "learning_rate": 4.914850387168657e-08, "loss": 0.4947, "step": 5295 }, { "epoch": 0.903137789904502, "grad_norm": 0.5843914151191711, "learning_rate": 4.8977624722467815e-08, "loss": 0.4753, "step": 5296 }, { "epoch": 0.9033083219645294, "grad_norm": 0.525813102722168, "learning_rate": 4.8807035688990485e-08, "loss": 0.4935, "step": 5297 }, { "epoch": 0.9034788540245566, "grad_norm": 0.45676755905151367, "learning_rate": 4.863673682329373e-08, "loss": 0.4844, "step": 5298 }, { "epoch": 0.9036493860845839, "grad_norm": 0.5244921445846558, "learning_rate": 4.8466728177328894e-08, "loss": 0.481, "step": 5299 }, { "epoch": 0.9038199181446112, "grad_norm": 0.5705152153968811, "learning_rate": 4.82970098029576e-08, "loss": 0.4914, "step": 5300 }, { "epoch": 0.9039904502046384, "grad_norm": 0.5858019590377808, "learning_rate": 4.8127581751953983e-08, "loss": 0.4892, "step": 5301 }, { "epoch": 0.9041609822646658, "grad_norm": 0.7156437039375305, "learning_rate": 4.7958444076002924e-08, "loss": 0.4949, "step": 5302 }, { "epoch": 0.904331514324693, "grad_norm": 0.5970281362533569, "learning_rate": 4.778959682670148e-08, "loss": 0.4838, "step": 5303 }, { "epoch": 0.9045020463847203, "grad_norm": 0.6081550717353821, "learning_rate": 4.7621040055557016e-08, "loss": 0.4877, "step": 5304 }, { "epoch": 0.9046725784447476, "grad_norm": 0.504256546497345, "learning_rate": 4.745277381398939e-08, "loss": 0.4919, "step": 5305 }, { "epoch": 0.904843110504775, "grad_norm": 0.5745243430137634, "learning_rate": 4.7284798153329095e-08, "loss": 0.4852, "step": 5306 }, { "epoch": 0.9050136425648022, "grad_norm": 0.44068363308906555, "learning_rate": 4.711711312481815e-08, "loss": 0.4892, "step": 5307 }, { "epoch": 0.9051841746248295, "grad_norm": 0.6510539054870605, "learning_rate": 4.694971877961041e-08, "loss": 0.4994, "step": 5308 }, { "epoch": 0.9053547066848567, "grad_norm": 0.43108850717544556, "learning_rate": 4.6782615168770146e-08, "loss": 0.4963, "step": 5309 }, { "epoch": 0.905525238744884, "grad_norm": 0.4596211910247803, "learning_rate": 4.661580234327379e-08, "loss": 0.4858, "step": 5310 }, { "epoch": 0.9056957708049114, "grad_norm": 0.4737480878829956, "learning_rate": 4.6449280354008643e-08, "loss": 0.4991, "step": 5311 }, { "epoch": 0.9058663028649386, "grad_norm": 0.43607211112976074, "learning_rate": 4.628304925177319e-08, "loss": 0.4853, "step": 5312 }, { "epoch": 0.9060368349249659, "grad_norm": 0.4824850559234619, "learning_rate": 4.61171090872774e-08, "loss": 0.4836, "step": 5313 }, { "epoch": 0.9062073669849932, "grad_norm": 0.5185177326202393, "learning_rate": 4.595145991114247e-08, "loss": 0.48, "step": 5314 }, { "epoch": 0.9063778990450204, "grad_norm": 0.49160775542259216, "learning_rate": 4.5786101773900863e-08, "loss": 0.4958, "step": 5315 }, { "epoch": 0.9065484311050478, "grad_norm": 0.5173426866531372, "learning_rate": 4.5621034725995995e-08, "loss": 0.504, "step": 5316 }, { "epoch": 0.906718963165075, "grad_norm": 0.44077467918395996, "learning_rate": 4.545625881778293e-08, "loss": 0.4898, "step": 5317 }, { "epoch": 0.9068894952251023, "grad_norm": 0.4675387442111969, "learning_rate": 4.529177409952746e-08, "loss": 0.4906, "step": 5318 }, { "epoch": 0.9070600272851296, "grad_norm": 0.6204301118850708, "learning_rate": 4.512758062140688e-08, "loss": 0.4969, "step": 5319 }, { "epoch": 0.9072305593451568, "grad_norm": 0.37933647632598877, "learning_rate": 4.4963678433509465e-08, "loss": 0.4906, "step": 5320 }, { "epoch": 0.9074010914051842, "grad_norm": 0.466796338558197, "learning_rate": 4.480006758583455e-08, "loss": 0.4983, "step": 5321 }, { "epoch": 0.9075716234652115, "grad_norm": 0.38458162546157837, "learning_rate": 4.463674812829278e-08, "loss": 0.5064, "step": 5322 }, { "epoch": 0.9077421555252387, "grad_norm": 0.4396178424358368, "learning_rate": 4.4473720110705944e-08, "loss": 0.4964, "step": 5323 }, { "epoch": 0.907912687585266, "grad_norm": 0.386715829372406, "learning_rate": 4.4310983582806945e-08, "loss": 0.4997, "step": 5324 }, { "epoch": 0.9080832196452933, "grad_norm": 0.39901241660118103, "learning_rate": 4.414853859423918e-08, "loss": 0.4905, "step": 5325 }, { "epoch": 0.9082537517053206, "grad_norm": 0.45329898595809937, "learning_rate": 4.3986385194558116e-08, "loss": 0.491, "step": 5326 }, { "epoch": 0.9084242837653479, "grad_norm": 0.44330501556396484, "learning_rate": 4.382452343322952e-08, "loss": 0.4949, "step": 5327 }, { "epoch": 0.9085948158253752, "grad_norm": 0.3553907573223114, "learning_rate": 4.366295335963011e-08, "loss": 0.5047, "step": 5328 }, { "epoch": 0.9087653478854024, "grad_norm": 0.5292105078697205, "learning_rate": 4.3501675023048475e-08, "loss": 0.4955, "step": 5329 }, { "epoch": 0.9089358799454298, "grad_norm": 0.4915977120399475, "learning_rate": 4.3340688472683246e-08, "loss": 0.51, "step": 5330 }, { "epoch": 0.909106412005457, "grad_norm": 0.3689645528793335, "learning_rate": 4.3179993757644595e-08, "loss": 0.4959, "step": 5331 }, { "epoch": 0.9092769440654843, "grad_norm": 0.4896616041660309, "learning_rate": 4.3019590926953403e-08, "loss": 0.5031, "step": 5332 }, { "epoch": 0.9094474761255116, "grad_norm": 0.44271454215049744, "learning_rate": 4.2859480029541996e-08, "loss": 0.4881, "step": 5333 }, { "epoch": 0.9096180081855388, "grad_norm": 0.44713589549064636, "learning_rate": 4.269966111425273e-08, "loss": 0.496, "step": 5334 }, { "epoch": 0.9097885402455662, "grad_norm": 0.4862429201602936, "learning_rate": 4.2540134229839716e-08, "loss": 0.4892, "step": 5335 }, { "epoch": 0.9099590723055935, "grad_norm": 0.5278210043907166, "learning_rate": 4.2380899424967817e-08, "loss": 0.4856, "step": 5336 }, { "epoch": 0.9101296043656207, "grad_norm": 0.475392609834671, "learning_rate": 4.222195674821239e-08, "loss": 0.4995, "step": 5337 }, { "epoch": 0.910300136425648, "grad_norm": 0.5768274664878845, "learning_rate": 4.2063306248060324e-08, "loss": 0.4927, "step": 5338 }, { "epoch": 0.9104706684856753, "grad_norm": 0.6023024916648865, "learning_rate": 4.190494797290868e-08, "loss": 0.4996, "step": 5339 }, { "epoch": 0.9106412005457026, "grad_norm": 0.47170257568359375, "learning_rate": 4.17468819710658e-08, "loss": 0.4957, "step": 5340 }, { "epoch": 0.9108117326057299, "grad_norm": 0.5494353771209717, "learning_rate": 4.158910829075092e-08, "loss": 0.4954, "step": 5341 }, { "epoch": 0.9109822646657572, "grad_norm": 0.44590601325035095, "learning_rate": 4.1431626980093846e-08, "loss": 0.485, "step": 5342 }, { "epoch": 0.9111527967257844, "grad_norm": 0.5497511625289917, "learning_rate": 4.1274438087135275e-08, "loss": 0.4951, "step": 5343 }, { "epoch": 0.9113233287858117, "grad_norm": 0.44941020011901855, "learning_rate": 4.1117541659826735e-08, "loss": 0.4925, "step": 5344 }, { "epoch": 0.911493860845839, "grad_norm": 0.4923752248287201, "learning_rate": 4.096093774603061e-08, "loss": 0.4912, "step": 5345 }, { "epoch": 0.9116643929058663, "grad_norm": 0.5564268231391907, "learning_rate": 4.08046263935199e-08, "loss": 0.4865, "step": 5346 }, { "epoch": 0.9118349249658936, "grad_norm": 0.5061978101730347, "learning_rate": 4.064860764997858e-08, "loss": 0.4947, "step": 5347 }, { "epoch": 0.9120054570259208, "grad_norm": 0.5099136829376221, "learning_rate": 4.0492881563001024e-08, "loss": 0.4826, "step": 5348 }, { "epoch": 0.9121759890859482, "grad_norm": 0.4287917912006378, "learning_rate": 4.033744818009244e-08, "loss": 0.4931, "step": 5349 }, { "epoch": 0.9123465211459755, "grad_norm": 0.590749979019165, "learning_rate": 4.0182307548669255e-08, "loss": 0.4881, "step": 5350 }, { "epoch": 0.9125170532060027, "grad_norm": 0.5917319059371948, "learning_rate": 4.0027459716057595e-08, "loss": 0.4967, "step": 5351 }, { "epoch": 0.91268758526603, "grad_norm": 0.6138724684715271, "learning_rate": 3.9872904729495127e-08, "loss": 0.5035, "step": 5352 }, { "epoch": 0.9128581173260573, "grad_norm": 0.4331624507904053, "learning_rate": 3.97186426361299e-08, "loss": 0.4889, "step": 5353 }, { "epoch": 0.9130286493860846, "grad_norm": 0.45480212569236755, "learning_rate": 3.9564673483020626e-08, "loss": 0.4847, "step": 5354 }, { "epoch": 0.9131991814461119, "grad_norm": 0.4731946587562561, "learning_rate": 3.9410997317136385e-08, "loss": 0.4912, "step": 5355 }, { "epoch": 0.9133697135061392, "grad_norm": 0.4428921341896057, "learning_rate": 3.9257614185357566e-08, "loss": 0.497, "step": 5356 }, { "epoch": 0.9135402455661664, "grad_norm": 0.45451268553733826, "learning_rate": 3.910452413447419e-08, "loss": 0.4849, "step": 5357 }, { "epoch": 0.9137107776261937, "grad_norm": 0.37964358925819397, "learning_rate": 3.8951727211187786e-08, "loss": 0.4889, "step": 5358 }, { "epoch": 0.913881309686221, "grad_norm": 0.46263939142227173, "learning_rate": 3.879922346210996e-08, "loss": 0.4952, "step": 5359 }, { "epoch": 0.9140518417462483, "grad_norm": 0.40812090039253235, "learning_rate": 3.8647012933762946e-08, "loss": 0.4991, "step": 5360 }, { "epoch": 0.9142223738062756, "grad_norm": 0.4596121311187744, "learning_rate": 3.849509567257959e-08, "loss": 0.484, "step": 5361 }, { "epoch": 0.9143929058663028, "grad_norm": 0.4200727939605713, "learning_rate": 3.834347172490327e-08, "loss": 0.4897, "step": 5362 }, { "epoch": 0.9145634379263301, "grad_norm": 0.46064093708992004, "learning_rate": 3.8192141136987996e-08, "loss": 0.4864, "step": 5363 }, { "epoch": 0.9147339699863575, "grad_norm": 0.402382493019104, "learning_rate": 3.804110395499805e-08, "loss": 0.4951, "step": 5364 }, { "epoch": 0.9149045020463847, "grad_norm": 0.4919821619987488, "learning_rate": 3.7890360225008584e-08, "loss": 0.4926, "step": 5365 }, { "epoch": 0.915075034106412, "grad_norm": 0.4360262453556061, "learning_rate": 3.773990999300459e-08, "loss": 0.4935, "step": 5366 }, { "epoch": 0.9152455661664393, "grad_norm": 0.44577082991600037, "learning_rate": 3.758975330488213e-08, "loss": 0.4864, "step": 5367 }, { "epoch": 0.9154160982264665, "grad_norm": 0.4504692852497101, "learning_rate": 3.743989020644767e-08, "loss": 0.493, "step": 5368 }, { "epoch": 0.9155866302864939, "grad_norm": 0.4033011496067047, "learning_rate": 3.729032074341743e-08, "loss": 0.4782, "step": 5369 }, { "epoch": 0.9157571623465212, "grad_norm": 0.5430464148521423, "learning_rate": 3.7141044961419243e-08, "loss": 0.4903, "step": 5370 }, { "epoch": 0.9159276944065484, "grad_norm": 0.5598674416542053, "learning_rate": 3.6992062905990223e-08, "loss": 0.4853, "step": 5371 }, { "epoch": 0.9160982264665757, "grad_norm": 0.37662777304649353, "learning_rate": 3.684337462257848e-08, "loss": 0.4944, "step": 5372 }, { "epoch": 0.916268758526603, "grad_norm": 0.40339040756225586, "learning_rate": 3.6694980156542494e-08, "loss": 0.4894, "step": 5373 }, { "epoch": 0.9164392905866303, "grad_norm": 0.35441353917121887, "learning_rate": 3.654687955315073e-08, "loss": 0.4928, "step": 5374 }, { "epoch": 0.9166098226466576, "grad_norm": 0.3989948630332947, "learning_rate": 3.639907285758249e-08, "loss": 0.4968, "step": 5375 }, { "epoch": 0.9167803547066848, "grad_norm": 0.4780336022377014, "learning_rate": 3.6251560114927036e-08, "loss": 0.478, "step": 5376 }, { "epoch": 0.9169508867667121, "grad_norm": 0.5224922895431519, "learning_rate": 3.6104341370184265e-08, "loss": 0.4922, "step": 5377 }, { "epoch": 0.9171214188267395, "grad_norm": 0.7130451798439026, "learning_rate": 3.5957416668264026e-08, "loss": 0.5005, "step": 5378 }, { "epoch": 0.9172919508867667, "grad_norm": 0.5847972631454468, "learning_rate": 3.581078605398703e-08, "loss": 0.4771, "step": 5379 }, { "epoch": 0.917462482946794, "grad_norm": 0.4442712366580963, "learning_rate": 3.566444957208359e-08, "loss": 0.4856, "step": 5380 }, { "epoch": 0.9176330150068213, "grad_norm": 0.4108210802078247, "learning_rate": 3.5518407267194804e-08, "loss": 0.4919, "step": 5381 }, { "epoch": 0.9178035470668485, "grad_norm": 0.49916204810142517, "learning_rate": 3.5372659183871896e-08, "loss": 0.485, "step": 5382 }, { "epoch": 0.9179740791268759, "grad_norm": 0.44206488132476807, "learning_rate": 3.522720536657609e-08, "loss": 0.4894, "step": 5383 }, { "epoch": 0.9181446111869032, "grad_norm": 0.43081364035606384, "learning_rate": 3.5082045859679117e-08, "loss": 0.4866, "step": 5384 }, { "epoch": 0.9183151432469304, "grad_norm": 0.5318077802658081, "learning_rate": 3.4937180707462996e-08, "loss": 0.484, "step": 5385 }, { "epoch": 0.9184856753069577, "grad_norm": 0.40832993388175964, "learning_rate": 3.479260995411982e-08, "loss": 0.4851, "step": 5386 }, { "epoch": 0.918656207366985, "grad_norm": 0.4995136260986328, "learning_rate": 3.4648333643751536e-08, "loss": 0.4958, "step": 5387 }, { "epoch": 0.9188267394270123, "grad_norm": 0.39905282855033875, "learning_rate": 3.450435182037104e-08, "loss": 0.4831, "step": 5388 }, { "epoch": 0.9189972714870396, "grad_norm": 0.46222537755966187, "learning_rate": 3.436066452790077e-08, "loss": 0.493, "step": 5389 }, { "epoch": 0.9191678035470668, "grad_norm": 0.4872012138366699, "learning_rate": 3.421727181017321e-08, "loss": 0.4909, "step": 5390 }, { "epoch": 0.9193383356070941, "grad_norm": 0.4322300851345062, "learning_rate": 3.40741737109318e-08, "loss": 0.4999, "step": 5391 }, { "epoch": 0.9195088676671214, "grad_norm": 0.4919182062149048, "learning_rate": 3.393137027382909e-08, "loss": 0.4779, "step": 5392 }, { "epoch": 0.9196793997271487, "grad_norm": 0.4974134862422943, "learning_rate": 3.3788861542428555e-08, "loss": 0.5065, "step": 5393 }, { "epoch": 0.919849931787176, "grad_norm": 0.48541364073753357, "learning_rate": 3.3646647560203305e-08, "loss": 0.4852, "step": 5394 }, { "epoch": 0.9200204638472033, "grad_norm": 0.5268510580062866, "learning_rate": 3.350472837053676e-08, "loss": 0.4939, "step": 5395 }, { "epoch": 0.9201909959072305, "grad_norm": 0.36918506026268005, "learning_rate": 3.336310401672194e-08, "loss": 0.474, "step": 5396 }, { "epoch": 0.9203615279672579, "grad_norm": 0.5224368572235107, "learning_rate": 3.322177454196285e-08, "loss": 0.4965, "step": 5397 }, { "epoch": 0.9205320600272852, "grad_norm": 0.35501906275749207, "learning_rate": 3.308073998937278e-08, "loss": 0.4875, "step": 5398 }, { "epoch": 0.9207025920873124, "grad_norm": 0.5201204419136047, "learning_rate": 3.294000040197509e-08, "loss": 0.5005, "step": 5399 }, { "epoch": 0.9208731241473397, "grad_norm": 0.4988919496536255, "learning_rate": 3.279955582270356e-08, "loss": 0.4832, "step": 5400 }, { "epoch": 0.921043656207367, "grad_norm": 0.44986864924430847, "learning_rate": 3.2659406294401566e-08, "loss": 0.4951, "step": 5401 }, { "epoch": 0.9212141882673943, "grad_norm": 0.4283299148082733, "learning_rate": 3.251955185982281e-08, "loss": 0.4831, "step": 5402 }, { "epoch": 0.9213847203274216, "grad_norm": 0.4074150621891022, "learning_rate": 3.2379992561630715e-08, "loss": 0.4884, "step": 5403 }, { "epoch": 0.9215552523874488, "grad_norm": 0.43735846877098083, "learning_rate": 3.2240728442399005e-08, "loss": 0.4847, "step": 5404 }, { "epoch": 0.9217257844474761, "grad_norm": 0.5105264186859131, "learning_rate": 3.2101759544610816e-08, "loss": 0.4921, "step": 5405 }, { "epoch": 0.9218963165075034, "grad_norm": 0.44802650809288025, "learning_rate": 3.1963085910659664e-08, "loss": 0.4778, "step": 5406 }, { "epoch": 0.9220668485675307, "grad_norm": 0.4990009367465973, "learning_rate": 3.182470758284895e-08, "loss": 0.4913, "step": 5407 }, { "epoch": 0.922237380627558, "grad_norm": 0.46665534377098083, "learning_rate": 3.168662460339167e-08, "loss": 0.4913, "step": 5408 }, { "epoch": 0.9224079126875853, "grad_norm": 0.403708279132843, "learning_rate": 3.1548837014411363e-08, "loss": 0.5045, "step": 5409 }, { "epoch": 0.9225784447476125, "grad_norm": 0.47914642095565796, "learning_rate": 3.141134485794073e-08, "loss": 0.5021, "step": 5410 }, { "epoch": 0.9227489768076398, "grad_norm": 0.6098238229751587, "learning_rate": 3.127414817592278e-08, "loss": 0.4976, "step": 5411 }, { "epoch": 0.9229195088676672, "grad_norm": 0.4115535318851471, "learning_rate": 3.113724701021026e-08, "loss": 0.4945, "step": 5412 }, { "epoch": 0.9230900409276944, "grad_norm": 0.3769288659095764, "learning_rate": 3.100064140256587e-08, "loss": 0.481, "step": 5413 }, { "epoch": 0.9232605729877217, "grad_norm": 0.33954548835754395, "learning_rate": 3.0864331394661834e-08, "loss": 0.49, "step": 5414 }, { "epoch": 0.923431105047749, "grad_norm": 0.48619434237480164, "learning_rate": 3.072831702808066e-08, "loss": 0.49, "step": 5415 }, { "epoch": 0.9236016371077762, "grad_norm": 0.5546624660491943, "learning_rate": 3.05925983443145e-08, "loss": 0.4811, "step": 5416 }, { "epoch": 0.9237721691678036, "grad_norm": 0.3814515769481659, "learning_rate": 3.0457175384764894e-08, "loss": 0.4973, "step": 5417 }, { "epoch": 0.9239427012278308, "grad_norm": 0.35709336400032043, "learning_rate": 3.0322048190744025e-08, "loss": 0.4823, "step": 5418 }, { "epoch": 0.9241132332878581, "grad_norm": 0.3805638551712036, "learning_rate": 3.018721680347281e-08, "loss": 0.4933, "step": 5419 }, { "epoch": 0.9242837653478854, "grad_norm": 0.3947684168815613, "learning_rate": 3.0052681264082895e-08, "loss": 0.4966, "step": 5420 }, { "epoch": 0.9244542974079127, "grad_norm": 0.44564321637153625, "learning_rate": 2.991844161361513e-08, "loss": 0.4768, "step": 5421 }, { "epoch": 0.92462482946794, "grad_norm": 0.3913787305355072, "learning_rate": 2.978449789301996e-08, "loss": 0.4991, "step": 5422 }, { "epoch": 0.9247953615279673, "grad_norm": 0.38056641817092896, "learning_rate": 2.9650850143158164e-08, "loss": 0.4913, "step": 5423 }, { "epoch": 0.9249658935879945, "grad_norm": 0.32223859429359436, "learning_rate": 2.9517498404799675e-08, "loss": 0.4829, "step": 5424 }, { "epoch": 0.9251364256480218, "grad_norm": 0.4325050711631775, "learning_rate": 2.93844427186244e-08, "loss": 0.4866, "step": 5425 }, { "epoch": 0.9253069577080492, "grad_norm": 0.3830873966217041, "learning_rate": 2.9251683125221865e-08, "loss": 0.4943, "step": 5426 }, { "epoch": 0.9254774897680764, "grad_norm": 0.35291770100593567, "learning_rate": 2.911921966509135e-08, "loss": 0.4878, "step": 5427 }, { "epoch": 0.9256480218281037, "grad_norm": 0.3755296766757965, "learning_rate": 2.8987052378641525e-08, "loss": 0.4909, "step": 5428 }, { "epoch": 0.925818553888131, "grad_norm": 0.4500507116317749, "learning_rate": 2.8855181306191142e-08, "loss": 0.487, "step": 5429 }, { "epoch": 0.9259890859481582, "grad_norm": 0.47844499349594116, "learning_rate": 2.872360648796824e-08, "loss": 0.5013, "step": 5430 }, { "epoch": 0.9261596180081856, "grad_norm": 0.4715981185436249, "learning_rate": 2.859232796411049e-08, "loss": 0.4878, "step": 5431 }, { "epoch": 0.9263301500682128, "grad_norm": 0.3902131915092468, "learning_rate": 2.846134577466542e-08, "loss": 0.4883, "step": 5432 }, { "epoch": 0.9265006821282401, "grad_norm": 0.5492172241210938, "learning_rate": 2.833065995958995e-08, "loss": 0.4916, "step": 5433 }, { "epoch": 0.9266712141882674, "grad_norm": 0.418430894613266, "learning_rate": 2.8200270558750744e-08, "loss": 0.4951, "step": 5434 }, { "epoch": 0.9268417462482946, "grad_norm": 0.46424612402915955, "learning_rate": 2.807017761192399e-08, "loss": 0.4952, "step": 5435 }, { "epoch": 0.927012278308322, "grad_norm": 0.4414074122905731, "learning_rate": 2.7940381158795384e-08, "loss": 0.4913, "step": 5436 }, { "epoch": 0.9271828103683493, "grad_norm": 0.4416884183883667, "learning_rate": 2.7810881238960033e-08, "loss": 0.4761, "step": 5437 }, { "epoch": 0.9273533424283765, "grad_norm": 0.5253694653511047, "learning_rate": 2.768167789192289e-08, "loss": 0.5034, "step": 5438 }, { "epoch": 0.9275238744884038, "grad_norm": 0.5157954692840576, "learning_rate": 2.7552771157098425e-08, "loss": 0.5028, "step": 5439 }, { "epoch": 0.927694406548431, "grad_norm": 0.5215798616409302, "learning_rate": 2.7424161073810186e-08, "loss": 0.4945, "step": 5440 }, { "epoch": 0.9278649386084584, "grad_norm": 0.44901686906814575, "learning_rate": 2.7295847681291902e-08, "loss": 0.4958, "step": 5441 }, { "epoch": 0.9280354706684857, "grad_norm": 0.5716381072998047, "learning_rate": 2.7167831018686144e-08, "loss": 0.4907, "step": 5442 }, { "epoch": 0.928206002728513, "grad_norm": 0.54997318983078, "learning_rate": 2.7040111125045345e-08, "loss": 0.4853, "step": 5443 }, { "epoch": 0.9283765347885402, "grad_norm": 0.5040403008460999, "learning_rate": 2.691268803933145e-08, "loss": 0.5013, "step": 5444 }, { "epoch": 0.9285470668485676, "grad_norm": 0.648098886013031, "learning_rate": 2.6785561800415475e-08, "loss": 0.5016, "step": 5445 }, { "epoch": 0.9287175989085948, "grad_norm": 0.4393656253814697, "learning_rate": 2.6658732447078285e-08, "loss": 0.5034, "step": 5446 }, { "epoch": 0.9288881309686221, "grad_norm": 0.5123600959777832, "learning_rate": 2.6532200018010048e-08, "loss": 0.4998, "step": 5447 }, { "epoch": 0.9290586630286494, "grad_norm": 0.5143542289733887, "learning_rate": 2.6405964551810327e-08, "loss": 0.5152, "step": 5448 }, { "epoch": 0.9292291950886766, "grad_norm": 0.5461297631263733, "learning_rate": 2.628002608698799e-08, "loss": 0.4902, "step": 5449 }, { "epoch": 0.929399727148704, "grad_norm": 0.588545560836792, "learning_rate": 2.615438466196141e-08, "loss": 0.4989, "step": 5450 }, { "epoch": 0.9295702592087313, "grad_norm": 0.4249182641506195, "learning_rate": 2.6029040315058488e-08, "loss": 0.4883, "step": 5451 }, { "epoch": 0.9297407912687585, "grad_norm": 0.5955637693405151, "learning_rate": 2.590399308451608e-08, "loss": 0.4951, "step": 5452 }, { "epoch": 0.9299113233287858, "grad_norm": 0.4577358365058899, "learning_rate": 2.5779243008481116e-08, "loss": 0.4933, "step": 5453 }, { "epoch": 0.930081855388813, "grad_norm": 0.435455322265625, "learning_rate": 2.5654790125009155e-08, "loss": 0.5086, "step": 5454 }, { "epoch": 0.9302523874488404, "grad_norm": 0.5645721554756165, "learning_rate": 2.5530634472065277e-08, "loss": 0.4977, "step": 5455 }, { "epoch": 0.9304229195088677, "grad_norm": 0.39590975642204285, "learning_rate": 2.540677608752429e-08, "loss": 0.4892, "step": 5456 }, { "epoch": 0.930593451568895, "grad_norm": 0.48490989208221436, "learning_rate": 2.528321500916986e-08, "loss": 0.4997, "step": 5457 }, { "epoch": 0.9307639836289222, "grad_norm": 0.4811006188392639, "learning_rate": 2.515995127469495e-08, "loss": 0.4881, "step": 5458 }, { "epoch": 0.9309345156889495, "grad_norm": 0.43111667037010193, "learning_rate": 2.503698492170237e-08, "loss": 0.5023, "step": 5459 }, { "epoch": 0.9311050477489768, "grad_norm": 0.535325288772583, "learning_rate": 2.4914315987703664e-08, "loss": 0.4918, "step": 5460 }, { "epoch": 0.9312755798090041, "grad_norm": 0.376253604888916, "learning_rate": 2.479194451011957e-08, "loss": 0.4995, "step": 5461 }, { "epoch": 0.9314461118690314, "grad_norm": 0.3538033962249756, "learning_rate": 2.4669870526280784e-08, "loss": 0.4944, "step": 5462 }, { "epoch": 0.9316166439290586, "grad_norm": 0.41819334030151367, "learning_rate": 2.45480940734264e-08, "loss": 0.508, "step": 5463 }, { "epoch": 0.931787175989086, "grad_norm": 0.4020731449127197, "learning_rate": 2.4426615188705263e-08, "loss": 0.5062, "step": 5464 }, { "epoch": 0.9319577080491133, "grad_norm": 0.5282530784606934, "learning_rate": 2.4305433909175394e-08, "loss": 0.501, "step": 5465 }, { "epoch": 0.9321282401091405, "grad_norm": 0.5189552307128906, "learning_rate": 2.418455027180389e-08, "loss": 0.4845, "step": 5466 }, { "epoch": 0.9322987721691678, "grad_norm": 0.40900495648384094, "learning_rate": 2.406396431346714e-08, "loss": 0.4929, "step": 5467 }, { "epoch": 0.932469304229195, "grad_norm": 0.43187782168388367, "learning_rate": 2.3943676070950608e-08, "loss": 0.4953, "step": 5468 }, { "epoch": 0.9326398362892224, "grad_norm": 0.43702763319015503, "learning_rate": 2.3823685580949277e-08, "loss": 0.5043, "step": 5469 }, { "epoch": 0.9328103683492497, "grad_norm": 0.5212153792381287, "learning_rate": 2.3703992880066642e-08, "loss": 0.4942, "step": 5470 }, { "epoch": 0.932980900409277, "grad_norm": 0.3882196247577667, "learning_rate": 2.3584598004816163e-08, "loss": 0.4867, "step": 5471 }, { "epoch": 0.9331514324693042, "grad_norm": 0.48326700925827026, "learning_rate": 2.346550099161982e-08, "loss": 0.4998, "step": 5472 }, { "epoch": 0.9333219645293315, "grad_norm": 0.46829846501350403, "learning_rate": 2.334670187680899e-08, "loss": 0.4851, "step": 5473 }, { "epoch": 0.9334924965893588, "grad_norm": 0.4301968216896057, "learning_rate": 2.3228200696624128e-08, "loss": 0.4866, "step": 5474 }, { "epoch": 0.9336630286493861, "grad_norm": 0.41791969537734985, "learning_rate": 2.3109997487214988e-08, "loss": 0.4993, "step": 5475 }, { "epoch": 0.9338335607094134, "grad_norm": 0.3805132210254669, "learning_rate": 2.2992092284639947e-08, "loss": 0.5012, "step": 5476 }, { "epoch": 0.9340040927694406, "grad_norm": 0.41024842858314514, "learning_rate": 2.287448512486701e-08, "loss": 0.4874, "step": 5477 }, { "epoch": 0.9341746248294679, "grad_norm": 0.49618643522262573, "learning_rate": 2.2757176043772922e-08, "loss": 0.5013, "step": 5478 }, { "epoch": 0.9343451568894953, "grad_norm": 0.5039137601852417, "learning_rate": 2.2640165077143618e-08, "loss": 0.4814, "step": 5479 }, { "epoch": 0.9345156889495225, "grad_norm": 0.3791121542453766, "learning_rate": 2.252345226067421e-08, "loss": 0.5065, "step": 5480 }, { "epoch": 0.9346862210095498, "grad_norm": 0.45290419459342957, "learning_rate": 2.240703762996843e-08, "loss": 0.4925, "step": 5481 }, { "epoch": 0.934856753069577, "grad_norm": 0.6099880337715149, "learning_rate": 2.2290921220539664e-08, "loss": 0.5044, "step": 5482 }, { "epoch": 0.9350272851296043, "grad_norm": 0.48982471227645874, "learning_rate": 2.217510306780979e-08, "loss": 0.4776, "step": 5483 }, { "epoch": 0.9351978171896317, "grad_norm": 0.37271150946617126, "learning_rate": 2.2059583207109994e-08, "loss": 0.5078, "step": 5484 }, { "epoch": 0.935368349249659, "grad_norm": 0.5908680558204651, "learning_rate": 2.1944361673680306e-08, "loss": 0.4831, "step": 5485 }, { "epoch": 0.9355388813096862, "grad_norm": 0.5035811066627502, "learning_rate": 2.1829438502669943e-08, "loss": 0.5011, "step": 5486 }, { "epoch": 0.9357094133697135, "grad_norm": 0.4935630261898041, "learning_rate": 2.1714813729136975e-08, "loss": 0.4832, "step": 5487 }, { "epoch": 0.9358799454297408, "grad_norm": 0.5124121904373169, "learning_rate": 2.160048738804821e-08, "loss": 0.4844, "step": 5488 }, { "epoch": 0.9360504774897681, "grad_norm": 0.4626336991786957, "learning_rate": 2.148645951428008e-08, "loss": 0.4827, "step": 5489 }, { "epoch": 0.9362210095497954, "grad_norm": 0.5183104872703552, "learning_rate": 2.1372730142617206e-08, "loss": 0.4942, "step": 5490 }, { "epoch": 0.9363915416098226, "grad_norm": 0.39121490716934204, "learning_rate": 2.1259299307753723e-08, "loss": 0.4876, "step": 5491 }, { "epoch": 0.9365620736698499, "grad_norm": 0.5651541948318481, "learning_rate": 2.11461670442924e-08, "loss": 0.4866, "step": 5492 }, { "epoch": 0.9367326057298773, "grad_norm": 0.46623361110687256, "learning_rate": 2.103333338674485e-08, "loss": 0.488, "step": 5493 }, { "epoch": 0.9369031377899045, "grad_norm": 0.4703865945339203, "learning_rate": 2.092079836953176e-08, "loss": 0.4953, "step": 5494 }, { "epoch": 0.9370736698499318, "grad_norm": 0.7352892160415649, "learning_rate": 2.0808562026982894e-08, "loss": 0.4873, "step": 5495 }, { "epoch": 0.937244201909959, "grad_norm": 0.4713911712169647, "learning_rate": 2.0696624393336643e-08, "loss": 0.4798, "step": 5496 }, { "epoch": 0.9374147339699863, "grad_norm": 0.5269001722335815, "learning_rate": 2.0584985502740018e-08, "loss": 0.483, "step": 5497 }, { "epoch": 0.9375852660300137, "grad_norm": 0.41727718710899353, "learning_rate": 2.0473645389249678e-08, "loss": 0.5097, "step": 5498 }, { "epoch": 0.937755798090041, "grad_norm": 0.5754560232162476, "learning_rate": 2.036260408683033e-08, "loss": 0.4885, "step": 5499 }, { "epoch": 0.9379263301500682, "grad_norm": 0.39966753125190735, "learning_rate": 2.0251861629356108e-08, "loss": 0.493, "step": 5500 }, { "epoch": 0.9380968622100955, "grad_norm": 0.45334547758102417, "learning_rate": 2.0141418050609543e-08, "loss": 0.5074, "step": 5501 }, { "epoch": 0.9382673942701227, "grad_norm": 0.49507802724838257, "learning_rate": 2.0031273384282348e-08, "loss": 0.4989, "step": 5502 }, { "epoch": 0.9384379263301501, "grad_norm": 0.4019278585910797, "learning_rate": 1.992142766397465e-08, "loss": 0.4993, "step": 5503 }, { "epoch": 0.9386084583901774, "grad_norm": 0.4670289158821106, "learning_rate": 1.981188092319586e-08, "loss": 0.4845, "step": 5504 }, { "epoch": 0.9387789904502046, "grad_norm": 0.4508693516254425, "learning_rate": 1.970263319536392e-08, "loss": 0.4897, "step": 5505 }, { "epoch": 0.9389495225102319, "grad_norm": 0.48310673236846924, "learning_rate": 1.9593684513805502e-08, "loss": 0.4843, "step": 5506 }, { "epoch": 0.9391200545702592, "grad_norm": 0.40250974893569946, "learning_rate": 1.9485034911756018e-08, "loss": 0.4924, "step": 5507 }, { "epoch": 0.9392905866302865, "grad_norm": 0.45123982429504395, "learning_rate": 1.937668442235996e-08, "loss": 0.4964, "step": 5508 }, { "epoch": 0.9394611186903138, "grad_norm": 0.40856313705444336, "learning_rate": 1.926863307867033e-08, "loss": 0.4964, "step": 5509 }, { "epoch": 0.939631650750341, "grad_norm": 0.49330079555511475, "learning_rate": 1.9160880913648985e-08, "loss": 0.4835, "step": 5510 }, { "epoch": 0.9398021828103683, "grad_norm": 0.5441690683364868, "learning_rate": 1.9053427960166184e-08, "loss": 0.4791, "step": 5511 }, { "epoch": 0.9399727148703957, "grad_norm": 0.5150030255317688, "learning_rate": 1.8946274251001374e-08, "loss": 0.49, "step": 5512 }, { "epoch": 0.940143246930423, "grad_norm": 0.5744228363037109, "learning_rate": 1.883941981884241e-08, "loss": 0.4852, "step": 5513 }, { "epoch": 0.9403137789904502, "grad_norm": 0.5330885648727417, "learning_rate": 1.8732864696286103e-08, "loss": 0.4864, "step": 5514 }, { "epoch": 0.9404843110504775, "grad_norm": 0.42424067854881287, "learning_rate": 1.8626608915837675e-08, "loss": 0.4934, "step": 5515 }, { "epoch": 0.9406548431105047, "grad_norm": 0.49726104736328125, "learning_rate": 1.85206525099112e-08, "loss": 0.4923, "step": 5516 }, { "epoch": 0.9408253751705321, "grad_norm": 0.4287932515144348, "learning_rate": 1.8414995510829372e-08, "loss": 0.4911, "step": 5517 }, { "epoch": 0.9409959072305594, "grad_norm": 0.5417968034744263, "learning_rate": 1.8309637950823636e-08, "loss": 0.4865, "step": 5518 }, { "epoch": 0.9411664392905866, "grad_norm": 0.6594215631484985, "learning_rate": 1.8204579862033945e-08, "loss": 0.4968, "step": 5519 }, { "epoch": 0.9413369713506139, "grad_norm": 0.4392363727092743, "learning_rate": 1.809982127650889e-08, "loss": 0.494, "step": 5520 }, { "epoch": 0.9415075034106412, "grad_norm": 0.4899759888648987, "learning_rate": 1.7995362226206015e-08, "loss": 0.4819, "step": 5521 }, { "epoch": 0.9416780354706685, "grad_norm": 0.5993614792823792, "learning_rate": 1.7891202742991165e-08, "loss": 0.4918, "step": 5522 }, { "epoch": 0.9418485675306958, "grad_norm": 0.4386999309062958, "learning_rate": 1.778734285863859e-08, "loss": 0.487, "step": 5523 }, { "epoch": 0.942019099590723, "grad_norm": 0.5626959204673767, "learning_rate": 1.7683782604831947e-08, "loss": 0.5061, "step": 5524 }, { "epoch": 0.9421896316507503, "grad_norm": 0.479331910610199, "learning_rate": 1.7580522013162628e-08, "loss": 0.485, "step": 5525 }, { "epoch": 0.9423601637107776, "grad_norm": 0.4703061282634735, "learning_rate": 1.7477561115130993e-08, "loss": 0.5045, "step": 5526 }, { "epoch": 0.942530695770805, "grad_norm": 0.38216519355773926, "learning_rate": 1.737489994214614e-08, "loss": 0.4831, "step": 5527 }, { "epoch": 0.9427012278308322, "grad_norm": 0.4150906503200531, "learning_rate": 1.7272538525525462e-08, "loss": 0.4905, "step": 5528 }, { "epoch": 0.9428717598908595, "grad_norm": 0.49322372674942017, "learning_rate": 1.717047689649487e-08, "loss": 0.4852, "step": 5529 }, { "epoch": 0.9430422919508867, "grad_norm": 0.4253634810447693, "learning_rate": 1.7068715086189127e-08, "loss": 0.489, "step": 5530 }, { "epoch": 0.943212824010914, "grad_norm": 0.5283239483833313, "learning_rate": 1.6967253125651286e-08, "loss": 0.488, "step": 5531 }, { "epoch": 0.9433833560709414, "grad_norm": 0.4244921803474426, "learning_rate": 1.6866091045832923e-08, "loss": 0.4779, "step": 5532 }, { "epoch": 0.9435538881309686, "grad_norm": 0.4265737235546112, "learning_rate": 1.6765228877594354e-08, "loss": 0.4911, "step": 5533 }, { "epoch": 0.9437244201909959, "grad_norm": 0.5310192704200745, "learning_rate": 1.6664666651704076e-08, "loss": 0.489, "step": 5534 }, { "epoch": 0.9438949522510232, "grad_norm": 0.5723373293876648, "learning_rate": 1.656440439883944e-08, "loss": 0.4993, "step": 5535 }, { "epoch": 0.9440654843110505, "grad_norm": 0.5719337463378906, "learning_rate": 1.6464442149585982e-08, "loss": 0.493, "step": 5536 }, { "epoch": 0.9442360163710778, "grad_norm": 0.44443824887275696, "learning_rate": 1.6364779934438082e-08, "loss": 0.4906, "step": 5537 }, { "epoch": 0.944406548431105, "grad_norm": 0.5066612958908081, "learning_rate": 1.6265417783798087e-08, "loss": 0.4847, "step": 5538 }, { "epoch": 0.9445770804911323, "grad_norm": 0.6740800142288208, "learning_rate": 1.6166355727977204e-08, "loss": 0.4975, "step": 5539 }, { "epoch": 0.9447476125511596, "grad_norm": 0.525609016418457, "learning_rate": 1.6067593797195025e-08, "loss": 0.4924, "step": 5540 }, { "epoch": 0.944918144611187, "grad_norm": 0.4890042841434479, "learning_rate": 1.596913202157935e-08, "loss": 0.4958, "step": 5541 }, { "epoch": 0.9450886766712142, "grad_norm": 0.4876374900341034, "learning_rate": 1.5870970431166813e-08, "loss": 0.4896, "step": 5542 }, { "epoch": 0.9452592087312415, "grad_norm": 0.5408183932304382, "learning_rate": 1.577310905590224e-08, "loss": 0.4887, "step": 5543 }, { "epoch": 0.9454297407912687, "grad_norm": 0.4260612428188324, "learning_rate": 1.5675547925638858e-08, "loss": 0.4791, "step": 5544 }, { "epoch": 0.945600272851296, "grad_norm": 0.5564858317375183, "learning_rate": 1.557828707013831e-08, "loss": 0.4874, "step": 5545 }, { "epoch": 0.9457708049113234, "grad_norm": 0.4741833508014679, "learning_rate": 1.548132651907064e-08, "loss": 0.4874, "step": 5546 }, { "epoch": 0.9459413369713506, "grad_norm": 0.43672487139701843, "learning_rate": 1.5384666302014406e-08, "loss": 0.497, "step": 5547 }, { "epoch": 0.9461118690313779, "grad_norm": 0.6418878436088562, "learning_rate": 1.528830644845647e-08, "loss": 0.4819, "step": 5548 }, { "epoch": 0.9462824010914052, "grad_norm": 0.49941152334213257, "learning_rate": 1.5192246987791983e-08, "loss": 0.4807, "step": 5549 }, { "epoch": 0.9464529331514324, "grad_norm": 0.5414541959762573, "learning_rate": 1.5096487949324502e-08, "loss": 0.4909, "step": 5550 }, { "epoch": 0.9466234652114598, "grad_norm": 0.5298616886138916, "learning_rate": 1.5001029362265995e-08, "loss": 0.4868, "step": 5551 }, { "epoch": 0.946793997271487, "grad_norm": 0.4212076663970947, "learning_rate": 1.490587125573684e-08, "loss": 0.4856, "step": 5552 }, { "epoch": 0.9469645293315143, "grad_norm": 0.567182719707489, "learning_rate": 1.4811013658765474e-08, "loss": 0.4972, "step": 5553 }, { "epoch": 0.9471350613915416, "grad_norm": 0.43124932050704956, "learning_rate": 1.4716456600288976e-08, "loss": 0.4999, "step": 5554 }, { "epoch": 0.947305593451569, "grad_norm": 0.47852280735969543, "learning_rate": 1.4622200109152496e-08, "loss": 0.4828, "step": 5555 }, { "epoch": 0.9474761255115962, "grad_norm": 0.4701859652996063, "learning_rate": 1.4528244214109812e-08, "loss": 0.4817, "step": 5556 }, { "epoch": 0.9476466575716235, "grad_norm": 0.3365345597267151, "learning_rate": 1.4434588943822548e-08, "loss": 0.4907, "step": 5557 }, { "epoch": 0.9478171896316507, "grad_norm": 0.6257134079933167, "learning_rate": 1.4341234326861077e-08, "loss": 0.488, "step": 5558 }, { "epoch": 0.947987721691678, "grad_norm": 0.47926685214042664, "learning_rate": 1.4248180391703616e-08, "loss": 0.4877, "step": 5559 }, { "epoch": 0.9481582537517054, "grad_norm": 0.4044884443283081, "learning_rate": 1.4155427166737124e-08, "loss": 0.4856, "step": 5560 }, { "epoch": 0.9483287858117326, "grad_norm": 0.47886356711387634, "learning_rate": 1.4062974680256637e-08, "loss": 0.4903, "step": 5561 }, { "epoch": 0.9484993178717599, "grad_norm": 0.5004807710647583, "learning_rate": 1.3970822960465036e-08, "loss": 0.4896, "step": 5562 }, { "epoch": 0.9486698499317872, "grad_norm": 0.5693298578262329, "learning_rate": 1.387897203547417e-08, "loss": 0.4882, "step": 5563 }, { "epoch": 0.9488403819918144, "grad_norm": 0.4116656482219696, "learning_rate": 1.3787421933303736e-08, "loss": 0.5013, "step": 5564 }, { "epoch": 0.9490109140518418, "grad_norm": 0.5718091130256653, "learning_rate": 1.3696172681881505e-08, "loss": 0.4925, "step": 5565 }, { "epoch": 0.949181446111869, "grad_norm": 0.45327019691467285, "learning_rate": 1.3605224309043875e-08, "loss": 0.4907, "step": 5566 }, { "epoch": 0.9493519781718963, "grad_norm": 0.4574928283691406, "learning_rate": 1.3514576842535212e-08, "loss": 0.4959, "step": 5567 }, { "epoch": 0.9495225102319236, "grad_norm": 0.5410921573638916, "learning_rate": 1.3424230310007948e-08, "loss": 0.4839, "step": 5568 }, { "epoch": 0.9496930422919508, "grad_norm": 0.4273688495159149, "learning_rate": 1.333418473902315e-08, "loss": 0.4821, "step": 5569 }, { "epoch": 0.9498635743519782, "grad_norm": 0.4793260097503662, "learning_rate": 1.3244440157049733e-08, "loss": 0.4956, "step": 5570 }, { "epoch": 0.9500341064120055, "grad_norm": 0.3881284296512604, "learning_rate": 1.3154996591464909e-08, "loss": 0.4829, "step": 5571 }, { "epoch": 0.9502046384720327, "grad_norm": 0.5769071578979492, "learning_rate": 1.3065854069553964e-08, "loss": 0.5037, "step": 5572 }, { "epoch": 0.95037517053206, "grad_norm": 0.3778176009654999, "learning_rate": 1.2977012618510477e-08, "loss": 0.491, "step": 5573 }, { "epoch": 0.9505457025920873, "grad_norm": 0.5648781061172485, "learning_rate": 1.2888472265436104e-08, "loss": 0.4947, "step": 5574 }, { "epoch": 0.9507162346521146, "grad_norm": 0.46096527576446533, "learning_rate": 1.2800233037340571e-08, "loss": 0.4882, "step": 5575 }, { "epoch": 0.9508867667121419, "grad_norm": 0.4529668390750885, "learning_rate": 1.2712294961142125e-08, "loss": 0.4867, "step": 5576 }, { "epoch": 0.9510572987721692, "grad_norm": 0.5278765559196472, "learning_rate": 1.262465806366664e-08, "loss": 0.4915, "step": 5577 }, { "epoch": 0.9512278308321964, "grad_norm": 0.4194834232330322, "learning_rate": 1.2537322371648287e-08, "loss": 0.497, "step": 5578 }, { "epoch": 0.9513983628922238, "grad_norm": 0.49713462591171265, "learning_rate": 1.2450287911729643e-08, "loss": 0.4888, "step": 5579 }, { "epoch": 0.951568894952251, "grad_norm": 0.45388472080230713, "learning_rate": 1.2363554710460913e-08, "loss": 0.4957, "step": 5580 }, { "epoch": 0.9517394270122783, "grad_norm": 0.4333135485649109, "learning_rate": 1.2277122794300822e-08, "loss": 0.4949, "step": 5581 }, { "epoch": 0.9519099590723056, "grad_norm": 0.47753965854644775, "learning_rate": 1.2190992189615837e-08, "loss": 0.4972, "step": 5582 }, { "epoch": 0.9520804911323328, "grad_norm": 0.519422173500061, "learning_rate": 1.2105162922680825e-08, "loss": 0.4914, "step": 5583 }, { "epoch": 0.9522510231923602, "grad_norm": 0.5419857501983643, "learning_rate": 1.2019635019678512e-08, "loss": 0.4928, "step": 5584 }, { "epoch": 0.9524215552523875, "grad_norm": 0.4927580952644348, "learning_rate": 1.1934408506699803e-08, "loss": 0.5068, "step": 5585 }, { "epoch": 0.9525920873124147, "grad_norm": 0.422260046005249, "learning_rate": 1.1849483409743457e-08, "loss": 0.4972, "step": 5586 }, { "epoch": 0.952762619372442, "grad_norm": 0.6205607652664185, "learning_rate": 1.176485975471664e-08, "loss": 0.4918, "step": 5587 }, { "epoch": 0.9529331514324693, "grad_norm": 0.40875399112701416, "learning_rate": 1.1680537567434259e-08, "loss": 0.491, "step": 5588 }, { "epoch": 0.9531036834924966, "grad_norm": 0.5891082882881165, "learning_rate": 1.1596516873619401e-08, "loss": 0.5017, "step": 5589 }, { "epoch": 0.9532742155525239, "grad_norm": 0.42861342430114746, "learning_rate": 1.1512797698903122e-08, "loss": 0.4878, "step": 5590 }, { "epoch": 0.9534447476125512, "grad_norm": 0.5009891390800476, "learning_rate": 1.1429380068824548e-08, "loss": 0.5049, "step": 5591 }, { "epoch": 0.9536152796725784, "grad_norm": 0.5420736074447632, "learning_rate": 1.1346264008830766e-08, "loss": 0.5004, "step": 5592 }, { "epoch": 0.9537858117326057, "grad_norm": 0.48001834750175476, "learning_rate": 1.1263449544276939e-08, "loss": 0.4903, "step": 5593 }, { "epoch": 0.953956343792633, "grad_norm": 0.5677177309989929, "learning_rate": 1.1180936700425972e-08, "loss": 0.4994, "step": 5594 }, { "epoch": 0.9541268758526603, "grad_norm": 0.49556687474250793, "learning_rate": 1.1098725502449172e-08, "loss": 0.5101, "step": 5595 }, { "epoch": 0.9542974079126876, "grad_norm": 0.6599041819572449, "learning_rate": 1.1016815975425589e-08, "loss": 0.4784, "step": 5596 }, { "epoch": 0.9544679399727148, "grad_norm": 0.5260385870933533, "learning_rate": 1.0935208144342125e-08, "loss": 0.5008, "step": 5597 }, { "epoch": 0.9546384720327421, "grad_norm": 0.4529414176940918, "learning_rate": 1.0853902034093979e-08, "loss": 0.4881, "step": 5598 }, { "epoch": 0.9548090040927695, "grad_norm": 0.49681493639945984, "learning_rate": 1.0772897669484085e-08, "loss": 0.4971, "step": 5599 }, { "epoch": 0.9549795361527967, "grad_norm": 0.39829450845718384, "learning_rate": 1.0692195075223234e-08, "loss": 0.4947, "step": 5600 }, { "epoch": 0.955150068212824, "grad_norm": 0.48838427662849426, "learning_rate": 1.0611794275930399e-08, "loss": 0.4948, "step": 5601 }, { "epoch": 0.9553206002728513, "grad_norm": 0.46820709109306335, "learning_rate": 1.0531695296132517e-08, "loss": 0.5054, "step": 5602 }, { "epoch": 0.9554911323328786, "grad_norm": 0.5562225580215454, "learning_rate": 1.045189816026393e-08, "loss": 0.4957, "step": 5603 }, { "epoch": 0.9556616643929059, "grad_norm": 0.45520704984664917, "learning_rate": 1.0372402892667722e-08, "loss": 0.5046, "step": 5604 }, { "epoch": 0.9558321964529332, "grad_norm": 0.4145662784576416, "learning_rate": 1.0293209517594162e-08, "loss": 0.4999, "step": 5605 }, { "epoch": 0.9560027285129604, "grad_norm": 0.4943574070930481, "learning_rate": 1.0214318059201922e-08, "loss": 0.4992, "step": 5606 }, { "epoch": 0.9561732605729877, "grad_norm": 0.5607316493988037, "learning_rate": 1.01357285415572e-08, "loss": 0.4953, "step": 5607 }, { "epoch": 0.956343792633015, "grad_norm": 0.40864089131355286, "learning_rate": 1.0057440988634372e-08, "loss": 0.4924, "step": 5608 }, { "epoch": 0.9565143246930423, "grad_norm": 0.43824154138565063, "learning_rate": 9.979455424315443e-09, "loss": 0.5078, "step": 5609 }, { "epoch": 0.9566848567530696, "grad_norm": 0.489147812128067, "learning_rate": 9.901771872390498e-09, "loss": 0.4924, "step": 5610 }, { "epoch": 0.9568553888130968, "grad_norm": 0.4240574836730957, "learning_rate": 9.824390356557579e-09, "loss": 0.508, "step": 5611 }, { "epoch": 0.9570259208731241, "grad_norm": 0.5207276344299316, "learning_rate": 9.747310900422136e-09, "loss": 0.5032, "step": 5612 }, { "epoch": 0.9571964529331515, "grad_norm": 0.4289999008178711, "learning_rate": 9.670533527498138e-09, "loss": 0.4841, "step": 5613 }, { "epoch": 0.9573669849931787, "grad_norm": 0.43601903319358826, "learning_rate": 9.594058261206741e-09, "loss": 0.4991, "step": 5614 }, { "epoch": 0.957537517053206, "grad_norm": 0.6186116337776184, "learning_rate": 9.517885124877281e-09, "loss": 0.4986, "step": 5615 }, { "epoch": 0.9577080491132333, "grad_norm": 0.43979814648628235, "learning_rate": 9.442014141747058e-09, "loss": 0.5052, "step": 5616 }, { "epoch": 0.9578785811732605, "grad_norm": 0.523296058177948, "learning_rate": 9.366445334960784e-09, "loss": 0.487, "step": 5617 }, { "epoch": 0.9580491132332879, "grad_norm": 0.4721669554710388, "learning_rate": 9.29117872757146e-09, "loss": 0.5055, "step": 5618 }, { "epoch": 0.9582196452933152, "grad_norm": 0.5010939836502075, "learning_rate": 9.216214342539388e-09, "loss": 0.4958, "step": 5619 }, { "epoch": 0.9583901773533424, "grad_norm": 0.5684243440628052, "learning_rate": 9.141552202733274e-09, "loss": 0.4935, "step": 5620 }, { "epoch": 0.9585607094133697, "grad_norm": 0.4389380216598511, "learning_rate": 9.067192330929009e-09, "loss": 0.5028, "step": 5621 }, { "epoch": 0.958731241473397, "grad_norm": 0.5720691680908203, "learning_rate": 8.993134749810896e-09, "loss": 0.497, "step": 5622 }, { "epoch": 0.9589017735334243, "grad_norm": 0.5201283693313599, "learning_rate": 8.919379481970415e-09, "loss": 0.4966, "step": 5623 }, { "epoch": 0.9590723055934516, "grad_norm": 0.44963565468788147, "learning_rate": 8.845926549907125e-09, "loss": 0.498, "step": 5624 }, { "epoch": 0.9592428376534788, "grad_norm": 0.4307955801486969, "learning_rate": 8.772775976028547e-09, "loss": 0.4891, "step": 5625 }, { "epoch": 0.9594133697135061, "grad_norm": 0.5067347288131714, "learning_rate": 8.699927782649609e-09, "loss": 0.4942, "step": 5626 }, { "epoch": 0.9595839017735335, "grad_norm": 0.49882620573043823, "learning_rate": 8.62738199199309e-09, "loss": 0.495, "step": 5627 }, { "epoch": 0.9597544338335607, "grad_norm": 0.38423335552215576, "learning_rate": 8.555138626189619e-09, "loss": 0.4995, "step": 5628 }, { "epoch": 0.959924965893588, "grad_norm": 0.6150292754173279, "learning_rate": 8.483197707277569e-09, "loss": 0.4817, "step": 5629 }, { "epoch": 0.9600954979536153, "grad_norm": 0.4514417052268982, "learning_rate": 8.41155925720283e-09, "loss": 0.4958, "step": 5630 }, { "epoch": 0.9602660300136425, "grad_norm": 0.6224324107170105, "learning_rate": 8.340223297819473e-09, "loss": 0.493, "step": 5631 }, { "epoch": 0.9604365620736699, "grad_norm": 0.45860835909843445, "learning_rate": 8.269189850888871e-09, "loss": 0.5014, "step": 5632 }, { "epoch": 0.9606070941336972, "grad_norm": 0.501257061958313, "learning_rate": 8.198458938080023e-09, "loss": 0.4912, "step": 5633 }, { "epoch": 0.9607776261937244, "grad_norm": 0.48550844192504883, "learning_rate": 8.128030580970338e-09, "loss": 0.496, "step": 5634 }, { "epoch": 0.9609481582537517, "grad_norm": 0.4650779664516449, "learning_rate": 8.05790480104407e-09, "loss": 0.4942, "step": 5635 }, { "epoch": 0.961118690313779, "grad_norm": 0.48873841762542725, "learning_rate": 7.988081619693666e-09, "loss": 0.4983, "step": 5636 }, { "epoch": 0.9612892223738063, "grad_norm": 0.40933525562286377, "learning_rate": 7.918561058219199e-09, "loss": 0.5015, "step": 5637 }, { "epoch": 0.9614597544338336, "grad_norm": 0.5009204149246216, "learning_rate": 7.84934313782837e-09, "loss": 0.4954, "step": 5638 }, { "epoch": 0.9616302864938608, "grad_norm": 0.4214461147785187, "learning_rate": 7.780427879636621e-09, "loss": 0.4884, "step": 5639 }, { "epoch": 0.9618008185538881, "grad_norm": 0.5430739521980286, "learning_rate": 7.711815304666805e-09, "loss": 0.4928, "step": 5640 }, { "epoch": 0.9619713506139154, "grad_norm": 0.45583394169807434, "learning_rate": 7.643505433849953e-09, "loss": 0.4893, "step": 5641 }, { "epoch": 0.9621418826739427, "grad_norm": 0.4132305383682251, "learning_rate": 7.575498288024175e-09, "loss": 0.5066, "step": 5642 }, { "epoch": 0.96231241473397, "grad_norm": 0.5108238458633423, "learning_rate": 7.507793887935656e-09, "loss": 0.4903, "step": 5643 }, { "epoch": 0.9624829467939973, "grad_norm": 0.47365233302116394, "learning_rate": 7.440392254237983e-09, "loss": 0.5016, "step": 5644 }, { "epoch": 0.9626534788540245, "grad_norm": 0.5538928508758545, "learning_rate": 7.3732934074926e-09, "loss": 0.4934, "step": 5645 }, { "epoch": 0.9628240109140518, "grad_norm": 0.401213675737381, "learning_rate": 7.306497368168353e-09, "loss": 0.4908, "step": 5646 }, { "epoch": 0.9629945429740792, "grad_norm": 0.48938992619514465, "learning_rate": 7.240004156641945e-09, "loss": 0.5084, "step": 5647 }, { "epoch": 0.9631650750341064, "grad_norm": 0.5778220295906067, "learning_rate": 7.173813793197371e-09, "loss": 0.4898, "step": 5648 }, { "epoch": 0.9633356070941337, "grad_norm": 0.44132354855537415, "learning_rate": 7.1079262980265914e-09, "loss": 0.4949, "step": 5649 }, { "epoch": 0.963506139154161, "grad_norm": 0.6237069964408875, "learning_rate": 7.042341691229083e-09, "loss": 0.4888, "step": 5650 }, { "epoch": 0.9636766712141883, "grad_norm": 0.7165541648864746, "learning_rate": 6.97705999281162e-09, "loss": 0.4915, "step": 5651 }, { "epoch": 0.9638472032742156, "grad_norm": 0.48227575421333313, "learning_rate": 6.91208122268916e-09, "loss": 0.4855, "step": 5652 }, { "epoch": 0.9640177353342428, "grad_norm": 0.5969128608703613, "learning_rate": 6.847405400683627e-09, "loss": 0.493, "step": 5653 }, { "epoch": 0.9641882673942701, "grad_norm": 0.4721926748752594, "learning_rate": 6.783032546524904e-09, "loss": 0.4832, "step": 5654 }, { "epoch": 0.9643587994542974, "grad_norm": 0.634792685508728, "learning_rate": 6.718962679850505e-09, "loss": 0.4922, "step": 5655 }, { "epoch": 0.9645293315143247, "grad_norm": 0.4622402489185333, "learning_rate": 6.65519582020513e-09, "loss": 0.4808, "step": 5656 }, { "epoch": 0.964699863574352, "grad_norm": 0.6484578847885132, "learning_rate": 6.591731987041439e-09, "loss": 0.4934, "step": 5657 }, { "epoch": 0.9648703956343793, "grad_norm": 0.539602518081665, "learning_rate": 6.528571199719503e-09, "loss": 0.5055, "step": 5658 }, { "epoch": 0.9650409276944065, "grad_norm": 0.36174818873405457, "learning_rate": 6.465713477506908e-09, "loss": 0.4803, "step": 5659 }, { "epoch": 0.9652114597544338, "grad_norm": 0.5808553695678711, "learning_rate": 6.403158839578761e-09, "loss": 0.5017, "step": 5660 }, { "epoch": 0.9653819918144612, "grad_norm": 0.44459226727485657, "learning_rate": 6.340907305017907e-09, "loss": 0.4927, "step": 5661 }, { "epoch": 0.9655525238744884, "grad_norm": 0.5725284814834595, "learning_rate": 6.27895889281438e-09, "loss": 0.4887, "step": 5662 }, { "epoch": 0.9657230559345157, "grad_norm": 0.5165860056877136, "learning_rate": 6.217313621866284e-09, "loss": 0.4948, "step": 5663 }, { "epoch": 0.965893587994543, "grad_norm": 0.4258648157119751, "learning_rate": 6.155971510978576e-09, "loss": 0.4918, "step": 5664 }, { "epoch": 0.9660641200545702, "grad_norm": 0.4650655686855316, "learning_rate": 6.094932578864288e-09, "loss": 0.4911, "step": 5665 }, { "epoch": 0.9662346521145976, "grad_norm": 0.5605490803718567, "learning_rate": 6.034196844143748e-09, "loss": 0.5022, "step": 5666 }, { "epoch": 0.9664051841746248, "grad_norm": 0.5298847556114197, "learning_rate": 5.9737643253446895e-09, "loss": 0.4785, "step": 5667 }, { "epoch": 0.9665757162346521, "grad_norm": 0.36176204681396484, "learning_rate": 5.913635040902477e-09, "loss": 0.4832, "step": 5668 }, { "epoch": 0.9667462482946794, "grad_norm": 0.5243964791297913, "learning_rate": 5.853809009160106e-09, "loss": 0.4855, "step": 5669 }, { "epoch": 0.9669167803547067, "grad_norm": 0.42310968041419983, "learning_rate": 5.794286248367753e-09, "loss": 0.4961, "step": 5670 }, { "epoch": 0.967087312414734, "grad_norm": 0.45878174901008606, "learning_rate": 5.73506677668323e-09, "loss": 0.4881, "step": 5671 }, { "epoch": 0.9672578444747613, "grad_norm": 0.481548547744751, "learning_rate": 5.676150612171972e-09, "loss": 0.4867, "step": 5672 }, { "epoch": 0.9674283765347885, "grad_norm": 0.3911304175853729, "learning_rate": 5.617537772806603e-09, "loss": 0.4869, "step": 5673 }, { "epoch": 0.9675989085948158, "grad_norm": 0.4614737331867218, "learning_rate": 5.559228276467377e-09, "loss": 0.5035, "step": 5674 }, { "epoch": 0.9677694406548432, "grad_norm": 0.4647872745990753, "learning_rate": 5.501222140942064e-09, "loss": 0.4828, "step": 5675 }, { "epoch": 0.9679399727148704, "grad_norm": 0.4832058250904083, "learning_rate": 5.443519383925845e-09, "loss": 0.4924, "step": 5676 }, { "epoch": 0.9681105047748977, "grad_norm": 0.37751418352127075, "learning_rate": 5.386120023021191e-09, "loss": 0.4899, "step": 5677 }, { "epoch": 0.968281036834925, "grad_norm": 0.4673527479171753, "learning_rate": 5.329024075738321e-09, "loss": 0.4967, "step": 5678 }, { "epoch": 0.9684515688949522, "grad_norm": 0.4402042031288147, "learning_rate": 5.272231559494634e-09, "loss": 0.4943, "step": 5679 }, { "epoch": 0.9686221009549796, "grad_norm": 0.4784039556980133, "learning_rate": 5.215742491615161e-09, "loss": 0.4939, "step": 5680 }, { "epoch": 0.9687926330150068, "grad_norm": 0.4998716413974762, "learning_rate": 5.1595568893321186e-09, "loss": 0.4975, "step": 5681 }, { "epoch": 0.9689631650750341, "grad_norm": 0.44880411028862, "learning_rate": 5.103674769785461e-09, "loss": 0.4754, "step": 5682 }, { "epoch": 0.9691336971350614, "grad_norm": 0.5895913243293762, "learning_rate": 5.0480961500223306e-09, "loss": 0.4812, "step": 5683 }, { "epoch": 0.9693042291950886, "grad_norm": 0.384233295917511, "learning_rate": 4.992821046997388e-09, "loss": 0.4783, "step": 5684 }, { "epoch": 0.969474761255116, "grad_norm": 0.4828539788722992, "learning_rate": 4.937849477572588e-09, "loss": 0.4828, "step": 5685 }, { "epoch": 0.9696452933151433, "grad_norm": 0.5714468955993652, "learning_rate": 4.883181458517406e-09, "loss": 0.4972, "step": 5686 }, { "epoch": 0.9698158253751705, "grad_norm": 0.4602378308773041, "learning_rate": 4.8288170065088354e-09, "loss": 0.4891, "step": 5687 }, { "epoch": 0.9699863574351978, "grad_norm": 0.5014626979827881, "learning_rate": 4.774756138130832e-09, "loss": 0.4879, "step": 5688 }, { "epoch": 0.970156889495225, "grad_norm": 0.5613033771514893, "learning_rate": 4.720998869875316e-09, "loss": 0.4889, "step": 5689 }, { "epoch": 0.9703274215552524, "grad_norm": 0.5589002370834351, "learning_rate": 4.667545218141057e-09, "loss": 0.4767, "step": 5690 }, { "epoch": 0.9704979536152797, "grad_norm": 0.5097793936729431, "learning_rate": 4.614395199234679e-09, "loss": 0.4862, "step": 5691 }, { "epoch": 0.970668485675307, "grad_norm": 0.5597699880599976, "learning_rate": 4.561548829369766e-09, "loss": 0.4955, "step": 5692 }, { "epoch": 0.9708390177353342, "grad_norm": 0.5028160214424133, "learning_rate": 4.509006124667648e-09, "loss": 0.4849, "step": 5693 }, { "epoch": 0.9710095497953616, "grad_norm": 0.5753107070922852, "learning_rate": 4.456767101156723e-09, "loss": 0.4922, "step": 5694 }, { "epoch": 0.9711800818553888, "grad_norm": 0.6566035151481628, "learning_rate": 4.404831774772689e-09, "loss": 0.4899, "step": 5695 }, { "epoch": 0.9713506139154161, "grad_norm": 0.48711341619491577, "learning_rate": 4.353200161358984e-09, "loss": 0.4811, "step": 5696 }, { "epoch": 0.9715211459754434, "grad_norm": 0.7718820571899414, "learning_rate": 4.301872276666119e-09, "loss": 0.4848, "step": 5697 }, { "epoch": 0.9716916780354706, "grad_norm": 0.6576699018478394, "learning_rate": 4.250848136352015e-09, "loss": 0.4974, "step": 5698 }, { "epoch": 0.971862210095498, "grad_norm": 0.588111400604248, "learning_rate": 4.200127755981887e-09, "loss": 0.4956, "step": 5699 }, { "epoch": 0.9720327421555253, "grad_norm": 0.7305927872657776, "learning_rate": 4.149711151028357e-09, "loss": 0.5016, "step": 5700 }, { "epoch": 0.9722032742155525, "grad_norm": 0.45234251022338867, "learning_rate": 4.099598336871347e-09, "loss": 0.4784, "step": 5701 }, { "epoch": 0.9723738062755798, "grad_norm": 0.6457910537719727, "learning_rate": 4.049789328798071e-09, "loss": 0.5076, "step": 5702 }, { "epoch": 0.972544338335607, "grad_norm": 0.4973987340927124, "learning_rate": 4.0002841420032635e-09, "loss": 0.4925, "step": 5703 }, { "epoch": 0.9727148703956344, "grad_norm": 0.608955442905426, "learning_rate": 3.951082791588512e-09, "loss": 0.4946, "step": 5704 }, { "epoch": 0.9728854024556617, "grad_norm": 0.48888248205184937, "learning_rate": 3.902185292563365e-09, "loss": 0.4911, "step": 5705 }, { "epoch": 0.973055934515689, "grad_norm": 0.5876244306564331, "learning_rate": 3.853591659844003e-09, "loss": 0.4878, "step": 5706 }, { "epoch": 0.9732264665757162, "grad_norm": 0.5642983317375183, "learning_rate": 3.8053019082544555e-09, "loss": 0.4797, "step": 5707 }, { "epoch": 0.9733969986357435, "grad_norm": 0.5118812322616577, "learning_rate": 3.757316052525828e-09, "loss": 0.4955, "step": 5708 }, { "epoch": 0.9735675306957708, "grad_norm": 0.6304372549057007, "learning_rate": 3.709634107296412e-09, "loss": 0.5018, "step": 5709 }, { "epoch": 0.9737380627557981, "grad_norm": 0.44215402007102966, "learning_rate": 3.662256087111904e-09, "loss": 0.4891, "step": 5710 }, { "epoch": 0.9739085948158254, "grad_norm": 0.6735357642173767, "learning_rate": 3.615182006425411e-09, "loss": 0.4917, "step": 5711 }, { "epoch": 0.9740791268758526, "grad_norm": 0.603859543800354, "learning_rate": 3.5684118795971115e-09, "loss": 0.496, "step": 5712 }, { "epoch": 0.9742496589358799, "grad_norm": 0.570151150226593, "learning_rate": 3.521945720894593e-09, "loss": 0.4985, "step": 5713 }, { "epoch": 0.9744201909959073, "grad_norm": 0.7017441391944885, "learning_rate": 3.4757835444926282e-09, "loss": 0.4774, "step": 5714 }, { "epoch": 0.9745907230559345, "grad_norm": 0.48164358735084534, "learning_rate": 3.4299253644732856e-09, "loss": 0.4926, "step": 5715 }, { "epoch": 0.9747612551159618, "grad_norm": 0.5758684873580933, "learning_rate": 3.384371194825931e-09, "loss": 0.487, "step": 5716 }, { "epoch": 0.974931787175989, "grad_norm": 0.38753560185432434, "learning_rate": 3.3391210494472253e-09, "loss": 0.502, "step": 5717 }, { "epoch": 0.9751023192360164, "grad_norm": 0.600566029548645, "learning_rate": 3.2941749421409044e-09, "loss": 0.4966, "step": 5718 }, { "epoch": 0.9752728512960437, "grad_norm": 0.5640249252319336, "learning_rate": 3.2495328866182233e-09, "loss": 0.4949, "step": 5719 }, { "epoch": 0.975443383356071, "grad_norm": 0.450433611869812, "learning_rate": 3.2051948964973993e-09, "loss": 0.4962, "step": 5720 }, { "epoch": 0.9756139154160982, "grad_norm": 0.5050912499427795, "learning_rate": 3.1611609853041684e-09, "loss": 0.4887, "step": 5721 }, { "epoch": 0.9757844474761255, "grad_norm": 0.43759477138519287, "learning_rate": 3.1174311664712302e-09, "loss": 0.4776, "step": 5722 }, { "epoch": 0.9759549795361528, "grad_norm": 0.5738310217857361, "learning_rate": 3.074005453338802e-09, "loss": 0.4947, "step": 5723 }, { "epoch": 0.9761255115961801, "grad_norm": 0.41161057353019714, "learning_rate": 3.030883859154066e-09, "loss": 0.5027, "step": 5724 }, { "epoch": 0.9762960436562074, "grad_norm": 0.4722995460033417, "learning_rate": 2.98806639707161e-09, "loss": 0.4945, "step": 5725 }, { "epoch": 0.9764665757162346, "grad_norm": 0.5092844367027283, "learning_rate": 2.9455530801532076e-09, "loss": 0.4894, "step": 5726 }, { "epoch": 0.9766371077762619, "grad_norm": 0.49190133810043335, "learning_rate": 2.9033439213677077e-09, "loss": 0.4892, "step": 5727 }, { "epoch": 0.9768076398362893, "grad_norm": 0.7835355997085571, "learning_rate": 2.8614389335914763e-09, "loss": 0.4967, "step": 5728 }, { "epoch": 0.9769781718963165, "grad_norm": 0.5099844336509705, "learning_rate": 2.819838129607844e-09, "loss": 0.4944, "step": 5729 }, { "epoch": 0.9771487039563438, "grad_norm": 0.5194500684738159, "learning_rate": 2.7785415221073257e-09, "loss": 0.4908, "step": 5730 }, { "epoch": 0.977319236016371, "grad_norm": 0.5863021016120911, "learning_rate": 2.737549123687844e-09, "loss": 0.5031, "step": 5731 }, { "epoch": 0.9774897680763983, "grad_norm": 0.49079203605651855, "learning_rate": 2.6968609468543964e-09, "loss": 0.4804, "step": 5732 }, { "epoch": 0.9776603001364257, "grad_norm": 0.5268674492835999, "learning_rate": 2.6564770040190537e-09, "loss": 0.5013, "step": 5733 }, { "epoch": 0.977830832196453, "grad_norm": 0.3950529396533966, "learning_rate": 2.616397307501295e-09, "loss": 0.491, "step": 5734 }, { "epoch": 0.9780013642564802, "grad_norm": 0.5526773929595947, "learning_rate": 2.5766218695277845e-09, "loss": 0.4882, "step": 5735 }, { "epoch": 0.9781718963165075, "grad_norm": 0.6158766150474548, "learning_rate": 2.537150702232261e-09, "loss": 0.4936, "step": 5736 }, { "epoch": 0.9783424283765347, "grad_norm": 0.46131011843681335, "learning_rate": 2.497983817655536e-09, "loss": 0.4892, "step": 5737 }, { "epoch": 0.9785129604365621, "grad_norm": 0.5086356401443481, "learning_rate": 2.4591212277457202e-09, "loss": 0.4899, "step": 5738 }, { "epoch": 0.9786834924965894, "grad_norm": 0.5378406643867493, "learning_rate": 2.420562944358329e-09, "loss": 0.4934, "step": 5739 }, { "epoch": 0.9788540245566166, "grad_norm": 0.4971339702606201, "learning_rate": 2.3823089792556212e-09, "loss": 0.5008, "step": 5740 }, { "epoch": 0.9790245566166439, "grad_norm": 0.5195417404174805, "learning_rate": 2.3443593441073723e-09, "loss": 0.4954, "step": 5741 }, { "epoch": 0.9791950886766713, "grad_norm": 0.43583396077156067, "learning_rate": 2.306714050490211e-09, "loss": 0.4881, "step": 5742 }, { "epoch": 0.9793656207366985, "grad_norm": 0.39053624868392944, "learning_rate": 2.2693731098881734e-09, "loss": 0.4844, "step": 5743 }, { "epoch": 0.9795361527967258, "grad_norm": 0.4212055802345276, "learning_rate": 2.2323365336924807e-09, "loss": 0.4957, "step": 5744 }, { "epoch": 0.979706684856753, "grad_norm": 0.4515783488750458, "learning_rate": 2.1956043332010957e-09, "loss": 0.4839, "step": 5745 }, { "epoch": 0.9798772169167803, "grad_norm": 0.41012388467788696, "learning_rate": 2.1591765196197213e-09, "loss": 0.4913, "step": 5746 }, { "epoch": 0.9800477489768077, "grad_norm": 0.48675966262817383, "learning_rate": 2.1230531040606906e-09, "loss": 0.498, "step": 5747 }, { "epoch": 0.980218281036835, "grad_norm": 0.644092857837677, "learning_rate": 2.0872340975438556e-09, "loss": 0.4929, "step": 5748 }, { "epoch": 0.9803888130968622, "grad_norm": 0.5100440979003906, "learning_rate": 2.0517195109959198e-09, "loss": 0.4862, "step": 5749 }, { "epoch": 0.9805593451568895, "grad_norm": 0.43121039867401123, "learning_rate": 2.016509355250884e-09, "loss": 0.4912, "step": 5750 }, { "epoch": 0.9807298772169167, "grad_norm": 0.5692516565322876, "learning_rate": 1.981603641049934e-09, "loss": 0.4878, "step": 5751 }, { "epoch": 0.9809004092769441, "grad_norm": 0.4968946874141693, "learning_rate": 1.9470023790411077e-09, "loss": 0.4923, "step": 5752 }, { "epoch": 0.9810709413369714, "grad_norm": 0.4381271302700043, "learning_rate": 1.912705579779961e-09, "loss": 0.4882, "step": 5753 }, { "epoch": 0.9812414733969986, "grad_norm": 0.5335595011711121, "learning_rate": 1.8787132537287923e-09, "loss": 0.5084, "step": 5754 }, { "epoch": 0.9814120054570259, "grad_norm": 0.40526238083839417, "learning_rate": 1.8450254112571954e-09, "loss": 0.4812, "step": 5755 }, { "epoch": 0.9815825375170532, "grad_norm": 0.5060843229293823, "learning_rate": 1.8116420626420606e-09, "loss": 0.4897, "step": 5756 }, { "epoch": 0.9817530695770805, "grad_norm": 0.5042193531990051, "learning_rate": 1.77856321806702e-09, "loss": 0.4972, "step": 5757 }, { "epoch": 0.9819236016371078, "grad_norm": 0.44039374589920044, "learning_rate": 1.7457888876230012e-09, "loss": 0.4933, "step": 5758 }, { "epoch": 0.982094133697135, "grad_norm": 0.6233049035072327, "learning_rate": 1.7133190813081182e-09, "loss": 0.4858, "step": 5759 }, { "epoch": 0.9822646657571623, "grad_norm": 0.39624595642089844, "learning_rate": 1.6811538090274473e-09, "loss": 0.4862, "step": 5760 }, { "epoch": 0.9824351978171897, "grad_norm": 0.437248557806015, "learning_rate": 1.6492930805932507e-09, "loss": 0.507, "step": 5761 }, { "epoch": 0.982605729877217, "grad_norm": 0.5529453754425049, "learning_rate": 1.617736905724754e-09, "loss": 0.4903, "step": 5762 }, { "epoch": 0.9827762619372442, "grad_norm": 0.412514865398407, "learning_rate": 1.5864852940485899e-09, "loss": 0.4861, "step": 5763 }, { "epoch": 0.9829467939972715, "grad_norm": 0.40067386627197266, "learning_rate": 1.5555382550981325e-09, "loss": 0.5007, "step": 5764 }, { "epoch": 0.9831173260572987, "grad_norm": 0.6422335505485535, "learning_rate": 1.5248957983139413e-09, "loss": 0.4917, "step": 5765 }, { "epoch": 0.9832878581173261, "grad_norm": 0.5342501997947693, "learning_rate": 1.4945579330437611e-09, "loss": 0.4907, "step": 5766 }, { "epoch": 0.9834583901773534, "grad_norm": 0.48356303572654724, "learning_rate": 1.464524668542411e-09, "loss": 0.4908, "step": 5767 }, { "epoch": 0.9836289222373806, "grad_norm": 0.5423564910888672, "learning_rate": 1.4347960139716734e-09, "loss": 0.491, "step": 5768 }, { "epoch": 0.9837994542974079, "grad_norm": 0.39374396204948425, "learning_rate": 1.405371978400516e-09, "loss": 0.493, "step": 5769 }, { "epoch": 0.9839699863574352, "grad_norm": 0.5703930854797363, "learning_rate": 1.3762525708048703e-09, "loss": 0.4961, "step": 5770 }, { "epoch": 0.9841405184174625, "grad_norm": 0.5441506505012512, "learning_rate": 1.3474378000678524e-09, "loss": 0.4895, "step": 5771 }, { "epoch": 0.9843110504774898, "grad_norm": 0.498138964176178, "learning_rate": 1.3189276749795422e-09, "loss": 0.4976, "step": 5772 }, { "epoch": 0.984481582537517, "grad_norm": 0.43553170561790466, "learning_rate": 1.290722204237316e-09, "loss": 0.4865, "step": 5773 }, { "epoch": 0.9846521145975443, "grad_norm": 0.45070740580558777, "learning_rate": 1.2628213964452908e-09, "loss": 0.4881, "step": 5774 }, { "epoch": 0.9848226466575716, "grad_norm": 0.7565078735351562, "learning_rate": 1.2352252601147697e-09, "loss": 0.4945, "step": 5775 }, { "epoch": 0.984993178717599, "grad_norm": 0.4107791781425476, "learning_rate": 1.207933803664241e-09, "loss": 0.4875, "step": 5776 }, { "epoch": 0.9851637107776262, "grad_norm": 0.5069323182106018, "learning_rate": 1.1809470354190446e-09, "loss": 0.5019, "step": 5777 }, { "epoch": 0.9853342428376535, "grad_norm": 0.49351727962493896, "learning_rate": 1.1542649636118176e-09, "loss": 0.4886, "step": 5778 }, { "epoch": 0.9855047748976807, "grad_norm": 0.463966429233551, "learning_rate": 1.127887596381938e-09, "loss": 0.4981, "step": 5779 }, { "epoch": 0.985675306957708, "grad_norm": 0.371059775352478, "learning_rate": 1.1018149417761915e-09, "loss": 0.4928, "step": 5780 }, { "epoch": 0.9858458390177354, "grad_norm": 0.4316602349281311, "learning_rate": 1.0760470077479935e-09, "loss": 0.4868, "step": 5781 }, { "epoch": 0.9860163710777626, "grad_norm": 0.42240771651268005, "learning_rate": 1.0505838021581672e-09, "loss": 0.4803, "step": 5782 }, { "epoch": 0.9861869031377899, "grad_norm": 0.3554317057132721, "learning_rate": 1.025425332774388e-09, "loss": 0.5001, "step": 5783 }, { "epoch": 0.9863574351978172, "grad_norm": 0.5055869817733765, "learning_rate": 1.0005716072714057e-09, "loss": 0.4855, "step": 5784 }, { "epoch": 0.9865279672578445, "grad_norm": 0.38027331233024597, "learning_rate": 9.76022633231155e-10, "loss": 0.4928, "step": 5785 }, { "epoch": 0.9866984993178718, "grad_norm": 0.5682066082954407, "learning_rate": 9.51778418142202e-10, "loss": 0.4864, "step": 5786 }, { "epoch": 0.986869031377899, "grad_norm": 0.5194091200828552, "learning_rate": 9.278389694006296e-10, "loss": 0.4938, "step": 5787 }, { "epoch": 0.9870395634379263, "grad_norm": 0.59715735912323, "learning_rate": 9.042042943092634e-10, "loss": 0.4848, "step": 5788 }, { "epoch": 0.9872100954979536, "grad_norm": 0.5079013109207153, "learning_rate": 8.808744000780023e-10, "loss": 0.4793, "step": 5789 }, { "epoch": 0.987380627557981, "grad_norm": 0.5454519391059875, "learning_rate": 8.578492938238204e-10, "loss": 0.4825, "step": 5790 }, { "epoch": 0.9875511596180082, "grad_norm": 0.6010236740112305, "learning_rate": 8.351289825705434e-10, "loss": 0.49, "step": 5791 }, { "epoch": 0.9877216916780355, "grad_norm": 0.4637555480003357, "learning_rate": 8.127134732494047e-10, "loss": 0.4798, "step": 5792 }, { "epoch": 0.9878922237380627, "grad_norm": 0.7311561107635498, "learning_rate": 7.906027726981569e-10, "loss": 0.4778, "step": 5793 }, { "epoch": 0.98806275579809, "grad_norm": 0.5522876977920532, "learning_rate": 7.687968876619601e-10, "loss": 0.4859, "step": 5794 }, { "epoch": 0.9882332878581174, "grad_norm": 0.4935724437236786, "learning_rate": 7.472958247928264e-10, "loss": 0.5005, "step": 5795 }, { "epoch": 0.9884038199181446, "grad_norm": 0.544166088104248, "learning_rate": 7.260995906497315e-10, "loss": 0.4781, "step": 5796 }, { "epoch": 0.9885743519781719, "grad_norm": 0.44626158475875854, "learning_rate": 7.052081916988363e-10, "loss": 0.4846, "step": 5797 }, { "epoch": 0.9887448840381992, "grad_norm": 0.5568285584449768, "learning_rate": 6.84621634313043e-10, "loss": 0.4868, "step": 5798 }, { "epoch": 0.9889154160982264, "grad_norm": 0.4396195113658905, "learning_rate": 6.643399247725502e-10, "loss": 0.489, "step": 5799 }, { "epoch": 0.9890859481582538, "grad_norm": 0.4277687668800354, "learning_rate": 6.443630692645198e-10, "loss": 0.475, "step": 5800 }, { "epoch": 0.989256480218281, "grad_norm": 0.491654634475708, "learning_rate": 6.246910738827439e-10, "loss": 0.4924, "step": 5801 }, { "epoch": 0.9894270122783083, "grad_norm": 0.4532473683357239, "learning_rate": 6.053239446285331e-10, "loss": 0.4878, "step": 5802 }, { "epoch": 0.9895975443383356, "grad_norm": 0.3788377642631531, "learning_rate": 5.862616874099392e-10, "loss": 0.4857, "step": 5803 }, { "epoch": 0.9897680763983628, "grad_norm": 0.4752003848552704, "learning_rate": 5.675043080418663e-10, "loss": 0.4864, "step": 5804 }, { "epoch": 0.9899386084583902, "grad_norm": 0.4157637655735016, "learning_rate": 5.49051812246515e-10, "loss": 0.483, "step": 5805 }, { "epoch": 0.9901091405184175, "grad_norm": 0.3814486563205719, "learning_rate": 5.309042056529379e-10, "loss": 0.477, "step": 5806 }, { "epoch": 0.9902796725784447, "grad_norm": 0.407847136259079, "learning_rate": 5.130614937971512e-10, "loss": 0.4838, "step": 5807 }, { "epoch": 0.990450204638472, "grad_norm": 0.3975071310997009, "learning_rate": 4.955236821222454e-10, "loss": 0.4962, "step": 5808 }, { "epoch": 0.9906207366984994, "grad_norm": 0.4644838571548462, "learning_rate": 4.78290775978163e-10, "loss": 0.4848, "step": 5809 }, { "epoch": 0.9907912687585266, "grad_norm": 0.4575129747390747, "learning_rate": 4.6136278062203225e-10, "loss": 0.4894, "step": 5810 }, { "epoch": 0.9909618008185539, "grad_norm": 0.5090674161911011, "learning_rate": 4.447397012177224e-10, "loss": 0.4721, "step": 5811 }, { "epoch": 0.9911323328785812, "grad_norm": 0.5588932037353516, "learning_rate": 4.284215428361771e-10, "loss": 0.4961, "step": 5812 }, { "epoch": 0.9913028649386084, "grad_norm": 0.39936065673828125, "learning_rate": 4.1240831045552546e-10, "loss": 0.4954, "step": 5813 }, { "epoch": 0.9914733969986358, "grad_norm": 0.5699079036712646, "learning_rate": 3.967000089607487e-10, "loss": 0.4795, "step": 5814 }, { "epoch": 0.991643929058663, "grad_norm": 0.4913797378540039, "learning_rate": 3.812966431435694e-10, "loss": 0.483, "step": 5815 }, { "epoch": 0.9918144611186903, "grad_norm": 0.5006852149963379, "learning_rate": 3.6619821770289557e-10, "loss": 0.4835, "step": 5816 }, { "epoch": 0.9919849931787176, "grad_norm": 0.6642242670059204, "learning_rate": 3.514047372448204e-10, "loss": 0.4936, "step": 5817 }, { "epoch": 0.9921555252387448, "grad_norm": 0.4854762554168701, "learning_rate": 3.3691620628195646e-10, "loss": 0.481, "step": 5818 }, { "epoch": 0.9923260572987722, "grad_norm": 0.6153271794319153, "learning_rate": 3.227326292343236e-10, "loss": 0.4844, "step": 5819 }, { "epoch": 0.9924965893587995, "grad_norm": 0.4837683439254761, "learning_rate": 3.08854010428572e-10, "loss": 0.4844, "step": 5820 }, { "epoch": 0.9926671214188267, "grad_norm": 0.45445963740348816, "learning_rate": 2.952803540985372e-10, "loss": 0.4774, "step": 5821 }, { "epoch": 0.992837653478854, "grad_norm": 0.614289402961731, "learning_rate": 2.820116643849069e-10, "loss": 0.4912, "step": 5822 }, { "epoch": 0.9930081855388813, "grad_norm": 0.4718392491340637, "learning_rate": 2.6904794533544336e-10, "loss": 0.489, "step": 5823 }, { "epoch": 0.9931787175989086, "grad_norm": 0.5621843934059143, "learning_rate": 2.5638920090476083e-10, "loss": 0.4929, "step": 5824 }, { "epoch": 0.9933492496589359, "grad_norm": 0.4721529185771942, "learning_rate": 2.440354349545482e-10, "loss": 0.4777, "step": 5825 }, { "epoch": 0.9935197817189632, "grad_norm": 0.39175862073898315, "learning_rate": 2.3198665125334643e-10, "loss": 0.4988, "step": 5826 }, { "epoch": 0.9936903137789904, "grad_norm": 0.48671218752861023, "learning_rate": 2.202428534767709e-10, "loss": 0.4873, "step": 5827 }, { "epoch": 0.9938608458390177, "grad_norm": 0.49351176619529724, "learning_rate": 2.0880404520728926e-10, "loss": 0.491, "step": 5828 }, { "epoch": 0.994031377899045, "grad_norm": 0.4758182168006897, "learning_rate": 1.9767022993444352e-10, "loss": 0.4853, "step": 5829 }, { "epoch": 0.9942019099590723, "grad_norm": 0.3767922520637512, "learning_rate": 1.8684141105462796e-10, "loss": 0.4894, "step": 5830 }, { "epoch": 0.9943724420190996, "grad_norm": 0.4667741060256958, "learning_rate": 1.7631759187131117e-10, "loss": 0.4783, "step": 5831 }, { "epoch": 0.9945429740791268, "grad_norm": 0.4266509711742401, "learning_rate": 1.6609877559481405e-10, "loss": 0.4867, "step": 5832 }, { "epoch": 0.9947135061391542, "grad_norm": 0.4854908287525177, "learning_rate": 1.5618496534242079e-10, "loss": 0.486, "step": 5833 }, { "epoch": 0.9948840381991815, "grad_norm": 0.5938842296600342, "learning_rate": 1.4657616413837895e-10, "loss": 0.4876, "step": 5834 }, { "epoch": 0.9950545702592087, "grad_norm": 0.4664316475391388, "learning_rate": 1.372723749141214e-10, "loss": 0.4848, "step": 5835 }, { "epoch": 0.995225102319236, "grad_norm": 0.44046300649642944, "learning_rate": 1.2827360050760018e-10, "loss": 0.4853, "step": 5836 }, { "epoch": 0.9953956343792633, "grad_norm": 0.5642649531364441, "learning_rate": 1.1957984366406384e-10, "loss": 0.4956, "step": 5837 }, { "epoch": 0.9955661664392906, "grad_norm": 0.7079784870147705, "learning_rate": 1.111911070356131e-10, "loss": 0.484, "step": 5838 }, { "epoch": 0.9957366984993179, "grad_norm": 0.4249776303768158, "learning_rate": 1.03107393181201e-10, "loss": 0.481, "step": 5839 }, { "epoch": 0.9959072305593452, "grad_norm": 0.5063623785972595, "learning_rate": 9.532870456685495e-11, "loss": 0.4966, "step": 5840 }, { "epoch": 0.9960777626193724, "grad_norm": 0.5348143577575684, "learning_rate": 8.785504356556564e-11, "loss": 0.4853, "step": 5841 }, { "epoch": 0.9962482946793997, "grad_norm": 0.5114048719406128, "learning_rate": 8.068641245717602e-11, "loss": 0.4763, "step": 5842 }, { "epoch": 0.996418826739427, "grad_norm": 0.5542433261871338, "learning_rate": 7.38228134284924e-11, "loss": 0.4853, "step": 5843 }, { "epoch": 0.9965893587994543, "grad_norm": 0.4604029357433319, "learning_rate": 6.726424857339543e-11, "loss": 0.479, "step": 5844 }, { "epoch": 0.9967598908594816, "grad_norm": 0.585250735282898, "learning_rate": 6.101071989261798e-11, "loss": 0.4891, "step": 5845 }, { "epoch": 0.9969304229195088, "grad_norm": 0.46360349655151367, "learning_rate": 5.506222929374528e-11, "loss": 0.469, "step": 5846 }, { "epoch": 0.9971009549795361, "grad_norm": 0.4242927134037018, "learning_rate": 4.941877859143685e-11, "loss": 0.491, "step": 5847 }, { "epoch": 0.9972714870395635, "grad_norm": 0.5808072090148926, "learning_rate": 4.408036950731554e-11, "loss": 0.4865, "step": 5848 }, { "epoch": 0.9974420190995907, "grad_norm": 0.4071265161037445, "learning_rate": 3.904700366985648e-11, "loss": 0.4936, "step": 5849 }, { "epoch": 0.997612551159618, "grad_norm": 0.5109954476356506, "learning_rate": 3.431868261449812e-11, "loss": 0.4887, "step": 5850 }, { "epoch": 0.9977830832196453, "grad_norm": 0.5592871904373169, "learning_rate": 2.9895407783753216e-11, "loss": 0.4916, "step": 5851 }, { "epoch": 0.9979536152796725, "grad_norm": 0.43258005380630493, "learning_rate": 2.5777180526764813e-11, "loss": 0.4855, "step": 5852 }, { "epoch": 0.9981241473396999, "grad_norm": 0.5342608094215393, "learning_rate": 2.1964002100083317e-11, "loss": 0.4842, "step": 5853 }, { "epoch": 0.9982946793997272, "grad_norm": 0.4164401888847351, "learning_rate": 1.8455873666778368e-11, "loss": 0.4969, "step": 5854 }, { "epoch": 0.9984652114597544, "grad_norm": 0.43266117572784424, "learning_rate": 1.5252796296993946e-11, "loss": 0.4852, "step": 5855 }, { "epoch": 0.9986357435197817, "grad_norm": 0.5233867764472961, "learning_rate": 1.2354770968059371e-11, "loss": 0.4971, "step": 5856 }, { "epoch": 0.998806275579809, "grad_norm": 0.42787232995033264, "learning_rate": 9.761798563823201e-12, "loss": 0.4788, "step": 5857 }, { "epoch": 0.9989768076398363, "grad_norm": 0.5615535974502563, "learning_rate": 7.473879875319335e-12, "loss": 0.4991, "step": 5858 }, { "epoch": 0.9991473396998636, "grad_norm": 0.47109538316726685, "learning_rate": 5.491015600656014e-12, "loss": 0.4858, "step": 5859 }, { "epoch": 0.9993178717598908, "grad_norm": 0.5316367149353027, "learning_rate": 3.813206344571719e-12, "loss": 0.479, "step": 5860 }, { "epoch": 0.9994884038199181, "grad_norm": 0.5030397176742554, "learning_rate": 2.4404526188792634e-12, "loss": 0.4877, "step": 5861 }, { "epoch": 0.9996589358799455, "grad_norm": 0.398891419172287, "learning_rate": 1.3727548423547732e-12, "loss": 0.4851, "step": 5862 }, { "epoch": 0.9998294679399727, "grad_norm": 0.41367173194885254, "learning_rate": 6.101133408487059e-13, "loss": 0.4926, "step": 5863 }, { "epoch": 1.0, "grad_norm": 0.4404045343399048, "learning_rate": 1.5252834684176266e-13, "loss": 0.483, "step": 5864 }, { "epoch": 1.0, "step": 5864, "total_flos": 5.461479216500315e+20, "train_loss": 0.09150631395949999, "train_runtime": 9502.0046, "train_samples_per_second": 17.525, "train_steps_per_second": 0.617 } ], "logging_steps": 1.0, "max_steps": 5864, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.461479216500315e+20, "train_batch_size": 1, "trial_name": null, "trial_params": null }